More efficiently parse shas for shaPostProcessor (#16101)

* More efficiently parse shas for shaPostProcessor

The shaPostProcessor currently repeatedly calls git rev-parse --verify on both backends
which is fine if there is only one thing that matches a sha - however if there are
multiple things then this becomes wildly inefficient.

This PR provides functions for both backends which are much faster to use.

Fix #16092

* Add ShaExistCache to RenderContext

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: 6543 <6543@obermui.de>
This commit is contained in:
zeripath 2021-06-20 23:39:12 +01:00 committed by GitHub
parent 23358bc55d
commit 196593e2e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 122 additions and 10 deletions

View file

@ -13,6 +13,30 @@ import (
"github.com/go-git/go-git/v5/plumbing"
)
// IsObjectExist returns true if given reference exists in the repository.
func (repo *Repository) IsObjectExist(name string) bool {
if name == "" {
return false
}
_, err := repo.gogitRepo.ResolveRevision(plumbing.Revision(name))
return err == nil
}
// IsReferenceExist returns true if given reference exists in the repository.
func (repo *Repository) IsReferenceExist(name string) bool {
if name == "" {
return false
}
reference, err := repo.gogitRepo.Reference(plumbing.ReferenceName(name), true)
if err != nil {
return false
}
return reference.Type() != plumbing.InvalidReference
}
// IsBranchExist returns true if given branch exists in current repository.
func (repo *Repository) IsBranchExist(name string) bool {
if name == "" {

View file

@ -9,10 +9,28 @@ package git
import (
"bufio"
"bytes"
"io"
"strings"
)
// IsObjectExist returns true if given reference exists in the repository.
func (repo *Repository) IsObjectExist(name string) bool {
if name == "" {
return false
}
wr, rd, cancel := repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(name + "\n"))
if err != nil {
log("Error writing to CatFileBatchCheck %v", err)
return false
}
sha, _, _, err := ReadBatchLine(rd)
return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name)))
}
// IsReferenceExist returns true if given reference exists in the repository.
func (repo *Repository) IsReferenceExist(name string) bool {
if name == "" {

View file

@ -286,6 +286,7 @@ var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM
var nulCleaner = strings.NewReplacer("\000", "")
func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
defer ctx.Cancel()
// FIXME: don't read all content to memory
rawHTML, err := ioutil.ReadAll(input)
if err != nil {
@ -996,6 +997,9 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
start := 0
next := node.NextSibling
if ctx.ShaExistCache == nil {
ctx.ShaExistCache = make(map[string]bool)
}
for node != nil && node != next && start < len(node.Data) {
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
if m == nil {
@ -1013,10 +1017,28 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
// as used by git and github for linking and thus we have to do similar.
// Because of this, we check to make sure that a matched hash is actually
// a commit in the repository before making it a link.
if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
// check cache first
exist, inCache := ctx.ShaExistCache[hash]
if !inCache {
if ctx.GitRepo == nil {
var err error
ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"])
if err != nil {
log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err)
return
}
ctx.AddCancel(func() {
ctx.GitRepo.Close()
ctx.GitRepo = nil
})
}
exist = ctx.GitRepo.IsObjectExist(hash)
ctx.ShaExistCache[hash] = exist
}
if !exist {
start = m[3]
continue
}

View file

@ -13,6 +13,7 @@ import (
"strings"
"sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
)
@ -35,13 +36,44 @@ func Init() {
// RenderContext represents a render context
type RenderContext struct {
Ctx context.Context
Filename string
Type string
IsWiki bool
URLPrefix string
Metas map[string]string
DefaultLink string
Ctx context.Context
Filename string
Type string
IsWiki bool
URLPrefix string
Metas map[string]string
DefaultLink string
GitRepo *git.Repository
ShaExistCache map[string]bool
cancelFn func()
}
// Cancel runs any cleanup functions that have been registered for this Ctx
func (ctx *RenderContext) Cancel() {
if ctx == nil {
return
}
ctx.ShaExistCache = map[string]bool{}
if ctx.cancelFn == nil {
return
}
ctx.cancelFn()
}
// AddCancel adds the provided fn as a Cleanup for this Ctx
func (ctx *RenderContext) AddCancel(fn func()) {
if ctx == nil {
return
}
oldCancelFn := ctx.cancelFn
if oldCancelFn == nil {
ctx.cancelFn = fn
return
}
ctx.cancelFn = func() {
defer oldCancelFn()
fn()
}
}
// Renderer defines an interface for rendering markup file to HTML