diff --git a/modules/git/batch.go b/modules/git/batch.go new file mode 100644 index 000000000..3ec4f1ddc --- /dev/null +++ b/modules/git/batch.go @@ -0,0 +1,46 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "context" +) + +type Batch struct { + cancel context.CancelFunc + Reader *bufio.Reader + Writer WriteCloserError +} + +func (repo *Repository) NewBatch(ctx context.Context) (*Batch, error) { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repo.Path); err != nil { + return nil, err + } + + var batch Batch + batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repo.Path) + return &batch, nil +} + +func (repo *Repository) NewBatchCheck(ctx context.Context) (*Batch, error) { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repo.Path); err != nil { + return nil, err + } + + var check Batch + check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repo.Path) + return &check, nil +} + +func (b *Batch) Close() { + if b.cancel != nil { + b.cancel() + b.Reader = nil + b.Writer = nil + b.cancel = nil + } +} diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index c988d6ab8..3b1a466b2 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -26,10 +26,10 @@ type WriteCloserError interface { CloseWithError(err error) error } -// EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. +// ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. // Run before opening git cat-file. // This is needed otherwise the git cat-file will hang for invalid repositories. -func EnsureValidGitRepository(ctx context.Context, repoPath string) error { +func ensureValidGitRepository(ctx context.Context, repoPath string) error { stderr := strings.Builder{} err := NewCommand(ctx, "rev-parse"). SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)). @@ -43,8 +43,8 @@ func EnsureValidGitRepository(ctx context.Context, repoPath string) error { return nil } -// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { +// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function +func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { batchStdinReader, batchStdinWriter := io.Pipe() batchStdoutReader, batchStdoutWriter := io.Pipe() ctx, ctxCancel := context.WithCancel(ctx) @@ -93,8 +93,8 @@ func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, return batchStdinWriter, batchReader, cancel } -// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { +// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function +func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout batchStdinReader, batchStdinWriter := io.Pipe() diff --git a/modules/git/blob.go b/modules/git/blob.go index 3de8ce8e9..2f0269342 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -28,9 +28,12 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - wr, rd, cancel := b.repo.CatFileBatch(b.repo.Ctx) + wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + if err != nil { + return nil, err + } - _, err := wr.Write([]byte(b.ID.String() + "\n")) + _, err = wr.Write([]byte(b.ID.String() + "\n")) if err != nil { cancel() return nil, err @@ -66,9 +69,13 @@ func (b *Blob) Size() int64 { return b.size } - wr, rd, cancel := b.repo.CatFileBatchCheck(b.repo.Ctx) + wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + if err != nil { + log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) + return 0 + } defer cancel() - _, err := wr.Write([]byte(b.ID.String() + "\n")) + _, err = wr.Write([]byte(b.ID.String() + "\n")) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index a26d74932..3b34b7933 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -129,7 +129,10 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, return nil, err } - batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch(ctx) + batchStdinWriter, batchReader, cancel, err := commit.repo.CatFileBatch(ctx) + if err != nil { + return nil, err + } defer cancel() commitsMap := map[string]*Commit{} diff --git a/modules/git/pipeline/lfs.go b/modules/git/pipeline/lfs.go index 55c49aaf3..3407eb983 100644 --- a/modules/git/pipeline/lfs.go +++ b/modules/git/pipeline/lfs.go @@ -67,7 +67,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx) + batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() // We'll use a scanner for the revList because it's simpler than a bufio.Reader diff --git a/modules/git/repo_base.go b/modules/git/repo_base.go index 8c34efc2c..5f17bc14f 100644 --- a/modules/git/repo_base.go +++ b/modules/git/repo_base.go @@ -21,15 +21,11 @@ type Repository struct { gpgSettings *GPGSettings - batchInUse bool - batchCancel context.CancelFunc - batchReader *bufio.Reader - batchWriter WriteCloserError + batchInUse bool + batch *Batch - checkInUse bool - checkCancel context.CancelFunc - checkReader *bufio.Reader - checkWriter WriteCloserError + checkInUse bool + check *Batch Ctx context.Context LastCommitCache *LastCommitCache @@ -51,63 +47,75 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { return nil, errors.New("no such file or directory") } - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - repo := &Repository{ + return &Repository{ Path: repoPath, tagCache: newObjectCache(), Ctx: ctx, - } - - repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(ctx, repoPath) - repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(ctx, repoPath) - - return repo, nil + }, nil } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { - if repo.batchCancel == nil || repo.batchInUse { - log.Debug("Opening temporary cat file batch for: %s", repo.Path) - return CatFileBatch(ctx, repo.Path) +func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { + if repo.batch == nil { + var err error + repo.batch, err = repo.NewBatch(ctx) + if err != nil { + return nil, nil, nil, err + } } - repo.batchInUse = true - return repo.batchWriter, repo.batchReader, func() { - repo.batchInUse = false + + if !repo.batchInUse { + repo.batchInUse = true + return repo.batch.Writer, repo.batch.Reader, func() { + repo.batchInUse = false + }, nil } + + log.Debug("Opening temporary cat file batch for: %s", repo.Path) + tempBatch, err := repo.NewBatch(ctx) + if err != nil { + return nil, nil, nil, err + } + return tempBatch.Writer, tempBatch.Reader, tempBatch.Close, nil } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { - if repo.checkCancel == nil || repo.checkInUse { - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - return CatFileBatchCheck(ctx, repo.Path) +func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { + if repo.check == nil { + var err error + repo.check, err = repo.NewBatchCheck(ctx) + if err != nil { + return nil, nil, nil, err + } } - repo.checkInUse = true - return repo.checkWriter, repo.checkReader, func() { - repo.checkInUse = false + + if !repo.checkInUse { + repo.checkInUse = true + return repo.check.Writer, repo.check.Reader, func() { + repo.checkInUse = false + }, nil } + + log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) + tempBatchCheck, err := repo.NewBatchCheck(ctx) + if err != nil { + return nil, nil, nil, err + } + return tempBatchCheck.Writer, tempBatchCheck.Reader, tempBatchCheck.Close, nil } func (repo *Repository) Close() error { if repo == nil { return nil } - if repo.batchCancel != nil { - repo.batchCancel() - repo.batchReader = nil - repo.batchWriter = nil - repo.batchCancel = nil + if repo.batch != nil { + repo.batch.Close() + repo.batch = nil repo.batchInUse = false } - if repo.checkCancel != nil { - repo.checkCancel() - repo.checkCancel = nil - repo.checkReader = nil - repo.checkWriter = nil + if repo.check != nil { + repo.check.Close() + repo.check = nil repo.checkInUse = false } repo.LastCommitCache = nil diff --git a/modules/git/repo_branch.go b/modules/git/repo_branch.go index dff3a43fa..7339c7db0 100644 --- a/modules/git/repo_branch.go +++ b/modules/git/repo_branch.go @@ -169,9 +169,13 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - wr, rd, cancel := repo.CatFileBatchCheck(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + if err != nil { + log.Debug("Error writing to CatFileBatchCheck %v", err) + return false + } defer cancel() - _, err := wr.Write([]byte(name + "\n")) + _, err = wr.Write([]byte(name + "\n")) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false @@ -186,9 +190,13 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - wr, rd, cancel := repo.CatFileBatchCheck(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + if err != nil { + log.Debug("Error writing to CatFileBatchCheck %v", err) + return false + } defer cancel() - _, err := wr.Write([]byte(name + "\n")) + _, err = wr.Write([]byte(name + "\n")) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false diff --git a/modules/git/repo_commit.go b/modules/git/repo_commit.go index 2a325d364..1f3d64fe0 100644 --- a/modules/git/repo_commit.go +++ b/modules/git/repo_commit.go @@ -536,9 +536,12 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - wr, rd, cancel := repo.CatFileBatchCheck(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + if err != nil { + return "", err + } defer cancel() - _, err := wr.Write([]byte(name + "\n")) + _, err = wr.Write([]byte(name + "\n")) if err != nil { return "", err } @@ -564,12 +567,19 @@ func (repo *Repository) RemoveReference(name string) error { // IsCommitExist returns true if given commit exists in current repository. func (repo *Repository) IsCommitExist(name string) bool { + if err := ensureValidGitRepository(repo.Ctx, repo.Path); err != nil { + log.Error("IsCommitExist: %v", err) + return false + } _, _, err := NewCommand(repo.Ctx, "cat-file", "-e").AddDynamicArguments(name).RunStdString(&RunOpts{Dir: repo.Path}) return err == nil } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - wr, rd, cancel := repo.CatFileBatch(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() _, _ = wr.Write([]byte(id.String() + "\n")) @@ -646,7 +656,10 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - wr, rd, cancel := repo.CatFileBatchCheck(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() _, err = wr.Write([]byte(commitID + "\n")) if err != nil { diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go index 84638b3ce..37c23faf6 100644 --- a/modules/git/repo_language_stats.go +++ b/modules/git/repo_language_stats.go @@ -60,7 +60,10 @@ func mergeLanguageStats(stats map[string]int64) map[string]int64 { func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx) + batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() writeID := func(id string) error { diff --git a/modules/git/repo_tag.go b/modules/git/repo_tag.go index d925d4a7d..12b0c022c 100644 --- a/modules/git/repo_tag.go +++ b/modules/git/repo_tag.go @@ -257,9 +257,12 @@ func (repo *Repository) GetTags(skip, limit int) (tags []string, err error) { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - wr, rd, cancel := repo.CatFileBatchCheck(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + if err != nil { + return "", err + } defer cancel() - _, err := wr.Write([]byte(id.String() + "\n")) + _, err = wr.Write([]byte(id.String() + "\n")) if err != nil { return "", err } @@ -315,7 +318,10 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - wr, rd, cancel := repo.CatFileBatch(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() if _, err := wr.Write([]byte(tagID.String() + "\n")); err != nil { diff --git a/modules/git/repo_tree.go b/modules/git/repo_tree.go index 79a7e50eb..53d94d9d7 100644 --- a/modules/git/repo_tree.go +++ b/modules/git/repo_tree.go @@ -68,7 +68,10 @@ func (repo *Repository) CommitTree(author, committer *Signature, tree *Tree, opt } func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - wr, rd, cancel := repo.CatFileBatch(repo.Ctx) + wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + if err != nil { + return nil, err + } defer cancel() _, _ = wr.Write([]byte(id.String() + "\n")) diff --git a/modules/git/tree.go b/modules/git/tree.go index 422fe68ca..5b06cbf35 100644 --- a/modules/git/tree.go +++ b/modules/git/tree.go @@ -41,7 +41,10 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - wr, rd, cancel := t.repo.CatFileBatch(t.repo.Ctx) + wr, rd, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + if err != nil { + return nil, err + } defer cancel() _, _ = wr.Write([]byte(t.ID.String() + "\n")) diff --git a/modules/git/tree_entry.go b/modules/git/tree_entry.go index b5dd80130..0d9cfd225 100644 --- a/modules/git/tree_entry.go +++ b/modules/git/tree_entry.go @@ -47,9 +47,13 @@ func (te *TreeEntry) Size() int64 { return te.size } - wr, rd, cancel := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + wr, rd, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + if err != nil { + log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) + return 0 + } defer cancel() - _, err := wr.Write([]byte(te.ID.String() + "\n")) + _, err = wr.Write([]byte(te.ID.String() + "\n")) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 66724a344..cf9fcbd8b 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -16,10 +16,10 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -193,21 +193,23 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil { - log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err) + r, err := gitrepo.OpenRepository(ctx, repo) + if err != nil { return err } - - batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath()) - defer cancel() + defer r.Close() + gitBatch, err := r.NewBatch(ctx) + if err != nil { + return err + } + defer gitBatch.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil { return err } } - cancel() + gitBatch.Close() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index e4622fd66..0bda180fa 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -15,11 +15,11 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -154,17 +154,19 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil { - log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err) + r, err := gitrepo.OpenRepository(ctx, repo) + if err != nil { return err } - - batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath()) - defer cancel() + defer r.Close() + batch, err := r.NewBatch(ctx) + if err != nil { + return err + } + defer batch.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo) if err != nil { return err } @@ -172,7 +174,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st reqs = append(reqs, updateReqs...) } } - cancel() + batch.Close() } for _, filename := range changes.RemovedFilenames {