diff --git a/models/git/branch.go b/models/git/branch.go index 6baad65ab..a5ee2bde6 100644 --- a/models/git/branch.go +++ b/models/git/branch.go @@ -162,6 +162,11 @@ func GetBranch(ctx context.Context, repoID int64, branchName string) (*Branch, e return &branch, nil } +func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) { + branches := make([]*Branch, 0, len(branchNames)) + return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches) +} + func AddBranches(ctx context.Context, branches []*Branch) error { for _, branch := range branches { if _, err := db.GetEngine(ctx).Insert(branch); err != nil { diff --git a/routers/private/hook_post_receive.go b/routers/private/hook_post_receive.go index f4c698d3e..f5527cb15 100644 --- a/routers/private/hook_post_receive.go +++ b/routers/private/hook_post_receive.go @@ -8,9 +8,11 @@ import ( "net/http" "strconv" + git_model "code.gitea.io/gitea/models/git" issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/private" repo_module "code.gitea.io/gitea/modules/repository" @@ -27,6 +29,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { // We don't rely on RepoAssignment here because: // a) we don't need the git repo in this function + // OUT OF DATE: we do need the git repo to sync the branch to the db now. // b) our update function will likely change the repository in the db so we will need to refresh it // c) we don't always need the repo @@ -34,7 +37,11 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { repoName := ctx.Params(":repo") // defer getting the repository at this point - as we should only retrieve it if we're going to call update - var repo *repo_model.Repository + var ( + repo *repo_model.Repository + gitRepo *git.Repository + ) + defer gitRepo.Close() // it's safe to call Close on a nil pointer updates := make([]*repo_module.PushUpdateOptions, 0, len(opts.OldCommitIDs)) wasEmpty := false @@ -75,6 +82,61 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) { } if repo != nil && len(updates) > 0 { + branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates)) + for _, update := range updates { + if !update.RefFullName.IsBranch() { + continue + } + if repo == nil { + repo = loadRepository(ctx, ownerName, repoName) + if ctx.Written() { + return + } + wasEmpty = repo.IsEmpty + } + + if update.IsDelRef() { + if err := git_model.AddDeletedBranch(ctx, repo.ID, update.RefFullName.BranchName(), update.PusherID); err != nil { + log.Error("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err) + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } else { + branchesToSync = append(branchesToSync, update) + } + } + if len(branchesToSync) > 0 { + if gitRepo == nil { + var err error + gitRepo, err = gitrepo.OpenRepository(ctx, repo) + if err != nil { + log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err) + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } + + var ( + branchNames = make([]string, 0, len(branchesToSync)) + commitIDs = make([]string, 0, len(branchesToSync)) + ) + for _, update := range branchesToSync { + branchNames = append(branchNames, update.RefFullName.BranchName()) + commitIDs = append(commitIDs, update.NewCommitID) + } + + if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, gitRepo.GetCommit); err != nil { + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ + Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err), + }) + return + } + } + if err := repo_service.PushUpdates(updates); err != nil { log.Error("Failed to Update: %s/%s Total Updates: %d", ownerName, repoName, len(updates)) for i, update := range updates { diff --git a/services/repository/branch.go b/services/repository/branch.go index d349db7f7..b68355324 100644 --- a/services/repository/branch.go +++ b/services/repository/branch.go @@ -225,44 +225,91 @@ func checkBranchName(ctx context.Context, repo *repo_model.Repository, name stri return err } -// syncBranchToDB sync the branch information in the database. It will try to update the branch first, -// if updated success with affect records > 0, then all are done. Because that means the branch has been in the database. -// If no record is affected, that means the branch does not exist in database. So there are two possibilities. -// One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced, -// then we need to sync all the branches into database. -func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error { - cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit) - if err != nil { - return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) - } - if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced. - return nil +// SyncBranchesToDB sync the branch information in the database. +// It will check whether the branches of the repository have never been synced before. +// If so, it will sync all branches of the repository. +// Otherwise, it will sync the branches that need to be updated. +func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error { + // Some designs that make the code look strange but are made for performance optimization purposes: + // 1. Sync branches in a batch to reduce the number of DB queries. + // 2. Lazy load commit information since it may be not necessary. + // 3. Exit early if synced all branches of git repo when there's no branch in DB. + // 4. Check the branches in DB if they are already synced. + // + // If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once. + // See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27 + // For the first batch, it will hit optimization 3. + // For other batches, it will hit optimization 4. + + if len(branchNames) != len(commitIDs) { + return fmt.Errorf("branchNames and commitIDs length not match") } - // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, - // we cannot simply insert the branch but need to check we have branches or not - hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ - RepoID: repoID, - IsDeletedBranch: optional.Some(false), - }.ToConds()) - if err != nil { - return err - } - if !hasBranch { - if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { - return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err) + return db.WithTx(ctx, func(ctx context.Context) error { + branches, err := git_model.GetBranches(ctx, repoID, branchNames) + if err != nil { + return fmt.Errorf("git_model.GetBranches: %v", err) + } + + if len(branches) == 0 { + // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, + // we cannot simply insert the branch but need to check we have branches or not + hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ + RepoID: repoID, + IsDeletedBranch: optional.Some(false), + }.ToConds()) + if err != nil { + return err + } + if !hasBranch { + if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { + return fmt.Errorf("repo_module.SyncRepoBranches %d failed: %v", repoID, err) + } + return nil + } + } + + branchMap := make(map[string]*git_model.Branch, len(branches)) + for _, branch := range branches { + branchMap[branch.Name] = branch + } + + newBranches := make([]*git_model.Branch, 0, len(branchNames)) + + for i, branchName := range branchNames { + commitID := commitIDs[i] + branch, exist := branchMap[branchName] + if exist && branch.CommitID == commitID && !branch.IsDeleted { + continue + } + + commit, err := getCommit(commitID) + if err != nil { + return fmt.Errorf("get commit of %s failed: %v", branchName, err) + } + + if exist { + if _, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit); err != nil { + return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) + } + return nil + } + + // if database have branches but not this branch, it means this is a new branch + newBranches = append(newBranches, &git_model.Branch{ + RepoID: repoID, + Name: branchName, + CommitID: commit.ID.String(), + CommitMessage: commit.Summary(), + PusherID: pusherID, + CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), + }) + } + + if len(newBranches) > 0 { + return db.Insert(ctx, newBranches) } return nil - } - - // if database have branches but not this branch, it means this is a new branch - return db.Insert(ctx, &git_model.Branch{ - RepoID: repoID, - Name: branchName, - CommitID: commit.ID.String(), - CommitMessage: commit.Summary(), - PusherID: pusherID, - CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), }) } diff --git a/services/repository/push.go b/services/repository/push.go index 5d1023784..8b27f0719 100644 --- a/services/repository/push.go +++ b/services/repository/push.go @@ -11,7 +11,6 @@ import ( "time" "code.gitea.io/gitea/models/db" - git_model "code.gitea.io/gitea/models/git" repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/cache" @@ -259,10 +258,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error { commits.Commits = commits.Commits[:setting.UI.FeedMaxCommitNum] } - if err = syncBranchToDB(ctx, repo.ID, opts.PusherID, branch, newCommit); err != nil { - return fmt.Errorf("git_model.UpdateBranch %s:%s failed: %v", repo.FullName(), branch, err) - } - notify_service.PushCommits(ctx, pusher, repo, opts, commits) // Cache for big repository @@ -275,10 +270,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error { // close all related pulls log.Error("close related pull request failed: %v", err) } - - if err := git_model.AddDeletedBranch(ctx, repo.ID, branch, pusher.ID); err != nil { - return fmt.Errorf("AddDeletedBranch %s:%s failed: %v", repo.FullName(), branch, err) - } } // Even if user delete a branch on a repository which he didn't watch, he will be watch that. diff --git a/tests/integration/git_push_test.go b/tests/integration/git_push_test.go new file mode 100644 index 000000000..cb2910b17 --- /dev/null +++ b/tests/integration/git_push_test.go @@ -0,0 +1,131 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package integration + +import ( + "fmt" + "net/url" + "testing" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/unittest" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/git" + repo_service "code.gitea.io/gitea/services/repository" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGitPush(t *testing.T) { + onGiteaRun(t, testGitPush) +} + +func testGitPush(t *testing.T, u *url.URL) { + t.Run("Push branches at once", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + pushed = append(pushed, branchName) + doGitCreateBranch(gitPath, branchName)(t) + } + pushed = append(pushed, "master") + doGitPushTestRepository(gitPath, "origin", "--all")(t) + return pushed, deleted + }) + }) + + t.Run("Push branches one by one", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + doGitCreateBranch(gitPath, branchName)(t) + doGitPushTestRepository(gitPath, "origin", branchName)(t) + pushed = append(pushed, branchName) + } + return pushed, deleted + }) + }) + + t.Run("Delete branches", func(t *testing.T) { + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { + doGitPushTestRepository(gitPath, "origin", "master")(t) // make sure master is the default branch instead of a branch we are going to delete + pushed = append(pushed, "master") + + for i := 0; i < 100; i++ { + branchName := fmt.Sprintf("branch-%d", i) + pushed = append(pushed, branchName) + doGitCreateBranch(gitPath, branchName)(t) + } + doGitPushTestRepository(gitPath, "origin", "--all")(t) + + for i := 0; i < 10; i++ { + branchName := fmt.Sprintf("branch-%d", i) + doGitPushTestRepository(gitPath, "origin", "--delete", branchName)(t) + deleted = append(deleted, branchName) + } + return pushed, deleted + }) + }) +} + +func runTestGitPush(t *testing.T, u *url.URL, gitOperation func(t *testing.T, gitPath string) (pushed, deleted []string)) { + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + repo, err := repo_service.CreateRepository(db.DefaultContext, user, user, repo_service.CreateRepoOptions{ + Name: "repo-to-push", + Description: "test git push", + AutoInit: false, + DefaultBranch: "main", + IsPrivate: false, + }) + require.NoError(t, err) + require.NotEmpty(t, repo) + + gitPath := t.TempDir() + + doGitInitTestRepository(gitPath)(t) + + oldPath := u.Path + oldUser := u.User + defer func() { + u.Path = oldPath + u.User = oldUser + }() + u.Path = repo.FullName() + ".git" + u.User = url.UserPassword(user.LowerName, userPassword) + + doGitAddRemote(gitPath, "origin", u)(t) + + gitRepo, err := git.OpenRepository(git.DefaultContext, gitPath) + require.NoError(t, err) + defer gitRepo.Close() + + pushedBranches, deletedBranches := gitOperation(t, gitPath) + + dbBranches := make([]*git_model.Branch, 0) + require.NoError(t, db.GetEngine(db.DefaultContext).Where("repo_id=?", repo.ID).Find(&dbBranches)) + assert.Equalf(t, len(pushedBranches), len(dbBranches), "mismatched number of branches in db") + dbBranchesMap := make(map[string]*git_model.Branch, len(dbBranches)) + for _, branch := range dbBranches { + dbBranchesMap[branch.Name] = branch + } + + deletedBranchesMap := make(map[string]bool, len(deletedBranches)) + for _, branchName := range deletedBranches { + deletedBranchesMap[branchName] = true + } + + for _, branchName := range pushedBranches { + branch, ok := dbBranchesMap[branchName] + deleted := deletedBranchesMap[branchName] + assert.True(t, ok, "branch %s not found in database", branchName) + assert.Equal(t, deleted, branch.IsDeleted, "IsDeleted of %s is %v, but it's expected to be %v", branchName, branch.IsDeleted, deleted) + commitID, err := gitRepo.GetBranchCommitID(branchName) + require.NoError(t, err) + assert.Equal(t, commitID, branch.CommitID) + } + + require.NoError(t, repo_service.DeleteRepositoryDirectly(db.DefaultContext, user, repo.ID)) +}