From 70a7d6a0de05dee76c4d06716aec5ebe8894a245 Mon Sep 17 00:00:00 2001 From: Gusted Date: Sat, 6 Jul 2024 13:08:26 +0200 Subject: [PATCH] [ENHANCEMENT] Improve caching of contributor stats - It was noticed [in the Forgejo matrix channel](https://matrix.to/#/!qjPHwFPdxhpLkXMkyP:matrix.org/$vk78UR0eFCwQMDMTZ7-DWjMVB_LIAwHW6SkjhEcGkQQ?via=matrix.org) that the generation of the Forgejo project contributor stats was taking quite a while on codeberg.org. This was continued with the fact that a few moments later it was once again generating them again; it seemed like they weren't being cached while they were. - The problem was that the cache TTL is hardcoded to ten minutes and not to the configured TTL. This patch changes that by using the configured TLL for the contributor cache, as this is a computationally heavy operation and should be cached for as long as possible for a good user experience. This doesn't impact the accuracy of this feature because the commit ID of the default branch is used as a cache key. - Also changed in this patch, is that errors aren't cached and are instead being logged, this is more helpful to the administrator. For the user essentially nothing changed on this side, the contributor stats just looks like it's loading indefinitely. - Realistically, testing this isn't possible, as the cache library Forgejo currently uses doesn't expose the TTL or expiration time of a key. Manually testing this behavior is quite lengthy, as one of the steps would need to be "wait for ten minutes" and describe how you can notice the data was cached or was just generated, and because you could use different types of cache, it will be quite hard to write down how you could check the TTL of a key for a particular cache (I'm not even sure it's even possible for some). --- release-notes/9.0.0/4367.md | 1 + services/repository/contributors_graph.go | 20 +++++++------------ .../repository/contributors_graph_test.go | 14 +++++++++---- 3 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 release-notes/9.0.0/4367.md diff --git a/release-notes/9.0.0/4367.md b/release-notes/9.0.0/4367.md new file mode 100644 index 000000000..b5528617f --- /dev/null +++ b/release-notes/9.0.0/4367.md @@ -0,0 +1 @@ +The caching of contributor stats was improved (the data used by `///activity/recent-commits`) to use the configured cache TTL from the config (`[cache].ITEM_TTL`) instead of a hardcoded TTL of ten minutes. The computation of this operation is computationally heavy and makes a lot of requests to the database and Git on repositories with a lot of commits. It should be cached for longer than what was previously hardcoded, ten minutes. diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index f26a87e6a..6b35c8272 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -22,15 +22,13 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" "gitea.com/go-chi/cache" ) -const ( - contributorStatsCacheKey = "GetContributorStats/%s/%s" - contributorStatsCacheTimeout int64 = 60 * 10 -) +const contributorStatsCacheKey = "GetContributorStats/%s/%s" var ( ErrAwaitGeneration = errors.New("generation took longer than ") @@ -211,8 +209,7 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo) if err != nil { - err := fmt.Errorf("OpenRepository: %w", err) - _ = cache.Put(cacheKey, err, contributorStatsCacheTimeout) + log.Error("OpenRepository[repo=%q]: %v", repo.FullName(), err) return } defer closer.Close() @@ -222,13 +219,11 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey } extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision) if err != nil { - err := fmt.Errorf("ExtendedCommitStats: %w", err) - _ = cache.Put(cacheKey, err, contributorStatsCacheTimeout) + log.Error("getExtendedCommitStats[repo=%q revision=%q]: %v", repo.FullName(), revision, err) return } if len(extendedCommitStats) == 0 { - err := fmt.Errorf("no commit stats returned for revision '%s'", revision) - _ = cache.Put(cacheKey, err, contributorStatsCacheTimeout) + log.Error("No commit stats were returned [repo=%q revision=%q]", repo.FullName(), revision) return } @@ -312,14 +307,13 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey data, err := json.Marshal(contributorsCommitStats) if err != nil { - err := fmt.Errorf("couldn't marshal the data: %w", err) - _ = cache.Put(cacheKey, err, contributorStatsCacheTimeout) + log.Error("json.Marshal[repo=%q revision=%q]: %v", repo.FullName(), revision, err) return } // Store the data as an string, to make it uniform what data type is returned // from caches. - _ = cache.Put(cacheKey, string(data), contributorStatsCacheTimeout) + _ = cache.Put(cacheKey, string(data), setting.CacheService.TTLSeconds()) generateLock.Delete(cacheKey) if genDone != nil { genDone <- struct{}{} diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index 2c6102005..a04587e24 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -6,12 +6,14 @@ package repository import ( "slices" "testing" + "time" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/test" "gitea.com/go-chi/cache" "github.com/stretchr/testify/assert" @@ -27,10 +29,14 @@ func TestRepository_ContributorsGraph(t *testing.T) { }) assert.NoError(t, err) + lc, cleanup := test.NewLogChecker(log.DEFAULT, log.INFO) + lc.StopMark(`getExtendedCommitStats[repo="user2/repo2" revision="404ref"]: object does not exist [id: 404ref, rel_path: ]`) + defer cleanup() + generateContributorStats(nil, mockCache, "key", repo, "404ref") - err, isErr := mockCache.Get("key").(error) - assert.True(t, isErr) - assert.ErrorAs(t, err, &git.ErrNotExist{}) + assert.False(t, mockCache.IsExist("key")) + _, stopped := lc.Check(100 * time.Millisecond) + assert.True(t, stopped) generateContributorStats(nil, mockCache, "key2", repo, "master") dataString, isData := mockCache.Get("key2").(string)