Refactor LFS GC functions
- Remove options that currently aren't set
on `GarbageCollectLFSMetaObjectsOptions` and
`IterateLFSMetaObjectsForRepoOptions`.
- Simplify `IterateRepositoryIDsWithLFSMetaObjects` and
`IterateLFSMetaObjectsForRepo`.
- `IterateLFSMetaObjectsForRepo` was previously able to get in a
loop (`gc-lfs` doctor check was able to reproduce this) because the code
expected that the records would be updated to not match the SQL query,
but that wasn't the case. Simply enforce that only records higher than
the latest `id` from the previous iteration are allowed.
- For `gc-lfs` doctor check this was because `UpdatedLessRecentlyThan`
option was not set, which caused that records just marked as active in
the iteration weren't being filtered.
- Add unit tests
- Most likely a regression from 2cc3a6381c
.
- The bug with `gc-lfs` was found on Codeberg.
This commit is contained in:
parent
11fd8d5109
commit
7ffa7f5bce
5 changed files with 132 additions and 56 deletions
|
@ -0,0 +1,7 @@
|
||||||
|
-
|
||||||
|
|
||||||
|
id: 1000
|
||||||
|
oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82
|
||||||
|
size: 25
|
||||||
|
repository_id: 1
|
||||||
|
created_unix: 1712309123
|
|
@ -337,58 +337,47 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
|
||||||
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
|
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
|
||||||
batchSize := setting.Database.IterateBufferSize
|
batchSize := setting.Database.IterateBufferSize
|
||||||
sess := db.GetEngine(ctx)
|
sess := db.GetEngine(ctx)
|
||||||
id := int64(0)
|
var start int
|
||||||
type RepositoryCount struct {
|
type RepositoryCount struct {
|
||||||
RepositoryID int64
|
RepositoryID int64
|
||||||
Count int64
|
Count int64
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
counts := make([]*RepositoryCount, 0, batchSize)
|
counts := make([]*RepositoryCount, 0, batchSize)
|
||||||
sess.Select("repository_id, COUNT(id) AS count").
|
if err := sess.Select("repository_id, COUNT(id) AS count").
|
||||||
Table("lfs_meta_object").
|
Table("lfs_meta_object").
|
||||||
Where("repository_id > ?", id).
|
|
||||||
GroupBy("repository_id").
|
GroupBy("repository_id").
|
||||||
OrderBy("repository_id ASC")
|
OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil {
|
||||||
|
|
||||||
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(counts) == 0 {
|
if len(counts) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
start += len(counts)
|
||||||
|
|
||||||
for _, count := range counts {
|
for _, count := range counts {
|
||||||
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
|
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
id = counts[len(counts)-1].RepositoryID
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
|
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
|
||||||
type IterateLFSMetaObjectsForRepoOptions struct {
|
type IterateLFSMetaObjectsForRepoOptions struct {
|
||||||
OlderThan timeutil.TimeStamp
|
OlderThan timeutil.TimeStamp
|
||||||
UpdatedLessRecentlyThan timeutil.TimeStamp
|
UpdatedLessRecentlyThan timeutil.TimeStamp
|
||||||
OrderByUpdated bool
|
|
||||||
LoopFunctionAlwaysUpdates bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
||||||
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
|
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
|
||||||
var start int
|
|
||||||
batchSize := setting.Database.IterateBufferSize
|
batchSize := setting.Database.IterateBufferSize
|
||||||
engine := db.GetEngine(ctx)
|
engine := db.GetEngine(ctx)
|
||||||
type CountLFSMetaObject struct {
|
|
||||||
Count int64
|
|
||||||
LFSMetaObject `xorm:"extends"`
|
|
||||||
}
|
|
||||||
|
|
||||||
id := int64(0)
|
id := int64(0)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
beans := make([]*CountLFSMetaObject, 0, batchSize)
|
beans := make([]*LFSMetaObject, 0, batchSize)
|
||||||
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
|
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*").
|
||||||
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
||||||
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
||||||
if !opts.OlderThan.IsZero() {
|
if !opts.OlderThan.IsZero() {
|
||||||
|
@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
|
||||||
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
||||||
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
||||||
}
|
}
|
||||||
sess.GroupBy("`lfs_meta_object`.id")
|
sess.GroupBy("`lfs_meta_object`.id").
|
||||||
if opts.OrderByUpdated {
|
And("`lfs_meta_object`.id > ?", id).
|
||||||
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
|
OrderBy("`lfs_meta_object`.id ASC")
|
||||||
} else {
|
|
||||||
sess.And("`lfs_meta_object`.id > ?", id)
|
if err := sess.Limit(batchSize, 0).Find(&beans); err != nil {
|
||||||
sess.OrderBy("`lfs_meta_object`.id ASC")
|
|
||||||
}
|
|
||||||
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(beans) == 0 {
|
if len(beans) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !opts.LoopFunctionAlwaysUpdates {
|
|
||||||
start += len(beans)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, bean := range beans {
|
for _, bean := range beans {
|
||||||
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
|
if err := f(ctx, bean); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
101
models/git/lfs_test.go
Normal file
101
models/git/lfs_test.go
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package git
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
|
"code.gitea.io/gitea/models/unittest"
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
"code.gitea.io/gitea/modules/test"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) {
|
||||||
|
defer unittest.OverrideFixtures(
|
||||||
|
unittest.FixturesOptions{
|
||||||
|
Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"),
|
||||||
|
Base: setting.AppWorkPath,
|
||||||
|
Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"},
|
||||||
|
},
|
||||||
|
)()
|
||||||
|
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||||
|
|
||||||
|
type repocount struct {
|
||||||
|
repoid int64
|
||||||
|
count int64
|
||||||
|
}
|
||||||
|
expected := []repocount{{1, 1}, {54, 4}}
|
||||||
|
|
||||||
|
t.Run("Normal batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
|
||||||
|
cases := []repocount{}
|
||||||
|
|
||||||
|
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
|
||||||
|
cases = append(cases, repocount{repoID, count})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expected, cases)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Low batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
|
||||||
|
cases := []repocount{}
|
||||||
|
|
||||||
|
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
|
||||||
|
cases = append(cases, repocount{repoID, count})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expected, cases)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIterateLFSMetaObjectsForRepo(t *testing.T) {
|
||||||
|
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||||
|
|
||||||
|
expectedIDs := []int64{1, 2, 3, 4}
|
||||||
|
|
||||||
|
t.Run("Normal batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Low batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
|
||||||
|
t.Run("Batch handles updates", func(t *testing.T) {
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
_, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
|
@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
|
||||||
OlderThan: time.Now().Add(-24 * time.Hour * 7),
|
OlderThan: time.Now().Add(-24 * time.Hour * 7),
|
||||||
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
|
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
|
logger.Error("Couldn't garabage collect LFS objects: %v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -21,12 +20,10 @@ import (
|
||||||
|
|
||||||
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
|
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
|
||||||
type GarbageCollectLFSMetaObjectsOptions struct {
|
type GarbageCollectLFSMetaObjectsOptions struct {
|
||||||
LogDetail func(format string, v ...any)
|
LogDetail func(format string, v ...any)
|
||||||
AutoFix bool
|
AutoFix bool
|
||||||
OlderThan time.Time
|
OlderThan time.Time
|
||||||
UpdatedLessRecentlyThan time.Time
|
UpdatedLessRecentlyThan time.Time
|
||||||
NumberToCheckPerRepo int64
|
|
||||||
ProportionToCheckPerRepo float64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
|
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
|
||||||
|
@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
|
|
||||||
opts.NumberToCheckPerRepo = newMinimum
|
|
||||||
}
|
|
||||||
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
|
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
defer gitRepo.Close()
|
defer gitRepo.Close()
|
||||||
|
|
||||||
store := lfs.NewContentStore()
|
store := lfs.NewContentStore()
|
||||||
errStop := errors.New("STOPERR")
|
|
||||||
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
|
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
|
||||||
|
|
||||||
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
|
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error {
|
||||||
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
|
|
||||||
return errStop
|
|
||||||
}
|
|
||||||
total++
|
total++
|
||||||
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
|
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
|
||||||
|
|
||||||
|
@ -123,16 +113,10 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
//
|
//
|
||||||
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
||||||
// unassociated LFS object is genuinely unassociated.
|
// unassociated LFS object is genuinely unassociated.
|
||||||
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
||||||
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
||||||
OrderByUpdated: true,
|
|
||||||
LoopFunctionAlwaysUpdates: true,
|
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
if err == errStop {
|
|
||||||
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in a new issue