Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of LFS meta objects. As repositories may have a large number of LFSMetaObjects, an updated column is added to this table and it is used to perform a generational GC to attempt to reduce the amount of work. (There may need to be a bit more work here but this is probably enough for the moment.) Fix #7045 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
04c97aa364
commit
2cc3a6381c
9 changed files with 255 additions and 35 deletions
|
@ -2213,6 +2213,28 @@ ROUTER = console
|
||||||
;SCHEDULE = @every 168h
|
;SCHEDULE = @every 168h
|
||||||
;OLDER_THAN = 8760h
|
;OLDER_THAN = 8760h
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; Garbage collect LFS pointers in repositories
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;[cron.gc_lfs]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;ENABLED = false
|
||||||
|
;; Garbage collect LFS pointers in repositories (default false)
|
||||||
|
;RUN_AT_START = false
|
||||||
|
;; Interval as a duration between each gc run (default every 24h)
|
||||||
|
;SCHEDULE = @every 24h
|
||||||
|
;; Only attempt to garbage collect LFSMetaObjects older than this (default 7 days)
|
||||||
|
;OLDER_THAN = 168h
|
||||||
|
;; Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
|
||||||
|
;LAST_UPDATED_MORE_THAN_AGO = 72h
|
||||||
|
; Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
|
||||||
|
;NUMBER_TO_CHECK_PER_REPO = 100
|
||||||
|
;Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
|
||||||
|
;PROPORTION_TO_CHECK_PER_REPO = 0.6
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Git Operation timeout in seconds
|
;; Git Operation timeout in seconds
|
||||||
|
|
|
@ -1039,6 +1039,16 @@ Default templates for project boards:
|
||||||
- `SCHEDULE`: **@every 168h**: Cron syntax to set how often to check.
|
- `SCHEDULE`: **@every 168h**: Cron syntax to set how often to check.
|
||||||
- `OLDER_THAN`: **@every 8760h**: any system notice older than this expression will be deleted from database.
|
- `OLDER_THAN`: **@every 8760h**: any system notice older than this expression will be deleted from database.
|
||||||
|
|
||||||
|
#### Cron - Garbage collect LFS pointers in repositories ('cron.gc_lfs')
|
||||||
|
|
||||||
|
- `ENABLED`: **false**: Enable service.
|
||||||
|
- `RUN_AT_START`: **false**: Run tasks at start up time (if ENABLED).
|
||||||
|
- `SCHEDULE`: **@every 24h**: Cron syntax to set how often to check.
|
||||||
|
- `OLDER_THAN`: **168h**: Only attempt to garbage collect LFSMetaObjects older than this (default 7 days)
|
||||||
|
- `LAST_UPDATED_MORE_THAN_AGO`: **72h**: Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
|
||||||
|
- `NUMBER_TO_CHECK_PER_REPO`: **100**: Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
|
||||||
|
- `PROPORTION_TO_CHECK_PER_REPO`: **0.6**: Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
|
||||||
|
|
||||||
## Git (`git`)
|
## Git (`git`)
|
||||||
|
|
||||||
- `PATH`: **""**: The path of Git executable. If empty, Gitea searches through the PATH environment.
|
- `PATH`: **""**: The path of Git executable. If empty, Gitea searches through the PATH environment.
|
||||||
|
|
|
@ -115,6 +115,7 @@ type LFSMetaObject struct {
|
||||||
RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
|
RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
|
||||||
Existing bool `xorm:"-"`
|
Existing bool `xorm:"-"`
|
||||||
CreatedUnix timeutil.TimeStamp `xorm:"created"`
|
CreatedUnix timeutil.TimeStamp `xorm:"created"`
|
||||||
|
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -334,8 +335,45 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
|
||||||
return lfsSize, nil
|
return lfsSize, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IterateRepositoryIDsWithLFSMetaObjects iterates across the repositories that have LFSMetaObjects
|
||||||
|
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
|
||||||
|
batchSize := setting.Database.IterateBufferSize
|
||||||
|
sess := db.GetEngine(ctx)
|
||||||
|
id := int64(0)
|
||||||
|
type RepositoryCount struct {
|
||||||
|
RepositoryID int64
|
||||||
|
Count int64
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
counts := make([]*RepositoryCount, 0, batchSize)
|
||||||
|
sess.Select("repository_id, COUNT(id) AS count").
|
||||||
|
Table("lfs_meta_object").
|
||||||
|
Where("repository_id > ?", id).
|
||||||
|
GroupBy("repository_id").
|
||||||
|
OrderBy("repository_id ASC")
|
||||||
|
|
||||||
|
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(counts) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, count := range counts {
|
||||||
|
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id = counts[len(counts)-1].RepositoryID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
|
||||||
type IterateLFSMetaObjectsForRepoOptions struct {
|
type IterateLFSMetaObjectsForRepoOptions struct {
|
||||||
OlderThan time.Time
|
OlderThan time.Time
|
||||||
|
UpdatedLessRecentlyThan time.Time
|
||||||
|
OrderByUpdated bool
|
||||||
|
LoopFunctionAlwaysUpdates bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
||||||
|
@ -348,28 +386,53 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
|
||||||
LFSMetaObject
|
LFSMetaObject
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id := int64(0)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
beans := make([]*CountLFSMetaObject, 0, batchSize)
|
beans := make([]*CountLFSMetaObject, 0, batchSize)
|
||||||
// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
|
|
||||||
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
|
sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
|
||||||
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
||||||
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
||||||
if !opts.OlderThan.IsZero() {
|
if !opts.OlderThan.IsZero() {
|
||||||
sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
|
sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
|
||||||
}
|
}
|
||||||
|
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
||||||
|
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
||||||
|
}
|
||||||
sess.GroupBy("`lfs_meta_object`.id")
|
sess.GroupBy("`lfs_meta_object`.id")
|
||||||
|
if opts.OrderByUpdated {
|
||||||
|
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
|
||||||
|
} else {
|
||||||
|
sess.And("`lfs_meta_object`.id > ?", id)
|
||||||
|
sess.OrderBy("`lfs_meta_object`.id ASC")
|
||||||
|
}
|
||||||
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
|
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(beans) == 0 {
|
if len(beans) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if !opts.LoopFunctionAlwaysUpdates {
|
||||||
start += len(beans)
|
start += len(beans)
|
||||||
|
}
|
||||||
|
|
||||||
for _, bean := range beans {
|
for _, bean := range beans {
|
||||||
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
|
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
id = beans[len(beans)-1].ID
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MarkLFSMetaObject updates the updated time for the provided LFSMetaObject
|
||||||
|
func MarkLFSMetaObject(ctx context.Context, id int64) error {
|
||||||
|
obj := &LFSMetaObject{
|
||||||
|
UpdatedUnix: timeutil.TimeStampNow(),
|
||||||
|
}
|
||||||
|
count, err := db.GetEngine(ctx).ID(id).Update(obj)
|
||||||
|
if count != 1 {
|
||||||
|
log.Error("Unexpectedly updated %d LFSMetaObjects with ID: %d", count, id)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
|
@ -432,6 +432,9 @@ var migrations = []Migration{
|
||||||
NewMigration("Update counts of all open milestones", v1_18.UpdateOpenMilestoneCounts),
|
NewMigration("Update counts of all open milestones", v1_18.UpdateOpenMilestoneCounts),
|
||||||
// v230 -> v231
|
// v230 -> v231
|
||||||
NewMigration("Add ConfidentialClient column (default true) to OAuth2Application table", v1_18.AddConfidentialClientColumnToOAuth2ApplicationTable),
|
NewMigration("Add ConfidentialClient column (default true) to OAuth2Application table", v1_18.AddConfidentialClientColumnToOAuth2ApplicationTable),
|
||||||
|
|
||||||
|
// Gitea 1.18.0 ends at v231
|
||||||
|
|
||||||
// v231 -> v232
|
// v231 -> v232
|
||||||
NewMigration("Add index for hook_task", v1_19.AddIndexForHookTask),
|
NewMigration("Add index for hook_task", v1_19.AddIndexForHookTask),
|
||||||
// v232 -> v233
|
// v232 -> v233
|
||||||
|
@ -446,6 +449,8 @@ var migrations = []Migration{
|
||||||
NewMigration("Create secrets table", v1_19.CreateSecretsTable),
|
NewMigration("Create secrets table", v1_19.CreateSecretsTable),
|
||||||
// v237 -> v238
|
// v237 -> v238
|
||||||
NewMigration("Drop ForeignReference table", v1_19.DropForeignReferenceTable),
|
NewMigration("Drop ForeignReference table", v1_19.DropForeignReferenceTable),
|
||||||
|
// v238 -> v239
|
||||||
|
NewMigration("Add updated unix to LFSMetaObject", v1_19.AddUpdatedUnixToLFSMetaObject),
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetCurrentDBVersion returns the current db version
|
// GetCurrentDBVersion returns the current db version
|
||||||
|
|
27
models/migrations/v1_19/v238.go
Normal file
27
models/migrations/v1_19/v238.go
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
// Copyright 2022 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package v1_19 //nolint
|
||||||
|
|
||||||
|
import (
|
||||||
|
"code.gitea.io/gitea/modules/timeutil"
|
||||||
|
|
||||||
|
"xorm.io/xorm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AddUpdatedUnixToLFSMetaObject adds an updated column to the LFSMetaObject to allow for garbage collection
|
||||||
|
func AddUpdatedUnixToLFSMetaObject(x *xorm.Engine) error {
|
||||||
|
// Drop the table introduced in `v211`, it's considered badly designed and doesn't look like to be used.
|
||||||
|
// See: https://github.com/go-gitea/gitea/issues/21086#issuecomment-1318217453
|
||||||
|
// LFSMetaObject stores metadata for LFS tracked files.
|
||||||
|
type LFSMetaObject struct {
|
||||||
|
ID int64 `xorm:"pk autoincr"`
|
||||||
|
Oid string `json:"oid" xorm:"UNIQUE(s) INDEX NOT NULL"`
|
||||||
|
Size int64 `json:"size" xorm:"NOT NULL"`
|
||||||
|
RepositoryID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
|
||||||
|
CreatedUnix timeutil.TimeStamp `xorm:"created"`
|
||||||
|
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
|
||||||
|
}
|
||||||
|
|
||||||
|
return x.Sync(new(LFSMetaObject))
|
||||||
|
}
|
|
@ -6,6 +6,7 @@ package doctor
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
@ -29,7 +30,20 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
|
||||||
return fmt.Errorf("LFS support is disabled")
|
return fmt.Errorf("LFS support is disabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
|
if err := repository.GarbageCollectLFSMetaObjects(ctx, repository.GarbageCollectLFSMetaObjectsOptions{
|
||||||
|
Logger: logger,
|
||||||
|
AutoFix: autofix,
|
||||||
|
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
|
||||||
|
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
|
||||||
|
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
|
||||||
|
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
|
||||||
|
// objects.
|
||||||
|
//
|
||||||
|
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
||||||
|
// unassociated LFS object is genuinely unassociated.
|
||||||
|
OlderThan: time.Now().Add(-24 * time.Hour * 7),
|
||||||
|
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
|
||||||
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2554,6 +2554,7 @@ dashboard.delete_old_actions = Delete all old actions from database
|
||||||
dashboard.delete_old_actions.started = Delete all old actions from database started.
|
dashboard.delete_old_actions.started = Delete all old actions from database started.
|
||||||
dashboard.update_checker = Update checker
|
dashboard.update_checker = Update checker
|
||||||
dashboard.delete_old_system_notices = Delete all old system notices from database
|
dashboard.delete_old_system_notices = Delete all old system notices from database
|
||||||
|
dashboard.gc_lfs = Garbage collect LFS meta objects
|
||||||
|
|
||||||
users.user_manage_panel = User Account Management
|
users.user_manage_panel = User Account Management
|
||||||
users.new_account = Create User Account
|
users.new_account = Create User Account
|
||||||
|
|
|
@ -175,6 +175,48 @@ func registerDeleteOldSystemNotices() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func registerGCLFS() {
|
||||||
|
if !setting.LFS.StartServer {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
type GCLFSConfig struct {
|
||||||
|
OlderThanConfig
|
||||||
|
LastUpdatedMoreThanAgo time.Duration
|
||||||
|
NumberToCheckPerRepo int64
|
||||||
|
ProportionToCheckPerRepo float64
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterTaskFatal("gc_lfs", &GCLFSConfig{
|
||||||
|
OlderThanConfig: OlderThanConfig{
|
||||||
|
BaseConfig: BaseConfig{
|
||||||
|
Enabled: false,
|
||||||
|
RunAtStart: false,
|
||||||
|
Schedule: "@every 24h",
|
||||||
|
},
|
||||||
|
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
|
||||||
|
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
|
||||||
|
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
|
||||||
|
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
|
||||||
|
// objects.
|
||||||
|
//
|
||||||
|
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
||||||
|
// unassociated LFS object is genuinely unassociated.
|
||||||
|
OlderThan: 24 * time.Hour * 7,
|
||||||
|
},
|
||||||
|
// Only GC things that haven't been looked at in the past 3 days
|
||||||
|
LastUpdatedMoreThanAgo: 24 * time.Hour * 3,
|
||||||
|
NumberToCheckPerRepo: 100,
|
||||||
|
ProportionToCheckPerRepo: 0.6,
|
||||||
|
}, func(ctx context.Context, _ *user_model.User, config Config) error {
|
||||||
|
gcLFSConfig := config.(*GCLFSConfig)
|
||||||
|
return repo_service.GarbageCollectLFSMetaObjects(ctx, repo_service.GarbageCollectLFSMetaObjectsOptions{
|
||||||
|
AutoFix: true,
|
||||||
|
OlderThan: time.Now().Add(-gcLFSConfig.OlderThan),
|
||||||
|
UpdatedLessRecentlyThan: time.Now().Add(-gcLFSConfig.LastUpdatedMoreThanAgo),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func initExtendedTasks() {
|
func initExtendedTasks() {
|
||||||
registerDeleteInactiveUsers()
|
registerDeleteInactiveUsers()
|
||||||
registerDeleteRepositoryArchives()
|
registerDeleteRepositoryArchives()
|
||||||
|
@ -188,4 +230,5 @@ func initExtendedTasks() {
|
||||||
registerDeleteOldActions()
|
registerDeleteOldActions()
|
||||||
registerUpdateGiteaChecker()
|
registerUpdateGiteaChecker()
|
||||||
registerDeleteOldSystemNotices()
|
registerDeleteOldSystemNotices()
|
||||||
|
registerGCLFS()
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,49 +5,67 @@ package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"code.gitea.io/gitea/models/db"
|
|
||||||
git_model "code.gitea.io/gitea/models/git"
|
git_model "code.gitea.io/gitea/models/git"
|
||||||
repo_model "code.gitea.io/gitea/models/repo"
|
repo_model "code.gitea.io/gitea/models/repo"
|
||||||
"code.gitea.io/gitea/modules/git"
|
"code.gitea.io/gitea/modules/git"
|
||||||
"code.gitea.io/gitea/modules/lfs"
|
"code.gitea.io/gitea/modules/lfs"
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"xorm.io/builder"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
|
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
|
||||||
log.Trace("Doing: GarbageCollectLFSMetaObjects")
|
type GarbageCollectLFSMetaObjectsOptions struct {
|
||||||
|
Logger log.Logger
|
||||||
if err := db.Iterate(
|
AutoFix bool
|
||||||
ctx,
|
OlderThan time.Time
|
||||||
builder.And(builder.Gt{"id": 0}),
|
UpdatedLessRecentlyThan time.Time
|
||||||
func(ctx context.Context, repo *repo_model.Repository) error {
|
NumberToCheckPerRepo int64
|
||||||
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
|
ProportionToCheckPerRepo float64
|
||||||
},
|
|
||||||
); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Trace("Finished: GarbageCollectLFSMetaObjects")
|
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
|
||||||
|
func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error {
|
||||||
|
log.Trace("Doing: GarbageCollectLFSMetaObjects")
|
||||||
|
defer log.Trace("Finished: GarbageCollectLFSMetaObjects")
|
||||||
|
|
||||||
|
if !setting.LFS.StartServer {
|
||||||
|
if opts.Logger != nil {
|
||||||
|
opts.Logger.Info("LFS support is disabled")
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
|
return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
|
||||||
if logger != nil {
|
repo, err := repo_model.GetRepositoryByID(ctx, repoID)
|
||||||
logger.Info("Checking %-v", repo)
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
total, orphaned, collected, deleted := 0, 0, 0, 0
|
|
||||||
if logger != nil {
|
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
|
||||||
|
opts.NumberToCheckPerRepo = newMinimum
|
||||||
|
}
|
||||||
|
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
|
||||||
|
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
|
||||||
|
if opts.Logger != nil {
|
||||||
|
opts.Logger.Info("Checking %-v", repo)
|
||||||
|
}
|
||||||
|
total, orphaned, collected, deleted := int64(0), 0, 0, 0
|
||||||
|
if opts.Logger != nil {
|
||||||
defer func() {
|
defer func() {
|
||||||
if orphaned == 0 {
|
if orphaned == 0 {
|
||||||
logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
|
opts.Logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
|
||||||
} else if !autofix {
|
} else if !opts.AutoFix {
|
||||||
logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
|
opts.Logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
|
||||||
} else {
|
} else {
|
||||||
logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
|
opts.Logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -60,17 +78,21 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
defer gitRepo.Close()
|
defer gitRepo.Close()
|
||||||
|
|
||||||
store := lfs.NewContentStore()
|
store := lfs.NewContentStore()
|
||||||
|
errStop := errors.New("STOPERR")
|
||||||
|
|
||||||
return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
|
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
|
||||||
|
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
|
||||||
|
return errStop
|
||||||
|
}
|
||||||
total++
|
total++
|
||||||
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
|
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
|
||||||
|
|
||||||
if gitRepo.IsObjectExist(pointerSha.String()) {
|
if gitRepo.IsObjectExist(pointerSha.String()) {
|
||||||
return nil
|
return git_model.MarkLFSMetaObject(ctx, metaObject.ID)
|
||||||
}
|
}
|
||||||
orphaned++
|
orphaned++
|
||||||
|
|
||||||
if !autofix {
|
if !opts.AutoFix {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Non-existent pointer file
|
// Non-existent pointer file
|
||||||
|
@ -100,6 +122,19 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
//
|
//
|
||||||
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
||||||
// unassociated LFS object is genuinely unassociated.
|
// unassociated LFS object is genuinely unassociated.
|
||||||
OlderThan: time.Now().Add(-24 * 7 * time.Hour),
|
OlderThan: opts.OlderThan,
|
||||||
|
UpdatedLessRecentlyThan: opts.UpdatedLessRecentlyThan,
|
||||||
|
OrderByUpdated: true,
|
||||||
|
LoopFunctionAlwaysUpdates: true,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if err == errStop {
|
||||||
|
if opts.Logger != nil {
|
||||||
|
opts.Logger.Info("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue