Rework repository archive (#14723)

* Use storage to store archive files

* Fix backend lint

* Add archiver table on database

* Finish archive download

* Fix test

* Add database migrations

* Add status for archiver

* Fix lint

* Add queue

* Add doctor to check and delete old archives

* Improve archive queue

* Fix tests

* improve archive storage

* Delete repo archives

* Add missing fixture

* fix fixture

* Fix fixture

* Fix test

* Fix archiver cleaning

* Fix bug

* Add docs for repository archive storage

* remove repo-archive configuration

* Fix test

* Fix test

* Fix lint

Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
Lunny Xiao 2021-06-24 05:12:38 +08:00 committed by GitHub
parent c9c7afda1a
commit b223d36195
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 648 additions and 480 deletions

View file

@ -0,0 +1 @@
[] # empty

View file

@ -319,6 +319,8 @@ var migrations = []Migration{
NewMigration("Create PushMirror table", createPushMirrorTable),
// v184 -> v185
NewMigration("Rename Task errors to message", renameTaskErrorsToMessage),
// v185 -> v186
NewMigration("Add new table repo_archiver", addRepoArchiver),
}
// GetCurrentDBVersion returns the current db version

View file

@ -1,3 +1,4 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

22
models/migrations/v185.go Normal file
View file

@ -0,0 +1,22 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migrations
import (
"xorm.io/xorm"
)
func addRepoArchiver(x *xorm.Engine) error {
// RepoArchiver represents all archivers
type RepoArchiver struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"index unique(s)"`
Type int `xorm:"unique(s)"`
Status int
CommitID string `xorm:"VARCHAR(40) unique(s)"`
CreatedUnix int64 `xorm:"INDEX NOT NULL created"`
}
return x.Sync2(new(RepoArchiver))
}

View file

@ -136,6 +136,7 @@ func init() {
new(RepoTransfer),
new(IssueIndex),
new(PushMirror),
new(RepoArchiver),
)
gonicNames := []string{"SSL", "UID"}

View file

@ -1587,6 +1587,22 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
return err
}
// Remove archives
var archives []*RepoArchiver
if err = sess.Where("repo_id=?", repoID).Find(&archives); err != nil {
return err
}
for _, v := range archives {
v.Repo = repo
p, _ := v.RelativePath()
removeStorageWithNotice(sess, storage.RepoArchives, "Delete repo archive file", p)
}
if _, err := sess.Delete(&RepoArchiver{RepoID: repoID}); err != nil {
return err
}
if repo.NumForks > 0 {
if _, err = sess.Exec("UPDATE `repository` SET fork_id=0,is_fork=? WHERE fork_id=?", false, repo.ID); err != nil {
log.Error("reset 'fork_id' and 'is_fork': %v", err)
@ -1768,64 +1784,45 @@ func DeleteRepositoryArchives(ctx context.Context) error {
func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error {
log.Trace("Doing: ArchiveCleanup")
if err := x.Where("id > 0").Iterate(new(Repository), func(idx int, bean interface{}) error {
return deleteOldRepositoryArchives(ctx, olderThan, idx, bean)
}); err != nil {
log.Trace("Error: ArchiveClean: %v", err)
return err
for {
var archivers []RepoArchiver
err := x.Where("created_unix < ?", time.Now().Add(-olderThan).Unix()).
Asc("created_unix").
Limit(100).
Find(&archivers)
if err != nil {
log.Trace("Error: ArchiveClean: %v", err)
return err
}
for _, archiver := range archivers {
if err := deleteOldRepoArchiver(ctx, &archiver); err != nil {
return err
}
}
if len(archivers) < 100 {
break
}
}
log.Trace("Finished: ArchiveCleanup")
return nil
}
func deleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration, idx int, bean interface{}) error {
repo := bean.(*Repository)
basePath := filepath.Join(repo.RepoPath(), "archives")
var delRepoArchiver = new(RepoArchiver)
for _, ty := range []string{"zip", "targz"} {
select {
case <-ctx.Done():
return ErrCancelledf("before deleting old repository archives with filetype %s for %s", ty, repo.FullName())
default:
}
path := filepath.Join(basePath, ty)
file, err := os.Open(path)
if err != nil {
if !os.IsNotExist(err) {
log.Warn("Unable to open directory %s: %v", path, err)
return err
}
// If the directory doesn't exist, that's okay.
continue
}
files, err := file.Readdir(0)
file.Close()
if err != nil {
log.Warn("Unable to read directory %s: %v", path, err)
return err
}
minimumOldestTime := time.Now().Add(-olderThan)
for _, info := range files {
if info.ModTime().Before(minimumOldestTime) && !info.IsDir() {
select {
case <-ctx.Done():
return ErrCancelledf("before deleting old repository archive file %s with filetype %s for %s", info.Name(), ty, repo.FullName())
default:
}
toDelete := filepath.Join(path, info.Name())
// This is a best-effort purge, so we do not check error codes to confirm removal.
if err = util.Remove(toDelete); err != nil {
log.Trace("Unable to delete %s, but proceeding: %v", toDelete, err)
}
}
}
func deleteOldRepoArchiver(ctx context.Context, archiver *RepoArchiver) error {
p, err := archiver.RelativePath()
if err != nil {
return err
}
_, err = x.ID(archiver.ID).Delete(delRepoArchiver)
if err != nil {
return err
}
if err := storage.RepoArchives.Delete(p); err != nil {
log.Error("delete repo archive file failed: %v", err)
}
return nil
}

86
models/repo_archiver.go Normal file
View file

@ -0,0 +1,86 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
"fmt"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/timeutil"
)
// RepoArchiverStatus represents repo archive status
type RepoArchiverStatus int
// enumerate all repo archive statuses
const (
RepoArchiverGenerating = iota // the archiver is generating
RepoArchiverReady // it's ready
)
// RepoArchiver represents all archivers
type RepoArchiver struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"index unique(s)"`
Repo *Repository `xorm:"-"`
Type git.ArchiveType `xorm:"unique(s)"`
Status RepoArchiverStatus
CommitID string `xorm:"VARCHAR(40) unique(s)"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL created"`
}
// LoadRepo loads repository
func (archiver *RepoArchiver) LoadRepo() (*Repository, error) {
if archiver.Repo != nil {
return archiver.Repo, nil
}
var repo Repository
has, err := x.ID(archiver.RepoID).Get(&repo)
if err != nil {
return nil, err
}
if !has {
return nil, ErrRepoNotExist{
ID: archiver.RepoID,
}
}
return &repo, nil
}
// RelativePath returns relative path
func (archiver *RepoArchiver) RelativePath() (string, error) {
repo, err := archiver.LoadRepo()
if err != nil {
return "", err
}
return fmt.Sprintf("%s/%s/%s.%s", repo.FullName(), archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil
}
// GetRepoArchiver get an archiver
func GetRepoArchiver(ctx DBContext, repoID int64, tp git.ArchiveType, commitID string) (*RepoArchiver, error) {
var archiver RepoArchiver
has, err := ctx.e.Where("repo_id=?", repoID).And("`type`=?", tp).And("commit_id=?", commitID).Get(&archiver)
if err != nil {
return nil, err
}
if has {
return &archiver, nil
}
return nil, nil
}
// AddRepoArchiver adds an archiver
func AddRepoArchiver(ctx DBContext, archiver *RepoArchiver) error {
_, err := ctx.e.Insert(archiver)
return err
}
// UpdateRepoArchiverStatus updates archiver's status
func UpdateRepoArchiverStatus(ctx DBContext, archiver *RepoArchiver) error {
_, err := ctx.e.ID(archiver.ID).Cols("status").Update(archiver)
return err
}

View file

@ -74,6 +74,8 @@ func MainTest(m *testing.M, pathToGiteaRoot string) {
setting.RepoAvatar.Storage.Path = filepath.Join(setting.AppDataPath, "repo-avatars")
setting.RepoArchive.Storage.Path = filepath.Join(setting.AppDataPath, "repo-archive")
if err = storage.Init(); err != nil {
fatalTestError("storage.Init: %v\n", err)
}