Code/repo search (#2582)
Indexed search of repository contents (for default branch only)
This commit is contained in:
parent
762f1d7237
commit
5866eb2321
33 changed files with 1214 additions and 31 deletions
1
models/fixtures/repo_indexer_status.yml
Normal file
1
models/fixtures/repo_indexer_status.yml
Normal file
|
@ -0,0 +1 @@
|
|||
[] # empty
|
|
@ -144,6 +144,8 @@ var migrations = []Migration{
|
|||
NewMigration("remove organization watch repositories", removeOrganizationWatchRepo),
|
||||
// v47 -> v48
|
||||
NewMigration("add deleted branches", addDeletedBranch),
|
||||
// v48 -> v49
|
||||
NewMigration("add repo indexer status", addRepoIndexerStatus),
|
||||
}
|
||||
|
||||
// Migrate database to current version
|
||||
|
|
25
models/migrations/v48.go
Normal file
25
models/migrations/v48.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package migrations
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-xorm/xorm"
|
||||
)
|
||||
|
||||
func addRepoIndexerStatus(x *xorm.Engine) error {
|
||||
// RepoIndexerStatus see models/repo_indexer.go
|
||||
type RepoIndexerStatus struct {
|
||||
ID int64 `xorm:"pk autoincr"`
|
||||
RepoID int64 `xorm:"INDEX NOT NULL"`
|
||||
CommitSha string `xorm:"VARCHAR(40)"`
|
||||
}
|
||||
|
||||
if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
|
||||
return fmt.Errorf("Sync2: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -13,6 +13,10 @@ import (
|
|||
"path"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
// Needed for the MySQL driver
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
"github.com/go-xorm/core"
|
||||
|
@ -23,9 +27,6 @@ import (
|
|||
|
||||
// Needed for the MSSSQL driver
|
||||
_ "github.com/denisenkom/go-mssqldb"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
// Engine represents a xorm engine or session.
|
||||
|
@ -115,6 +116,7 @@ func init() {
|
|||
new(Stopwatch),
|
||||
new(TrackedTime),
|
||||
new(DeletedBranch),
|
||||
new(RepoIndexerStatus),
|
||||
)
|
||||
|
||||
gonicNames := []string{"SSL", "UID"}
|
||||
|
@ -150,8 +152,13 @@ func LoadConfigs() {
|
|||
DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500)
|
||||
|
||||
sec = setting.Cfg.Section("indexer")
|
||||
setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")
|
||||
setting.Indexer.IssuePath = absolutePath(
|
||||
sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve"))
|
||||
setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
|
||||
setting.Indexer.RepoPath = absolutePath(
|
||||
sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve"))
|
||||
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
|
||||
setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024)
|
||||
}
|
||||
|
||||
// parsePostgreSQLHostPort parses given input in various forms defined in
|
||||
|
@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error {
|
|||
}
|
||||
return x.DumpTablesToFile(tbs, filePath)
|
||||
}
|
||||
|
||||
// absolutePath make path absolute if it is relative
|
||||
func absolutePath(path string) string {
|
||||
workDir, err := setting.WorkDir()
|
||||
if err != nil {
|
||||
log.Fatal(4, "Failed to get work directory: %v", err)
|
||||
}
|
||||
return util.EnsureAbsolutePath(path, workDir)
|
||||
}
|
||||
|
|
|
@ -205,10 +205,11 @@ type Repository struct {
|
|||
ExternalMetas map[string]string `xorm:"-"`
|
||||
Units []*RepoUnit `xorm:"-"`
|
||||
|
||||
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
|
||||
ForkID int64 `xorm:"INDEX"`
|
||||
BaseRepo *Repository `xorm:"-"`
|
||||
Size int64 `xorm:"NOT NULL DEFAULT 0"`
|
||||
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
|
||||
ForkID int64 `xorm:"INDEX"`
|
||||
BaseRepo *Repository `xorm:"-"`
|
||||
Size int64 `xorm:"NOT NULL DEFAULT 0"`
|
||||
IndexerStatus *RepoIndexerStatus `xorm:"-"`
|
||||
|
||||
Created time.Time `xorm:"-"`
|
||||
CreatedUnix int64 `xorm:"INDEX created"`
|
||||
|
@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("git fetch origin: %v", err)
|
||||
}
|
||||
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
|
||||
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
|
||||
if len(branch) > 0 {
|
||||
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
|
||||
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err
|
|||
if err = SyncReleasesWithTags(repo, gitRepo); err != nil {
|
||||
log.Error(4, "Failed to synchronize tags to releases for repository: %v", err)
|
||||
}
|
||||
UpdateRepoIndexer(repo)
|
||||
}
|
||||
|
||||
if err = repo.UpdateSize(); err != nil {
|
||||
|
@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
|
|||
go HookQueue.Add(repo.ID)
|
||||
}
|
||||
|
||||
DeleteRepoFromIndexer(repo)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) (
|
|||
if err != nil {
|
||||
return fmt.Errorf("PushUpdate: %v", err)
|
||||
}
|
||||
UpdateRepoIndexer(repo)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
302
models/repo_indexer.go
Normal file
302
models/repo_indexer.go
Normal file
|
@ -0,0 +1,302 @@
|
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/git"
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
|
||||
"github.com/Unknwon/com"
|
||||
)
|
||||
|
||||
// RepoIndexerStatus status of a repo's entry in the repo indexer
|
||||
// For now, implicitly refers to default branch
|
||||
type RepoIndexerStatus struct {
|
||||
ID int64 `xorm:"pk autoincr"`
|
||||
RepoID int64 `xorm:"INDEX"`
|
||||
CommitSha string `xorm:"VARCHAR(40)"`
|
||||
}
|
||||
|
||||
func (repo *Repository) getIndexerStatus() error {
|
||||
if repo.IndexerStatus != nil {
|
||||
return nil
|
||||
}
|
||||
status := &RepoIndexerStatus{RepoID: repo.ID}
|
||||
has, err := x.Get(status)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !has {
|
||||
status.CommitSha = ""
|
||||
}
|
||||
repo.IndexerStatus = status
|
||||
return nil
|
||||
}
|
||||
|
||||
func (repo *Repository) updateIndexerStatus(sha string) error {
|
||||
if err := repo.getIndexerStatus(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(repo.IndexerStatus.CommitSha) == 0 {
|
||||
repo.IndexerStatus.CommitSha = sha
|
||||
_, err := x.Insert(repo.IndexerStatus)
|
||||
return err
|
||||
}
|
||||
repo.IndexerStatus.CommitSha = sha
|
||||
_, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
|
||||
Update(repo.IndexerStatus)
|
||||
return err
|
||||
}
|
||||
|
||||
type repoIndexerOperation struct {
|
||||
repo *Repository
|
||||
deleted bool
|
||||
}
|
||||
|
||||
var repoIndexerOperationQueue chan repoIndexerOperation
|
||||
|
||||
// InitRepoIndexer initialize the repo indexer
|
||||
func InitRepoIndexer() {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
return
|
||||
}
|
||||
indexer.InitRepoIndexer(populateRepoIndexer)
|
||||
repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength)
|
||||
go processRepoIndexerOperationQueue()
|
||||
}
|
||||
|
||||
// populateRepoIndexer populate the repo indexer with data
|
||||
func populateRepoIndexer() error {
|
||||
log.Info("Populating repository indexer (this may take a while)")
|
||||
for page := 1; ; page++ {
|
||||
repos, _, err := SearchRepositoryByName(&SearchRepoOptions{
|
||||
Page: page,
|
||||
PageSize: 10,
|
||||
OrderBy: SearchOrderByID,
|
||||
Private: true,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(repos) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, repo := range repos {
|
||||
if err = updateRepoIndexer(repo); err != nil {
|
||||
// only log error, since this should not prevent
|
||||
// gitea from starting up
|
||||
log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type updateBatch struct {
|
||||
updates []indexer.RepoIndexerUpdate
|
||||
}
|
||||
|
||||
func updateRepoIndexer(repo *Repository) error {
|
||||
changes, err := getRepoChanges(repo)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if changes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
batch := indexer.RepoIndexerBatch()
|
||||
for _, filename := range changes.UpdatedFiles {
|
||||
if err := addUpdate(filename, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, filename := range changes.RemovedFiles {
|
||||
if err := addDelete(filename, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err = batch.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
return updateLastIndexSync(repo)
|
||||
}
|
||||
|
||||
// repoChanges changes (file additions/updates/removals) to a repo
|
||||
type repoChanges struct {
|
||||
UpdatedFiles []string
|
||||
RemovedFiles []string
|
||||
}
|
||||
|
||||
// getRepoChanges returns changes to repo since last indexer update
|
||||
func getRepoChanges(repo *Repository) (*repoChanges, error) {
|
||||
repoWorkingPool.CheckIn(com.ToStr(repo.ID))
|
||||
defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
|
||||
|
||||
if err := repo.UpdateLocalCopyBranch(""); err != nil {
|
||||
return nil, err
|
||||
} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
|
||||
// repo does not have any commits yet, so nothing to update
|
||||
return nil, nil
|
||||
} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
|
||||
return nil, err
|
||||
} else if err = repo.getIndexerStatus(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(repo.IndexerStatus.CommitSha) == 0 {
|
||||
return genesisChanges(repo)
|
||||
}
|
||||
return nonGenesisChanges(repo)
|
||||
}
|
||||
|
||||
func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
|
||||
filepath := path.Join(repo.LocalCopyPath(), filename)
|
||||
if stat, err := os.Stat(filepath); err != nil {
|
||||
return err
|
||||
} else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
|
||||
return nil
|
||||
}
|
||||
fileContents, err := ioutil.ReadFile(filepath)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !base.IsTextFile(fileContents) {
|
||||
return nil
|
||||
}
|
||||
return batch.Add(indexer.RepoIndexerUpdate{
|
||||
Filepath: filename,
|
||||
Op: indexer.RepoIndexerOpUpdate,
|
||||
Data: &indexer.RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
Content: string(fileContents),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
|
||||
return batch.Add(indexer.RepoIndexerUpdate{
|
||||
Filepath: filename,
|
||||
Op: indexer.RepoIndexerOpDelete,
|
||||
Data: &indexer.RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// genesisChanges get changes to add repo to the indexer for the first time
|
||||
func genesisChanges(repo *Repository) (*repoChanges, error) {
|
||||
var changes repoChanges
|
||||
stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, line := range strings.Split(stdout, "\n") {
|
||||
filename := strings.TrimSpace(line)
|
||||
if len(filename) == 0 {
|
||||
continue
|
||||
} else if filename[0] == '"' {
|
||||
filename, err = strconv.Unquote(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
||||
}
|
||||
return &changes, nil
|
||||
}
|
||||
|
||||
// nonGenesisChanges get changes since the previous indexer update
|
||||
func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
|
||||
diffCmd := git.NewCommand("diff", "--name-status",
|
||||
repo.IndexerStatus.CommitSha, "HEAD")
|
||||
stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
// previous commit sha may have been removed by a force push, so
|
||||
// try rebuilding from scratch
|
||||
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return genesisChanges(repo)
|
||||
}
|
||||
var changes repoChanges
|
||||
for _, line := range strings.Split(stdout, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
filename := strings.TrimSpace(line[1:])
|
||||
if len(filename) == 0 {
|
||||
continue
|
||||
} else if filename[0] == '"' {
|
||||
filename, err = strconv.Unquote(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
switch status := line[0]; status {
|
||||
case 'M', 'A':
|
||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
||||
case 'D':
|
||||
changes.RemovedFiles = append(changes.RemovedFiles, filename)
|
||||
default:
|
||||
log.Warn("Unrecognized status: %c (line=%s)", status, line)
|
||||
}
|
||||
}
|
||||
return &changes, nil
|
||||
}
|
||||
|
||||
func updateLastIndexSync(repo *Repository) error {
|
||||
stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sha := strings.TrimSpace(stdout)
|
||||
return repo.updateIndexerStatus(sha)
|
||||
}
|
||||
|
||||
func processRepoIndexerOperationQueue() {
|
||||
for {
|
||||
op := <-repoIndexerOperationQueue
|
||||
if op.deleted {
|
||||
if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil {
|
||||
log.Error(4, "DeleteRepoFromIndexer: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err := updateRepoIndexer(op.repo); err != nil {
|
||||
log.Error(4, "updateRepoIndexer: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
|
||||
func DeleteRepoFromIndexer(repo *Repository) {
|
||||
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true})
|
||||
}
|
||||
|
||||
// UpdateRepoIndexer update a repository's entries in the indexer
|
||||
func UpdateRepoIndexer(repo *Repository) {
|
||||
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false})
|
||||
}
|
||||
|
||||
func addOperationToQueue(op repoIndexerOperation) {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case repoIndexerOperationQueue <- op:
|
||||
break
|
||||
default:
|
||||
go func() {
|
||||
repoIndexerOperationQueue <- op
|
||||
}()
|
||||
}
|
||||
}
|
|
@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) {
|
|||
commits = ListToPushCommits(l)
|
||||
}
|
||||
|
||||
if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch {
|
||||
UpdateRepoIndexer(repo)
|
||||
}
|
||||
|
||||
if err := CommitRepoAction(CommitRepoActionOptions{
|
||||
PusherName: opts.PusherName,
|
||||
RepoOwnerID: owner.ID,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue