Code/repo search (#2582)

Indexed search of repository contents (for default branch only)
This commit is contained in:
Ethan Koenig 2017-10-26 23:10:54 -07:00 committed by Lauris BH
parent 762f1d7237
commit 5866eb2321
33 changed files with 1214 additions and 31 deletions

View file

@ -0,0 +1 @@
[] # empty

View file

@ -144,6 +144,8 @@ var migrations = []Migration{
NewMigration("remove organization watch repositories", removeOrganizationWatchRepo),
// v47 -> v48
NewMigration("add deleted branches", addDeletedBranch),
// v48 -> v49
NewMigration("add repo indexer status", addRepoIndexerStatus),
}
// Migrate database to current version

25
models/migrations/v48.go Normal file
View file

@ -0,0 +1,25 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migrations
import (
"fmt"
"github.com/go-xorm/xorm"
)
func addRepoIndexerStatus(x *xorm.Engine) error {
// RepoIndexerStatus see models/repo_indexer.go
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX NOT NULL"`
CommitSha string `xorm:"VARCHAR(40)"`
}
if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}
return nil
}

View file

@ -13,6 +13,10 @@ import (
"path"
"strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
// Needed for the MySQL driver
_ "github.com/go-sql-driver/mysql"
"github.com/go-xorm/core"
@ -23,9 +27,6 @@ import (
// Needed for the MSSSQL driver
_ "github.com/denisenkom/go-mssqldb"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
// Engine represents a xorm engine or session.
@ -115,6 +116,7 @@ func init() {
new(Stopwatch),
new(TrackedTime),
new(DeletedBranch),
new(RepoIndexerStatus),
)
gonicNames := []string{"SSL", "UID"}
@ -150,8 +152,13 @@ func LoadConfigs() {
DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500)
sec = setting.Cfg.Section("indexer")
setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")
setting.Indexer.IssuePath = absolutePath(
sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve"))
setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
setting.Indexer.RepoPath = absolutePath(
sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve"))
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024)
}
// parsePostgreSQLHostPort parses given input in various forms defined in
@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error {
}
return x.DumpTablesToFile(tbs, filePath)
}
// absolutePath make path absolute if it is relative
func absolutePath(path string) string {
workDir, err := setting.WorkDir()
if err != nil {
log.Fatal(4, "Failed to get work directory: %v", err)
}
return util.EnsureAbsolutePath(path, workDir)
}

View file

@ -205,10 +205,11 @@ type Repository struct {
ExternalMetas map[string]string `xorm:"-"`
Units []*RepoUnit `xorm:"-"`
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
ForkID int64 `xorm:"INDEX"`
BaseRepo *Repository `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
ForkID int64 `xorm:"INDEX"`
BaseRepo *Repository `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
IndexerStatus *RepoIndexerStatus `xorm:"-"`
Created time.Time `xorm:"-"`
CreatedUnix int64 `xorm:"INDEX created"`
@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error {
if err != nil {
return fmt.Errorf("git fetch origin: %v", err)
}
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
if len(branch) > 0 {
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
}
}
}
return nil
@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err
if err = SyncReleasesWithTags(repo, gitRepo); err != nil {
log.Error(4, "Failed to synchronize tags to releases for repository: %v", err)
}
UpdateRepoIndexer(repo)
}
if err = repo.UpdateSize(); err != nil {
@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
go HookQueue.Add(repo.ID)
}
DeleteRepoFromIndexer(repo)
return nil
}

View file

@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) (
if err != nil {
return fmt.Errorf("PushUpdate: %v", err)
}
UpdateRepoIndexer(repo)
return nil
}

302
models/repo_indexer.go Normal file
View file

@ -0,0 +1,302 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
"io/ioutil"
"os"
"path"
"strconv"
"strings"
"code.gitea.io/git"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/Unknwon/com"
)
// RepoIndexerStatus status of a repo's entry in the repo indexer
// For now, implicitly refers to default branch
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX"`
CommitSha string `xorm:"VARCHAR(40)"`
}
func (repo *Repository) getIndexerStatus() error {
if repo.IndexerStatus != nil {
return nil
}
status := &RepoIndexerStatus{RepoID: repo.ID}
has, err := x.Get(status)
if err != nil {
return err
} else if !has {
status.CommitSha = ""
}
repo.IndexerStatus = status
return nil
}
func (repo *Repository) updateIndexerStatus(sha string) error {
if err := repo.getIndexerStatus(); err != nil {
return err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
repo.IndexerStatus.CommitSha = sha
_, err := x.Insert(repo.IndexerStatus)
return err
}
repo.IndexerStatus.CommitSha = sha
_, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
Update(repo.IndexerStatus)
return err
}
type repoIndexerOperation struct {
repo *Repository
deleted bool
}
var repoIndexerOperationQueue chan repoIndexerOperation
// InitRepoIndexer initialize the repo indexer
func InitRepoIndexer() {
if !setting.Indexer.RepoIndexerEnabled {
return
}
indexer.InitRepoIndexer(populateRepoIndexer)
repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength)
go processRepoIndexerOperationQueue()
}
// populateRepoIndexer populate the repo indexer with data
func populateRepoIndexer() error {
log.Info("Populating repository indexer (this may take a while)")
for page := 1; ; page++ {
repos, _, err := SearchRepositoryByName(&SearchRepoOptions{
Page: page,
PageSize: 10,
OrderBy: SearchOrderByID,
Private: true,
})
if err != nil {
return err
} else if len(repos) == 0 {
return nil
}
for _, repo := range repos {
if err = updateRepoIndexer(repo); err != nil {
// only log error, since this should not prevent
// gitea from starting up
log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err)
}
}
}
}
type updateBatch struct {
updates []indexer.RepoIndexerUpdate
}
func updateRepoIndexer(repo *Repository) error {
changes, err := getRepoChanges(repo)
if err != nil {
return err
} else if changes == nil {
return nil
}
batch := indexer.RepoIndexerBatch()
for _, filename := range changes.UpdatedFiles {
if err := addUpdate(filename, repo, batch); err != nil {
return err
}
}
for _, filename := range changes.RemovedFiles {
if err := addDelete(filename, repo, batch); err != nil {
return err
}
}
if err = batch.Flush(); err != nil {
return err
}
return updateLastIndexSync(repo)
}
// repoChanges changes (file additions/updates/removals) to a repo
type repoChanges struct {
UpdatedFiles []string
RemovedFiles []string
}
// getRepoChanges returns changes to repo since last indexer update
func getRepoChanges(repo *Repository) (*repoChanges, error) {
repoWorkingPool.CheckIn(com.ToStr(repo.ID))
defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
if err := repo.UpdateLocalCopyBranch(""); err != nil {
return nil, err
} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
// repo does not have any commits yet, so nothing to update
return nil, nil
} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
return nil, err
} else if err = repo.getIndexerStatus(); err != nil {
return nil, err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
return genesisChanges(repo)
}
return nonGenesisChanges(repo)
}
func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
filepath := path.Join(repo.LocalCopyPath(), filename)
if stat, err := os.Stat(filepath); err != nil {
return err
} else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
return nil
}
fileContents, err := ioutil.ReadFile(filepath)
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
return nil
}
return batch.Add(indexer.RepoIndexerUpdate{
Filepath: filename,
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
Content: string(fileContents),
},
})
}
func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
return batch.Add(indexer.RepoIndexerUpdate{
Filepath: filename,
Op: indexer.RepoIndexerOpDelete,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
},
})
}
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges(repo *Repository) (*repoChanges, error) {
var changes repoChanges
stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
if err != nil {
return nil, err
}
for _, line := range strings.Split(stdout, "\n") {
filename := strings.TrimSpace(line)
if len(filename) == 0 {
continue
} else if filename[0] == '"' {
filename, err = strconv.Unquote(filename)
if err != nil {
return nil, err
}
}
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
}
return &changes, nil
}
// nonGenesisChanges get changes since the previous indexer update
func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
diffCmd := git.NewCommand("diff", "--name-status",
repo.IndexerStatus.CommitSha, "HEAD")
stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
if err != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
return nil, err
}
return genesisChanges(repo)
}
var changes repoChanges
for _, line := range strings.Split(stdout, "\n") {
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}
filename := strings.TrimSpace(line[1:])
if len(filename) == 0 {
continue
} else if filename[0] == '"' {
filename, err = strconv.Unquote(filename)
if err != nil {
return nil, err
}
}
switch status := line[0]; status {
case 'M', 'A':
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
case 'D':
changes.RemovedFiles = append(changes.RemovedFiles, filename)
default:
log.Warn("Unrecognized status: %c (line=%s)", status, line)
}
}
return &changes, nil
}
func updateLastIndexSync(repo *Repository) error {
stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
if err != nil {
return err
}
sha := strings.TrimSpace(stdout)
return repo.updateIndexerStatus(sha)
}
func processRepoIndexerOperationQueue() {
for {
op := <-repoIndexerOperationQueue
if op.deleted {
if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil {
log.Error(4, "DeleteRepoFromIndexer: %v", err)
}
} else {
if err := updateRepoIndexer(op.repo); err != nil {
log.Error(4, "updateRepoIndexer: %v", err)
}
}
}
}
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
func DeleteRepoFromIndexer(repo *Repository) {
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true})
}
// UpdateRepoIndexer update a repository's entries in the indexer
func UpdateRepoIndexer(repo *Repository) {
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false})
}
func addOperationToQueue(op repoIndexerOperation) {
if !setting.Indexer.RepoIndexerEnabled {
return
}
select {
case repoIndexerOperationQueue <- op:
break
default:
go func() {
repoIndexerOperationQueue <- op
}()
}
}

View file

@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) {
commits = ListToPushCommits(l)
}
if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch {
UpdateRepoIndexer(repo)
}
if err := CommitRepoAction(CommitRepoActionOptions{
PusherName: opts.PusherName,
RepoOwnerID: owner.ID,