diff --git a/docs/content/administration/repo-indexer.en-us.md b/docs/content/administration/repo-indexer.en-us.md index 6dec2d63f..aa8222291 100644 --- a/docs/content/administration/repo-indexer.en-us.md +++ b/docs/content/administration/repo-indexer.en-us.md @@ -17,6 +17,12 @@ menu: # Repository indexer +## Builtin repository code search without indexer + +Users could do repository-level code search without setting up a repository indexer. +The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories. +Better code search support could be achieved by setting up the repository indexer. + ## Setting up the repository indexer Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md): diff --git a/docs/content/installation/comparison.en-us.md b/docs/content/installation/comparison.en-us.md index 1ba4f7ecc..3fb6561f3 100644 --- a/docs/content/installation/comparison.en-us.md +++ b/docs/content/installation/comparison.en-us.md @@ -87,6 +87,9 @@ _Symbols used in table:_ | Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ | +- Gitea has builtin repository-level code search +- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md) + ## Issue Tracker | Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE | diff --git a/modules/git/command.go b/modules/git/command.go index 371109730..22cb275ab 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -367,7 +367,6 @@ type RunStdError interface { error Unwrap() error Stderr() string - IsExitCode(code int) bool } type runStdError struct { @@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string { return r.stderr } -func (r *runStdError) IsExitCode(code int) bool { +func IsErrorExitCode(err error, code int) bool { var exitError *exec.ExitError - if errors.As(r.err, &exitError) { + if errors.As(err, &exitError) { return exitError.ExitCode() == code } return false diff --git a/modules/git/git.go b/modules/git/git.go index 13a312749..a2e65257a 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -340,7 +340,7 @@ func CheckGitVersionEqual(equal string) error { func configSet(key, value string) error { stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil) - if err != nil && !err.IsExitCode(1) { + if err != nil && !IsErrorExitCode(err, 1) { return fmt.Errorf("failed to get git config %s, err: %w", key, err) } @@ -363,7 +363,7 @@ func configSetNonExist(key, value string) error { // already exist return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist, set new config _, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil) if err != nil { @@ -381,7 +381,7 @@ func configAddNonExist(key, value string) error { // already exist return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist, add new config _, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil) if err != nil { @@ -402,7 +402,7 @@ func configUnsetAll(key, value string) error { } return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist return nil } diff --git a/modules/git/grep.go b/modules/git/grep.go new file mode 100644 index 000000000..a6c486112 --- /dev/null +++ b/modules/git/grep.go @@ -0,0 +1,118 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "os" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/util" +) + +type GrepResult struct { + Filename string + LineNumbers []int + LineCodes []string +} + +type GrepOptions struct { + RefName string + MaxResultLimit int + ContextLineNumber int + IsFuzzy bool +} + +func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) { + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("unable to create os pipe to grep: %w", err) + } + defer func() { + _ = stdoutReader.Close() + _ = stdoutWriter.Close() + }() + + /* + The output is like this ( "^@" means \x00): + + HEAD:.air.toml + 6^@bin = "gitea" + + HEAD:.changelog.yml + 2^@repo: go-gitea/gitea + */ + var results []*GrepResult + cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name") + cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) + if opts.IsFuzzy { + words := strings.Fields(search) + for _, word := range words { + cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) + } + } else { + cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) + } + cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD")) + opts.MaxResultLimit = util.IfZero(opts.MaxResultLimit, 50) + stderr := bytes.Buffer{} + err = cmd.Run(&RunOpts{ + Dir: repo.Path, + Stdout: stdoutWriter, + Stderr: &stderr, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer stdoutReader.Close() + + isInBlock := false + scanner := bufio.NewScanner(stdoutReader) + var res *GrepResult + for scanner.Scan() { + line := scanner.Text() + if !isInBlock { + if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { + isInBlock = true + res = &GrepResult{Filename: filename} + results = append(results, res) + } + continue + } + if line == "" { + if len(results) >= opts.MaxResultLimit { + cancel() + break + } + isInBlock = false + continue + } + if line == "--" { + continue + } + if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok { + lineNumInt, _ := strconv.Atoi(lineNum) + res.LineNumbers = append(res.LineNumbers, lineNumInt) + res.LineCodes = append(res.LineCodes, lineCode) + } + } + return scanner.Err() + }, + }) + // git grep exits by cancel (killed), usually it is caused by the limit of results + if IsErrorExitCode(err, -1) && stderr.Len() == 0 { + return results, nil + } + // git grep exits with 1 if no results are found + if IsErrorExitCode(err, 1) && stderr.Len() == 0 { + return nil, nil + } + if err != nil && !errors.Is(err, context.Canceled) { + return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String()) + } + return results, nil +} diff --git a/modules/git/grep_test.go b/modules/git/grep_test.go new file mode 100644 index 000000000..b5fa437c5 --- /dev/null +++ b/modules/git/grep_test.go @@ -0,0 +1,51 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGrepSearch(t *testing.T) { + repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo")) + assert.NoError(t, err) + defer repo.Close() + + res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{}) + assert.NoError(t, err) + assert.Equal(t, []*GrepResult{ + { + Filename: "java-hello/main.java", + LineNumbers: []int{3}, + LineCodes: []string{" public static void main(String[] args)"}, + }, + { + Filename: "main.vendor.java", + LineNumbers: []int{3}, + LineCodes: []string{" public static void main(String[] args)"}, + }, + }, res) + + res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1}) + assert.NoError(t, err) + assert.Equal(t, []*GrepResult{ + { + Filename: "java-hello/main.java", + LineNumbers: []int{3}, + LineCodes: []string{" public static void main(String[] args)"}, + }, + }, res) + + res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{}) + assert.NoError(t, err) + assert.Len(t, res, 0) + + res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{}) + assert.Error(t, err) + assert.Len(t, res, 0) +} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 51c7595cf..5f35e8073 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { return nil } +func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine { + // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting + hl, _ := highlight.Code(filename, "", code) + highlightedLines := strings.Split(string(hl), "\n") + + // The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n` + lines := make([]ResultLine, min(len(highlightedLines), len(lineNums))) + for i := 0; i < len(lines); i++ { + lines[i].Num = lineNums[i] + lines[i].FormattedContent = template.HTML(highlightedLines[i]) + } + return lines +} + func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) { startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") var formattedLinesBuffer bytes.Buffer contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n") - lines := make([]ResultLine, 0, len(contentLines)) + lineNums := make([]int, 0, len(contentLines)) index := startIndex for i, line := range contentLines { var err error @@ -91,29 +105,16 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res line[closeActiveIndex:], ) } else { - err = writeStrings(&formattedLinesBuffer, - line, - ) + err = writeStrings(&formattedLinesBuffer, line) } if err != nil { return nil, err } - lines = append(lines, ResultLine{Num: startLineNum + i}) + lineNums = append(lineNums, startLineNum+i) index += len(line) } - // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting - hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String()) - highlightedLines := strings.Split(string(hl), "\n") - - // The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n` - lines = lines[:min(len(highlightedLines), len(lines))] - highlightedLines = highlightedLines[:len(lines)] - for i := 0; i < len(lines); i++ { - lines[i].FormattedContent = template.HTML(highlightedLines[i]) - } - return &Result{ RepoID: result.RepoID, Filename: result.Filename, @@ -121,7 +122,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res UpdatedUnix: result.UpdatedUnix, Language: result.Language, Color: result.Color, - Lines: lines, + Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()), }, nil } diff --git a/modules/util/util.go b/modules/util/util.go index 5c7515819..c94fb9104 100644 --- a/modules/util/util.go +++ b/modules/util/util.go @@ -212,3 +212,12 @@ func ToFloat64(number any) (float64, error) { func ToPointer[T any](val T) *T { return &val } + +// IfZero returns "def" if "v" is a zero value, otherwise "v" +func IfZero[T comparable](v, def T) T { + var zero T + if v == zero { + return def + } + return v +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index a58305a83..17a8180ec 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -171,6 +171,7 @@ org_kind = Search orgs... team_kind = Search teams... code_kind = Search code... code_search_unavailable = Code search is currently not available. Please contact the site administrator. +code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer. package_kind = Search packages... project_kind = Search projects... branch_kind = Search branches... diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 460378ce0..6e7042854 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -5,13 +5,14 @@ package repo import ( "net/http" + "strings" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/git" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/context" - "code.gitea.io/gitea/services/repository/files" ) const tplSearch base.TplName = "repo/search" @@ -33,17 +34,17 @@ func Search(ctx *context.Context) { return } - ctx.Data["Repo"] = ctx.Repo.Repository - page := ctx.FormInt("page") if page <= 0 { page = 1 } + var total int + var searchResults []*code_indexer.Result + var searchResultLanguages []*code_indexer.SearchResultLanguages if setting.Indexer.RepoIndexerEnabled { - ctx.Data["CodeIndexerDisabled"] = false - - total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + var err error + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ RepoIDs: []int64{ctx.Repo.Repository.ID}, Keyword: keyword, IsKeywordFuzzy: isFuzzy, @@ -62,28 +63,39 @@ func Search(ctx *context.Context) { } else { ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } - - ctx.Data["SearchResults"] = searchResults - ctx.Data["SearchResultLanguages"] = searchResultLanguages - - pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) - pager.SetDefaultParams(ctx) - pager.AddParam(ctx, "l", "Language") - ctx.Data["Page"] = pager } else { - data, err := files.NewRepoGrep(ctx, ctx.Repo.Repository, keyword) + res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy}) if err != nil { - ctx.ServerError("NewRepoGrep", err) + ctx.ServerError("GrepSearch", err) return } - - ctx.Data["CodeIndexerDisabled"] = true - ctx.Data["SearchResults"] = data - - pager := context.NewPagination(len(data), setting.UI.RepoSearchPagingNum, page, 5) - pager.SetDefaultParams(ctx) - ctx.Data["Page"] = pager + total = len(res) + pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) + pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) + res = res[pageStart:pageEnd] + for _, r := range res { + searchResults = append(searchResults, &code_indexer.Result{ + RepoID: ctx.Repo.Repository.ID, + Filename: r.Filename, + CommitID: ctx.Repo.CommitID, + // UpdatedUnix: not supported yet + // Language: not supported yet + // Color: not supported yet + Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")), + }) + } } + ctx.Data["CodeIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled + ctx.Data["Repo"] = ctx.Repo.Repository + ctx.Data["SourcePath"] = ctx.Repo.Repository.Link() + ctx.Data["SearchResults"] = searchResults + ctx.Data["SearchResultLanguages"] = searchResultLanguages + + pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) + pager.SetDefaultParams(ctx) + pager.AddParam(ctx, "l", "Language") + ctx.Data["Page"] = pager + ctx.HTML(http.StatusOK, tplSearch) } diff --git a/services/repository/files/search.go b/services/repository/files/search.go deleted file mode 100644 index 09c3ab5bf..000000000 --- a/services/repository/files/search.go +++ /dev/null @@ -1,111 +0,0 @@ -package files - -import ( - "context" - "html/template" - "strconv" - "strings" - - repo_model "code.gitea.io/gitea/models/repo" - "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" - "code.gitea.io/gitea/modules/highlight" - "code.gitea.io/gitea/modules/timeutil" - - "github.com/go-enry/go-enry/v2" -) - -type Result struct { - RepoID int64 // ignored - Filename string - CommitID string // branch - UpdatedUnix timeutil.TimeStamp // ignored - Language string - Color string - Lines []ResultLine -} - -type ResultLine struct { - Num int64 - FormattedContent template.HTML -} - -const pHEAD = "HEAD:" - -func NewRepoGrep(ctx context.Context, repo *repo_model.Repository, keyword string) ([]*Result, error) { - t, _, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo) - if err != nil { - return nil, err - } - - data := []*Result{} - - stdout, _, err := git.NewCommand(ctx, - "grep", - "-1", // n before and after lines - "-z", - "--heading", - "--break", // easier parsing - "--fixed-strings", // disallow regex for now - "-n", // line nums - "-i", // ignore case - "--full-name", // full file path, rel to repo - //"--column", // for adding better highlighting support - "-e", // for queries starting with "-" - ). - AddDynamicArguments(keyword). - AddArguments("HEAD"). - RunStdString(&git.RunOpts{Dir: t.Path}) - if err != nil { - return data, nil // non zero exit code when there are no results - } - - for _, block := range strings.Split(stdout, "\n\n") { - res := Result{CommitID: repo.DefaultBranch} - - linenum := []int64{} - code := []string{} - - for _, line := range strings.Split(block, "\n") { - if strings.HasPrefix(line, pHEAD) { - res.Filename = strings.TrimPrefix(line, pHEAD) - continue - } - - if ln, after, ok := strings.Cut(line, "\x00"); ok { - i, err := strconv.ParseInt(ln, 10, 64) - if err != nil { - continue - } - - linenum = append(linenum, i) - code = append(code, after) - } - } - - if res.Filename == "" || len(code) == 0 || len(linenum) == 0 { - continue - } - - var hl template.HTML - - hl, res.Language = highlight.Code(res.Filename, "", strings.Join(code, "\n")) - res.Color = enry.GetColor(res.Language) - - hlCode := strings.Split(string(hl), "\n") - n := min(len(hlCode), len(linenum)) - - res.Lines = make([]ResultLine, n) - - for i := 0; i < n; i++ { - res.Lines[i] = ResultLine{ - Num: linenum[i], - FormattedContent: template.HTML(hlCode[i]), - } - } - - data = append(data, &res) - } - - return data, nil -} diff --git a/services/repository/files/search_test.go b/services/repository/files/search_test.go deleted file mode 100644 index 2f2f87368..000000000 --- a/services/repository/files/search_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package files - -import ( - "testing" - - "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/services/contexttest" - - "github.com/stretchr/testify/assert" -) - -func TestNewRepoGrep(t *testing.T) { - unittest.PrepareTestEnv(t) - ctx, _ := contexttest.MockContext(t, "user2/repo1") - ctx.SetParams(":id", "1") - contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) - contexttest.LoadUser(t, ctx, 2) - contexttest.LoadGitRepo(t, ctx) - defer ctx.Repo.GitRepo.Close() - - t.Run("with result", func(t *testing.T) { - res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "Description") - assert.NoError(t, err) - - expected := []*Result{ - { - RepoID: 0, - Filename: "README.md", - CommitID: "master", - UpdatedUnix: 0, - Language: "Markdown", - Color: "#083fa1", - Lines: []ResultLine{ - {Num: 2, FormattedContent: ""}, - {Num: 3, FormattedContent: "Description for repo1"}, - }, - }, - } - - assert.EqualValues(t, res, expected) - }) - - t.Run("empty result", func(t *testing.T) { - res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "keyword that does not match in the repo") - assert.NoError(t, err) - - assert.EqualValues(t, res, []*Result{}) - }) -} diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl index bfb5ab523..94cff1d6c 100644 --- a/templates/repo/home.tmpl +++ b/templates/repo/home.tmpl @@ -5,27 +5,18 @@ {{template "base/alert" .}} {{template "repo/code/recently_pushed_new_branches" .}} {{if and (not .HideRepoInfo) (not .IsBlame)}} -