From 06d2e90fa4000e5965b4f7a1dc52b53f8d7cf639 Mon Sep 17 00:00:00 2001 From: Shiny Nematoda Date: Tue, 6 Aug 2024 05:57:25 +0000 Subject: [PATCH] feat: highlighted code search results (#4749) closes #4534
Screenshots ![](https://codeberg.org/attachments/0ab8a7b0-6485-46dc-a730-c016abb1f287)
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/4749 Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Shiny Nematoda Co-committed-by: Shiny Nematoda --- modules/git/grep.go | 48 +++++++++++---- modules/git/grep_test.go | 59 +++++++++++------- modules/indexer/code/search.go | 83 +++++++++++++++++++++++++- routers/web/repo/search.go | 2 +- services/gitdiff/gitdiff.go | 2 +- services/gitdiff/highlightdiff.go | 52 ++++++++-------- services/gitdiff/highlightdiff_test.go | 22 +++---- tests/integration/explore_code_test.go | 14 +++-- tests/integration/repo_search_test.go | 3 +- web_src/css/repo.css | 4 ++ 10 files changed, 214 insertions(+), 75 deletions(-) diff --git a/modules/git/grep.go b/modules/git/grep.go index 7cd1a96da..0f4d29718 100644 --- a/modules/git/grep.go +++ b/modules/git/grep.go @@ -1,4 +1,5 @@ // Copyright 2024 The Gitea Authors. All rights reserved. +// Copyright 2024 The Forgejo Authors. All rights reserved. // SPDX-License-Identifier: MIT package git @@ -19,9 +20,10 @@ import ( ) type GrepResult struct { - Filename string - LineNumbers []int - LineCodes []string + Filename string + LineNumbers []int + LineCodes []string + HighlightedRanges [][3]int } type GrepOptions struct { @@ -33,6 +35,13 @@ type GrepOptions struct { PathSpec []setting.Glob } +func hasPrefixFold(s, t string) bool { + if len(s) < len(t) { + return false + } + return strings.EqualFold(s[:len(t)], t) +} + func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) { stdoutReader, stdoutWriter, err := os.Pipe() if err != nil { @@ -53,18 +62,19 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO 2^@repo: go-gitea/gitea */ var results []*GrepResult - cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name") + cmd := NewCommand(ctx, "grep", + "--null", "--break", "--heading", "--column", + "--fixed-strings", "--line-number", "--ignore-case", "--full-name") cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) if opts.MatchesPerFile > 0 { cmd.AddOptionValues("--max-count", fmt.Sprint(opts.MatchesPerFile)) } + words := []string{search} if opts.IsFuzzy { - words := strings.Fields(search) - for _, word := range words { - cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) - } - } else { - cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) + words = strings.Fields(search) + } + for _, word := range words { + cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) } // pathspec @@ -128,6 +138,24 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok { lineNumInt, _ := strconv.Atoi(lineNum) res.LineNumbers = append(res.LineNumbers, lineNumInt) + if lineCol, lineCode2, ok := strings.Cut(lineCode, "\x00"); ok { + lineColInt, _ := strconv.Atoi(lineCol) + start := lineColInt - 1 + matchLen := len(lineCode2) + for _, word := range words { + if hasPrefixFold(lineCode2[start:], word) { + matchLen = len(word) + break + } + } + res.HighlightedRanges = append(res.HighlightedRanges, [3]int{ + len(res.LineCodes), + start, + start + matchLen, + }) + res.LineCodes = append(res.LineCodes, lineCode2) + continue + } res.LineCodes = append(res.LineCodes, lineCode) } } diff --git a/modules/git/grep_test.go b/modules/git/grep_test.go index 486b5bc56..bb7db7d58 100644 --- a/modules/git/grep_test.go +++ b/modules/git/grep_test.go @@ -20,28 +20,43 @@ func TestGrepSearch(t *testing.T) { require.NoError(t, err) defer repo.Close() - res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{}) + res, err := GrepSearch(context.Background(), repo, "public", GrepOptions{}) require.NoError(t, err) assert.Equal(t, []*GrepResult{ { Filename: "java-hello/main.java", - LineNumbers: []int{3}, - LineCodes: []string{" public static void main(String[] args)"}, + LineNumbers: []int{1, 3}, + LineCodes: []string{ + "public class HelloWorld", + " public static void main(String[] args)", + }, + HighlightedRanges: [][3]int{{0, 0, 6}, {1, 1, 7}}, }, { Filename: "main.vendor.java", - LineNumbers: []int{3}, - LineCodes: []string{" public static void main(String[] args)"}, + LineNumbers: []int{1, 3}, + LineCodes: []string{ + "public class HelloWorld", + " public static void main(String[] args)", + }, + HighlightedRanges: [][3]int{{0, 0, 6}, {1, 1, 7}}, }, }, res) - res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1}) + res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1, ContextLineNumber: 2}) require.NoError(t, err) assert.Equal(t, []*GrepResult{ { Filename: "java-hello/main.java", - LineNumbers: []int{3}, - LineCodes: []string{" public static void main(String[] args)"}, + LineNumbers: []int{1, 2, 3, 4, 5}, + LineCodes: []string{ + "public class HelloWorld", + "{", + " public static void main(String[] args)", + " {", + " System.out.println(\"Hello world!\");", + }, + HighlightedRanges: [][3]int{{2, 15, 19}}, }, }, res) @@ -49,24 +64,28 @@ func TestGrepSearch(t *testing.T) { require.NoError(t, err) assert.Equal(t, []*GrepResult{ { - Filename: "i-am-a-python.p", - LineNumbers: []int{1}, - LineCodes: []string{"## This is a simple file to do a hello world"}, + Filename: "i-am-a-python.p", + LineNumbers: []int{1}, + LineCodes: []string{"## This is a simple file to do a hello world"}, + HighlightedRanges: [][3]int{{0, 39, 44}}, }, { - Filename: "java-hello/main.java", - LineNumbers: []int{1}, - LineCodes: []string{"public class HelloWorld"}, + Filename: "java-hello/main.java", + LineNumbers: []int{1}, + LineCodes: []string{"public class HelloWorld"}, + HighlightedRanges: [][3]int{{0, 18, 23}}, }, { - Filename: "main.vendor.java", - LineNumbers: []int{1}, - LineCodes: []string{"public class HelloWorld"}, + Filename: "main.vendor.java", + LineNumbers: []int{1}, + LineCodes: []string{"public class HelloWorld"}, + HighlightedRanges: [][3]int{{0, 18, 23}}, }, { - Filename: "python-hello/hello.py", - LineNumbers: []int{1}, - LineCodes: []string{"## This is a simple file to do a hello world"}, + Filename: "python-hello/hello.py", + LineNumbers: []int{1}, + LineCodes: []string{"## This is a simple file to do a hello world"}, + HighlightedRanges: [][3]int{{0, 39, 44}}, }, }, res) diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 5f35e8073..04af733cd 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/services/gitdiff" ) // Result a search result to display @@ -70,11 +71,85 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { return nil } -func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine { +const ( + highlightTagStart = "" + highlightTagEnd = "" +) + +func HighlightSearchResultCode(filename string, lineNums []int, highlightRanges [][3]int, code string) []ResultLine { + hcd := gitdiff.NewHighlightCodeDiff() + hcd.CollectUsedRunes(code) + startTag, endTag := hcd.NextPlaceholder(), hcd.NextPlaceholder() + hcd.PlaceholderTokenMap[startTag] = highlightTagStart + hcd.PlaceholderTokenMap[endTag] = highlightTagEnd + // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting hl, _ := highlight.Code(filename, "", code) - highlightedLines := strings.Split(string(hl), "\n") + conv := hcd.ConvertToPlaceholders(string(hl)) + convLines := strings.Split(conv, "\n") + // each highlightRange is of the form [line number, start pos, end pos] + for _, highlightRange := range highlightRanges { + ln, start, end := highlightRange[0], highlightRange[1], highlightRange[2] + line := convLines[ln] + if line == "" || len(line) <= start || len(line) < end { + continue + } + + sb := strings.Builder{} + count := -1 + isOpen := false + for _, r := range line { + if token, ok := hcd.PlaceholderTokenMap[r]; + // token was not found + !ok || + // token was marked as used + token == "" || + // the token is not an valid html tag emited by chroma + !(len(token) > 6 && (token[0:5] == "{TEXT}" => "\uE000{TEXT}\uE001" // These Unicode placeholders are friendly to the diff. // Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities. // It's guaranteed that the tags in final diff result are paired correctly. -type highlightCodeDiff struct { +type HighlightCodeDiff struct { placeholderBegin rune placeholderMaxCount int placeholderIndex int - placeholderTokenMap map[rune]string + PlaceholderTokenMap map[rune]string tokenPlaceholderMap map[string]rune placeholderOverflowCount int @@ -49,52 +49,52 @@ type highlightCodeDiff struct { lineWrapperTags []string } -func newHighlightCodeDiff() *highlightCodeDiff { - return &highlightCodeDiff{ +func NewHighlightCodeDiff() *HighlightCodeDiff { + return &HighlightCodeDiff{ placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD) placeholderMaxCount: 64000, - placeholderTokenMap: map[rune]string{}, + PlaceholderTokenMap: map[rune]string{}, tokenPlaceholderMap: map[string]rune{}, } } -// nextPlaceholder returns 0 if no more placeholder can be used +// NextPlaceholder returns 0 if no more placeholder can be used // the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line // so the placeholderMaxCount is impossible to be exhausted in real cases. -func (hcd *highlightCodeDiff) nextPlaceholder() rune { +func (hcd *HighlightCodeDiff) NextPlaceholder() rune { for hcd.placeholderIndex < hcd.placeholderMaxCount { r := hcd.placeholderBegin + rune(hcd.placeholderIndex) hcd.placeholderIndex++ // only use non-existing (not used by code) rune as placeholders - if _, ok := hcd.placeholderTokenMap[r]; !ok { + if _, ok := hcd.PlaceholderTokenMap[r]; !ok { return r } } return 0 // no more available placeholder } -func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { +func (hcd *HighlightCodeDiff) isInPlaceholderRange(r rune) bool { return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) } -func (hcd *highlightCodeDiff) collectUsedRunes(code string) { +func (hcd *HighlightCodeDiff) CollectUsedRunes(code string) { for _, r := range code { if hcd.isInPlaceholderRange(r) { // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. - hcd.placeholderTokenMap[r] = "" + hcd.PlaceholderTokenMap[r] = "" } } } -func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { - hcd.collectUsedRunes(codeA) - hcd.collectUsedRunes(codeB) +func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { + hcd.CollectUsedRunes(codeA) + hcd.CollectUsedRunes(codeB) highlightCodeA, _ := highlight.Code(filename, language, codeA) highlightCodeB, _ := highlight.Code(filename, language, codeB) - convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA)) - convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB)) + convertedCodeA := hcd.ConvertToPlaceholders(string(highlightCodeA)) + convertedCodeB := hcd.ConvertToPlaceholders(string(highlightCodeB)) diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true) diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) @@ -106,7 +106,7 @@ func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB } // convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. -func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { +func (hcd *HighlightCodeDiff) ConvertToPlaceholders(htmlCode string) string { var tagStack []string res := strings.Builder{} @@ -153,10 +153,10 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { // remember the placeholder and token in the map placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] if !ok { - placeholder = hcd.nextPlaceholder() + placeholder = hcd.NextPlaceholder() if placeholder != 0 { hcd.tokenPlaceholderMap[tokenInMap] = placeholder - hcd.placeholderTokenMap[placeholder] = tokenInMap + hcd.PlaceholderTokenMap[placeholder] = tokenInMap } } @@ -179,12 +179,16 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { return res.String() } -func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { +func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { + diff.Text = hcd.Recover(diff.Text) +} + +func (hcd *HighlightCodeDiff) Recover(src string) string { sb := strings.Builder{} var tagStack []string - for _, r := range diff.Text { - token, ok := hcd.placeholderTokenMap[r] + for _, r := range src { + token, ok := hcd.PlaceholderTokenMap[r] if !ok || token == "" { sb.WriteRune(r) // if the rune is not a placeholder, write it as it is continue @@ -218,5 +222,5 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { } } - diff.Text = sb.String() + return sb.String() } diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index 545a060e2..2ff4472bc 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -13,7 +13,7 @@ import ( ) func TestDiffWithHighlight(t *testing.T) { - hcd := newHighlightCodeDiff() + hcd := NewHighlightCodeDiff() diffs := hcd.diffWithHighlight( "main.v", "", " run('<>')\n", @@ -28,9 +28,9 @@ func TestDiffWithHighlight(t *testing.T) { output = diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) - hcd = newHighlightCodeDiff() - hcd.placeholderTokenMap['O'] = "" - hcd.placeholderTokenMap['C'] = "" + hcd = NewHighlightCodeDiff() + hcd.PlaceholderTokenMap['O'] = "" + hcd.PlaceholderTokenMap['C'] = "" diff := diffmatchpatch.Diff{} diff.Text = "OC" @@ -47,20 +47,20 @@ func TestDiffWithHighlight(t *testing.T) { } func TestDiffWithHighlightPlaceholder(t *testing.T) { - hcd := newHighlightCodeDiff() + hcd := NewHighlightCodeDiff() diffs := hcd.diffWithHighlight( "main.js", "", "a='\U00100000'", "a='\U0010FFFD''", ) - assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000]) - assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD]) + assert.Equal(t, "", hcd.PlaceholderTokenMap[0x00100000]) + assert.Equal(t, "", hcd.PlaceholderTokenMap[0x0010FFFD]) expected := fmt.Sprintf(`a='%s'`, "\U00100000") output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) assert.Equal(t, expected, output) - hcd = newHighlightCodeDiff() + hcd = NewHighlightCodeDiff() diffs = hcd.diffWithHighlight( "main.js", "", "a='\U00100000'", @@ -72,7 +72,7 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) { } func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { - hcd := newHighlightCodeDiff() + hcd := NewHighlightCodeDiff() hcd.placeholderMaxCount = 0 diffs := hcd.diffWithHighlight( "main.js", "", @@ -83,7 +83,7 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { expected := fmt.Sprintf(`%s#39;`, "\uFFFD") assert.Equal(t, expected, output) - hcd = newHighlightCodeDiff() + hcd = NewHighlightCodeDiff() hcd.placeholderMaxCount = 0 diffs = hcd.diffWithHighlight( "main.js", "", @@ -102,7 +102,7 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { func TestDiffWithHighlightTagMatch(t *testing.T) { totalOverflow := 0 for i := 0; i < 100; i++ { - hcd := newHighlightCodeDiff() + hcd := NewHighlightCodeDiff() hcd.placeholderMaxCount = i diffs := hcd.diffWithHighlight( "main.js", "", diff --git a/tests/integration/explore_code_test.go b/tests/integration/explore_code_test.go index 1634f70d3..d84b47cf0 100644 --- a/tests/integration/explore_code_test.go +++ b/tests/integration/explore_code_test.go @@ -8,6 +8,7 @@ import ( "code.gitea.io/gitea/modules/test" "code.gitea.io/gitea/tests" + "github.com/PuerkitoBio/goquery" "github.com/stretchr/testify/assert" ) @@ -15,11 +16,16 @@ func TestExploreCodeSearchIndexer(t *testing.T) { defer tests.PrepareTestEnv(t)() defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, true)() - req := NewRequest(t, "GET", "/explore/code") + req := NewRequest(t, "GET", "/explore/code?q=file&fuzzy=true") resp := MakeRequest(t, req, http.StatusOK) + doc := NewHTMLParser(t, resp.Body).Find(".explore") - doc := NewHTMLParser(t, resp.Body) - msg := doc.Find(".explore").Find(".ui.container").Find(".ui.message[data-test-tag=grep]") + msg := doc. + Find(".ui.container"). + Find(".ui.message[data-test-tag=grep]") + assert.EqualValues(t, 0, msg.Length()) - assert.Empty(t, msg.Nodes) + doc.Find(".file-body").Each(func(i int, sel *goquery.Selection) { + assert.Positive(t, sel.Find(".code-inner").Find(".search-highlight").Length(), 0) + }) } diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index a2b458889..c7a31f473 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -27,7 +27,8 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string { result := make([]string, resultSelections.Length()) resultSelections.Each(func(i int, selection *goquery.Selection) { - assert.Positive(t, resultSelections.Find("div ol li").Length(), 0) + assert.Positive(t, selection.Find("div ol li").Length(), 0) + assert.Positive(t, selection.Find(".code-inner").Find(".search-highlight").Length(), 0) result[i] = selection. Find(".header"). Find("span.file a.file-link"). diff --git a/web_src/css/repo.css b/web_src/css/repo.css index bf0366add..c628ac5e0 100644 --- a/web_src/css/repo.css +++ b/web_src/css/repo.css @@ -1752,6 +1752,10 @@ td .commit-summary { color: inherit; } +.search-highlight { + background: var(--color-primary-alpha-40); +} + .repository.quickstart .guide .item { padding: 1em; }