Server-side syntax highlighting for all code (#12047)

* Server-side syntax hilighting for all code

This PR does a few things:

* Remove all traces of highlight.js
* Use chroma library to provide fast syntax hilighting directly on the server
* Provide syntax hilighting for diffs
* Re-style both unified and split diffs views
* Add custom syntax hilighting styling for both regular and arc-green

Fixes #7729
Fixes #10157
Fixes #11825
Fixes #7728
Fixes #3872
Fixes #3682

And perhaps gets closer to #9553

* fix line marker

* fix repo search

* Fix single line select

* properly load settings

* npm uninstall highlight.js

* review suggestion

* code review

* forgot to call function

* fix test

* Apply suggestions from code review

suggestions from @silverwind thanks

Co-authored-by: silverwind <me@silverwind.io>

* code review

* copy/paste error

* Use const for highlight size limit

* Update web_src/less/_repository.less

Co-authored-by: Lauris BH <lauris@nix.lv>

* update size limit to 1MB and other styling tweaks

* fix highlighting for certain diff sections

* fix test

* add worker back as suggested

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
mrsdizzie 2020-06-30 17:34:03 -04:00 committed by GitHub
parent ce5f2b9845
commit af7ffaa279
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
336 changed files with 37293 additions and 769 deletions

View file

@ -1,151 +1,148 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package highlight
import (
"path"
"bufio"
"bytes"
"path/filepath"
"strings"
"sync"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/alecthomas/chroma/formatters/html"
"github.com/alecthomas/chroma/lexers"
"github.com/alecthomas/chroma/styles"
)
// don't index files larger than this many bytes for performance purposes
const sizeLimit = 1000000
var (
// File name should ignore highlight.
ignoreFileNames = map[string]bool{
"license": true,
"copying": true,
}
// For custom user mapping
highlightMapping = map[string]string{}
// File names that are representing highlight classes.
highlightFileNames = map[string]string{
"dockerfile": "dockerfile",
"makefile": "makefile",
"gnumakefile": "makefile",
"cmakelists.txt": "cmake",
}
// Extensions that are same as highlight classes.
// See hljs.listLanguages() for list of language names.
highlightExts = map[string]struct{}{
".applescript": {},
".arm": {},
".as": {},
".bash": {},
".bat": {},
".c": {},
".cmake": {},
".cpp": {},
".cs": {},
".css": {},
".dart": {},
".diff": {},
".django": {},
".go": {},
".gradle": {},
".groovy": {},
".haml": {},
".handlebars": {},
".html": {},
".ini": {},
".java": {},
".json": {},
".less": {},
".lua": {},
".php": {},
".scala": {},
".scss": {},
".sql": {},
".swift": {},
".ts": {},
".xml": {},
".yaml": {},
}
// Extensions that are not same as highlight classes.
highlightMapping = map[string]string{
".ahk": "autohotkey",
".crmsh": "crmsh",
".dash": "shell",
".erl": "erlang",
".escript": "erlang",
".ex": "elixir",
".exs": "elixir",
".f": "fortran",
".f77": "fortran",
".f90": "fortran",
".f95": "fortran",
".feature": "gherkin",
".fish": "shell",
".for": "fortran",
".hbs": "handlebars",
".hs": "haskell",
".hx": "haxe",
".js": "javascript",
".jsx": "javascript",
".ksh": "shell",
".kt": "kotlin",
".l": "ocaml",
".ls": "livescript",
".md": "markdown",
".mjs": "javascript",
".mli": "ocaml",
".mll": "ocaml",
".mly": "ocaml",
".patch": "diff",
".pl": "perl",
".pm": "perl",
".ps1": "powershell",
".psd1": "powershell",
".psm1": "powershell",
".py": "python",
".pyw": "python",
".rb": "ruby",
".rs": "rust",
".scpt": "applescript",
".scptd": "applescript",
".sh": "bash",
".tcsh": "shell",
".ts": "typescript",
".tsx": "typescript",
".txt": "plaintext",
".vb": "vbnet",
".vbs": "vbscript",
".yml": "yaml",
".zsh": "shell",
}
once sync.Once
)
// NewContext loads highlight map
// NewContext loads custom highlight map from local config
func NewContext() {
keys := setting.Cfg.Section("highlight.mapping").Keys()
for i := range keys {
highlightMapping[keys[i].Name()] = keys[i].Value()
}
once.Do(func() {
keys := setting.Cfg.Section("highlight.mapping").Keys()
for i := range keys {
highlightMapping[keys[i].Name()] = keys[i].Value()
}
})
}
// FileNameToHighlightClass returns the best match for highlight class name
// based on the rule of highlight.js.
func FileNameToHighlightClass(fname string) string {
fname = strings.ToLower(fname)
if ignoreFileNames[fname] {
return "nohighlight"
// Code returns a HTML version of code string with chroma syntax highlighting classes
func Code(fileName, code string) string {
NewContext()
if len(code) > sizeLimit {
return code
}
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
if formatter == nil {
log.Error("Couldn't create chroma formatter")
return code
}
if name, ok := highlightFileNames[fname]; ok {
return name
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
//change file name to one with mapped extension so we look that up instead
fileName = "mapped." + val
}
ext := path.Ext(fname)
if _, ok := highlightExts[ext]; ok {
return ext[1:]
lexer := lexers.Match(fileName)
if lexer == nil {
lexer = lexers.Fallback
}
name, ok := highlightMapping[ext]
if ok {
return name
iterator, err := lexer.Tokenise(nil, string(code))
if err != nil {
log.Error("Can't tokenize code: %v", err)
return code
}
// style not used for live site but need to pass something
err = formatter.Format(htmlw, styles.GitHub, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return code
}
return ""
htmlw.Flush()
return htmlbuf.String()
}
// File returns map with line lumbers and HTML version of code with chroma syntax highlighting classes
func File(numLines int, fileName string, code []byte) map[int]string {
NewContext()
if len(code) > sizeLimit {
return plainText(string(code), numLines)
}
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
if formatter == nil {
log.Error("Couldn't create chroma formatter")
return plainText(string(code), numLines)
}
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
fileName = "test." + val
}
lexer := lexers.Match(fileName)
if lexer == nil {
lexer = lexers.Analyse(string(code))
if lexer == nil {
lexer = lexers.Fallback
}
}
iterator, err := lexer.Tokenise(nil, string(code))
if err != nil {
log.Error("Can't tokenize code: %v", err)
return plainText(string(code), numLines)
}
err = formatter.Format(htmlw, styles.GitHub, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return plainText(string(code), numLines)
}
htmlw.Flush()
m := make(map[int]string, numLines)
for k, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) {
line := k + 1
m[line] = string(v)
}
return m
}
// return unhiglighted map
func plainText(code string, numLines int) map[int]string {
m := make(map[int]string, numLines)
for k, v := range strings.SplitN(string(code), "\n", numLines) {
line := k + 1
m[line] = string(v)
}
return m
}

View file

@ -6,8 +6,6 @@ package code
import (
"bytes"
"html"
gotemplate "html/template"
"strings"
"code.gitea.io/gitea/modules/highlight"
@ -23,9 +21,8 @@ type Result struct {
UpdatedUnix timeutil.TimeStamp
Language string
Color string
HighlightClass string
LineNumbers []int
FormattedLines gotemplate.HTML
FormattedLines string
}
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
@ -80,19 +77,13 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
openActiveIndex := util.Max(result.StartIndex-index, 0)
closeActiveIndex := util.Min(result.EndIndex-index, len(line))
err = writeStrings(&formattedLinesBuffer,
`<li>`,
html.EscapeString(line[:openActiveIndex]),
`<span class='active'>`,
html.EscapeString(line[openActiveIndex:closeActiveIndex]),
`</span>`,
html.EscapeString(line[closeActiveIndex:]),
`</li>`,
line[:openActiveIndex],
line[openActiveIndex:closeActiveIndex],
line[closeActiveIndex:],
)
} else {
err = writeStrings(&formattedLinesBuffer,
`<li>`,
html.EscapeString(line),
`</li>`,
line,
)
}
if err != nil {
@ -109,9 +100,8 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
HighlightClass: highlight.FileNameToHighlightClass(result.Filename),
LineNumbers: lineNumbers,
FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()),
FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()),
}, nil
}

View file

@ -15,7 +15,9 @@ import (
"code.gitea.io/gitea/modules/setting"
giteautil "code.gitea.io/gitea/modules/util"
chromahtml "github.com/alecthomas/chroma/formatters/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark-highlighting"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
@ -49,6 +51,30 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
if entering {
// include language-x class as part of commonmark spec
_, err := w.WriteString("<pre><code class=\"chroma language-" + string(language) + "\">")
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}),
),
meta.Meta,
),
goldmark.WithParserOptions(

View file

@ -37,8 +37,8 @@ func NewSanitizer() {
// ReplaceSanitizer replaces the current sanitizer to account for changes in settings
func ReplaceSanitizer() {
sanitizer.policy = bluemonday.UGCPolicy()
// We only want to allow HighlightJS specific classes for code blocks
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-[\w-]+$`)).OnElements("code")
// For Chroma markdown plugin
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
// Checkboxes
sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
@ -65,8 +65,8 @@ func ReplaceSanitizer() {
// Allow classes for emojis
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
// Allow icons, checkboxes and emojis on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(ui checkbox)|(ui checked checkbox)|(emoji))$`)).OnElements("span")
// Allow icons, checkboxes, emojis, and chroma syntax on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(ui checkbox)|(ui checked checkbox)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span")
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",

View file

@ -47,7 +47,8 @@ func TestGetDiffPreview(t *testing.T) {
IsSubmodule: false,
Sections: []*gitdiff.DiffSection{
{
Name: "",
FileName: "README.md",
Name: "",
Lines: []*gitdiff.DiffLine{
{
LeftIdx: 0,