From f6920696161e1750cbbe89dc9f934f637549c9e3 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 28 Mar 2024 10:26:13 +0800 Subject: [PATCH] Refactor markdown render (#30139) Only split the file into small ones (and rename AttentionTypes to attentionTypes) (cherry picked from commit 71706126b56616750a65290460fd211b9b8449da) Signed-off-by: Gergely Nagy Conflicts: - modules/markup/markdown/goldmark.go - modules/markup/markdown/transform_blockquote.go Conflicts were resolved by favouring the Forgejo implementation: I copied the Forgejo code to the same place Gitea copied them to, and adjusted the imports accordingly. Apart from conflict resolution, this also moves `applyElementDir` from a local func in `goldmark.Transform` to a method on `*ASTTransformer`, to make it callable from the extracted functions. --- modules/markup/markdown/goldmark.go | 263 +----------------- modules/markup/markdown/prefixed_id.go | 59 ++++ modules/markup/markdown/transform_codespan.go | 56 ++++ modules/markup/markdown/transform_heading.go | 32 +++ modules/markup/markdown/transform_image.go | 66 +++++ modules/markup/markdown/transform_link.go | 47 ++++ modules/markup/markdown/transform_list.go | 86 ++++++ 7 files changed, 358 insertions(+), 251 deletions(-) create mode 100644 modules/markup/markdown/prefixed_id.go create mode 100644 modules/markup/markdown/transform_codespan.go create mode 100644 modules/markup/markdown/transform_heading.go create mode 100644 modules/markup/markdown/transform_image.go create mode 100644 modules/markup/markdown/transform_link.go create mode 100644 modules/markup/markdown/transform_list.go diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go index 5a481a31f..8289e2867 100644 --- a/modules/markup/markdown/goldmark.go +++ b/modules/markup/markdown/goldmark.go @@ -4,17 +4,12 @@ package markdown import ( - "bytes" "fmt" "regexp" - "slices" "strings" - "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/markup" - "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/setting" - giteautil "code.gitea.io/gitea/modules/util" "github.com/yuin/goldmark/ast" east "github.com/yuin/goldmark/extension/ast" @@ -30,6 +25,12 @@ var byteMailto = []byte("mailto:") // ASTTransformer is a default transformer of the goldmark tree. type ASTTransformer struct{} +func (g *ASTTransformer) applyElementDir(n ast.Node) { + if markup.DefaultProcessorHelper.ElementDir != "" { + n.SetAttributeString("dir", []byte(markup.DefaultProcessorHelper.ElementDir)) + } +} + // Transform transforms the given AST tree. func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { firstChild := node.FirstChild() @@ -46,12 +47,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa tocMode = rc.TOC } - applyElementDir := func(n ast.Node) { - if markup.DefaultProcessorHelper.ElementDir != "" { - n.SetAttributeString("dir", []byte(markup.DefaultProcessorHelper.ElementDir)) - } - } - _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { return ast.WalkContinue, nil @@ -59,135 +54,15 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa switch v := n.(type) { case *ast.Heading: - for _, attr := range v.Attributes() { - if _, ok := attr.Value.([]byte); !ok { - v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value))) - } - } - txt := n.Text(reader.Source()) - header := markup.Header{ - Text: util.BytesToReadOnlyString(txt), - Level: v.Level, - } - if id, found := v.AttributeString("id"); found { - header.ID = util.BytesToReadOnlyString(id.([]byte)) - } - tocList = append(tocList, header) - applyElementDir(v) + g.transformHeading(ctx, v, reader, &tocList) case *ast.Paragraph: - applyElementDir(v) + g.applyElementDir(v) case *ast.Image: - // Images need two things: - // - // 1. Their src needs to munged to be a real value - // 2. If they're not wrapped with a link they need a link wrapper - - // Check if the destination is a real link - if len(v.Destination) > 0 && !markup.IsLink(v.Destination) { - v.Destination = []byte(giteautil.URLJoin( - ctx.Links.ResolveMediaLink(ctx.IsWiki), - strings.TrimLeft(string(v.Destination), "/"), - )) - } - - parent := n.Parent() - // Create a link around image only if parent is not already a link - if _, ok := parent.(*ast.Link); !ok && parent != nil { - next := n.NextSibling() - - // Create a link wrapper - wrap := ast.NewLink() - wrap.Destination = v.Destination - wrap.Title = v.Title - wrap.SetAttributeString("target", []byte("_blank")) - - // Duplicate the current image node - image := ast.NewImage(ast.NewLink()) - image.Destination = v.Destination - image.Title = v.Title - for _, attr := range v.Attributes() { - image.SetAttribute(attr.Name, attr.Value) - } - for child := v.FirstChild(); child != nil; { - next := child.NextSibling() - image.AppendChild(image, child) - child = next - } - - // Append our duplicate image to the wrapper link - wrap.AppendChild(wrap, image) - - // Wire in the next sibling - wrap.SetNextSibling(next) - - // Replace the current node with the wrapper link - parent.ReplaceChild(parent, n, wrap) - - // But most importantly ensure the next sibling is still on the old image too - v.SetNextSibling(next) - } + g.transformImage(ctx, v, reader) case *ast.Link: - // Links need their href to munged to be a real value - link := v.Destination - - // Do not process the link if it's not a link, starts with an hashtag - // (indicating it's an anchor link), starts with `mailto:` or any of the - // custom markdown URLs. - processLink := len(link) > 0 && !markup.IsLink(link) && - link[0] != '#' && !bytes.HasPrefix(link, byteMailto) && - !slices.ContainsFunc(setting.Markdown.CustomURLSchemes, func(s string) bool { - return bytes.HasPrefix(link, []byte(s+":")) - }) - - if processLink { - var base string - if ctx.IsWiki { - base = ctx.Links.WikiLink() - } else if ctx.Links.HasBranchInfo() { - base = ctx.Links.SrcLink() - } else { - base = ctx.Links.Base - } - - link = []byte(giteautil.URLJoin(base, string(link))) - } - if len(link) > 0 && link[0] == '#' { - link = []byte("#user-content-" + string(link)[1:]) - } - v.Destination = link + g.transformLink(ctx, v, reader) case *ast.List: - if v.HasChildren() { - children := make([]ast.Node, 0, v.ChildCount()) - child := v.FirstChild() - for child != nil { - children = append(children, child) - child = child.NextSibling() - } - v.RemoveChildren(v) - - for _, child := range children { - listItem := child.(*ast.ListItem) - if !child.HasChildren() || !child.FirstChild().HasChildren() { - v.AppendChild(v, child) - continue - } - taskCheckBox, ok := child.FirstChild().FirstChild().(*east.TaskCheckBox) - if !ok { - v.AppendChild(v, child) - continue - } - newChild := NewTaskCheckBoxListItem(listItem) - newChild.IsChecked = taskCheckBox.IsChecked - newChild.SetAttributeString("class", []byte("task-list-item")) - segments := newChild.FirstChild().Lines() - if segments.Len() > 0 { - segment := segments.At(0) - newChild.SourcePosition = rc.metaLength + segment.Start - } - v.AppendChild(v, newChild) - } - } - applyElementDir(v) + g.transformList(ctx, v, reader, rc) case *ast.Text: if v.SoftLineBreak() && !v.HardLineBreak() { if ctx.Metas["mode"] != "document" { @@ -197,10 +72,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa } } case *ast.CodeSpan: - colorContent := n.Text(reader.Source()) - if matchColor(strings.ToLower(string(colorContent))) { - v.AppendChild(v, NewColorPreview(colorContent)) - } + g.transformCodeSpan(ctx, v, reader) } return ast.WalkContinue, nil }) @@ -222,50 +94,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa } } -type prefixedIDs struct { - values container.Set[string] -} - -// Generate generates a new element id. -func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte { - dft := []byte("id") - if kind == ast.KindHeading { - dft = []byte("heading") - } - return p.GenerateWithDefault(value, dft) -} - -// Generate generates a new element id. -func (p *prefixedIDs) GenerateWithDefault(value, dft []byte) []byte { - result := common.CleanValue(value) - if len(result) == 0 { - result = dft - } - if !bytes.HasPrefix(result, []byte("user-content-")) { - result = append([]byte("user-content-"), result...) - } - if p.values.Add(util.BytesToReadOnlyString(result)) { - return result - } - for i := 1; ; i++ { - newResult := fmt.Sprintf("%s-%d", result, i) - if p.values.Add(newResult) { - return []byte(newResult) - } - } -} - -// Put puts a given element id to the used ids table. -func (p *prefixedIDs) Put(value []byte) { - p.values.Add(util.BytesToReadOnlyString(value)) -} - -func newPrefixedIDs() *prefixedIDs { - return &prefixedIDs{ - values: make(container.Set[string]), - } -} - // NewHTMLRenderer creates a HTMLRenderer to render // in the gitea form. func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { @@ -295,38 +123,6 @@ func (r *HTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox) } -// renderCodeSpan renders CodeSpan elements (like goldmark upstream does) but also renders ColorPreview elements. -// See #21474 for reference -func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { - if entering { - if n.Attributes() != nil { - _, _ = w.WriteString("') - } else { - _, _ = w.WriteString("") - } - for c := n.FirstChild(); c != nil; c = c.NextSibling() { - switch v := c.(type) { - case *ast.Text: - segment := v.Segment - value := segment.Value(source) - if bytes.HasSuffix(value, []byte("\n")) { - r.Writer.RawWrite(w, value[:len(value)-1]) - r.Writer.RawWrite(w, []byte(" ")) - } else { - r.Writer.RawWrite(w, value) - } - case *ColorPreview: - _, _ = w.WriteString(fmt.Sprintf(``, string(v.Color))) - } - } - return ast.WalkSkipChildren, nil - } - _, _ = w.WriteString("") - return ast.WalkContinue, nil -} - func (r *HTMLRenderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { n := node.(*ast.Document) @@ -415,38 +211,3 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node return ast.WalkContinue, nil } - -func (r *HTMLRenderer) renderTaskCheckBoxListItem(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { - n := node.(*TaskCheckBoxListItem) - if entering { - if n.Attributes() != nil { - _, _ = w.WriteString("') - } else { - _, _ = w.WriteString("
  • ") - } - fmt.Fprintf(w, ``) - } else { - _ = w.WriteByte('>') - } - fc := n.FirstChild() - if fc != nil { - if _, ok := fc.(*ast.TextBlock); !ok { - _ = w.WriteByte('\n') - } - } - } else { - _, _ = w.WriteString("
  • \n") - } - return ast.WalkContinue, nil -} - -func (r *HTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { - return ast.WalkContinue, nil -} diff --git a/modules/markup/markdown/prefixed_id.go b/modules/markup/markdown/prefixed_id.go new file mode 100644 index 000000000..9c6094920 --- /dev/null +++ b/modules/markup/markdown/prefixed_id.go @@ -0,0 +1,59 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "bytes" + "fmt" + + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/markup/common" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/util" +) + +type prefixedIDs struct { + values container.Set[string] +} + +// Generate generates a new element id. +func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte { + dft := []byte("id") + if kind == ast.KindHeading { + dft = []byte("heading") + } + return p.GenerateWithDefault(value, dft) +} + +// GenerateWithDefault generates a new element id. +func (p *prefixedIDs) GenerateWithDefault(value, dft []byte) []byte { + result := common.CleanValue(value) + if len(result) == 0 { + result = dft + } + if !bytes.HasPrefix(result, []byte("user-content-")) { + result = append([]byte("user-content-"), result...) + } + if p.values.Add(util.BytesToReadOnlyString(result)) { + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s-%d", result, i) + if p.values.Add(newResult) { + return []byte(newResult) + } + } +} + +// Put puts a given element id to the used ids table. +func (p *prefixedIDs) Put(value []byte) { + p.values.Add(util.BytesToReadOnlyString(value)) +} + +func newPrefixedIDs() *prefixedIDs { + return &prefixedIDs{ + values: make(container.Set[string]), + } +} diff --git a/modules/markup/markdown/transform_codespan.go b/modules/markup/markdown/transform_codespan.go new file mode 100644 index 000000000..0cf1169de --- /dev/null +++ b/modules/markup/markdown/transform_codespan.go @@ -0,0 +1,56 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "bytes" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/markup" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// renderCodeSpan renders CodeSpan elements (like goldmark upstream does) but also renders ColorPreview elements. +// See #21474 for reference +func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + if n.Attributes() != nil { + _, _ = w.WriteString("') + } else { + _, _ = w.WriteString("") + } + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + switch v := c.(type) { + case *ast.Text: + segment := v.Segment + value := segment.Value(source) + if bytes.HasSuffix(value, []byte("\n")) { + r.Writer.RawWrite(w, value[:len(value)-1]) + r.Writer.RawWrite(w, []byte(" ")) + } else { + r.Writer.RawWrite(w, value) + } + case *ColorPreview: + _, _ = w.WriteString(fmt.Sprintf(``, string(v.Color))) + } + } + return ast.WalkSkipChildren, nil + } + _, _ = w.WriteString("") + return ast.WalkContinue, nil +} + +func (g *ASTTransformer) transformCodeSpan(ctx *markup.RenderContext, v *ast.CodeSpan, reader text.Reader) { + colorContent := v.Text(reader.Source()) + if matchColor(strings.ToLower(string(colorContent))) { + v.AppendChild(v, NewColorPreview(colorContent)) + } +} diff --git a/modules/markup/markdown/transform_heading.go b/modules/markup/markdown/transform_heading.go new file mode 100644 index 000000000..ce585a37d --- /dev/null +++ b/modules/markup/markdown/transform_heading.go @@ -0,0 +1,32 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "fmt" + + "code.gitea.io/gitea/modules/markup" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +func (g *ASTTransformer) transformHeading(ctx *markup.RenderContext, v *ast.Heading, reader text.Reader, tocList *[]markup.Header) { + for _, attr := range v.Attributes() { + if _, ok := attr.Value.([]byte); !ok { + v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value))) + } + } + txt := v.Text(reader.Source()) + header := markup.Header{ + Text: util.BytesToReadOnlyString(txt), + Level: v.Level, + } + if id, found := v.AttributeString("id"); found { + header.ID = util.BytesToReadOnlyString(id.([]byte)) + } + *tocList = append(*tocList, header) + g.applyElementDir(v) +} diff --git a/modules/markup/markdown/transform_image.go b/modules/markup/markdown/transform_image.go new file mode 100644 index 000000000..db449e9dd --- /dev/null +++ b/modules/markup/markdown/transform_image.go @@ -0,0 +1,66 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "strings" + + "code.gitea.io/gitea/modules/markup" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" +) + +func (g *ASTTransformer) transformImage(ctx *markup.RenderContext, v *ast.Image, reader text.Reader) { + // Images need two things: + // + // 1. Their src needs to munged to be a real value + // 2. If they're not wrapped with a link they need a link wrapper + + // Check if the destination is a real link + if len(v.Destination) > 0 && !markup.IsLink(v.Destination) { + v.Destination = []byte(giteautil.URLJoin( + ctx.Links.ResolveMediaLink(ctx.IsWiki), + strings.TrimLeft(string(v.Destination), "/"), + )) + } + + parent := v.Parent() + // Create a link around image only if parent is not already a link + if _, ok := parent.(*ast.Link); !ok && parent != nil { + next := v.NextSibling() + + // Create a link wrapper + wrap := ast.NewLink() + wrap.Destination = v.Destination + wrap.Title = v.Title + wrap.SetAttributeString("target", []byte("_blank")) + + // Duplicate the current image node + image := ast.NewImage(ast.NewLink()) + image.Destination = v.Destination + image.Title = v.Title + for _, attr := range v.Attributes() { + image.SetAttribute(attr.Name, attr.Value) + } + for child := v.FirstChild(); child != nil; { + next := child.NextSibling() + image.AppendChild(image, child) + child = next + } + + // Append our duplicate image to the wrapper link + wrap.AppendChild(wrap, image) + + // Wire in the next sibling + wrap.SetNextSibling(next) + + // Replace the current node with the wrapper link + parent.ReplaceChild(parent, v, wrap) + + // But most importantly ensure the next sibling is still on the old image too + v.SetNextSibling(next) + } +} diff --git a/modules/markup/markdown/transform_link.go b/modules/markup/markdown/transform_link.go new file mode 100644 index 000000000..aceae5b74 --- /dev/null +++ b/modules/markup/markdown/transform_link.go @@ -0,0 +1,47 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "bytes" + "slices" + + "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/setting" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" +) + +func (g *ASTTransformer) transformLink(ctx *markup.RenderContext, v *ast.Link, reader text.Reader) { + // Links need their href to munged to be a real value + link := v.Destination + + // Do not process the link if it's not a link, starts with an hashtag + // (indicating it's an anchor link), starts with `mailto:` or any of the + // custom markdown URLs. + processLink := len(link) > 0 && !markup.IsLink(link) && + link[0] != '#' && !bytes.HasPrefix(link, byteMailto) && + !slices.ContainsFunc(setting.Markdown.CustomURLSchemes, func(s string) bool { + return bytes.HasPrefix(link, []byte(s+":")) + }) + + if processLink { + var base string + if ctx.IsWiki { + base = ctx.Links.WikiLink() + } else if ctx.Links.HasBranchInfo() { + base = ctx.Links.SrcLink() + } else { + base = ctx.Links.Base + } + + link = []byte(giteautil.URLJoin(base, string(link))) + } + if len(link) > 0 && link[0] == '#' { + link = []byte("#user-content-" + string(link)[1:]) + } + v.Destination = link +} diff --git a/modules/markup/markdown/transform_list.go b/modules/markup/markdown/transform_list.go new file mode 100644 index 000000000..6563e2dd6 --- /dev/null +++ b/modules/markup/markdown/transform_list.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markdown + +import ( + "fmt" + + "code.gitea.io/gitea/modules/markup" + + "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +func (r *HTMLRenderer) renderTaskCheckBoxListItem(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*TaskCheckBoxListItem) + if entering { + if n.Attributes() != nil { + _, _ = w.WriteString("') + } else { + _, _ = w.WriteString("
  • ") + } + fmt.Fprintf(w, ``) + } else { + _ = w.WriteByte('>') + } + fc := n.FirstChild() + if fc != nil { + if _, ok := fc.(*ast.TextBlock); !ok { + _ = w.WriteByte('\n') + } + } + } else { + _, _ = w.WriteString("
  • \n") + } + return ast.WalkContinue, nil +} + +func (r *HTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + return ast.WalkContinue, nil +} + +func (g *ASTTransformer) transformList(ctx *markup.RenderContext, v *ast.List, reader text.Reader, rc *RenderConfig) { + if v.HasChildren() { + children := make([]ast.Node, 0, v.ChildCount()) + child := v.FirstChild() + for child != nil { + children = append(children, child) + child = child.NextSibling() + } + v.RemoveChildren(v) + + for _, child := range children { + listItem := child.(*ast.ListItem) + if !child.HasChildren() || !child.FirstChild().HasChildren() { + v.AppendChild(v, child) + continue + } + taskCheckBox, ok := child.FirstChild().FirstChild().(*east.TaskCheckBox) + if !ok { + v.AppendChild(v, child) + continue + } + newChild := NewTaskCheckBoxListItem(listItem) + newChild.IsChecked = taskCheckBox.IsChecked + newChild.SetAttributeString("class", []byte("task-list-item")) + segments := newChild.FirstChild().Lines() + if segments.Len() > 0 { + segment := segments.At(0) + newChild.SourcePosition = rc.metaLength + segment.Start + } + v.AppendChild(v, newChild) + } + } + g.applyElementDir(v) +}