[FEAT] Enable ambiguous character detection in configured contexts
- The ambiguous character detection is an important security feature to combat against sourcebase attacks (https://trojansource.codes/). - However there are a few problems with the feature as it stands today (i) it's apparantly an big performance hitter, it's twice as slow as syntax highlighting (ii) it contains false positives, because it's reporting valid problems but not valid within the context of a programming language (ambiguous charachters in code comments being a prime example) that can lead to security issues (iii) charachters from certain languages always being marked as ambiguous. It's a lot of effort to fix the aforementioned issues. - Therefore, make it configurable in which context the ambiguous character detection should be run, this avoids running detection in all contexts such as file views, but still enable it in commits and pull requests diffs where it matters the most. Ideally this also becomes an per-repository setting, but the code architecture doesn't allow for a clean implementation of that. - Adds unit test. - Adds integration tests to ensure that the contexts and instance-wide is respected (and that ambigious charachter detection actually work in different places). - Ref: https://codeberg.org/forgejo/forgejo/pulls/2395#issuecomment-1575547 - Ref: https://codeberg.org/forgejo/forgejo/issues/564
This commit is contained in:
parent
0081e59243
commit
5b3a82d621
9 changed files with 151 additions and 15 deletions
|
@ -10,6 +10,7 @@ package charset
|
|||
import (
|
||||
"html/template"
|
||||
"io"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
|
@ -20,16 +21,29 @@ import (
|
|||
// RuneNBSP is the codepoint for NBSP
|
||||
const RuneNBSP = 0xa0
|
||||
|
||||
type escapeContext string
|
||||
|
||||
// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS
|
||||
// Defines the different contexts that could be used to escape in.
|
||||
const (
|
||||
// Wiki pages.
|
||||
WikiContext escapeContext = "wiki"
|
||||
// Rendered content (except markup), source code and blames.
|
||||
FileviewContext escapeContext = "file-view"
|
||||
// Commits or pull requet's diff.
|
||||
DiffContext escapeContext = "diff"
|
||||
)
|
||||
|
||||
// EscapeControlHTML escapes the unicode control sequences in a provided html document
|
||||
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
|
||||
func EscapeControlHTML(html template.HTML, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
|
||||
sb := &strings.Builder{}
|
||||
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
|
||||
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, context, allowed...) // err has been handled in EscapeControlReader
|
||||
return escaped, template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
|
||||
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
|
||||
if !setting.UI.AmbiguousUnicodeDetection {
|
||||
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, err error) {
|
||||
if !setting.UI.AmbiguousUnicodeDetection || slices.Contains(setting.UI.SkipEscapeContexts, string(context)) {
|
||||
_, err = io.Copy(writer, reader)
|
||||
return &EscapeStatus{}, err
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
package charset
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
|
@ -14,6 +15,8 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var testContext = escapeContext("test")
|
||||
|
||||
type escapeControlTest struct {
|
||||
name string
|
||||
text string
|
||||
|
@ -159,7 +162,7 @@ func TestEscapeControlReader(t *testing.T) {
|
|||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
output := &strings.Builder{}
|
||||
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
|
||||
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}, testContext)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.status, *status)
|
||||
assert.Equal(t, tt.result, output.String())
|
||||
|
@ -169,9 +172,22 @@ func TestEscapeControlReader(t *testing.T) {
|
|||
|
||||
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
|
||||
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
|
||||
|
||||
_, out := EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
|
||||
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
||||
setting.UI.AmbiguousUnicodeDetection = false
|
||||
_, out = EscapeControlHTML("a test", &translation.MockLocale{})
|
||||
_, out = EscapeControlHTML("a test", &translation.MockLocale{}, testContext)
|
||||
assert.EqualValues(t, `a test`, out)
|
||||
}
|
||||
|
||||
func TestAmbiguousUnicodeDetectionContext(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"test"})()
|
||||
|
||||
input := template.HTML("a test")
|
||||
|
||||
_, out := EscapeControlHTML(input, &translation.MockLocale{}, escapeContext("not-test"))
|
||||
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
||||
|
||||
_, out = EscapeControlHTML(input, &translation.MockLocale{}, testContext)
|
||||
assert.EqualValues(t, input, out)
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ var UI = struct {
|
|||
PreferredTimestampTense string
|
||||
|
||||
AmbiguousUnicodeDetection bool
|
||||
SkipEscapeContexts []string
|
||||
|
||||
Notification struct {
|
||||
MinTimeout time.Duration
|
||||
|
@ -89,6 +90,7 @@ var UI = struct {
|
|||
PreferredTimestampTense: "mixed",
|
||||
|
||||
AmbiguousUnicodeDetection: true,
|
||||
SkipEscapeContexts: []string{},
|
||||
|
||||
Notification: struct {
|
||||
MinTimeout time.Duration
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue