Normalize oauth email username (#28561)

This commit is contained in:
Kyle D 2024-01-03 16:48:20 -08:00 committed by GitHub
parent 657b23d635
commit 54acf7b0d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 80 additions and 10 deletions

View file

@ -10,8 +10,10 @@ import (
"fmt"
"net/url"
"path/filepath"
"regexp"
"strings"
"time"
"unicode"
_ "image/jpeg" // Needed for jpeg support
@ -29,6 +31,9 @@ import (
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/modules/validation"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"xorm.io/builder"
)
@ -515,6 +520,26 @@ func GetUserSalt() (string, error) {
return hex.EncodeToString(rBytes), nil
}
// Note: The set of characters here can safely expand without a breaking change,
// but characters removed from this set can cause user account linking to break
var (
customCharsReplacement = strings.NewReplacer("Æ", "AE")
removeCharsRE = regexp.MustCompile(`['´\x60]`)
removeDiacriticsTransform = transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
replaceCharsHyphenRE = regexp.MustCompile(`[\s~+]`)
)
// normalizeUserName returns a string with single-quotes and diacritics
// removed, and any other non-supported username characters replaced with
// a `-` character
func NormalizeUserName(s string) (string, error) {
strDiacriticsRemoved, n, err := transform.String(removeDiacriticsTransform, customCharsReplacement.Replace(s))
if err != nil {
return "", fmt.Errorf("Failed to normalize character `%v` in provided username `%v`", s[n], s)
}
return replaceCharsHyphenRE.ReplaceAllLiteralString(removeCharsRE.ReplaceAllLiteralString(strDiacriticsRemoved, ""), "-"), nil
}
var (
reservedUsernames = []string{
".",

View file

@ -544,3 +544,31 @@ func Test_ValidateUser(t *testing.T) {
assert.EqualValues(t, expected, err == nil, fmt.Sprintf("case: %+v", kase))
}
}
func Test_NormalizeUserFromEmail(t *testing.T) {
testCases := []struct {
Input string
Expected string
IsNormalizedValid bool
}{
{"test", "test", true},
{"Sinéad.O'Connor", "Sinead.OConnor", true},
{"Æsir", "AEsir", true},
// \u00e9\u0065\u0301
{"éé", "ee", true},
{"Awareness Hub", "Awareness-Hub", true},
{"double__underscore", "double__underscore", false}, // We should consider squashing double non-alpha characters
{".bad.", ".bad.", false},
{"new😀user", "new😀user", false}, // No plans to support
}
for _, testCase := range testCases {
normalizedName, err := user_model.NormalizeUserName(testCase.Input)
assert.NoError(t, err)
assert.EqualValues(t, testCase.Expected, normalizedName)
if testCase.IsNormalizedValid {
assert.NoError(t, user_model.IsUsableUsername(normalizedName))
} else {
assert.Error(t, user_model.IsUsableUsername(normalizedName))
}
}
}