Add support for different Maven POM encoding (#25873)

Fixes #25853

- Maven POM files aren't always UTF-8 encoded.
- Reject the upload of unparsable POM files
This commit is contained in:
KN4CK3R 2023-07-14 11:39:15 +02:00 committed by GitHub
parent dc679fc9fa
commit bd82d8974e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 2 deletions

View file

@ -8,6 +8,8 @@ import (
"io"
"code.gitea.io/gitea/modules/validation"
"golang.org/x/net/html/charset"
)
// Metadata represents the metadata of a Maven package
@ -52,7 +54,10 @@ type pomStruct struct {
// ParsePackageMetaData parses the metadata of a pom file
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
var pom pomStruct
if err := xml.NewDecoder(r).Decode(&pom); err != nil {
dec := xml.NewDecoder(r)
dec.CharsetReader = charset.NewReaderLabel
if err := dec.Decode(&pom); err != nil {
return nil, err
}

View file

@ -8,6 +8,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"golang.org/x/text/encoding/charmap"
)
const (
@ -69,4 +70,20 @@ func TestParsePackageMetaData(t *testing.T) {
assert.Equal(t, dependencyArtifactID, m.Dependencies[0].ArtifactID)
assert.Equal(t, dependencyVersion, m.Dependencies[0].Version)
})
t.Run("Encoding", func(t *testing.T) {
// UTF-8 is default but the metadata could be encoded differently
pomContent8859_1, err := charmap.ISO8859_1.NewEncoder().String(
strings.ReplaceAll(
pomContent,
`<?xml version="1.0"?>`,
`<?xml version="1.0" encoding="ISO-8859-1"?>`,
),
)
assert.NoError(t, err)
m, err := ParsePackageMetaData(strings.NewReader(pomContent8859_1))
assert.NoError(t, err)
assert.NotNil(t, m)
})
}