Properly determine CSV delimiter (#17459)

* Fixes #16558 CSV delimiter determiner

* Fixes #16558 - properly determine CSV delmiiter

* Moves quoteString to a new function

* Adds big test with lots of commas for tab delimited csv

* Adds comments

* Shortens the text of the test

* Removes single quotes from regexp as only double quotes need to be searched

* Fixes spelling

* Fixes check of length as it probalby will only be 1e4, not greater

* Makes sample size a const, properly removes truncated line

* Makes sample size a const, properly removes truncated line

* Fixes comment

* Fixes comment

* tests for FormatError() function

* Adds logic to find the limiter before or after a quoted value

* Simplifies regex

* Error tests

* Error tests

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

* Adds comments

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: delvh <dev.lh@web.de>
This commit is contained in:
Richard Mahn 2021-10-30 09:50:40 -06:00 committed by GitHub
parent 63c0dc89ef
commit 40c8451b7d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 642 additions and 94 deletions

View file

@ -23,6 +23,7 @@ import (
csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/upload"
"code.gitea.io/gitea/modules/util"
@ -106,7 +107,7 @@ func setCsvCompareContext(ctx *context.Context) {
errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))
csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, io.Closer, error) {
csvReaderFromCommit := func(ctx *markup.RenderContext, c *git.Commit) (*csv.Reader, io.Closer, error) {
blob, err := c.GetBlobByPath(diffFile.Name)
if err != nil {
return nil, nil, err
@ -121,18 +122,18 @@ func setCsvCompareContext(ctx *context.Context) {
return nil, nil, err
}
csvReader, err := csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader))
csvReader, err := csv_module.CreateReaderAndDetermineDelimiter(ctx, charset.ToUTF8WithFallbackReader(reader))
return csvReader, reader, err
}
baseReader, baseBlobCloser, err := csvReaderFromCommit(baseCommit)
baseReader, baseBlobCloser, err := csvReaderFromCommit(&markup.RenderContext{Filename: diffFile.OldName}, baseCommit)
if baseBlobCloser != nil {
defer baseBlobCloser.Close()
}
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
headReader, headBlobCloser, err := csvReaderFromCommit(headCommit)
headReader, headBlobCloser, err := csvReaderFromCommit(&markup.RenderContext{Filename: diffFile.Name}, headCommit)
if headBlobCloser != nil {
defer headBlobCloser.Close()
}