forgejo/modules/lfs/content_store.go
Gusted fa37a211fb
[GITEA] Drop sha256-simd in favor of stdlib
- In Go 1.21 the crypto/sha256 [got a massive
improvement](https://go.dev/doc/go1.21#crypto/sha256) by utilizing the
SHA instructions for AMD64 CPUs, which sha256-simd already was doing.
The performance is now on par and I think it's preferable to use the
standard library rather than a package when possible.

```
cpu: AMD Ryzen 5 3600X 6-Core Processor
                │  simd.txt   │               go.txt                │
                │   sec/op    │    sec/op     vs base               │
Hash/8Bytes-12    63.25n ± 1%    73.38n ± 1%  +16.02% (p=0.002 n=6)
Hash/64Bytes-12   98.73n ± 1%   105.30n ± 1%   +6.65% (p=0.002 n=6)
Hash/1K-12        567.2n ± 1%    572.8n ± 1%   +0.99% (p=0.002 n=6)
Hash/8K-12        4.062µ ± 1%    4.062µ ± 1%        ~ (p=0.396 n=6)
Hash/1M-12        512.1µ ± 0%    510.6µ ± 1%        ~ (p=0.485 n=6)
Hash/5M-12        2.556m ± 1%    2.564m ± 0%        ~ (p=0.093 n=6)
Hash/10M-12       5.112m ± 0%    5.127m ± 0%        ~ (p=0.093 n=6)
geomean           13.82µ         14.27µ        +3.28%

                │   simd.txt   │               go.txt                │
                │     B/s      │     B/s       vs base               │
Hash/8Bytes-12    120.6Mi ± 1%   104.0Mi ± 1%  -13.81% (p=0.002 n=6)
Hash/64Bytes-12   618.2Mi ± 1%   579.8Mi ± 1%   -6.22% (p=0.002 n=6)
Hash/1K-12        1.682Gi ± 1%   1.665Gi ± 1%   -0.98% (p=0.002 n=6)
Hash/8K-12        1.878Gi ± 1%   1.878Gi ± 1%        ~ (p=0.310 n=6)
Hash/1M-12        1.907Gi ± 0%   1.913Gi ± 1%        ~ (p=0.485 n=6)
Hash/5M-12        1.911Gi ± 1%   1.904Gi ± 0%        ~ (p=0.093 n=6)
Hash/10M-12       1.910Gi ± 0%   1.905Gi ± 0%        ~ (p=0.093 n=6)
geomean           1.066Gi        1.032Gi        -3.18%
```

(cherry picked from commit abd94ff5b59c86e793fd9bf12187ea6cfd1f3fa1)
(cherry picked from commit 15e81637abf70576a564cf9eecaa9640228afb5b)

Conflicts:
	go.mod
	https://codeberg.org/forgejo/forgejo/pulls/1581
(cherry picked from commit 325d92917f655c999b81b08832ee623d6b669f0f)

Conflicts:
	modules/context/context_cookie.go
	https://codeberg.org/forgejo/forgejo/pulls/1617
(cherry picked from commit 358819e8959886faa171ac16541097500d0a703e)
(cherry picked from commit 362fd7aae17832fa922fa017794bc564ca43060d)
(cherry picked from commit 4f64ee294ee05c93042b6ec68f0a179ec249dab9)
(cherry picked from commit 4bde77f7b13c5f961c141c01b6da1f9eda5ec387)
(cherry picked from commit 1311e30a811675eb623692349e4e808a85aabef6)
(cherry picked from commit 57b69e334c2973118488b9b5dbdc8a2c88135756)
(cherry picked from commit 52dc892fadecf39e89c3c351edc9efb42522257b)
(cherry picked from commit 77f54f4187869c6eabcc837742fd3f908093a76c)
(cherry picked from commit 0d0392f3a510ce3683bb649dee1e65b45dd91354)

Conflicts:
	go.mod
	https://codeberg.org/forgejo/forgejo/pulls/2034
(cherry picked from commit 92798364e8fe3188a2100b54f3adea943f8309e9)
(cherry picked from commit 43d218127752aa9251c4c3ef71b9c060f109dffc)
(cherry picked from commit 45c88b86a35729fc0b2dc6b72bc33caf9f69265f)
(cherry picked from commit a1cd6f4e3a7956773cbc0aef8abb80d17b62eb49)
(cherry picked from commit 01191dc2adf8c57ae448be37e73158005a8ff74d)
(cherry picked from commit 151e07f37e2854ad633f1352fb0ce3cd06f4b2ae)
2024-02-05 16:09:40 +01:00

163 lines
4.6 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package lfs
import (
"crypto/sha256"
"encoding/hex"
"errors"
"hash"
"io"
"os"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
)
var (
// ErrHashMismatch occurs if the content has does not match OID
ErrHashMismatch = errors.New("content hash does not match OID")
// ErrSizeMismatch occurs if the content size does not match
ErrSizeMismatch = errors.New("content size does not match")
)
// ContentStore provides a simple file system based storage.
type ContentStore struct {
storage.ObjectStorage
}
// NewContentStore creates the default ContentStore
func NewContentStore() *ContentStore {
contentStore := &ContentStore{ObjectStorage: storage.LFS}
return contentStore
}
// Get takes a Meta object and retrieves the content from the store, returning
// it as an io.ReadSeekCloser.
func (s *ContentStore) Get(pointer Pointer) (storage.Object, error) {
f, err := s.Open(pointer.RelativePath())
if err != nil {
log.Error("Whilst trying to read LFS OID[%s]: Unable to open Error: %v", pointer.Oid, err)
return nil, err
}
return f, err
}
// Put takes a Meta object and an io.Reader and writes the content to the store.
func (s *ContentStore) Put(pointer Pointer, r io.Reader) error {
p := pointer.RelativePath()
// Wrap the provided reader with an inline hashing and size checker
wrappedRd := newHashingReader(pointer.Size, pointer.Oid, r)
// now pass the wrapped reader to Save - if there is a size mismatch or hash mismatch then
// the errors returned by the newHashingReader should percolate up to here
written, err := s.Save(p, wrappedRd, pointer.Size)
if err != nil {
log.Error("Whilst putting LFS OID[%s]: Failed to copy to tmpPath: %s Error: %v", pointer.Oid, p, err)
return err
}
// check again whether there is any error during the Save operation
// because some errors might be ignored by the Reader's caller
if wrappedRd.lastError != nil && !errors.Is(wrappedRd.lastError, io.EOF) {
err = wrappedRd.lastError
} else if written != pointer.Size {
err = ErrSizeMismatch
}
// if the upload failed, try to delete the file
if err != nil {
if errDel := s.Delete(p); errDel != nil {
log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, errDel)
}
}
return err
}
// Exists returns true if the object exists in the content store.
func (s *ContentStore) Exists(pointer Pointer) (bool, error) {
_, err := s.ObjectStorage.Stat(pointer.RelativePath())
if err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
return true, nil
}
// Verify returns true if the object exists in the content store and size is correct.
func (s *ContentStore) Verify(pointer Pointer) (bool, error) {
p := pointer.RelativePath()
fi, err := s.ObjectStorage.Stat(p)
if os.IsNotExist(err) || (err == nil && fi.Size() != pointer.Size) {
return false, nil
} else if err != nil {
log.Error("Unable stat file: %s for LFS OID[%s] Error: %v", p, pointer.Oid, err)
return false, err
}
return true, nil
}
// ReadMetaObject will read a git_model.LFSMetaObject and return a reader
func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) {
contentStore := NewContentStore()
return contentStore.Get(pointer)
}
type hashingReader struct {
internal io.Reader
currentSize int64
expectedSize int64
hash hash.Hash
expectedHash string
lastError error
}
// recordError records the last error during the Save operation
// Some callers of the Reader doesn't respect the returned "err"
// For example, MinIO's Put will ignore errors if the written size could equal to expected size
// So we must remember the error by ourselves,
// and later check again whether ErrSizeMismatch or ErrHashMismatch occurs during the Save operation
func (r *hashingReader) recordError(err error) error {
r.lastError = err
return err
}
func (r *hashingReader) Read(b []byte) (int, error) {
n, err := r.internal.Read(b)
if n > 0 {
r.currentSize += int64(n)
wn, werr := r.hash.Write(b[:n])
if wn != n || werr != nil {
return n, r.recordError(werr)
}
}
if errors.Is(err, io.EOF) || r.currentSize >= r.expectedSize {
if r.currentSize != r.expectedSize {
return n, r.recordError(ErrSizeMismatch)
}
shaStr := hex.EncodeToString(r.hash.Sum(nil))
if shaStr != r.expectedHash {
return n, r.recordError(ErrHashMismatch)
}
}
return n, r.recordError(err)
}
func newHashingReader(expectedSize int64, expectedHash string, reader io.Reader) *hashingReader {
return &hashingReader{
internal: reader,
expectedSize: expectedSize,
expectedHash: expectedHash,
hash: sha256.New(),
}
}