mirror of
https://codeberg.org/davrot/forgejo.git
synced 2025-06-26 08:00:02 +02:00
blob: GetBlobContent: reduce allocations (#8223)
See #8222 for context. ## git.Blob.NewTruncatedReader This introduce a new `NewTruncatedReader` method to return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned). Since the actual size is also returned `GetBlobContent` can pre-allocate a `[]byte` of the full-size (min of the asked size and the actual size) and call `io.ReadFull(rc, buf)` (instead of `util.ReadWithLimit(dataRc, int(limit))` which is convoluted and not used anywhere else). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. ### Documentation - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8223 Reviewed-by: Lucas <sclu1034@noreply.codeberg.org> Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: oliverpool <git@olivier.pfad.fr> Co-committed-by: oliverpool <git@olivier.pfad.fr>
This commit is contained in:
parent
913eaffb8a
commit
31ad7c9353
4 changed files with 95 additions and 135 deletions
|
@ -12,7 +12,6 @@ import (
|
||||||
|
|
||||||
"forgejo.org/modules/log"
|
"forgejo.org/modules/log"
|
||||||
"forgejo.org/modules/typesniffer"
|
"forgejo.org/modules/typesniffer"
|
||||||
"forgejo.org/modules/util"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Blob represents a Git object.
|
// Blob represents a Git object.
|
||||||
|
@ -25,42 +24,25 @@ type Blob struct {
|
||||||
repo *Repository
|
repo *Repository
|
||||||
}
|
}
|
||||||
|
|
||||||
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
|
func (b *Blob) newReader() (*bufio.Reader, int64, func(), error) {
|
||||||
// Calling the Close function on the result will discard all unread output.
|
|
||||||
func (b *Blob) DataAsync() (io.ReadCloser, error) {
|
|
||||||
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
|
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, 0, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = wr.Write([]byte(b.ID.String() + "\n"))
|
_, err = wr.Write([]byte(b.ID.String() + "\n"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancel()
|
cancel()
|
||||||
return nil, err
|
return nil, 0, nil, err
|
||||||
}
|
}
|
||||||
_, _, size, err := ReadBatchLine(rd)
|
_, _, size, err := ReadBatchLine(rd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancel()
|
cancel()
|
||||||
return nil, err
|
return nil, 0, nil, err
|
||||||
}
|
}
|
||||||
b.gotSize = true
|
b.gotSize = true
|
||||||
b.size = size
|
b.size = size
|
||||||
|
return rd, size, cancel, err
|
||||||
if size < 4096 {
|
|
||||||
bs, err := io.ReadAll(io.LimitReader(rd, size))
|
|
||||||
defer cancel()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
_, err = rd.Discard(1)
|
|
||||||
return io.NopCloser(bytes.NewReader(bs)), err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &blobReader{
|
|
||||||
rd: rd,
|
|
||||||
n: size,
|
|
||||||
cancel: cancel,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size returns the uncompressed size of the blob
|
// Size returns the uncompressed size of the blob
|
||||||
|
@ -91,10 +73,36 @@ func (b *Blob) Size() int64 {
|
||||||
return b.size
|
return b.size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
|
||||||
|
// Calling the Close function on the result will discard all unread output.
|
||||||
|
func (b *Blob) DataAsync() (io.ReadCloser, error) {
|
||||||
|
rd, size, cancel, err := b.newReader()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if size < 4096 {
|
||||||
|
bs, err := io.ReadAll(io.LimitReader(rd, size))
|
||||||
|
defer cancel()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
_, err = rd.Discard(1)
|
||||||
|
return io.NopCloser(bytes.NewReader(bs)), err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &blobReader{
|
||||||
|
rd: rd,
|
||||||
|
n: size,
|
||||||
|
cancel: cancel,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
type blobReader struct {
|
type blobReader struct {
|
||||||
rd *bufio.Reader
|
rd *bufio.Reader
|
||||||
n int64
|
n int64 // number of bytes to read
|
||||||
cancel func()
|
additionalDiscard int64 // additional number of bytes to discard
|
||||||
|
cancel func()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *blobReader) Read(p []byte) (n int, err error) {
|
func (b *blobReader) Read(p []byte) (n int, err error) {
|
||||||
|
@ -117,7 +125,8 @@ func (b *blobReader) Close() error {
|
||||||
|
|
||||||
defer b.cancel()
|
defer b.cancel()
|
||||||
|
|
||||||
if err := DiscardFull(b.rd, b.n+1); err != nil {
|
// discard the unread bytes, the truncated bytes and the trailing newline
|
||||||
|
if err := DiscardFull(b.rd, b.n+b.additionalDiscard+1); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,17 +140,35 @@ func (b *Blob) Name() string {
|
||||||
return b.name
|
return b.name
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetBlobContent Gets the limited content of the blob as raw text
|
// NewTruncatedReader return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned)
|
||||||
|
func (b *Blob) NewTruncatedReader(limit int64) (rc io.ReadCloser, fullSize int64, err error) {
|
||||||
|
r, fullSize, cancel, err := b.newReader()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fullSize, err
|
||||||
|
}
|
||||||
|
|
||||||
|
limit = min(limit, fullSize)
|
||||||
|
return &blobReader{
|
||||||
|
rd: r,
|
||||||
|
n: limit,
|
||||||
|
additionalDiscard: fullSize - limit,
|
||||||
|
cancel: cancel,
|
||||||
|
}, fullSize, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetBlobContent Gets the truncated content of the blob as raw text
|
||||||
func (b *Blob) GetBlobContent(limit int64) (string, error) {
|
func (b *Blob) GetBlobContent(limit int64) (string, error) {
|
||||||
if limit <= 0 {
|
if limit <= 0 {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
dataRc, err := b.DataAsync()
|
rc, fullSize, err := b.NewTruncatedReader(limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
defer dataRc.Close()
|
defer rc.Close()
|
||||||
buf, err := util.ReadWithLimit(dataRc, int(limit))
|
|
||||||
|
buf := make([]byte, min(fullSize, limit))
|
||||||
|
_, err = io.ReadFull(rc, buf)
|
||||||
return string(buf), err
|
return string(buf), err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,43 @@ func TestBlob_Data(t *testing.T) {
|
||||||
assert.Equal(t, output, string(data))
|
assert.Equal(t, output, string(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBlob_GetBlobContent(t *testing.T) {
|
||||||
|
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||||
|
repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
defer repo.Close()
|
||||||
|
|
||||||
|
testBlob, err := repo.GetBlob("6c493ff740f9380390d5c9ddef4af18697ac9375")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r, err := testBlob.GetBlobContent(100)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "file2\n", r)
|
||||||
|
|
||||||
|
r, err = testBlob.GetBlobContent(-1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Empty(t, r)
|
||||||
|
|
||||||
|
r, err = testBlob.GetBlobContent(4)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "file", r)
|
||||||
|
|
||||||
|
r, err = testBlob.GetBlobContent(6)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "file2\n", r)
|
||||||
|
|
||||||
|
t.Run("non-existing blob", func(t *testing.T) {
|
||||||
|
inexistingBlob, err := repo.GetBlob("00003ff740f9380390d5c9ddef4af18690000000")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r, err := inexistingBlob.GetBlobContent(100)
|
||||||
|
require.Error(t, err)
|
||||||
|
require.IsType(t, ErrNotExist{}, err)
|
||||||
|
require.Empty(t, r)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func Benchmark_Blob_Data(b *testing.B) {
|
func Benchmark_Blob_Data(b *testing.B) {
|
||||||
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||||
repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
@ -20,42 +19,6 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReadWithLimit reads at most "limit" bytes from r into buf.
|
|
||||||
// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil.
|
|
||||||
func ReadWithLimit(r io.Reader, n int) (buf []byte, err error) {
|
|
||||||
return readWithLimit(r, 1024, n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func readWithLimit(r io.Reader, batch, limit int) ([]byte, error) {
|
|
||||||
if limit <= batch {
|
|
||||||
buf := make([]byte, limit)
|
|
||||||
n, err := ReadAtMost(r, buf)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return buf[:n], nil
|
|
||||||
}
|
|
||||||
res := bytes.NewBuffer(make([]byte, 0, batch))
|
|
||||||
bufFix := make([]byte, batch)
|
|
||||||
eof := false
|
|
||||||
for res.Len() < limit && !eof {
|
|
||||||
bufTmp := bufFix
|
|
||||||
if res.Len()+batch > limit {
|
|
||||||
bufTmp = bufFix[:limit-res.Len()]
|
|
||||||
}
|
|
||||||
n, err := io.ReadFull(r, bufTmp)
|
|
||||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
|
||||||
eof = true
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if _, err = res.Write(bufTmp[:n]); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res.Bytes(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ErrNotEmpty is an error reported when there is a non-empty reader
|
// ErrNotEmpty is an error reported when there is a non-empty reader
|
||||||
var ErrNotEmpty = errors.New("not-empty")
|
var ErrNotEmpty = errors.New("not-empty")
|
||||||
|
|
||||||
|
|
|
@ -1,67 +0,0 @@
|
||||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
package util
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"errors"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
type readerWithError struct {
|
|
||||||
buf *bytes.Buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *readerWithError) Read(p []byte) (n int, err error) {
|
|
||||||
if r.buf.Len() < 2 {
|
|
||||||
return 0, errors.New("test error")
|
|
||||||
}
|
|
||||||
return r.buf.Read(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReadWithLimit(t *testing.T) {
|
|
||||||
bs := []byte("0123456789abcdef")
|
|
||||||
|
|
||||||
// normal test
|
|
||||||
buf, err := readWithLimit(bytes.NewBuffer(bs), 5, 2)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("01"), buf)
|
|
||||||
|
|
||||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 5)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("01234"), buf)
|
|
||||||
|
|
||||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 6)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("012345"), buf)
|
|
||||||
|
|
||||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, len(bs))
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
|
||||||
|
|
||||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 100)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
|
||||||
|
|
||||||
// test with error
|
|
||||||
buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 10)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("0123456789"), buf)
|
|
||||||
|
|
||||||
buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 100)
|
|
||||||
require.ErrorContains(t, err, "test error")
|
|
||||||
assert.Empty(t, buf)
|
|
||||||
|
|
||||||
// test public function
|
|
||||||
buf, err = ReadWithLimit(bytes.NewBuffer(bs), 2)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("01"), buf)
|
|
||||||
|
|
||||||
buf, err = ReadWithLimit(bytes.NewBuffer(bs), 9999999)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue