mirror of
https://codeberg.org/davrot/forgejo.git
synced 2025-06-26 17:00:04 +02:00
blob: GetBlobContent: reduce allocations (#8223)
See #8222 for context. ## git.Blob.NewTruncatedReader This introduce a new `NewTruncatedReader` method to return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned). Since the actual size is also returned `GetBlobContent` can pre-allocate a `[]byte` of the full-size (min of the asked size and the actual size) and call `io.ReadFull(rc, buf)` (instead of `util.ReadWithLimit(dataRc, int(limit))` which is convoluted and not used anywhere else). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. ### Documentation - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8223 Reviewed-by: Lucas <sclu1034@noreply.codeberg.org> Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: oliverpool <git@olivier.pfad.fr> Co-committed-by: oliverpool <git@olivier.pfad.fr>
This commit is contained in:
parent
913eaffb8a
commit
31ad7c9353
4 changed files with 95 additions and 135 deletions
|
@ -12,7 +12,6 @@ import (
|
|||
|
||||
"forgejo.org/modules/log"
|
||||
"forgejo.org/modules/typesniffer"
|
||||
"forgejo.org/modules/util"
|
||||
)
|
||||
|
||||
// Blob represents a Git object.
|
||||
|
@ -25,42 +24,25 @@ type Blob struct {
|
|||
repo *Repository
|
||||
}
|
||||
|
||||
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
|
||||
// Calling the Close function on the result will discard all unread output.
|
||||
func (b *Blob) DataAsync() (io.ReadCloser, error) {
|
||||
func (b *Blob) newReader() (*bufio.Reader, int64, func(), error) {
|
||||
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, nil, err
|
||||
}
|
||||
|
||||
_, err = wr.Write([]byte(b.ID.String() + "\n"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, err
|
||||
return nil, 0, nil, err
|
||||
}
|
||||
_, _, size, err := ReadBatchLine(rd)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, err
|
||||
return nil, 0, nil, err
|
||||
}
|
||||
b.gotSize = true
|
||||
b.size = size
|
||||
|
||||
if size < 4096 {
|
||||
bs, err := io.ReadAll(io.LimitReader(rd, size))
|
||||
defer cancel()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, err = rd.Discard(1)
|
||||
return io.NopCloser(bytes.NewReader(bs)), err
|
||||
}
|
||||
|
||||
return &blobReader{
|
||||
rd: rd,
|
||||
n: size,
|
||||
cancel: cancel,
|
||||
}, nil
|
||||
return rd, size, cancel, err
|
||||
}
|
||||
|
||||
// Size returns the uncompressed size of the blob
|
||||
|
@ -91,9 +73,35 @@ func (b *Blob) Size() int64 {
|
|||
return b.size
|
||||
}
|
||||
|
||||
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
|
||||
// Calling the Close function on the result will discard all unread output.
|
||||
func (b *Blob) DataAsync() (io.ReadCloser, error) {
|
||||
rd, size, cancel, err := b.newReader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if size < 4096 {
|
||||
bs, err := io.ReadAll(io.LimitReader(rd, size))
|
||||
defer cancel()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, err = rd.Discard(1)
|
||||
return io.NopCloser(bytes.NewReader(bs)), err
|
||||
}
|
||||
|
||||
return &blobReader{
|
||||
rd: rd,
|
||||
n: size,
|
||||
cancel: cancel,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type blobReader struct {
|
||||
rd *bufio.Reader
|
||||
n int64
|
||||
n int64 // number of bytes to read
|
||||
additionalDiscard int64 // additional number of bytes to discard
|
||||
cancel func()
|
||||
}
|
||||
|
||||
|
@ -117,7 +125,8 @@ func (b *blobReader) Close() error {
|
|||
|
||||
defer b.cancel()
|
||||
|
||||
if err := DiscardFull(b.rd, b.n+1); err != nil {
|
||||
// discard the unread bytes, the truncated bytes and the trailing newline
|
||||
if err := DiscardFull(b.rd, b.n+b.additionalDiscard+1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -131,17 +140,35 @@ func (b *Blob) Name() string {
|
|||
return b.name
|
||||
}
|
||||
|
||||
// GetBlobContent Gets the limited content of the blob as raw text
|
||||
// NewTruncatedReader return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned)
|
||||
func (b *Blob) NewTruncatedReader(limit int64) (rc io.ReadCloser, fullSize int64, err error) {
|
||||
r, fullSize, cancel, err := b.newReader()
|
||||
if err != nil {
|
||||
return nil, fullSize, err
|
||||
}
|
||||
|
||||
limit = min(limit, fullSize)
|
||||
return &blobReader{
|
||||
rd: r,
|
||||
n: limit,
|
||||
additionalDiscard: fullSize - limit,
|
||||
cancel: cancel,
|
||||
}, fullSize, nil
|
||||
}
|
||||
|
||||
// GetBlobContent Gets the truncated content of the blob as raw text
|
||||
func (b *Blob) GetBlobContent(limit int64) (string, error) {
|
||||
if limit <= 0 {
|
||||
return "", nil
|
||||
}
|
||||
dataRc, err := b.DataAsync()
|
||||
rc, fullSize, err := b.NewTruncatedReader(limit)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer dataRc.Close()
|
||||
buf, err := util.ReadWithLimit(dataRc, int(limit))
|
||||
defer rc.Close()
|
||||
|
||||
buf := make([]byte, min(fullSize, limit))
|
||||
_, err = io.ReadFull(rc, buf)
|
||||
return string(buf), err
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,43 @@ func TestBlob_Data(t *testing.T) {
|
|||
assert.Equal(t, output, string(data))
|
||||
}
|
||||
|
||||
func TestBlob_GetBlobContent(t *testing.T) {
|
||||
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||
repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||
require.NoError(t, err)
|
||||
|
||||
defer repo.Close()
|
||||
|
||||
testBlob, err := repo.GetBlob("6c493ff740f9380390d5c9ddef4af18697ac9375")
|
||||
require.NoError(t, err)
|
||||
|
||||
r, err := testBlob.GetBlobContent(100)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "file2\n", r)
|
||||
|
||||
r, err = testBlob.GetBlobContent(-1)
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, r)
|
||||
|
||||
r, err = testBlob.GetBlobContent(4)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "file", r)
|
||||
|
||||
r, err = testBlob.GetBlobContent(6)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "file2\n", r)
|
||||
|
||||
t.Run("non-existing blob", func(t *testing.T) {
|
||||
inexistingBlob, err := repo.GetBlob("00003ff740f9380390d5c9ddef4af18690000000")
|
||||
require.NoError(t, err)
|
||||
|
||||
r, err := inexistingBlob.GetBlobContent(100)
|
||||
require.Error(t, err)
|
||||
require.IsType(t, ErrNotExist{}, err)
|
||||
require.Empty(t, r)
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_Blob_Data(b *testing.B) {
|
||||
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||
repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
@ -20,42 +19,6 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
|
|||
return n, err
|
||||
}
|
||||
|
||||
// ReadWithLimit reads at most "limit" bytes from r into buf.
|
||||
// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil.
|
||||
func ReadWithLimit(r io.Reader, n int) (buf []byte, err error) {
|
||||
return readWithLimit(r, 1024, n)
|
||||
}
|
||||
|
||||
func readWithLimit(r io.Reader, batch, limit int) ([]byte, error) {
|
||||
if limit <= batch {
|
||||
buf := make([]byte, limit)
|
||||
n, err := ReadAtMost(r, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf[:n], nil
|
||||
}
|
||||
res := bytes.NewBuffer(make([]byte, 0, batch))
|
||||
bufFix := make([]byte, batch)
|
||||
eof := false
|
||||
for res.Len() < limit && !eof {
|
||||
bufTmp := bufFix
|
||||
if res.Len()+batch > limit {
|
||||
bufTmp = bufFix[:limit-res.Len()]
|
||||
}
|
||||
n, err := io.ReadFull(r, bufTmp)
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
eof = true
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err = res.Write(bufTmp[:n]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return res.Bytes(), nil
|
||||
}
|
||||
|
||||
// ErrNotEmpty is an error reported when there is a non-empty reader
|
||||
var ErrNotEmpty = errors.New("not-empty")
|
||||
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type readerWithError struct {
|
||||
buf *bytes.Buffer
|
||||
}
|
||||
|
||||
func (r *readerWithError) Read(p []byte) (n int, err error) {
|
||||
if r.buf.Len() < 2 {
|
||||
return 0, errors.New("test error")
|
||||
}
|
||||
return r.buf.Read(p)
|
||||
}
|
||||
|
||||
func TestReadWithLimit(t *testing.T) {
|
||||
bs := []byte("0123456789abcdef")
|
||||
|
||||
// normal test
|
||||
buf, err := readWithLimit(bytes.NewBuffer(bs), 5, 2)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("01"), buf)
|
||||
|
||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 5)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("01234"), buf)
|
||||
|
||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 6)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("012345"), buf)
|
||||
|
||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, len(bs))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
||||
|
||||
buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 100)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
||||
|
||||
// test with error
|
||||
buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 10)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("0123456789"), buf)
|
||||
|
||||
buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 100)
|
||||
require.ErrorContains(t, err, "test error")
|
||||
assert.Empty(t, buf)
|
||||
|
||||
// test public function
|
||||
buf, err = ReadWithLimit(bytes.NewBuffer(bs), 2)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("01"), buf)
|
||||
|
||||
buf, err = ReadWithLimit(bytes.NewBuffer(bs), 9999999)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("0123456789abcdef"), buf)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue