git-annex: add tests for archives of annex repos
Some checks failed
Integration tests for the release process / release-simulation (push) Has been cancelled

Tests include:
- Compare the list of files in the resulting archive with the list of
  files as tracked in the archived git tree.
- Compare the content of each file with what it should be (git blob
  content or the annexed file, respectively).
- Check that the file mode matches the expected file mode for all
  archived files.
- Check that the resulting archive has the archived commitID set as a
  comment (as `git archive` does as well).

The tests are done for both the "web" endpoints at
`/<user>/<repo>/archive/<git-ref>.{tar.gz,zip}` and the "api-v1"
endpoints at `/api/v1/<user>/<repo>/archive/<git-ref>.{tar.gz,zip}`.
This commit is contained in:
Matthias Riße 2023-08-01 17:30:23 +02:00
parent 02ad4ab33d
commit 1101799da6
2 changed files with 317 additions and 0 deletions

View file

@ -0,0 +1,292 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package integration
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"fmt"
"io"
"io/fs"
"net/http"
"net/url"
"path"
"sort"
"strings"
"sync"
"testing"
auth_model "code.gitea.io/gitea/models/auth"
"code.gitea.io/gitea/modules/annex"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/require"
)
type record struct {
name string
isAnnexed bool
annexedMode fs.FileMode
gitMode git.EntryMode
content []byte
}
func TestGitAnnexArchive(t *testing.T) {
onGiteaRun(t, func(t *testing.T, u *url.URL) {
ctx := NewAPITestContext(t, "user2", "annex-archive-test", auth_model.AccessTokenScopeWriteRepository)
require.NoError(t, doCreateRemoteAnnexRepository(t, u, ctx, false))
req := NewRequest(t, "GET", u.String())
_ = ctx.Session.MakeRequest(t, req, http.StatusOK)
remoteRepoPath := path.Join(setting.RepoRootPath, ctx.GitPath())
// get the commitID of the master branch
repo, err := git.OpenRepository(git.DefaultContext, remoteRepoPath)
require.NoError(t, err)
commitID, err := repo.GetBranchCommitID("master")
require.NoError(t, err)
tree, err := repo.GetTree("master")
require.NoError(t, err)
entries, err := tree.ListEntriesRecursiveFast()
require.NoError(t, err)
filesInGit := make(map[string]record, len(entries))
var annexedMode fs.FileMode
for _, entry := range entries {
if !entry.IsDir() {
name := entry.Name()
blob := entry.Blob()
isAnnexed, err := annex.IsAnnexed(blob)
require.NoError(t, err)
var r io.Reader
if isAnnexed {
fa, err := annex.Content(blob)
require.NoError(t, err)
defer fa.Close()
r = fa
stat, err := fa.Stat()
require.NoError(t, err)
annexedMode = stat.Mode()
} else {
// standard git file
br, err := blob.DataAsync()
require.NoError(t, err)
defer br.Close()
r = br
}
expectedContent, err := io.ReadAll(r)
require.NoError(t, err)
filesInGit[name] = record{
name: name,
isAnnexed: isAnnexed,
annexedMode: annexedMode,
gitMode: entry.Mode(),
content: expectedContent,
}
}
}
t.Run("api-v1", func(t *testing.T) {
urlBase := fmt.Sprintf("/api/v1/repos/%s/%s/archive/master", ctx.Username, ctx.Reponame)
doTestArchive(t, ctx, urlBase, commitID, filesInGit)
})
t.Run("web", func(t *testing.T) {
urlBase := fmt.Sprintf("/%s/%s/archive/master", ctx.Username, ctx.Reponame)
doTestArchive(t, ctx, urlBase, commitID, filesInGit)
})
})
}
func doTestArchive(t *testing.T, ctx APITestContext, urlBase, commitID string, filesInGit map[string]record) {
// cleanup previously generated archives
adminSession := loginUser(t, "user1")
adminToken := getTokenForLoggedInUser(t, adminSession, auth_model.AccessTokenScopeWriteAdmin)
link, _ := url.Parse("/api/v1/admin/cron/delete_repo_archives")
link.RawQuery = url.Values{"token": {adminToken}}.Encode()
resp := adminSession.MakeRequest(t, NewRequest(t, "POST", link.String()), http.StatusNoContent)
bs, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Len(t, bs, 0)
t.Run("TARGZ", func(t *testing.T) {
var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func() {
defer wg.Done()
doTestTarGzArchive(t, urlBase+".tar.gz", ctx, commitID, filesInGit)
}()
}
wg.Wait()
})
t.Run("ZIP", func(t *testing.T) {
var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func() {
defer wg.Done()
doTestZipArchive(t, urlBase+".zip", ctx, commitID, filesInGit)
}()
}
wg.Wait()
})
}
func getArchiveFromEndpoint(t *testing.T, ctx APITestContext, endpointURL string) []byte {
link, _ := url.Parse(endpointURL)
resp := ctx.Session.MakeRequest(t, NewRequest(t, "GET", link.String()), http.StatusOK)
bs, err := io.ReadAll(resp.Body)
require.NoError(t, err)
return bs
}
func doTestTarGzArchive(t *testing.T, endpointURL string, ctx APITestContext, commitID string, filesInGit map[string]record) {
// request a tar.gz archive of the repo
bs := getArchiveFromEndpoint(t, ctx, endpointURL)
// open the archive for reading
gzrd, err := gzip.NewReader(bytes.NewReader(bs))
require.NoError(t, err)
defer gzrd.Close()
rd := tar.NewReader(gzrd)
var filesInArchive []string
for {
header, err := rd.Next()
if err == io.EOF {
break
}
require.NoError(t, err)
// skip directories
if header.Typeflag == tar.TypeDir {
continue
}
// check that the pax_global_header comment field is correctly set
if path.Base(header.Name) == "pax_global_header" {
require.Equal(t, commitID, header.PAXRecords["comment"])
continue // skip the remaining checks since this file does not exist in git
}
name := strings.TrimPrefix(header.Name, ctx.Reponame+"/")
filesInArchive = append(filesInArchive, name)
// make sure all files are the same as in the repo itself
actualContent, err := io.ReadAll(rd)
require.NoError(t, err)
actualFileMode := header.FileInfo().Mode()
compareToGitRecord(t, git.TARGZ, filesInGit[name], actualContent, actualFileMode)
}
// check that all files that are in git are also present in the archive
compareListOfStrings(t, mapKeys(filesInGit), filesInArchive)
}
func doTestZipArchive(t *testing.T, endpointURL string, ctx APITestContext, commitID string, filesInGit map[string]record) {
// request a zip archive of the repo
bs := getArchiveFromEndpoint(t, ctx, endpointURL)
// open the archive for reading
r, err := zip.NewReader(bytes.NewReader(bs), int64(len(bs)))
require.NoError(t, err)
// check that the comment field is correctly set
require.Equal(t, commitID, r.Comment)
var filesInArchive []string
for _, f := range r.File {
// skip directories
if f.FileInfo().IsDir() {
continue
}
name := strings.TrimPrefix(f.Name, ctx.Reponame+"/")
filesInArchive = append(filesInArchive, name)
// make sure all files are the same as in the repo itself
frd, err := f.Open()
require.NoError(t, err)
defer frd.Close()
actualContent, err := io.ReadAll(frd)
require.NoError(t, err)
actualFileMode := f.Mode()
compareToGitRecord(t, git.ZIP, filesInGit[name], actualContent, actualFileMode)
}
// check that all files that are in git are also present in the archive
compareListOfStrings(t, mapKeys(filesInGit), filesInArchive)
}
func compareToGitRecord(t *testing.T, archiveType git.ArchiveType, gitRecord record, actualContent []byte, actualFileMode fs.FileMode) {
expectedContent := gitRecord.content
var expectedFileMode fs.FileMode
// The expected file mode depends on the archive type and the type of file in git
// (regular, executable, symlink, annex pointer, annex symlink)
switch archiveType {
case git.TARGZ:
if gitRecord.gitMode == git.EntryModeExec ||
(gitRecord.gitMode == git.EntryModeSymlink && gitRecord.isAnnexed && (gitRecord.annexedMode&0o100) != 0) {
// If the file is a regular executable git file (plain-git or annex pointer file)
// or a symlink to an executable annexed file expect file mode 0775, just like
// what `git archive` would do for executable files.
expectedFileMode = fs.FileMode(0o775)
} else if gitRecord.gitMode == git.EntryModeBlob ||
(gitRecord.gitMode == git.EntryModeSymlink && gitRecord.isAnnexed && (gitRecord.annexedMode&0o100) == 0) {
// If the file is a regular non-executable git file (plain-git or annex pointer
// file) or a symlink to a non-executable annexed file expect file mode 0664, like
// what `git archive` would do.
expectedFileMode = fs.FileMode(0o664)
} else if gitRecord.gitMode == git.EntryModeSymlink {
// If the file is a plain-git symlink expect a normal symlink.
expectedFileMode = fs.FileMode(0o777) | fs.ModeSymlink
// gitRecord.content contains the link target, but in tar.gz there is no content
// for symlinks, so set the expectedContent to an empty byte slice.
expectedContent = []byte{}
}
case git.ZIP:
if gitRecord.gitMode == git.EntryModeExec ||
(gitRecord.gitMode == git.EntryModeSymlink && gitRecord.isAnnexed && (gitRecord.annexedMode&0o100) != 0) {
// If the file is a regular executable git file (plain-git or annex pointer file)
// or a symlink to an executable annexed file expect file mode 0755.
expectedFileMode = fs.FileMode(0o755)
} else if gitRecord.gitMode == git.EntryModeBlob ||
(gitRecord.gitMode == git.EntryModeSymlink && gitRecord.isAnnexed && (gitRecord.annexedMode&0o100) == 0) {
// If the file is a regular non-executable git file (plain-git or annex pointer
// file) or a symlink to a non-executable annexed file it should be archived with
// creatorFAT and have rw permissions, like what `git archive` would do.
// This means go should read it in with 0666 permissions.
expectedFileMode = fs.FileMode(0o666)
} else if gitRecord.gitMode == git.EntryModeSymlink {
// If the file is a plain-git symlink expect a normal symlink.
expectedFileMode = fs.FileMode(0o777) | fs.ModeSymlink
}
}
// check that the file modes (type and permissions) are equal
require.Equal(t, expectedFileMode.String(), actualFileMode.String())
// check that the contents are equal
require.Equal(t, expectedContent, actualContent)
}
func compareListOfStrings(t *testing.T, l1, l2 []string) {
sort.Strings(l1)
sort.Strings(l2)
require.Equal(t, l1, l2)
}
func mapKeys[K comparable, V any](m map[K]V) []K {
keys := make([]K, len(m))
i := 0
for k := range m {
keys[i] = k
i++
}
return keys
}

View file

@ -2739,6 +2739,16 @@ func doInitAnnexRepository(repoPath string) error {
return err
}
// // a file in a subdirectory
err = os.Mkdir(path.Join(repoPath, "subdir"), 0o755)
if err != nil {
return err
}
err = generateRandomFile(1024*1024/4, path.Join(repoPath, "subdir/subdir-annexed.tiff"))
if err != nil {
return err
}
// // a text file
err = os.WriteFile(path.Join(repoPath, "annexed.md"), []byte("Overview\n=====\n\n1. Profit\n2. ???\n3. Review Life Activations\n"), 0o777)
if err != nil {
@ -2780,6 +2790,21 @@ func doInitAnnexRepository(repoPath string) error {
return err
}
// add standard symlinks to git
err = os.Symlink(".gitattributes", path.Join(repoPath, "link-to-.gitattributes"))
if err != nil {
return err
}
err = os.Symlink("missing-file", path.Join(repoPath, "link-to-missing-file"))
if err != nil {
return err
}
err = git.AddChanges(repoPath, false, ".")
if err != nil {
return err
}
// save everything
err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Annex files"})
if err != nil {