git-annex: include annexed files in zip and tar.gz

This extends the archive creation logic to add annexed files to the
created archives. The basic flow is this:
1. Create an archive using `git archive`
2. Read in that archive and write out a new one, replacing all annexed
   files with their annexed content; leaving the git-only files as-is

The file permissions with which the annexed files are put into the
archive are decided based on what `git archive` does for other files as
well:
- For tar.gz archives, executable files get permissions 0775 and regular
  files get 0664.
- For zip archives, executable files get permissions 0755 and regular
  files are archived with "FAT permissions" rw, instead of unix
  permissions.

If for a given archive request an annexed file is not present on the
gitea instance then the content as tracked by git (i.e. a symlink or
pointer file) is silently put into the resulting archive instead.

Co-authored-by: Nick Guenther <nick.guenther@polymtl.ca>
This commit is contained in:
Matthias Riße 2023-06-26 15:12:06 +02:00
parent 1f4fe5bfe6
commit 02ad4ab33d
3 changed files with 283 additions and 7 deletions

View file

@ -0,0 +1,258 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package annex
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"context"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"code.gitea.io/gitea/modules/git"
)
func createArchiveTargz(ctx context.Context, repo *git.Repository, target io.Writer, prefix, commitID string) error {
// create a plain git tar archive
var tarB bytes.Buffer
err := repo.CreateArchive(ctx, git.TAR, &tarB, prefix != "", commitID)
if err != nil {
return err
}
gitFilesR := tar.NewReader(&tarB)
gzipW := gzip.NewWriter(target)
defer gzipW.Close()
tarW := tar.NewWriter(gzipW)
defer tarW.Close()
tree, err := repo.GetTree(commitID)
if err != nil {
return err
}
for {
oldHeader, err := gitFilesR.Next()
// TODO: handle non-local names in tar archives?
if err != nil {
if err == io.EOF {
break
}
return err
}
// default to copying the current file from the archive created by repo.CreateArchive
header := oldHeader
dataR := io.Reader(gitFilesR)
// if we can get a annex content location for the file we use that instead
te, err := tree.GetTreeEntryByPath(strings.TrimPrefix(oldHeader.Name, prefix))
if err == nil && (te.IsRegular() || te.IsExecutable() || te.IsLink()) {
blob := te.Blob()
annexPath, err := ContentLocation(blob)
if err == nil {
// blob corresponds to an annexed file
// build a tar header for the annexed file
file, err := os.Open(annexPath)
if err != nil {
return fmt.Errorf("opening %s failed: %w", annexPath, err)
}
stat, err := file.Stat()
if err != nil {
return fmt.Errorf("getting FileInfo for %s failed: %w", file.Name(), err)
}
// https://pkg.go.dev/archive/tar#Header:
// > For forward compatibility, users that retrieve a Header from Reader.Next,
// > mutate it in some ways, and then pass it back to Writer.WriteHeader should
// > do so by creating a new Header and copying the fields that they are interested in preserving.
header, err = tar.FileInfoHeader(stat, "")
if err != nil {
return fmt.Errorf("creating header failed: %w", err)
}
header.Size = stat.Size()
if te.IsExecutable() || (te.IsLink() && (stat.Mode().Perm()&0o100) != 0) {
// If the file is executable in git, or is a symlink to an annexed executable
// file, archive it with permissions 0775, as `git archive` would do for
// executables as well.
header.Mode = int64(fs.FileMode(0o775))
} else if te.IsRegular() || (te.IsLink() && (stat.Mode().Perm()&0o100) == 0) {
// If the file is not executable in git (i.e. a regular file), or a symlink to
// an annexed file that is not executable, archive it with permissions 0664.
header.Mode = int64(fs.FileMode(0o664))
}
// preserve these
header.Name = oldHeader.Name
header.Linkname = oldHeader.Linkname
header.Uid = oldHeader.Uid
header.Gid = oldHeader.Gid
header.Uname = oldHeader.Uname
header.Gname = oldHeader.Gname
header.ModTime = oldHeader.ModTime
header.AccessTime = oldHeader.AccessTime
header.ChangeTime = oldHeader.ChangeTime
header.PAXRecords = oldHeader.PAXRecords
header.Format = oldHeader.Format
// set the data reader
dataR = file
}
}
// write header
err = tarW.WriteHeader(header)
if err != nil {
return fmt.Errorf("writing header for %s failed: %w", header.Name, err)
}
// write data
_, err = io.Copy(tarW, dataR)
if err != nil {
return fmt.Errorf("writing data for %s failed: %w", header.Name, err)
}
}
return nil
}
func createArchiveZip(ctx context.Context, repo *git.Repository, target io.Writer, prefix, commitID string) error {
// create a plain git zip archive
var zipB bytes.Buffer
err := repo.CreateArchive(ctx, git.ZIP, &zipB, prefix != "", commitID)
if err != nil {
return err
}
gitFilesR, err := zip.NewReader(bytes.NewReader(zipB.Bytes()), int64(zipB.Len()))
if err != nil {
return err
}
tree, err := repo.GetTree(commitID)
if err != nil {
return err
}
zipW := zip.NewWriter(target)
defer zipW.Close()
err = zipW.SetComment(gitFilesR.Comment)
if err != nil {
return fmt.Errorf("setting archive comment field failed: %w", err)
}
for _, f := range gitFilesR.File {
oldHeader := f.FileHeader
// default to copying the current file from the archive created by repo.CreateArchive
// dataR is set later to avoid unnecessarily opening a file here
header := &oldHeader
dataR := io.Reader(nil)
te, err := tree.GetTreeEntryByPath(strings.TrimPrefix(oldHeader.Name, prefix))
if err == nil && (te.IsRegular() || te.IsExecutable() || te.IsLink()) {
blob := te.Blob()
annexPath, err := ContentLocation(blob)
if err == nil {
// blob corresponds to an annexed file
// build a zip header for the file
file, err := os.Open(annexPath)
if err != nil {
return fmt.Errorf("opening %s failed: %w", annexPath, err)
}
stat, err := file.Stat()
if err != nil {
return fmt.Errorf("getting FileInfo for %s failed: %w", file.Name(), err)
}
header, err = zip.FileInfoHeader(stat)
if err != nil {
return fmt.Errorf("creating header failed: %w", err)
}
header.Name = oldHeader.Name
header.Method = zip.Deflate
if te.IsExecutable() || (te.IsLink() && (stat.Mode().Perm()&0o100) != 0) {
// If the file is executable in git, or is a symlink to an annexed executable
// file, archive it with permissions 0775, as `git archive` would do for
// executables as well.
header.SetMode(fs.FileMode(0o755))
} else if te.IsRegular() || (te.IsLink() && (stat.Mode().Perm()&0o100) == 0) {
// If the file is not executable in git (i.e. a regular file), or a symlink to
// an annexed file that is not executable, archive it with the "FAT creator"
// in zip and set rw permissions through the external attrs.
// `git archive` does the same for regular files.
header.CreatorVersion = 0
header.ExternalAttrs = 0
}
// set the data reader
dataR = file
}
}
if dataR == nil {
// data reader was not yet set, take the data from the archive created by repo.CreateArchive
file, err := f.Open()
if err != nil {
return fmt.Errorf("opening %s failed: %w", f.Name, err)
}
dataR = file
}
// write header
fileW, err := zipW.CreateHeader(header)
if err != nil {
return fmt.Errorf("writing header for %s failed: %w", header.Name, err)
}
// write data
_, err = io.Copy(fileW, dataR)
if err != nil {
return fmt.Errorf("writing data for %s failed: %w", header.Name, err)
}
}
return nil
}
// CreateArchive creates an archive of format from repo at commitID and writes it to target.
// Files in the archive are prefixed with the repositories name if usePrefix is true.
// It is an annex-aware alternative to Repository.CreateArchive in the git package.
func CreateArchive(ctx context.Context, repo *git.Repository, format git.ArchiveType, target io.Writer, usePrefix bool, commitID string) error {
if format.String() == "unknown" {
return fmt.Errorf("unknown format: %v", format)
}
var prefix string
if usePrefix {
prefix = filepath.Base(strings.TrimSuffix(repo.Path, ".git")) + "/"
} else {
prefix = ""
}
var err error
if format == git.TARGZ {
err = createArchiveTargz(ctx, repo, target, prefix, commitID)
} else if format == git.ZIP {
err = createArchiveZip(ctx, repo, target, prefix, commitID)
} else {
return fmt.Errorf("unsupported format: %v", format)
}
if err != nil {
return fmt.Errorf("failed to create archive: %w", err)
}
return nil
}

View file

@ -19,6 +19,8 @@ type ArchiveType int
const (
// ZIP zip archive type
ZIP ArchiveType = iota + 1
// TAR tar archive type
TAR
// TARGZ tar gz archive type
TARGZ
// BUNDLE bundle archive type
@ -30,6 +32,8 @@ func (a ArchiveType) String() string {
switch a {
case ZIP:
return "zip"
case TAR:
return "tar"
case TARGZ:
return "tar.gz"
case BUNDLE:
@ -42,6 +46,8 @@ func ToArchiveType(s string) ArchiveType {
switch s {
case "zip":
return ZIP
case "tar":
return TAR
case "tar.gz":
return TARGZ
case "bundle":

View file

@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/annex"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
@ -254,13 +255,24 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver
w,
)
} else {
err = gitRepo.CreateArchive(
ctx,
archiver.Type,
w,
setting.Repository.PrefixArchiveFiles,
archiver.CommitID,
)
if annex.IsAnnexRepo(gitRepo) {
err = annex.CreateArchive(
ctx,
gitRepo,
archiver.Type,
w,
setting.Repository.PrefixArchiveFiles,
archiver.CommitID,
)
} else {
err = gitRepo.CreateArchive(
ctx,
archiver.Type,
w,
setting.Repository.PrefixArchiveFiles,
archiver.CommitID,
)
}
}
_ = w.CloseWithError(err)
done <- err