mirror of
https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo.git
synced 2025-04-20 21:26:42 +02:00

This changes the PR merge process such that annexed files contained in the to-be-merged commits are copied from the head repository to the base repository as part of the merge, similar to how it is done for LFS files. Fixes #11. ## Checklist The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org). ### Tests - I added test coverage for Go changes... - [ ] in their respective `*_test.go` for unit tests. - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server. - I added test coverage for JavaScript changes... - [ ] in `web_src/js/*.test.js` if it can be unit tested. - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)). ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [ ] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. - [ ] I want the title to show in the release notes with a link to this pull request. - [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title. Reviewed-on: https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/pulls/62 Co-authored-by: Matthias Riße <m.risse@fz-juelich.de> Co-committed-by: Matthias Riße <m.risse@fz-juelich.de>
256 lines
8 KiB
Go
256 lines
8 KiB
Go
// Copyright 2022 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
// Unlike modules/lfs, which operates mainly on git.Blobs, this operates on git.TreeEntrys.
|
|
// The motivation for this is that TreeEntrys have an easy pointer to the on-disk repo path,
|
|
// while blobs do not (in fact, if building with TAGS=gogit, blobs might exist only in a mock
|
|
// filesystem, living only in process RAM). We must have the on-disk path to do anything
|
|
// useful with git-annex because all of its interesting data is on-disk under .git/annex/.
|
|
|
|
package annex
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
"code.gitea.io/gitea/modules/typesniffer"
|
|
|
|
"gopkg.in/ini.v1" //nolint:depguard // This import is forbidden in favor of using the setting module, but we need ini parsing for something other than Forgejo settings
|
|
)
|
|
|
|
// ErrBlobIsNotAnnexed occurs if a blob does not contain a valid annex key
|
|
var ErrBlobIsNotAnnexed = errors.New("not a git-annex pointer")
|
|
|
|
func PrivateInit(ctx context.Context, repoPath string) error {
|
|
if _, _, err := git.NewCommand(ctx, "config", "annex.private", "true").RunStdString(&git.RunOpts{Dir: repoPath}); err != nil {
|
|
return err
|
|
}
|
|
if _, _, err := git.NewCommand(ctx, "annex", "init").RunStdString(&git.RunOpts{Dir: repoPath}); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func LookupKey(blob *git.Blob) (string, error) {
|
|
stdout, _, err := git.NewCommand(git.DefaultContext, "annex", "lookupkey", "--ref").AddDynamicArguments(blob.ID.String()).RunStdString(&git.RunOpts{Dir: blob.Repo().Path})
|
|
if err != nil {
|
|
return "", ErrBlobIsNotAnnexed
|
|
}
|
|
key := strings.TrimSpace(stdout)
|
|
return key, nil
|
|
}
|
|
|
|
// LookupKeyBatch runs git annex lookupkey --batch --ref
|
|
func LookupKeyBatch(ctx context.Context, shasToBatchReader *io.PipeReader, lookupKeyBatchWriter *io.PipeWriter, wg *sync.WaitGroup, repoPath string) {
|
|
defer wg.Done()
|
|
defer shasToBatchReader.Close()
|
|
defer lookupKeyBatchWriter.Close()
|
|
|
|
stderr := new(bytes.Buffer)
|
|
var errbuf strings.Builder
|
|
if err := git.NewCommand(ctx, "annex", "lookupkey", "--batch", "--ref").Run(&git.RunOpts{
|
|
Dir: repoPath,
|
|
Stdout: lookupKeyBatchWriter,
|
|
Stdin: shasToBatchReader,
|
|
Stderr: stderr,
|
|
}); err != nil {
|
|
_ = lookupKeyBatchWriter.CloseWithError(fmt.Errorf("git annex lookupkey --batch --ref [%s]: %w - %s", repoPath, err, errbuf.String()))
|
|
}
|
|
}
|
|
|
|
// CopyFromToBatch runs git -c annex.hardlink=true annex copy --batch-keys --from <remote> --to <remote>
|
|
func CopyFromToBatch(ctx context.Context, from, to string, keysToCopyReader *io.PipeReader, wg *sync.WaitGroup, repoPath string) {
|
|
defer wg.Done()
|
|
defer keysToCopyReader.Close()
|
|
|
|
stdout := new(bytes.Buffer)
|
|
stderr := new(bytes.Buffer)
|
|
var errbuf strings.Builder
|
|
if err := git.NewCommand(ctx, "-c", "annex.hardlink=true", "annex", "copy", "--batch-keys", "--from").AddDynamicArguments(from).AddArguments("--to").AddDynamicArguments(to).Run(&git.RunOpts{
|
|
Dir: repoPath,
|
|
Stdout: stdout,
|
|
Stdin: keysToCopyReader,
|
|
Stderr: stderr,
|
|
}); err != nil {
|
|
_ = keysToCopyReader.CloseWithError(fmt.Errorf("git annex copy --batch-keys --from <remote> --to <remote> [%s]: %w - %s", repoPath, err, errbuf.String()))
|
|
}
|
|
}
|
|
|
|
func ContentLocationFromKey(repoPath, key string) (string, error) {
|
|
contentLocation, _, err := git.NewCommandContextNoGlobals(git.DefaultContext, "annex", "contentlocation").AddDynamicArguments(key).RunStdString(&git.RunOpts{Dir: repoPath})
|
|
if err != nil {
|
|
return "", fmt.Errorf("in %s: %s does not seem to be a valid annexed file: %w", repoPath, key, err)
|
|
}
|
|
contentLocation = strings.TrimSpace(contentLocation)
|
|
contentLocation = path.Clean("/" + contentLocation)[1:] // prevent directory traversals
|
|
contentLocation = path.Join(repoPath, contentLocation)
|
|
|
|
return contentLocation, nil
|
|
}
|
|
|
|
// return the absolute path of the content pointed to by the annex pointer stored in the git object
|
|
// errors if the content is not found in this repo
|
|
func ContentLocation(blob *git.Blob) (string, error) {
|
|
key, err := LookupKey(blob)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return ContentLocationFromKey(blob.Repo().Path, key)
|
|
}
|
|
|
|
// returns a stream open to the annex content
|
|
func Content(blob *git.Blob) (*os.File, error) {
|
|
contentLocation, err := ContentLocation(blob)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return os.Open(contentLocation)
|
|
}
|
|
|
|
// whether the object appears to be a valid annex pointer
|
|
// does *not* verify if the content is actually in this repo;
|
|
// for that, use ContentLocation()
|
|
func IsAnnexed(blob *git.Blob) (bool, error) {
|
|
if !setting.Annex.Enabled {
|
|
return false, nil
|
|
}
|
|
|
|
// LookupKey is written to only return well-formed keys
|
|
// so the test is just to see if it errors
|
|
_, err := LookupKey(blob)
|
|
if err != nil {
|
|
if errors.Is(err, ErrBlobIsNotAnnexed) {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// PathIsAnnexRepo determines if repoPath is a git-annex enabled repository
|
|
func PathIsAnnexRepo(repoPath string) bool {
|
|
_, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: repoPath})
|
|
return err == nil
|
|
}
|
|
|
|
// IsAnnexRepo determines if repo is a git-annex enabled repository
|
|
func IsAnnexRepo(repo *git.Repository) bool {
|
|
_, _, err := git.NewCommand(repo.Ctx, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: repo.Path})
|
|
return err == nil
|
|
}
|
|
|
|
var (
|
|
uuid2repoPathCache = make(map[string]string)
|
|
repoPath2uuidCache = make(map[string]string)
|
|
)
|
|
|
|
func Init() error {
|
|
if !setting.Annex.Enabled {
|
|
return nil
|
|
}
|
|
if !setting.Annex.DisableP2PHTTP {
|
|
log.Info("Populating the git-annex UUID cache with existing repositories")
|
|
start := time.Now()
|
|
if err := updateUUID2RepoPathCache(); err != nil {
|
|
return err
|
|
}
|
|
log.Info("Populating the git-annex UUID cache took %v", time.Since(start))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func updateUUID2RepoPathCache() error {
|
|
configFiles, err := filepath.Glob(filepath.Join(setting.RepoRootPath, "*", "*", "config"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, configFile := range configFiles {
|
|
repoPath := strings.TrimSuffix(configFile, "/config")
|
|
_, ok := repoPath2uuidCache[repoPath]
|
|
if ok {
|
|
continue
|
|
}
|
|
config, err := ini.Load(configFile)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
repoUUID := config.Section("annex").Key("uuid").Value()
|
|
if repoUUID != "" {
|
|
uuid2repoPathCache[repoUUID] = repoPath
|
|
repoPath2uuidCache[repoPath] = repoUUID
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func repoPathFromUUIDCache(uuid string) (string, error) {
|
|
if repoPath, ok := uuid2repoPathCache[uuid]; ok {
|
|
return repoPath, nil
|
|
}
|
|
// If the cache didn't contain an entry for the UUID then update the cache and try again
|
|
if err := updateUUID2RepoPathCache(); err != nil {
|
|
return "", err
|
|
}
|
|
if repoPath, ok := uuid2repoPathCache[uuid]; ok {
|
|
return repoPath, nil
|
|
}
|
|
return "", fmt.Errorf("no repository known for UUID '%s'", uuid)
|
|
}
|
|
|
|
func checkValidity(uuid, repoPath string) (bool, error) {
|
|
stdout, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: repoPath})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
repoUUID := strings.TrimSpace(stdout)
|
|
return uuid == repoUUID, nil
|
|
}
|
|
|
|
func removeCachedEntries(uuid, repoPath string) {
|
|
delete(uuid2repoPathCache, uuid)
|
|
delete(repoPath2uuidCache, repoPath)
|
|
}
|
|
|
|
func UUID2RepoPath(uuid string) (string, error) {
|
|
// Get the current cache entry for the UUID
|
|
repoPath, err := repoPathFromUUIDCache(uuid)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// Check if it is still up-to-date
|
|
valid, err := checkValidity(uuid, repoPath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if !valid {
|
|
// If it isn't, remove the cache entry and try again
|
|
removeCachedEntries(uuid, repoPath)
|
|
return UUID2RepoPath(uuid)
|
|
}
|
|
// Otherwise just return the cached entry
|
|
return repoPath, nil
|
|
}
|
|
|
|
// GuessContentType guesses the content type of the annexed blob.
|
|
func GuessContentType(blob *git.Blob) (typesniffer.SniffedType, error) {
|
|
r, err := Content(blob)
|
|
if err != nil {
|
|
return typesniffer.SniffedType{}, err
|
|
}
|
|
defer r.Close()
|
|
|
|
return typesniffer.DetectContentTypeFromReader(r)
|
|
}
|