From e83f575da5cb9fbf98309cea6cdc809e1cc261bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 18 Jul 2024 18:18:06 +0000 Subject: [PATCH] Git-annex web uploads (#21) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements support for uploading files into the annex using the web interface. If a repository is a git-annex-enabled repository all files will be added to it using git annex add. This means that the repository's configuration for what to put into the annex (annex.largefiles in gitattributes) will be respected. Plain git repositories without git-annex will work as before, directly uploading to git. Fixes #5. Reviewed-on: https://codeberg.org/matrss/forgejo-aneksajo/pulls/21 Co-authored-by: Matthias Riße Co-committed-by: Matthias Riße --- modules/annex/annex.go | 6 ++ modules/util/remove.go | 11 ++-- services/repository/files/temp_repo.go | 20 +++++++ services/repository/files/upload.go | 83 +++++++++++++++++++++++++- tests/integration/git_annex_test.go | 81 +++++++++++++++++++++++++ 5 files changed, 194 insertions(+), 7 deletions(-) diff --git a/modules/annex/annex.go b/modules/annex/annex.go index bb049d77ed..bab5a7e0b2 100644 --- a/modules/annex/annex.go +++ b/modules/annex/annex.go @@ -152,3 +152,9 @@ func IsAnnexed(blob *git.Blob) (bool, error) { } return true, nil } + +// IsAnnexRepo determines if repo is a git-annex enabled repository +func IsAnnexRepo(repo *git.Repository) bool { + _, _, err := git.NewCommand(repo.Ctx, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: repo.Path}) + return err == nil +} diff --git a/modules/util/remove.go b/modules/util/remove.go index f2a61ae467..39556e5e0b 100644 --- a/modules/util/remove.go +++ b/modules/util/remove.go @@ -53,10 +53,13 @@ func MakeWritable(name string) error { return err } - // 0200 == u+w, in octal unix permission notation - err = os.Chmod(path, info.Mode()|0o200) - if err != nil { - return err + // Don't try chmod'ing symlinks (will fail with broken symlinks) + if info.Mode()&os.ModeSymlink != os.ModeSymlink { + // 0200 == u+w, in octal unix permission notation + err = os.Chmod(path, info.Mode()|0o200) + if err != nil { + return err + } } } return nil diff --git a/services/repository/files/temp_repo.go b/services/repository/files/temp_repo.go index 6e7570b82c..566ae5ff8f 100644 --- a/services/repository/files/temp_repo.go +++ b/services/repository/files/temp_repo.go @@ -202,6 +202,26 @@ func (t *TemporaryUploadRepository) AddObjectToIndex(mode, objectHash, objectPat return nil } +// InitPrivateAnnex initializes a private annex in the repository +func (t *TemporaryUploadRepository) InitPrivateAnnex() error { + if _, _, err := git.NewCommand(t.ctx, "config", "annex.private", "true").RunStdString(&git.RunOpts{Dir: t.basePath}); err != nil { + return err + } + if _, _, err := git.NewCommand(t.ctx, "annex", "init").RunStdString(&git.RunOpts{Dir: t.basePath}); err != nil { + return err + } + return nil +} + +// AddAnnex adds the file at path to the repository using git annex add +// This requires a non-bare repository +func (t *TemporaryUploadRepository) AddAnnex(path string) error { + if _, _, err := git.NewCommand(t.ctx, "annex", "add").AddDynamicArguments(path).RunStdString(&git.RunOpts{Dir: t.basePath}); err != nil { + return err + } + return nil +} + // WriteTree writes the current index as a tree to the object db and returns its hash func (t *TemporaryUploadRepository) WriteTree() (string, error) { stdout, _, err := git.NewCommand(t.ctx, "write-tree").RunStdString(&git.RunOpts{Dir: t.basePath}) diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index 1330116889..21cd5a8344 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -6,13 +6,16 @@ package files import ( "context" "fmt" + "io" "os" "path" + "path/filepath" "strings" git_model "code.gitea.io/gitea/models/git" repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/annex" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/setting" @@ -89,7 +92,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use defer t.Close() hasOldBranch := true - if err = t.Clone(opts.OldBranch, true); err != nil { + if err = t.Clone(opts.OldBranch, false); err != nil { if !git.IsErrBranchNotExist(err) || !repo.IsEmpty { return err } @@ -105,10 +108,30 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use } } - // Copy uploaded files into repository. - if err := copyUploadedLFSFilesIntoRepository(infos, t, opts.TreePath); err != nil { + r, err := git.OpenRepository(ctx, repo.RepoPath()) + if err != nil { return err } + if annex.IsAnnexRepo(r) { + // Initialize annex privately in temporary clone + if err := t.InitPrivateAnnex(); err != nil { + return err + } + // Copy uploaded files into git-annex repository + if err := copyUploadedFilesIntoAnnexRepository(infos, t, opts.TreePath); err != nil { + return err + } + // Move all annexed content in the temporary repository, i.e. everything we have just added, to the origin + author, committer := GetAuthorAndCommitterUsers(opts.Author, opts.Committer, doer) + if err := moveAnnexedFilesToOrigin(t, author, committer); err != nil { + return err + } + } else { + // Copy uploaded files into repository. + if err := copyUploadedLFSFilesIntoRepository(infos, t, opts.TreePath); err != nil { + return err + } + } // Now write the tree treeHash, err := t.WriteTree() @@ -246,3 +269,57 @@ func uploadToLFSContentStore(info uploadInfo, contentStore *lfs.ContentStore) er } return nil } + +func copyUploadedFilesIntoAnnexRepository(infos []uploadInfo, t *TemporaryUploadRepository, treePath string) error { + for i := range len(infos) { + if err := copyUploadedFileIntoAnnexRepository(&infos[i], t, treePath); err != nil { + return err + } + } + return nil +} + +func copyUploadedFileIntoAnnexRepository(info *uploadInfo, t *TemporaryUploadRepository, treePath string) error { + pathInRepo := path.Join(t.basePath, treePath, info.upload.Name) + if err := os.MkdirAll(filepath.Dir(pathInRepo), 0o700); err != nil { + return err + } + if err := os.Rename(info.upload.LocalPath(), pathInRepo); err != nil { + // Rename didn't work, try copy and remove + inputFile, err := os.Open(info.upload.LocalPath()) + if err != nil { + return fmt.Errorf("could not open source file: %v", err) + } + defer inputFile.Close() + outputFile, err := os.Create(pathInRepo) + if err != nil { + return fmt.Errorf("could not open dest file: %v", err) + } + defer outputFile.Close() + _, err = io.Copy(outputFile, inputFile) + if err != nil { + return fmt.Errorf("could not copy to dest from source: %v", err) + } + inputFile.Close() + err = os.Remove(info.upload.LocalPath()) + if err != nil { + return fmt.Errorf("could not remove source file: %v", err) + } + } + return t.AddAnnex(pathInRepo) +} + +func moveAnnexedFilesToOrigin(t *TemporaryUploadRepository, author, committer *user_model.User) error { + authorSig := author.NewGitSig() + committerSig := committer.NewGitSig() + env := append(os.Environ(), + "GIT_AUTHOR_NAME="+authorSig.Name, + "GIT_AUTHOR_EMAIL="+authorSig.Email, + "GIT_COMMITTER_NAME="+committerSig.Name, + "GIT_COMMITTER_EMAIL="+committerSig.Email, + ) + if _, _, err := git.NewCommand(t.ctx, "annex", "move", "--to", "origin").RunStdString(&git.RunOpts{Dir: t.basePath, Env: env}); err != nil { + return err + } + return nil +} diff --git a/tests/integration/git_annex_test.go b/tests/integration/git_annex_test.go index 4d4a0da88e..ff840c8b68 100644 --- a/tests/integration/git_annex_test.go +++ b/tests/integration/git_annex_test.go @@ -5,14 +5,17 @@ package integration import ( + "bytes" "errors" "fmt" "io" "math/rand" + "mime/multipart" "net/http" "net/url" "os" "path" + "path/filepath" "regexp" "strings" "testing" @@ -59,6 +62,84 @@ func doCreateRemoteAnnexRepository(t *testing.T, u *url.URL, ctx APITestContext, return nil } +func TestGitAnnexWebUpload(t *testing.T) { + if !setting.Annex.Enabled { + t.Skip("Skipping since annex support is disabled.") + } + + onGiteaRun(t, func(t *testing.T, u *url.URL) { + forEachObjectFormat(t, func(t *testing.T, objectFormat git.ObjectFormat) { + ctx := NewAPITestContext(t, "user2", "annex-web-upload-test"+objectFormat.Name(), auth_model.AccessTokenScopeWriteRepository) + require.NoError(t, doCreateRemoteAnnexRepository(t, u, ctx, false, objectFormat)) + + uploadFile := func(t *testing.T, path string) string { + t.Helper() + + body := &bytes.Buffer{} + mpForm := multipart.NewWriter(body) + err := mpForm.WriteField("_csrf", GetCSRF(t, ctx.Session, ctx.Username+"/"+ctx.Reponame+"/_upload/"+setting.Repository.DefaultBranch)) + require.NoError(t, err) + + file, err := mpForm.CreateFormFile("file", filepath.Base(path)) + require.NoError(t, err) + + srcFile, err := os.Open(path) + require.NoError(t, err) + + io.Copy(file, srcFile) + require.NoError(t, mpForm.Close()) + + req := NewRequestWithBody(t, "POST", "/"+ctx.Username+"/"+ctx.Reponame+"/upload-file", body) + req.Header.Add("Content-Type", mpForm.FormDataContentType()) + resp := ctx.Session.MakeRequest(t, req, http.StatusOK) + + respMap := map[string]string{} + DecodeJSON(t, resp, &respMap) + return respMap["uuid"] + } + + // Generate random file + tmpFile := path.Join(t.TempDir(), "web-upload-test-file.bin") + require.NoError(t, generateRandomFile(1024*1024/4, tmpFile)) + expectedContent, err := os.ReadFile(tmpFile) + require.NoError(t, err) + + // Upload generated file + fileUUID := uploadFile(t, tmpFile) + req := NewRequestWithValues(t, "POST", ctx.Username+"/"+ctx.Reponame+"/_upload/"+setting.Repository.DefaultBranch, map[string]string{ + "commit_choice": "direct", + "files": fileUUID, + "_csrf": GetCSRF(t, ctx.Session, ctx.Username+"/"+ctx.Reponame+"/_upload/"+setting.Repository.DefaultBranch), + "commit_mail_id": "-1", + }) + ctx.Session.MakeRequest(t, req, http.StatusSeeOther) + + // Get some handles on the target repository and file + remoteRepoPath := path.Join(setting.RepoRootPath, ctx.GitPath()) + repo, err := git.OpenRepository(git.DefaultContext, remoteRepoPath) + require.NoError(t, err) + defer repo.Close() + tree, err := repo.GetTree(setting.Repository.DefaultBranch) + require.NoError(t, err) + treeEntry, err := tree.GetTreeEntryByPath(filepath.Base(tmpFile)) + require.NoError(t, err) + blob := treeEntry.Blob() + + // Check that the uploaded file is annexed + isAnnexed, err := annex.IsAnnexed(blob) + require.NoError(t, err) + require.True(t, isAnnexed) + + // Check that the uploaded file has the correct content + annexedFile, err := annex.Content(blob) + require.NoError(t, err) + actualContent, err := io.ReadAll(annexedFile) + require.NoError(t, err) + require.Equal(t, expectedContent, actualContent) + }) + }) +} + func TestGitAnnexMedia(t *testing.T) { if !setting.Annex.Enabled { t.Skip("Skipping since annex support is disabled.")