fix: improve git-annex UUID cache update times (#65)
Some checks are pending
/ build-oci-image (rootful) (push) Waiting to run
/ build-oci-image (rootless) (push) Waiting to run
/ release (push) Waiting to run
testing / backend-checks (push) Waiting to run
testing / frontend-checks (push) Waiting to run
testing / test-unit (push) Blocked by required conditions
testing / test-e2e (push) Blocked by required conditions
testing / test-remote-cacher (redis) (push) Blocked by required conditions
testing / test-remote-cacher (valkey) (push) Blocked by required conditions
testing / test-remote-cacher (garnet) (push) Blocked by required conditions
testing / test-remote-cacher (redict) (push) Blocked by required conditions
testing / test-mysql (push) Blocked by required conditions
testing / test-pgsql (push) Blocked by required conditions
testing / test-sqlite (push) Blocked by required conditions
testing / security-check (push) Blocked by required conditions

Globbing for the config files is marginally faster than walking the
directory and checking for config files. Replacing the expensive calls
to `git config` for each repository with reading the repository's config
as an ini file is two orders of magnitude faster.

All in all this reduces the required time initializing the cache for
approx. 3000 repositories from approx. 5s to 50ms.

The server startup now also logs how long the cache update took and the
cache update is only done if p2phttp support is not disabled, because
p2phttp support is currently the only feature that requires the UUID
cache.

Fixes #63, fixes #64.

## Checklist

The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org).

### Tests

- I added test coverage for Go changes...
  - [ ] in their respective `*_test.go` for unit tests.
  - [ ] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes...
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [x] I do not want this change to show in the release notes.
- [ ] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

Reviewed-on: https://codeberg.org/forgejo-aneksajo/forgejo-aneksajo/pulls/65
Co-authored-by: Matthias Riße <m.risse@fz-juelich.de>
Co-committed-by: Matthias Riße <m.risse@fz-juelich.de>
This commit is contained in:
Matthias Riße 2025-02-21 13:59:06 +00:00 committed by matrss
parent c5b3812553
commit 1298a315bd

View file

@ -12,17 +12,18 @@ package annex
import (
"errors"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/typesniffer"
"gopkg.in/ini.v1" //nolint:depguard // This import is forbidden in favor of using the setting module, but we need ini parsing for something other than Forgejo settings
)
// ErrBlobIsNotAnnexed occurs if a blob does not contain a valid annex key
@ -95,8 +96,6 @@ func IsAnnexRepo(repo *git.Repository) bool {
return err == nil
}
var repoConfigFileRe = regexp.MustCompile("[^/]+/[^/]+.git/config$")
var (
uuid2repoPathCache = make(map[string]string)
repoPath2uuidCache = make(map[string]string)
@ -106,30 +105,39 @@ func Init() error {
if !setting.Annex.Enabled {
return nil
}
log.Info("Populating the git-annex UUID cache with existing repositories")
return updateUUID2RepoPathCache()
if !setting.Annex.DisableP2PHTTP {
log.Info("Populating the git-annex UUID cache with existing repositories")
start := time.Now()
if err := updateUUID2RepoPathCache(); err != nil {
return err
}
log.Info("Populating the git-annex UUID cache took %v", time.Since(start))
}
return nil
}
func updateUUID2RepoPathCache() error {
return filepath.WalkDir(setting.RepoRootPath, func(path string, d fs.DirEntry, err error) error {
if err == nil && repoConfigFileRe.MatchString(path) {
thisRepoPath := strings.TrimSuffix(path, "/config")
_, ok := repoPath2uuidCache[thisRepoPath]
if ok {
return nil
}
stdout, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: thisRepoPath})
if err != nil {
return nil
}
repoUUID := strings.TrimSpace(stdout)
if repoUUID != "" {
uuid2repoPathCache[repoUUID] = thisRepoPath
repoPath2uuidCache[thisRepoPath] = repoUUID
}
configFiles, err := filepath.Glob(filepath.Join(setting.RepoRootPath, "*", "*", "config"))
if err != nil {
return err
}
for _, configFile := range configFiles {
repoPath := strings.TrimSuffix(configFile, "/config")
_, ok := repoPath2uuidCache[repoPath]
if ok {
continue
}
return nil
})
config, err := ini.Load(configFile)
if err != nil {
continue
}
repoUUID := config.Section("annex").Key("uuid").Value()
if repoUUID != "" {
uuid2repoPathCache[repoUUID] = repoPath
repoPath2uuidCache[repoPath] = repoUUID
}
}
return nil
}
func repoPathFromUUIDCache(uuid string) (string, error) {