From b9437c7817ddfcc9e9e0a973b058d2e27a43a2af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Fri, 21 Feb 2025 11:55:17 +0100 Subject: [PATCH] fix: improve git-annex UUID cache update times Globbing for the config files is marginally faster than walking the directory and checking for config files. Replacing the expensive calls to `git config` for each repository with reading the repository's config as an ini file is two orders of magnitude faster. All in all this reduces the required time initializing the cache for approx. 3000 repositories from approx. 5s to 50ms. The server startup now also logs how long the cache update took and the cache update is only done if p2phttp support is not disabled, because p2phttp support is currently the only feature that requires the UUID cache. --- modules/annex/annex.go | 56 ++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/modules/annex/annex.go b/modules/annex/annex.go index dee24d21bc..26baedc0ba 100644 --- a/modules/annex/annex.go +++ b/modules/annex/annex.go @@ -12,17 +12,18 @@ package annex import ( "errors" "fmt" - "io/fs" "os" "path" "path/filepath" - "regexp" "strings" + "time" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/typesniffer" + + "gopkg.in/ini.v1" //nolint:depguard // This import is forbidden in favor of using the setting module, but we need ini parsing for something other than Forgejo settings ) // ErrBlobIsNotAnnexed occurs if a blob does not contain a valid annex key @@ -95,8 +96,6 @@ func IsAnnexRepo(repo *git.Repository) bool { return err == nil } -var repoConfigFileRe = regexp.MustCompile("[^/]+/[^/]+.git/config$") - var ( uuid2repoPathCache = make(map[string]string) repoPath2uuidCache = make(map[string]string) @@ -106,30 +105,39 @@ func Init() error { if !setting.Annex.Enabled { return nil } - log.Info("Populating the git-annex UUID cache with existing repositories") - return updateUUID2RepoPathCache() + if !setting.Annex.DisableP2PHTTP { + log.Info("Populating the git-annex UUID cache with existing repositories") + start := time.Now() + if err := updateUUID2RepoPathCache(); err != nil { + return err + } + log.Info("Populating the git-annex UUID cache took %v", time.Since(start)) + } + return nil } func updateUUID2RepoPathCache() error { - return filepath.WalkDir(setting.RepoRootPath, func(path string, d fs.DirEntry, err error) error { - if err == nil && repoConfigFileRe.MatchString(path) { - thisRepoPath := strings.TrimSuffix(path, "/config") - _, ok := repoPath2uuidCache[thisRepoPath] - if ok { - return nil - } - stdout, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: thisRepoPath}) - if err != nil { - return nil - } - repoUUID := strings.TrimSpace(stdout) - if repoUUID != "" { - uuid2repoPathCache[repoUUID] = thisRepoPath - repoPath2uuidCache[thisRepoPath] = repoUUID - } + configFiles, err := filepath.Glob(filepath.Join(setting.RepoRootPath, "*", "*", "config")) + if err != nil { + return err + } + for _, configFile := range configFiles { + repoPath := strings.TrimSuffix(configFile, "/config") + _, ok := repoPath2uuidCache[repoPath] + if ok { + continue } - return nil - }) + config, err := ini.Load(configFile) + if err != nil { + continue + } + repoUUID := config.Section("annex").Key("uuid").Value() + if repoUUID != "" { + uuid2repoPathCache[repoUUID] = repoPath + repoPath2uuidCache[repoPath] = repoUUID + } + } + return nil } func repoPathFromUUIDCache(uuid string) (string, error) {