fix: improve git-annex UUID cache update times

Globbing for the config files is marginally faster than walking the
directory and checking for config files. Replacing the expensive calls
to `git config` for each repository with reading the repository's config
as an ini file is two orders of magnitude faster.

All in all this reduces the required time initializing the cache for
approx. 3000 repositories from approx. 5s to 50ms.

The server startup now also logs how long the cache update took and the
cache update is only done if p2phttp support is not disabled, because
p2phttp support is currently the only feature that requires the UUID
cache.
This commit is contained in:
Matthias Riße 2025-02-21 11:55:17 +01:00
parent c5b3812553
commit b9437c7817

View file

@ -12,17 +12,18 @@ package annex
import (
"errors"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/typesniffer"
"gopkg.in/ini.v1" //nolint:depguard // This import is forbidden in favor of using the setting module, but we need ini parsing for something other than Forgejo settings
)
// ErrBlobIsNotAnnexed occurs if a blob does not contain a valid annex key
@ -95,8 +96,6 @@ func IsAnnexRepo(repo *git.Repository) bool {
return err == nil
}
var repoConfigFileRe = regexp.MustCompile("[^/]+/[^/]+.git/config$")
var (
uuid2repoPathCache = make(map[string]string)
repoPath2uuidCache = make(map[string]string)
@ -106,30 +105,39 @@ func Init() error {
if !setting.Annex.Enabled {
return nil
}
log.Info("Populating the git-annex UUID cache with existing repositories")
return updateUUID2RepoPathCache()
if !setting.Annex.DisableP2PHTTP {
log.Info("Populating the git-annex UUID cache with existing repositories")
start := time.Now()
if err := updateUUID2RepoPathCache(); err != nil {
return err
}
log.Info("Populating the git-annex UUID cache took %v", time.Since(start))
}
return nil
}
func updateUUID2RepoPathCache() error {
return filepath.WalkDir(setting.RepoRootPath, func(path string, d fs.DirEntry, err error) error {
if err == nil && repoConfigFileRe.MatchString(path) {
thisRepoPath := strings.TrimSuffix(path, "/config")
_, ok := repoPath2uuidCache[thisRepoPath]
if ok {
return nil
}
stdout, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: thisRepoPath})
if err != nil {
return nil
}
repoUUID := strings.TrimSpace(stdout)
if repoUUID != "" {
uuid2repoPathCache[repoUUID] = thisRepoPath
repoPath2uuidCache[thisRepoPath] = repoUUID
}
configFiles, err := filepath.Glob(filepath.Join(setting.RepoRootPath, "*", "*", "config"))
if err != nil {
return err
}
for _, configFile := range configFiles {
repoPath := strings.TrimSuffix(configFile, "/config")
_, ok := repoPath2uuidCache[repoPath]
if ok {
continue
}
return nil
})
config, err := ini.Load(configFile)
if err != nil {
continue
}
repoUUID := config.Section("annex").Key("uuid").Value()
if repoUUID != "" {
uuid2repoPathCache[repoUUID] = repoPath
repoPath2uuidCache[repoPath] = repoUUID
}
}
return nil
}
func repoPathFromUUIDCache(uuid string) (string, error) {