chore: simplify GetDiff (#7682)

- Split `GetDiff` into two functions, `GetDiffSimple` and `GetDiffFull`. The former will do the bare minimum and really only get a Git diff while the latter does some extra stuff that's relevant for the frontend to show extra relevant.
- Use `GetDiffSimple` for API related calls, as they do not benefit nor are returning the extra information that `GetDiffFull` provides, this should show a measurable performance increase for API calls that returns commits and `/repos/{owner}/{repo}/pulls/{index}/files`.
- `GetDiffSimple` contains extra code comments about its interesting way to determine the before commit.
- Add unit tests to demonstrates that the logic for determining the before commit didn't change and the function still yields correct information.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7682
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-committed-by: Gusted <postmaster@gusted.xyz>
This commit is contained in:
Gusted 2025-04-27 19:49:59 +00:00 committed by Earl Warren
parent afcb7262ea
commit 00761a15d1
7 changed files with 124 additions and 39 deletions

View file

@ -1628,7 +1628,7 @@ func GetPullRequestFiles(ctx *context.APIContext) {
maxLines := setting.Git.MaxGitDiffLines maxLines := setting.Git.MaxGitDiffLines
// FIXME: If there are too many files in the repo, may cause some unpredictable issues. // FIXME: If there are too many files in the repo, may cause some unpredictable issues.
diff, err := gitdiff.GetDiff(ctx, baseGitRepo, diff, _, err := gitdiff.GetDiffSimple(ctx, baseGitRepo,
&gitdiff.DiffOptions{ &gitdiff.DiffOptions{
BeforeCommitID: startCommitID, BeforeCommitID: startCommitID,
AfterCommitID: endCommitID, AfterCommitID: endCommitID,

View file

@ -329,7 +329,7 @@ func Diff(ctx *context.Context) {
maxLines, maxFiles = -1, -1 maxLines, maxFiles = -1, -1
} }
diff, err := gitdiff.GetDiff(ctx, gitRepo, &gitdiff.DiffOptions{ diff, err := gitdiff.GetDiffFull(ctx, gitRepo, &gitdiff.DiffOptions{
AfterCommitID: commitID, AfterCommitID: commitID,
SkipTo: ctx.FormString("skip-to"), SkipTo: ctx.FormString("skip-to"),
MaxLines: maxLines, MaxLines: maxLines,

View file

@ -621,7 +621,7 @@ func PrepareCompareDiff(
fileOnly := ctx.FormBool("file-only") fileOnly := ctx.FormBool("file-only")
diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo, diff, err := gitdiff.GetDiffFull(ctx, ci.HeadGitRepo,
&gitdiff.DiffOptions{ &gitdiff.DiffOptions{
BeforeCommitID: beforeCommitID, BeforeCommitID: beforeCommitID,
AfterCommitID: headCommitID, AfterCommitID: headCommitID,

View file

@ -981,7 +981,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
// as the viewed information is designed to be loaded only on latest PR // as the viewed information is designed to be loaded only on latest PR
// diff and if you're signed in. // diff and if you're signed in.
if !ctx.IsSigned || willShowSpecifiedCommit || willShowSpecifiedCommitRange { if !ctx.IsSigned || willShowSpecifiedCommit || willShowSpecifiedCommitRange {
diff, err = gitdiff.GetDiff(ctx, gitRepo, diffOptions, files...) diff, err = gitdiff.GetDiffFull(ctx, gitRepo, diffOptions, files...)
methodWithError = "GetDiff" methodWithError = "GetDiff"
} else { } else {
diff, err = gitdiff.SyncAndGetUserSpecificDiff(ctx, ctx.Doer.ID, pull, gitRepo, diffOptions, files...) diff, err = gitdiff.SyncAndGetUserSpecificDiff(ctx, ctx.Doer.ID, pull, gitRepo, diffOptions, files...)

View file

@ -210,7 +210,7 @@ func ToCommit(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Rep
// Get diff stats for commit // Get diff stats for commit
if opts.Stat { if opts.Stat {
diff, err := gitdiff.GetDiff(ctx, gitRepo, &gitdiff.DiffOptions{ diff, _, err := gitdiff.GetDiffSimple(ctx, gitRepo, &gitdiff.DiffOptions{
AfterCommitID: commit.ID.String(), AfterCommitID: commit.ID.String(),
}) })
if err != nil { if err != nil {

View file

@ -1088,52 +1088,52 @@ type DiffOptions struct {
FileOnly bool FileOnly bool
} }
// GetDiff builds a Diff between two commits of a repository. // GetDiffSimple builds a Diff between two commits of a repository.
// Passing the empty string as beforeCommitID returns a diff from the parent commit. // Passing the empty string as beforeCommitID returns a diff from the parent commit.
// The whitespaceBehavior is either an empty string or a git flag // The whitespaceBehavior is either an empty string or a git flag.
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { func GetDiffSimple(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (diff *Diff, afterCommit *git.Commit, err error) {
repoPath := gitRepo.Path afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID)
var beforeCommit *git.Commit
commit, err := gitRepo.GetCommit(opts.AfterCommitID)
if err != nil { if err != nil {
return nil, err return nil, nil, fmt.Errorf("unable to get the after commit %q: %w", opts.AfterCommitID, err)
} }
cmdCtx, cmdCancel := context.WithCancel(ctx) cmdCtx, cmdCancel := context.WithCancel(ctx)
defer cmdCancel() defer cmdCancel()
cmdDiff := git.NewCommand(cmdCtx) cmdDiff := git.NewCommand(cmdCtx).
AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
AddArguments(opts.WhitespaceBehavior...)
objectFormat, err := gitRepo.GetObjectFormat() objectFormat, err := gitRepo.GetObjectFormat()
if err != nil { if err != nil {
return nil, err return nil, nil, fmt.Errorf("not able to determine the object format: %w", err)
} }
if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && commit.ParentCount() == 0 { // If before commit is empty or the empty object and the after commit has no
cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M"). // parents, then use the empty tree as before commit.
AddArguments(opts.WhitespaceBehavior...). //
AddDynamicArguments(objectFormat.EmptyTree().String()). // This is the case for a 'initial commit' of a Git tree, which obviously has
AddDynamicArguments(opts.AfterCommitID) // no parents.
if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && afterCommit.ParentCount() == 0 {
// Reset before commit ID to indicate empty tree was used.
opts.BeforeCommitID = ""
// Add enpty tree as before commit.
cmdDiff.AddDynamicArguments(objectFormat.EmptyTree().String())
} else { } else {
actualBeforeCommitID := opts.BeforeCommitID // If before commit ID is empty, use the first parent of the after commit.
if len(actualBeforeCommitID) == 0 { if len(opts.BeforeCommitID) == 0 {
parentCommit, err := commit.Parent(0) parentCommit, err := afterCommit.Parent(0)
if err != nil { if err != nil {
return nil, err return nil, nil, fmt.Errorf("not able to get first parent of %q: %w", afterCommit.ID.String(), err)
} }
actualBeforeCommitID = parentCommit.ID.String() opts.BeforeCommitID = parentCommit.ID.String()
} }
cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M"). cmdDiff.AddDynamicArguments(opts.BeforeCommitID)
AddArguments(opts.WhitespaceBehavior...). }
AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
opts.BeforeCommitID = actualBeforeCommitID
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID) // Add the after commit to the diff command.
if err != nil { cmdDiff.AddDynamicArguments(opts.AfterCommitID)
return nil, err
}
}
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
// so if we are using at least this version of git we don't have to tell ParsePatch to do // so if we are using at least this version of git we don't have to tell ParsePatch to do
@ -1144,6 +1144,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
parsePatchSkipToFile = "" parsePatchSkipToFile = ""
} }
// If we only want to diff for some files, add that as well.
cmdDiff.AddDashesAndList(files...) cmdDiff.AddDashesAndList(files...)
reader, writer := io.Pipe() reader, writer := io.Pipe()
@ -1153,6 +1154,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
}() }()
go func() { go func() {
repoPath := gitRepo.Path
stderr := &bytes.Buffer{} stderr := &bytes.Buffer{}
cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath)) cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
if err := cmdDiff.Run(&git.RunOpts{ if err := cmdDiff.Run(&git.RunOpts{
@ -1167,14 +1169,33 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
_ = writer.Close() _ = writer.Close()
}() }()
diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) diff, err = ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
// Ensure the git process is killed if it didn't exit already // Ensure the git process is killed if it didn't exit already
cmdCancel() cmdCancel()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to ParsePatch: %w", err) return nil, nil, fmt.Errorf("unable to parse a git diff: %w", err)
} }
diff.Start = opts.SkipTo diff.Start = opts.SkipTo
return diff, afterCommit, nil
}
func GetDiffFull(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, afterCommit, err := GetDiffSimple(ctx, gitRepo, opts, files...)
if err != nil {
return nil, err
}
// If there's a before commit, then GetDiffSimple will set it, otherwise it
// is empty.
var beforeCommit *git.Commit
if len(opts.BeforeCommitID) != 0 {
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
if err != nil {
return nil, fmt.Errorf("unable to get before commit %q: %w", opts.BeforeCommitID, err)
}
}
checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...) checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err) return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err)
@ -1210,7 +1231,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
} }
tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit) tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, afterCommit)
if tailSection != nil { if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection) diffFile.Sections = append(diffFile.Sections, tailSection)
} }
@ -1272,7 +1293,7 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
// SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set // SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
// Additionally, the database asynchronously is updated if files have changed since the last review // Additionally, the database asynchronously is updated if files have changed since the last review
func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, err := GetDiff(ctx, gitRepo, opts, files...) diff, err := GetDiffFull(ctx, gitRepo, opts, files...)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -635,7 +635,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
defer gitRepo.Close() defer gitRepo.Close()
for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} { for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} {
diffs, err := GetDiff(db.DefaultContext, gitRepo, diffs, _, err := GetDiffSimple(db.DefaultContext, gitRepo,
&DiffOptions{ &DiffOptions{
AfterCommitID: "bd7063cc7c04689c4d082183d32a604ed27a24f9", AfterCommitID: "bd7063cc7c04689c4d082183d32a604ed27a24f9",
BeforeCommitID: "559c156f8e0178b71cb44355428f24001b08fc68", BeforeCommitID: "559c156f8e0178b71cb44355428f24001b08fc68",
@ -651,6 +651,70 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
} }
} }
func TestGetDiffFull(t *testing.T) {
gitRepo, err := git.OpenRepository(git.DefaultContext, "./../../modules/git/tests/repos/language_stats_repo")
require.NoError(t, err)
defer gitRepo.Close()
t.Run("Initial commit", func(t *testing.T) {
diff, err := GetDiffFull(db.DefaultContext, gitRepo,
&DiffOptions{
AfterCommitID: "8fee858da5796dfb37704761701bb8e800ad9ef3",
MaxLines: setting.Git.MaxGitDiffLines,
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: setting.Git.MaxGitDiffFiles,
})
require.NoError(t, err)
assert.Empty(t, diff.Start)
assert.Empty(t, diff.End)
assert.False(t, diff.IsIncomplete)
assert.Equal(t, 5, diff.NumFiles)
assert.Equal(t, 23, diff.TotalAddition)
assert.Len(t, diff.Files, 5)
assert.True(t, diff.Files[0].IsVendored)
assert.Equal(t, ".gitattributes", diff.Files[0].Name)
assert.Equal(t, "24139dae656713ba861751fb2c2ac38839349a7a", diff.Files[0].NameHash)
assert.Equal(t, "Python", diff.Files[1].Language)
assert.Equal(t, "i-am-a-python.p", diff.Files[1].Name)
assert.Equal(t, "32154957b043de62cbcdbe254a53ec4c3e00c5a0", diff.Files[1].NameHash)
assert.Equal(t, "java-hello/main.java", diff.Files[2].Name)
assert.Equal(t, "ef9f6a406a4cde7bb5480ba7b027bdc8cd6fa11d", diff.Files[2].NameHash)
assert.Equal(t, "main.vendor.java", diff.Files[3].Name)
assert.Equal(t, "c94fd7272f109d4d21d6df2b637c864a5ab63f46", diff.Files[3].NameHash)
assert.Equal(t, "python-hello/hello.py", diff.Files[4].Name)
assert.Equal(t, "021705ba8b98778dc4e277d3a6ea1b8c6122a7f9", diff.Files[4].NameHash)
})
t.Run("Normal diff", func(t *testing.T) {
diff, err := GetDiffFull(db.DefaultContext, gitRepo,
&DiffOptions{
AfterCommitID: "341fca5b5ea3de596dc483e54c2db28633cd2f97",
BeforeCommitID: "8fee858da5796dfb37704761701bb8e800ad9ef3",
MaxLines: setting.Git.MaxGitDiffLines,
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: setting.Git.MaxGitDiffFiles,
})
require.NoError(t, err)
assert.Empty(t, diff.Start)
assert.Empty(t, diff.End)
assert.False(t, diff.IsIncomplete)
assert.Equal(t, 1, diff.NumFiles)
assert.Equal(t, 1, diff.TotalAddition)
assert.Len(t, diff.Files, 1)
assert.Equal(t, ".gitattributes", diff.Files[0].Name)
assert.Equal(t, "24139dae656713ba861751fb2c2ac38839349a7a", diff.Files[0].NameHash)
})
}
func TestNoCrashes(t *testing.T) { func TestNoCrashes(t *testing.T) {
type testcase struct { type testcase struct {
gitdiff string gitdiff string