chore: simplify GetDiff (#7682)

- Split `GetDiff` into two functions, `GetDiffSimple` and `GetDiffFull`. The former will do the bare minimum and really only get a Git diff while the latter does some extra stuff that's relevant for the frontend to show extra relevant.
- Use `GetDiffSimple` for API related calls, as they do not benefit nor are returning the extra information that `GetDiffFull` provides, this should show a measurable performance increase for API calls that returns commits and `/repos/{owner}/{repo}/pulls/{index}/files`.
- `GetDiffSimple` contains extra code comments about its interesting way to determine the before commit.
- Add unit tests to demonstrates that the logic for determining the before commit didn't change and the function still yields correct information.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7682
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-committed-by: Gusted <postmaster@gusted.xyz>
This commit is contained in:
Gusted 2025-04-27 19:49:59 +00:00 committed by Earl Warren
parent afcb7262ea
commit 00761a15d1
7 changed files with 124 additions and 39 deletions

View file

@ -1088,53 +1088,53 @@ type DiffOptions struct {
FileOnly bool
}
// GetDiff builds a Diff between two commits of a repository.
// GetDiffSimple builds a Diff between two commits of a repository.
// Passing the empty string as beforeCommitID returns a diff from the parent commit.
// The whitespaceBehavior is either an empty string or a git flag
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
repoPath := gitRepo.Path
var beforeCommit *git.Commit
commit, err := gitRepo.GetCommit(opts.AfterCommitID)
// The whitespaceBehavior is either an empty string or a git flag.
func GetDiffSimple(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (diff *Diff, afterCommit *git.Commit, err error) {
afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID)
if err != nil {
return nil, err
return nil, nil, fmt.Errorf("unable to get the after commit %q: %w", opts.AfterCommitID, err)
}
cmdCtx, cmdCancel := context.WithCancel(ctx)
defer cmdCancel()
cmdDiff := git.NewCommand(cmdCtx)
cmdDiff := git.NewCommand(cmdCtx).
AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
AddArguments(opts.WhitespaceBehavior...)
objectFormat, err := gitRepo.GetObjectFormat()
if err != nil {
return nil, err
return nil, nil, fmt.Errorf("not able to determine the object format: %w", err)
}
if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && commit.ParentCount() == 0 {
cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
AddArguments(opts.WhitespaceBehavior...).
AddDynamicArguments(objectFormat.EmptyTree().String()).
AddDynamicArguments(opts.AfterCommitID)
// If before commit is empty or the empty object and the after commit has no
// parents, then use the empty tree as before commit.
//
// This is the case for a 'initial commit' of a Git tree, which obviously has
// no parents.
if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && afterCommit.ParentCount() == 0 {
// Reset before commit ID to indicate empty tree was used.
opts.BeforeCommitID = ""
// Add enpty tree as before commit.
cmdDiff.AddDynamicArguments(objectFormat.EmptyTree().String())
} else {
actualBeforeCommitID := opts.BeforeCommitID
if len(actualBeforeCommitID) == 0 {
parentCommit, err := commit.Parent(0)
// If before commit ID is empty, use the first parent of the after commit.
if len(opts.BeforeCommitID) == 0 {
parentCommit, err := afterCommit.Parent(0)
if err != nil {
return nil, err
return nil, nil, fmt.Errorf("not able to get first parent of %q: %w", afterCommit.ID.String(), err)
}
actualBeforeCommitID = parentCommit.ID.String()
opts.BeforeCommitID = parentCommit.ID.String()
}
cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
AddArguments(opts.WhitespaceBehavior...).
AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
opts.BeforeCommitID = actualBeforeCommitID
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
if err != nil {
return nil, err
}
cmdDiff.AddDynamicArguments(opts.BeforeCommitID)
}
// Add the after commit to the diff command.
cmdDiff.AddDynamicArguments(opts.AfterCommitID)
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
// so if we are using at least this version of git we don't have to tell ParsePatch to do
// the skipping for us
@ -1144,6 +1144,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
parsePatchSkipToFile = ""
}
// If we only want to diff for some files, add that as well.
cmdDiff.AddDashesAndList(files...)
reader, writer := io.Pipe()
@ -1153,6 +1154,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
}()
go func() {
repoPath := gitRepo.Path
stderr := &bytes.Buffer{}
cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
if err := cmdDiff.Run(&git.RunOpts{
@ -1167,14 +1169,33 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
_ = writer.Close()
}()
diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
diff, err = ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
// Ensure the git process is killed if it didn't exit already
cmdCancel()
if err != nil {
return nil, fmt.Errorf("unable to ParsePatch: %w", err)
return nil, nil, fmt.Errorf("unable to parse a git diff: %w", err)
}
diff.Start = opts.SkipTo
return diff, afterCommit, nil
}
func GetDiffFull(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, afterCommit, err := GetDiffSimple(ctx, gitRepo, opts, files...)
if err != nil {
return nil, err
}
// If there's a before commit, then GetDiffSimple will set it, otherwise it
// is empty.
var beforeCommit *git.Commit
if len(opts.BeforeCommitID) != 0 {
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
if err != nil {
return nil, fmt.Errorf("unable to get before commit %q: %w", opts.BeforeCommitID, err)
}
}
checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...)
if err != nil {
return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err)
@ -1210,7 +1231,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
}
tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, afterCommit)
if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection)
}
@ -1272,7 +1293,7 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
// SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
// Additionally, the database asynchronously is updated if files have changed since the last review
func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
diff, err := GetDiff(ctx, gitRepo, opts, files...)
diff, err := GetDiffFull(ctx, gitRepo, opts, files...)
if err != nil {
return nil, err
}