From a3540ed20efad4e1aebb71edac2fc74604f2122e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Sun, 24 Aug 2025 21:06:41 +0200 Subject: [PATCH 1/4] line-log: avoid unnecessary tree diffs when processing merge commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In process_ranges_merge_commit(), the line-level log first creates an array of diff queues by iterating over all parents of a merge commit and computing a tree diff for each. Then in a second loop it iterates over those diff queues, and if it finds that none of the interesting paths were modified in one of them, then it will return early. This means that when none of the interesting paths were modified between a merge and its first parent, then the tree diff between the merge and its second (Nth...) parent was computed in vain. Unify these two loops, so when it iterates over all parents of a merge commit, then it first computes the tree diff between the merge and that particular parent and then processes the resulting diff queue right away. This way we can spare some tree diff computing, thereby speeding up line-level log in repositories with mergy history: # git.git, 25.8% of commits are merges: Benchmark 1: ./git_v2.51.0 -C ~/src/git log -L:'lookup_commit(':commit.c v2.51.0 Time (mean ± σ): 1.001 s ± 0.009 s [User: 0.906 s, System: 0.095 s] Range (min … max): 0.991 s … 1.023 s 10 runs Benchmark 2: ./git -C ~/src/git log -L:'lookup_commit(':commit.c v2.51.0 Time (mean ± σ): 445.5 ms ± 3.4 ms [User: 358.8 ms, System: 84.3 ms] Range (min … max): 440.1 ms … 450.3 ms 10 runs Summary './git -C ~/src/git log -L:'lookup_commit(':commit.c v2.51.0' ran 2.25 ± 0.03 times faster than './git_v2.51.0 -C ~/src/git log -L:'lookup_commit(':commit.c v2.51.0' # linux.git, 7.5% of commits are merges: Benchmark 1: ./git_v2.51.0 -C ~/src/linux.git log -L:build_restore_work_registers:arch/mips/mm/tlbex.c v6.16 Time (mean ± σ): 3.246 s ± 0.007 s [User: 2.835 s, System: 0.409 s] Range (min … max): 3.232 s … 3.255 s 10 runs Benchmark 2: ./git -C ~/src/linux.git log -L:build_restore_work_registers:arch/mips/mm/tlbex.c v6.16 Time (mean ± σ): 2.467 s ± 0.014 s [User: 2.113 s, System: 0.353 s] Range (min … max): 2.455 s … 2.505 s 10 runs Summary './git -C ~/src/linux.git log -L:build_restore_work_registers:arch/mips/mm/tlbex.c v6.16' ran 1.32 ± 0.01 times faster than './git_v2.51.0 -C ~/src/linux.git log -L:build_restore_work_registers:arch/mips/mm/tlbex.c v6.16' And since now each iteration computes a tree diff and processes its result, there is no reason to store the diff queues for each merge parent anymore, so replace that diff queue array with a loop-local diff queue variable. With this change the static free_diffqueues() helper function in 'line-log.c' has no more callers left, remove it. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- line-log.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/line-log.c b/line-log.c index 07f2154e84..cf30915c94 100644 --- a/line-log.c +++ b/line-log.c @@ -1087,13 +1087,6 @@ static struct diff_filepair *diff_filepair_dup(struct diff_filepair *pair) return new_filepair; } -static void free_diffqueues(int n, struct diff_queue_struct *dq) -{ - for (int i = 0; i < n; i++) - diff_queue_clear(&dq[i]); - free(dq); -} - static int process_all_files(struct line_log_data **range_out, struct rev_info *rev, struct diff_queue_struct *queue, @@ -1209,7 +1202,6 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c static int process_ranges_merge_commit(struct rev_info *rev, struct commit *commit, struct line_log_data *range) { - struct diff_queue_struct *diffqueues; struct line_log_data **cand; struct commit **parents; struct commit_list *p; @@ -1220,20 +1212,19 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm if (nparents > 1 && rev->first_parent_only) nparents = 1; - ALLOC_ARRAY(diffqueues, nparents); CALLOC_ARRAY(cand, nparents); ALLOC_ARRAY(parents, nparents); p = commit->parents; for (i = 0; i < nparents; i++) { + struct diff_queue_struct diffqueue = DIFF_QUEUE_INIT; + int changed; parents[i] = p->item; p = p->next; - queue_diffs(range, &rev->diffopt, &diffqueues[i], commit, parents[i]); - } + queue_diffs(range, &rev->diffopt, &diffqueue, commit, parents[i]); - for (i = 0; i < nparents; i++) { - int changed; - changed = process_all_files(&cand[i], rev, &diffqueues[i], range); + changed = process_all_files(&cand[i], rev, &diffqueue, range); + diff_queue_clear(&diffqueue); if (!changed) { /* * This parent can take all the blame, so we @@ -1267,7 +1258,6 @@ out: free(cand[i]); } free(cand); - free_diffqueues(nparents, diffqueues); return ret; /* NEEDSWORK evil merge detection stuff */ From 9df27c258edf89ea8ea0472a0a9c260e026f197f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Sun, 24 Aug 2025 21:06:42 +0200 Subject: [PATCH 2/4] line-log: get rid of the parents array in process_ranges_merge_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can easily iterate through the parents of a merge commit without turning the list of parents into a dynamically allocated array of parents, so let's do so. This way we can avoid a memory allocation for each processed merge commit, though its effect on runtime seems to be unmeasurable. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- line-log.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/line-log.c b/line-log.c index cf30915c94..b2a31ae956 100644 --- a/line-log.c +++ b/line-log.c @@ -1203,7 +1203,6 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm struct line_log_data *range) { struct line_log_data **cand; - struct commit **parents; struct commit_list *p; int i; int nparents = commit_list_count(commit->parents); @@ -1213,15 +1212,15 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm nparents = 1; CALLOC_ARRAY(cand, nparents); - ALLOC_ARRAY(parents, nparents); - p = commit->parents; - for (i = 0; i < nparents; i++) { + for (p = commit->parents, i = 0; + p && i < nparents; + p = p->next, i++) { + struct commit *parent = p->item; struct diff_queue_struct diffqueue = DIFF_QUEUE_INIT; int changed; - parents[i] = p->item; - p = p->next; - queue_diffs(range, &rev->diffopt, &diffqueue, commit, parents[i]); + + queue_diffs(range, &rev->diffopt, &diffqueue, commit, parent); changed = process_all_files(&cand[i], rev, &diffqueue, range); diff_queue_clear(&diffqueue); @@ -1230,9 +1229,9 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm * This parent can take all the blame, so we * don't follow any other path in history */ - add_line_range(rev, parents[i], cand[i]); + add_line_range(rev, parent, cand[i]); free_commit_list(commit->parents); - commit_list_append(parents[i], &commit->parents); + commit_list_append(parent, &commit->parents); ret = 0; goto out; @@ -1243,14 +1242,15 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm * No single parent took the blame. We add the candidates * from the above loop to the parents. */ - for (i = 0; i < nparents; i++) - add_line_range(rev, parents[i], cand[i]); + for (p = commit->parents, i = 0; + p && i < nparents; + p = p->next, i++) + add_line_range(rev, p->item, cand[i]); ret = 1; out: clear_commit_line_range(rev, commit); - free(parents); for (i = 0; i < nparents; i++) { if (!cand[i]) continue; From 62e4ef85fbc5574fd80caababbf41bd33f53a46d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Sun, 24 Aug 2025 21:06:43 +0200 Subject: [PATCH 3/4] line-log: initialize diff queue in process_ranges_ordinary_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit process_ranges_ordinary_commit() uses a local diff queue variable, which it leaves uninitialized before passing its address to queue_diffs(). This is not an issue, because at the end of that function the contents of an other diff queue is moved into it by simply overwriting whatever is in there, i.e. without reading any uninitialized memory. Still, seeing the uninitialized diff queue being passed around scared me more than once, so out of caution let's make sure that it's initialized. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- line-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/line-log.c b/line-log.c index b2a31ae956..71fa857ee8 100644 --- a/line-log.c +++ b/line-log.c @@ -1182,7 +1182,7 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c struct line_log_data *range) { struct commit *parent = NULL; - struct diff_queue_struct queue; + struct diff_queue_struct queue = DIFF_QUEUE_INIT; struct line_log_data *parent_range; int changed; From 0a15bb634cf005a0266ee1108ac31aa75649a61c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Sun, 24 Aug 2025 21:06:44 +0200 Subject: [PATCH 4/4] line-log: simplify condition checking for merge commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In process_ranges_arbitrary_commit() the condition deciding whether the given commit is not a merge, i.e. that it doesn't have more than one parent, is head-scratchingly backwards, flip it. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- line-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/line-log.c b/line-log.c index 71fa857ee8..188d387d40 100644 --- a/line-log.c +++ b/line-log.c @@ -1273,10 +1273,10 @@ int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit struct line_log_data *prange = line_log_data_copy(range); add_line_range(rev, commit->parents->item, prange); clear_commit_line_range(rev, commit); - } else if (!commit->parents || !commit->parents->next) - changed = process_ranges_ordinary_commit(rev, commit, range); - else + } else if (commit->parents && commit->parents->next) changed = process_ranges_merge_commit(rev, commit, range); + else + changed = process_ranges_ordinary_commit(rev, commit, range); } if (!changed)