From c0bec06cfedb930ecb3cd8c8cdb0f3e14e5e9308 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 24 Sep 2025 13:57:15 -0700 Subject: [PATCH 01/92] diff --no-index: fix logic for paths ending in '/' If one of the two provided paths for git diff --no-index ends in a '/', a failure similar to the following occurs: $ git diff --no-index -- /tmp/ /tmp/ ':!' fatal: `pos + len' is too far after the end of the buffer This occurs because of an incorrect calculation of the skip lengths in diff_no_index(). The code wants to calculate the length of the string, but add one in case the string doesn't end with a slash. The method it uses is incorrect, as it always checks the trailing NUL character of the string. This will never be a '/', so we always add one. In the event that we *do* have a trailing slash, this will create an off-by-one length error later when using the skip value. The most straightforward fix would be to correct the skip1 and skip2 lengths by using ends_with(). However, Johannes made a good point that the existing logic is wasting a lot of computation. We generate the match string by copying the path in and then skipping almost all of it immediately with a potentially expensive memmove() from the strbuf_remove() call. We also re-initialize the match stringbuf each time we call read_directory_contents. The read_directory_contents really wants a path that is rooted at the start of the directory scan. We're currently building this by taking the full path and stripping out the start portion. Instead, replace this logic by building up the portion of the match as we go. Start by initializing two strbuf in diff_no_index containing the empty string. Pass these into queue_diff, which in turn passes the appropriate left or right side into read_directory_contents. As before, we build up the matches by appending elements to the match path and then clearing them using strbuf_setlen. In the recursive portion of the queue_diff algorithm, we build up new match paths the same way that we build up new buffer paths, by appending the elements and then clearing them with strbuf_setlen after each iteration. This is cheaper as it avoids repeated allocations, and is a bit simpler to track what is going on. Add a couple of test cases that pass in paths already ending in '/', to ensure the tests cover this regression. Reported-by: Johannes Schindelin Closes: https://lore.kernel.org/git/c75ec5f9-407a-6555-d4fb-bb629d54ec61@gmx.de/ Signed-off-by: Jacob Keller [jc: small leakfixes at the end of diff_no_index()] Signed-off-by: Junio C Hamano --- diff-no-index.c | 63 ++++++++++++++++++++++------------------ t/t4053-diff-no-index.sh | 16 ++++++++++ 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/diff-no-index.c b/diff-no-index.c index 88ae4cee56..f320424f05 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -21,30 +21,21 @@ static int read_directory_contents(const char *path, struct string_list *list, const struct pathspec *pathspec, - int skip) + struct strbuf *match) { - struct strbuf match = STRBUF_INIT; - int len; + int len = match->len; DIR *dir; struct dirent *e; if (!(dir = opendir(path))) return error("Could not open directory %s", path); - if (pathspec) { - strbuf_addstr(&match, path); - strbuf_complete(&match, '/'); - strbuf_remove(&match, 0, skip); - - len = match.len; - } - while ((e = readdir_skip_dot_and_dotdot(dir))) { if (pathspec) { int is_dir = 0; - strbuf_setlen(&match, len); - strbuf_addstr(&match, e->d_name); + strbuf_setlen(match, len); + strbuf_addstr(match, e->d_name); if (NOT_CONSTANT(DTYPE(e)) != DT_UNKNOWN) { is_dir = (DTYPE(e) == DT_DIR); } else { @@ -57,7 +48,7 @@ static int read_directory_contents(const char *path, struct string_list *list, } if (!match_leading_pathspec(NULL, pathspec, - match.buf, match.len, + match->buf, match->len, 0, NULL, is_dir)) continue; } @@ -65,7 +56,7 @@ static int read_directory_contents(const char *path, struct string_list *list, string_list_insert(list, e->d_name); } - strbuf_release(&match); + strbuf_setlen(match, len); closedir(dir); return 0; } @@ -169,7 +160,8 @@ static struct diff_filespec *noindex_filespec(const struct git_hash_algo *algop, static int queue_diff(struct diff_options *o, const struct git_hash_algo *algop, const char *name1, const char *name2, int recursing, - const struct pathspec *ps, int skip1, int skip2) + const struct pathspec *ps, + struct strbuf *ps_match1, struct strbuf *ps_match2) { int mode1 = 0, mode2 = 0; enum special special1 = SPECIAL_NONE, special2 = SPECIAL_NONE; @@ -208,10 +200,12 @@ static int queue_diff(struct diff_options *o, const struct git_hash_algo *algop, struct string_list p2 = STRING_LIST_INIT_DUP; int i1, i2, ret = 0; size_t len1 = 0, len2 = 0; + size_t match1_len = ps_match1->len; + size_t match2_len = ps_match2->len; - if (name1 && read_directory_contents(name1, &p1, ps, skip1)) + if (name1 && read_directory_contents(name1, &p1, ps, ps_match1)) return -1; - if (name2 && read_directory_contents(name2, &p2, ps, skip2)) { + if (name2 && read_directory_contents(name2, &p2, ps, ps_match2)) { string_list_clear(&p1, 0); return -1; } @@ -235,6 +229,11 @@ static int queue_diff(struct diff_options *o, const struct git_hash_algo *algop, strbuf_setlen(&buffer1, len1); strbuf_setlen(&buffer2, len2); + if (ps) { + strbuf_setlen(ps_match1, match1_len); + strbuf_setlen(ps_match2, match2_len); + } + if (i1 == p1.nr) comp = 1; else if (i2 == p2.nr) @@ -245,18 +244,28 @@ static int queue_diff(struct diff_options *o, const struct git_hash_algo *algop, if (comp > 0) n1 = NULL; else { - strbuf_addstr(&buffer1, p1.items[i1++].string); + strbuf_addstr(&buffer1, p1.items[i1].string); + if (ps) { + strbuf_addstr(ps_match1, p1.items[i1].string); + strbuf_complete(ps_match1, '/'); + } n1 = buffer1.buf; + i1++; } if (comp < 0) n2 = NULL; else { - strbuf_addstr(&buffer2, p2.items[i2++].string); + strbuf_addstr(&buffer2, p2.items[i2].string); + if (ps) { + strbuf_addstr(ps_match2, p2.items[i2].string); + strbuf_complete(ps_match2, '/'); + } n2 = buffer2.buf; + i2++; } - ret = queue_diff(o, algop, n1, n2, 1, ps, skip1, skip2); + ret = queue_diff(o, algop, n1, n2, 1, ps, ps_match1, ps_match2); } string_list_clear(&p1, 0); string_list_clear(&p2, 0); @@ -346,7 +355,8 @@ int diff_no_index(struct rev_info *revs, const struct git_hash_algo *algop, int implicit_no_index, int argc, const char **argv) { struct pathspec pathspec, *ps = NULL; - int i, no_index, skip1 = 0, skip2 = 0; + struct strbuf ps_match1 = STRBUF_INIT, ps_match2 = STRBUF_INIT; + int i, no_index; int ret = 1; const char *paths[2]; char *to_free[ARRAY_SIZE(paths)] = { 0 }; @@ -387,11 +397,6 @@ int diff_no_index(struct rev_info *revs, const struct git_hash_algo *algop, NULL, &argv[2]); if (pathspec.nr) ps = &pathspec; - - skip1 = strlen(paths[0]); - skip1 += paths[0][skip1] == '/' ? 0 : 1; - skip2 = strlen(paths[1]); - skip2 += paths[1][skip2] == '/' ? 0 : 1; } else if (argc > 2) { warning(_("Limiting comparison with pathspecs is only " "supported if both paths are directories.")); @@ -415,7 +420,7 @@ int diff_no_index(struct rev_info *revs, const struct git_hash_algo *algop, revs->diffopt.flags.exit_with_status = 1; if (queue_diff(&revs->diffopt, algop, paths[0], paths[1], 0, ps, - skip1, skip2)) + &ps_match1, &ps_match2)) goto out; diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/"); diffcore_std(&revs->diffopt); @@ -431,6 +436,8 @@ out: for (i = 0; i < ARRAY_SIZE(to_free); i++) free(to_free[i]); strbuf_release(&replacement); + strbuf_release(&ps_match1); + strbuf_release(&ps_match2); if (ps) clear_pathspec(ps); return ret; diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh index 01db9243ab..e0ea437685 100755 --- a/t/t4053-diff-no-index.sh +++ b/t/t4053-diff-no-index.sh @@ -322,6 +322,22 @@ test_expect_success 'diff --no-index with pathspec' ' test_cmp expect actual ' +test_expect_success 'diff --no-index first path ending in slash with pathspec' ' + test_expect_code 1 git diff --name-status --no-index a/ b 1 >actual && + cat >expect <<-EOF && + D a/1 + EOF + test_cmp expect actual +' + +test_expect_success 'diff --no-index second path ending in slash with pathspec' ' + test_expect_code 1 git diff --name-status --no-index a b/ 1 >actual && + cat >expect <<-EOF && + D a/1 + EOF + test_cmp expect actual +' + test_expect_success 'diff --no-index with pathspec no matches' ' test_expect_code 0 git diff --name-status --no-index a b missing ' From cf680cdb9543095bf75eefce7489c34282506353 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Thu, 2 Oct 2025 23:27:26 +0000 Subject: [PATCH 02/92] make: delete XDIFF_LIB, add xdiff to LIB_OBJS In a future patch series the 'xdiff' Rust crate will be added. Delete the creation of the static library file for xdiff to avoid a name conflict. This also moves toward the goal of Rust only needing to link against libgit.a. Changes to Meson are not required as the xdiff library is already included in Meson's libgit.a. xdiff-objs was a historical make target to allow building just the objects in xdiff. Since it was defined in terms of XDIFF_OBJS (which no longer exists) this convenience make target no longer makes sense. Remove it. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- Makefile | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 4c95affadb..1a607eff93 100644 --- a/Makefile +++ b/Makefile @@ -916,7 +916,6 @@ export PYTHON_PATH TEST_SHELL_PATH = $(SHELL_PATH) LIB_FILE = libgit.a -XDIFF_LIB = xdiff/lib.a REFTABLE_LIB = reftable/libreftable.a GENERATED_H += command-list.h @@ -1207,6 +1206,13 @@ LIB_OBJS += write-or-die.o LIB_OBJS += ws.o LIB_OBJS += wt-status.o LIB_OBJS += xdiff-interface.o +LIB_OBJS += xdiff/xdiffi.o +LIB_OBJS += xdiff/xemit.o +LIB_OBJS += xdiff/xhistogram.o +LIB_OBJS += xdiff/xmerge.o +LIB_OBJS += xdiff/xpatience.o +LIB_OBJS += xdiff/xprepare.o +LIB_OBJS += xdiff/xutils.o BUILTIN_OBJS += builtin/add.o BUILTIN_OBJS += builtin/am.o @@ -1388,8 +1394,8 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o -# xdiff and reftable libs may in turn depend on what is in libgit.a -GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(LIB_FILE) +# reftable lib may in turn depend on what is in libgit.a +GITLIBS = common-main.o $(LIB_FILE) $(REFTABLE_LIB) $(LIB_FILE) EXTLIBS = GIT_USER_AGENT = git/$(GIT_VERSION) @@ -2721,16 +2727,6 @@ reconfigure config.mak.autogen: config.status .PHONY: reconfigure # This is a convenience target. endif -XDIFF_OBJS += xdiff/xdiffi.o -XDIFF_OBJS += xdiff/xemit.o -XDIFF_OBJS += xdiff/xhistogram.o -XDIFF_OBJS += xdiff/xmerge.o -XDIFF_OBJS += xdiff/xpatience.o -XDIFF_OBJS += xdiff/xprepare.o -XDIFF_OBJS += xdiff/xutils.o -.PHONY: xdiff-objs -xdiff-objs: $(XDIFF_OBJS) - REFTABLE_OBJS += reftable/basics.o REFTABLE_OBJS += reftable/error.o REFTABLE_OBJS += reftable/block.o @@ -2765,7 +2761,6 @@ OBJECTS += $(GIT_OBJS) OBJECTS += $(SCALAR_OBJS) OBJECTS += $(PROGRAM_OBJS) OBJECTS += $(TEST_OBJS) -OBJECTS += $(XDIFF_OBJS) OBJECTS += $(FUZZ_OBJS) OBJECTS += $(REFTABLE_OBJS) $(REFTABLE_TEST_OBJS) OBJECTS += $(UNIT_TEST_OBJS) @@ -2919,9 +2914,6 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ -$(XDIFF_LIB): $(XDIFF_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ - $(REFTABLE_LIB): $(REFTABLE_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ @@ -3763,7 +3755,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) git.rc git.res $(RM) $(OBJECTS) $(RM) headless-git.o - $(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) + $(RM) $(LIB_FILE) $(REFTABLE_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS) $(RM) $(TEST_PROGRAMS) $(RM) $(FUZZ_PROGRAMS) @@ -3957,7 +3949,6 @@ endif LIBGIT_PUB_OBJS += contrib/libgit-sys/public_symbol_export.o LIBGIT_PUB_OBJS += libgit.a LIBGIT_PUB_OBJS += reftable/libreftable.a -LIBGIT_PUB_OBJS += xdiff/lib.a LIBGIT_PARTIAL_EXPORT = contrib/libgit-sys/partial_symbol_export.o From f3b4c89d59f15f3b67f29bff6f1f53dbc11a5b58 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Thu, 2 Oct 2025 23:27:27 +0000 Subject: [PATCH 03/92] make: delete REFTABLE_LIB, add reftable to LIB_OBJS Same idea as the previous commit except that I don't know when or if reftable will be turned into a Rust crate. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- Makefile | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 1a607eff93..a6a14b038b 100644 --- a/Makefile +++ b/Makefile @@ -916,7 +916,6 @@ export PYTHON_PATH TEST_SHELL_PATH = $(SHELL_PATH) LIB_FILE = libgit.a -REFTABLE_LIB = reftable/libreftable.a GENERATED_H += command-list.h GENERATED_H += config-list.h @@ -1134,6 +1133,19 @@ LIB_OBJS += refs/iterator.o LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o +LIB_OBJS += reftable/basics.o +LIB_OBJS += reftable/error.o +LIB_OBJS += reftable/block.o +LIB_OBJS += reftable/blocksource.o +LIB_OBJS += reftable/iter.o +LIB_OBJS += reftable/merged.o +LIB_OBJS += reftable/pq.o +LIB_OBJS += reftable/record.o +LIB_OBJS += reftable/stack.o +LIB_OBJS += reftable/system.o +LIB_OBJS += reftable/table.o +LIB_OBJS += reftable/tree.o +LIB_OBJS += reftable/writer.o LIB_OBJS += remote.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o @@ -1394,8 +1406,7 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o -# reftable lib may in turn depend on what is in libgit.a -GITLIBS = common-main.o $(LIB_FILE) $(REFTABLE_LIB) $(LIB_FILE) +GITLIBS = common-main.o $(LIB_FILE) EXTLIBS = GIT_USER_AGENT = git/$(GIT_VERSION) @@ -2727,20 +2738,6 @@ reconfigure config.mak.autogen: config.status .PHONY: reconfigure # This is a convenience target. endif -REFTABLE_OBJS += reftable/basics.o -REFTABLE_OBJS += reftable/error.o -REFTABLE_OBJS += reftable/block.o -REFTABLE_OBJS += reftable/blocksource.o -REFTABLE_OBJS += reftable/iter.o -REFTABLE_OBJS += reftable/merged.o -REFTABLE_OBJS += reftable/pq.o -REFTABLE_OBJS += reftable/record.o -REFTABLE_OBJS += reftable/stack.o -REFTABLE_OBJS += reftable/system.o -REFTABLE_OBJS += reftable/table.o -REFTABLE_OBJS += reftable/tree.o -REFTABLE_OBJS += reftable/writer.o - TEST_OBJS := $(patsubst %$X,%.o,$(TEST_PROGRAMS)) $(patsubst %,t/helper/%,$(TEST_BUILTINS_OBJS)) .PHONY: test-objs @@ -2762,7 +2759,7 @@ OBJECTS += $(SCALAR_OBJS) OBJECTS += $(PROGRAM_OBJS) OBJECTS += $(TEST_OBJS) OBJECTS += $(FUZZ_OBJS) -OBJECTS += $(REFTABLE_OBJS) $(REFTABLE_TEST_OBJS) +OBJECTS += $(REFTABLE_TEST_OBJS) OBJECTS += $(UNIT_TEST_OBJS) OBJECTS += $(CLAR_TEST_OBJS) OBJECTS += $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS)) @@ -2914,9 +2911,6 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ -$(REFTABLE_LIB): $(REFTABLE_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ - export DEFAULT_EDITOR DEFAULT_PAGER Documentation/GIT-EXCLUDED-PROGRAMS: FORCE @@ -3755,7 +3749,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) git.rc git.res $(RM) $(OBJECTS) $(RM) headless-git.o - $(RM) $(LIB_FILE) $(REFTABLE_LIB) + $(RM) $(LIB_FILE) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS) $(RM) $(TEST_PROGRAMS) $(RM) $(FUZZ_PROGRAMS) @@ -3948,7 +3942,6 @@ endif LIBGIT_PUB_OBJS += contrib/libgit-sys/public_symbol_export.o LIBGIT_PUB_OBJS += libgit.a -LIBGIT_PUB_OBJS += reftable/libreftable.a LIBGIT_PARTIAL_EXPORT = contrib/libgit-sys/partial_symbol_export.o From 2c3cc43f96f9568d5475e46bd1442c5551129ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:19:23 +0200 Subject: [PATCH 04/92] add-patch: improve help for options j, J, k, and K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The options j, J, k, and K don't affect the status of the current hunk. They just go to a different one. This is true whether the current hunk is undecided or not. Avoid misunderstanding by no longer mentioning the current hunk explicitly in their help texts. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-add.adoc | 8 ++++---- add-patch.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/git-add.adoc b/Documentation/git-add.adoc index ad629c46c5..3266ccf105 100644 --- a/Documentation/git-add.adoc +++ b/Documentation/git-add.adoc @@ -342,10 +342,10 @@ patch:: d - do not stage this hunk or any of the later hunks in the file g - select a hunk to go to / - search for a hunk matching the given regex - j - leave this hunk undecided, see next undecided hunk - J - leave this hunk undecided, see next hunk - k - leave this hunk undecided, see previous undecided hunk - K - leave this hunk undecided, see previous hunk + j - go to the next undecided hunk + J - go to the next hunk + k - go to the previous undecided hunk + K - go to the previous hunk s - split the current hunk into smaller hunks e - manually edit the current hunk p - print the current hunk diff --git a/add-patch.c b/add-patch.c index b0389c5d5b..912266a3f8 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1397,10 +1397,10 @@ static size_t display_hunks(struct add_p_state *s, } static const char help_patch_remainder[] = -N_("j - leave this hunk undecided, see next undecided hunk\n" - "J - leave this hunk undecided, see next hunk\n" - "k - leave this hunk undecided, see previous undecided hunk\n" - "K - leave this hunk undecided, see previous hunk\n" +N_("j - go to the next undecided hunk\n" + "J - go to the next hunk\n" + "k - go to the previous undecided hunk\n" + "K - go to the previous hunk\n" "g - select a hunk to go to\n" "/ - search for a hunk matching the given regex\n" "s - split the current hunk into smaller hunks\n" From c309b65a7c8a0dc8a1566ac3587d37d935632e4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:20:31 +0200 Subject: [PATCH 05/92] add-patch: document that option J rolls over MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable "permitted" is not reset after moving to a different hunk, so it only accumulates permission and doesn't necessarily reflect those of the current hunk. This may be a bug, but is actually useful with the option J, which can be used at the last hunk to roll over to the first hunk. Make this particular behavior official. Also adjust the error message, as it will only be shown if there's just a single hunk. Suggested-by: Junio C Hamano Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-add.adoc | 2 +- add-patch.c | 6 +++--- t/t3701-add-interactive.sh | 18 ++++++++++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Documentation/git-add.adoc b/Documentation/git-add.adoc index 3266ccf105..5c05a3a7f9 100644 --- a/Documentation/git-add.adoc +++ b/Documentation/git-add.adoc @@ -343,7 +343,7 @@ patch:: g - select a hunk to go to / - search for a hunk matching the given regex j - go to the next undecided hunk - J - go to the next hunk + J - go to the next hunk, roll over at the bottom k - go to the previous undecided hunk K - go to the previous hunk s - split the current hunk into smaller hunks diff --git a/add-patch.c b/add-patch.c index 912266a3f8..1f466ec9c0 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1398,7 +1398,7 @@ static size_t display_hunks(struct add_p_state *s, static const char help_patch_remainder[] = N_("j - go to the next undecided hunk\n" - "J - go to the next hunk\n" + "J - go to the next hunk, roll over at the bottom\n" "k - go to the previous undecided hunk\n" "K - go to the previous hunk\n" "g - select a hunk to go to\n" @@ -1493,7 +1493,7 @@ static int patch_update_file(struct add_p_state *s, permitted |= ALLOW_GOTO_NEXT_UNDECIDED_HUNK; strbuf_addstr(&s->buf, ",j"); } - if (hunk_index + 1 < file_diff->hunk_nr) { + if (file_diff->hunk_nr > 1) { permitted |= ALLOW_GOTO_NEXT_HUNK; strbuf_addstr(&s->buf, ",J"); } @@ -1584,7 +1584,7 @@ soft_increment: if (permitted & ALLOW_GOTO_NEXT_HUNK) hunk_index++; else - err(s, _("No next hunk")); + err(s, _("No other hunk")); } else if (s->answer.buf[0] == 'k') { if (permitted & ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK) hunk_index = undecided_previous; diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index d9fe289a7a..d5d2e120ab 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -334,7 +334,7 @@ test_expect_success 'different prompts for mode change/deleted' ' cat >expect <<-\EOF && (1/1) Stage deletion [y,n,q,a,d,p,?]? (1/2) Stage mode change [y,n,q,a,d,j,J,g,/,p,?]? - (2/2) Stage this hunk [y,n,q,a,d,K,g,/,e,p,?]? + (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? EOF test_cmp expect actual.filtered ' @@ -521,7 +521,7 @@ test_expect_success 'split hunk setup' ' test_expect_success 'goto hunk 1 with "g 1"' ' test_when_finished "git reset" && tr _ " " >expect <<-EOF && - (2/2) Stage this hunk [y,n,q,a,d,K,g,/,e,p,?]? + 1: -1,2 +1,3 +15 + (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? + 1: -1,2 +1,3 +15 _ 2: -2,4 +3,8 +21 go to which hunk? @@ -1,2 +1,3 @@ _10 @@ -550,7 +550,7 @@ test_expect_success 'goto hunk 1 with "g1"' ' test_expect_success 'navigate to hunk via regex /pattern' ' test_when_finished "git reset" && tr _ " " >expect <<-EOF && - (2/2) Stage this hunk [y,n,q,a,d,K,g,/,e,p,?]? @@ -1,2 +1,3 @@ + (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? @@ -1,2 +1,3 @@ _10 +15 _20 @@ -805,7 +805,7 @@ test_expect_success 'colors can be overridden' ' (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? @@ -3 +3,2 @@ more-context +another-one - (2/2) Stage this hunk [y,n,q,a,d,K,g,/,e,p,?]? @@ -1,3 +1,3 @@ + (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? @@ -1,3 +1,3 @@ context -old +new @@ -1354,4 +1354,14 @@ do ' done +test_expect_success 'option J rolls over' ' + test_write_lines a b c d e f g h i >file && + git add file && + test_write_lines X b c d e f g h X >file && + test_write_lines J J q | git add -p >out && + test_write_lines 1 2 1 >expect && + sed -n -e "s-/.*--" -e "s/^(//p" actual && + test_cmp expect actual +' + test_done From 171c1688ccbe5e6d709444a65a5ca2e0a9175b16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:21:19 +0200 Subject: [PATCH 06/92] add-patch: let options y, n, j, and e roll over to next undecided MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The options y, n, and e mark the current hunk as decided. If there's another undecided hunk towards the bottom of the hunk array they go there. If there isn't, but there is another undecided hunk towards the top then they go to the very first hunk, no matter if it has already been decided on. The option j does basically the same move. Technically it is not allowed if there's no undecided hunk towards the bottom, but the variable "permitted" is never reset, so this permission is retained from the very first hunk. That may a bug, but this behavior is at least consistent with y, n, and e and arguably more useful than refusing to move. Improve the roll-over behavior of these four options by moving to the first undecided hunk instead of hunk 1, consistent with what they do when not rolling over. Also adjust the error message for j, as it will only be shown if there's no other undecided hunk in either direction. Reported-by: Windl, Ulrich Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-add.adoc | 2 +- add-patch.c | 13 ++++++++++--- t/t3701-add-interactive.sh | 22 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Documentation/git-add.adoc b/Documentation/git-add.adoc index 5c05a3a7f9..596cdeff93 100644 --- a/Documentation/git-add.adoc +++ b/Documentation/git-add.adoc @@ -342,7 +342,7 @@ patch:: d - do not stage this hunk or any of the later hunks in the file g - select a hunk to go to / - search for a hunk matching the given regex - j - go to the next undecided hunk + j - go to the next undecided hunk, roll over at the bottom J - go to the next hunk, roll over at the bottom k - go to the previous undecided hunk K - go to the previous hunk diff --git a/add-patch.c b/add-patch.c index 1f466ec9c0..106bfcb275 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1397,7 +1397,7 @@ static size_t display_hunks(struct add_p_state *s, } static const char help_patch_remainder[] = -N_("j - go to the next undecided hunk\n" +N_("j - go to the next undecided hunk, roll over at the bottom\n" "J - go to the next hunk, roll over at the bottom\n" "k - go to the previous undecided hunk\n" "K - go to the previous hunk\n" @@ -1408,6 +1408,11 @@ N_("j - go to the next undecided hunk\n" "p - print the current hunk, 'P' to use the pager\n" "? - print help\n"); +static size_t inc_mod(size_t a, size_t m) +{ + return a < m - 1 ? a + 1 : 0; +} + static int patch_update_file(struct add_p_state *s, struct file_diff *file_diff) { @@ -1451,7 +1456,9 @@ static int patch_update_file(struct add_p_state *s, break; } - for (i = hunk_index + 1; i < file_diff->hunk_nr; i++) + for (i = inc_mod(hunk_index, file_diff->hunk_nr); + i != hunk_index; + i = inc_mod(i, file_diff->hunk_nr)) if (file_diff->hunk[i].use == UNDECIDED_HUNK) { undecided_next = i; break; @@ -1594,7 +1601,7 @@ soft_increment: if (permitted & ALLOW_GOTO_NEXT_UNDECIDED_HUNK) hunk_index = undecided_next; else - err(s, _("No next hunk")); + err(s, _("No other undecided hunk")); } else if (s->answer.buf[0] == 'g') { char *pend; unsigned long response; diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index d5d2e120ab..8086d3da71 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -1364,4 +1364,26 @@ test_expect_success 'option J rolls over' ' test_cmp expect actual ' +test_expect_success 'options y, n, j, e roll over to next undecided (1)' ' + test_write_lines a b c d e f g h i j k l m n o p q >file && + git add file && + test_write_lines X b c d e f g h X j k l m n o p X >file && + test_set_editor : && + test_write_lines g3 y g3 n g3 j g3 e q | git add -p >out && + test_write_lines 1 3 1 3 1 3 1 3 1 >expect && + sed -n -e "s-/.*--" -e "s/^(//p" actual && + test_cmp expect actual +' + +test_expect_success 'options y, n, j, e roll over to next undecided (2)' ' + test_write_lines a b c d e f g h i j k l m n o p q >file && + git add file && + test_write_lines X b c d e f g h X j k l m n o p X >file && + test_set_editor : && + test_write_lines y g3 y g3 n g3 j g3 e q | git add -p >out && + test_write_lines 1 2 3 2 3 2 3 2 3 2 >expect && + sed -n -e "s-/.*--" -e "s/^(//p" actual && + test_cmp expect actual +' + test_done From 1967b60681256ed452ed70dedf381b5380697901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:22:38 +0200 Subject: [PATCH 07/92] add-patch: let options k and K roll over like j and J MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Options j and J roll over at the bottom and go to the first undecided hunk and hunk 1, respectively. Let options k and K do the same when they reach the top of the hunk array, so let them go to the last undecided hunk and the last hunk, respectively, for consistency. Also use the same direction-neutral error messages. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-add.adoc | 4 ++-- add-patch.c | 22 ++++++++++++++------- t/t3701-add-interactive.sh | 40 +++++++++++++++++++------------------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/Documentation/git-add.adoc b/Documentation/git-add.adoc index 596cdeff93..3116a2cac5 100644 --- a/Documentation/git-add.adoc +++ b/Documentation/git-add.adoc @@ -344,8 +344,8 @@ patch:: / - search for a hunk matching the given regex j - go to the next undecided hunk, roll over at the bottom J - go to the next hunk, roll over at the bottom - k - go to the previous undecided hunk - K - go to the previous hunk + k - go to the previous undecided hunk, roll over at the top + K - go to the previous hunk, roll over at the top s - split the current hunk into smaller hunks e - manually edit the current hunk p - print the current hunk diff --git a/add-patch.c b/add-patch.c index 106bfcb275..4f314c16ec 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1399,8 +1399,8 @@ static size_t display_hunks(struct add_p_state *s, static const char help_patch_remainder[] = N_("j - go to the next undecided hunk, roll over at the bottom\n" "J - go to the next hunk, roll over at the bottom\n" - "k - go to the previous undecided hunk\n" - "K - go to the previous hunk\n" + "k - go to the previous undecided hunk, roll over at the top\n" + "K - go to the previous hunk, roll over at the top\n" "g - select a hunk to go to\n" "/ - search for a hunk matching the given regex\n" "s - split the current hunk into smaller hunks\n" @@ -1408,6 +1408,11 @@ N_("j - go to the next undecided hunk, roll over at the bottom\n" "p - print the current hunk, 'P' to use the pager\n" "? - print help\n"); +static size_t dec_mod(size_t a, size_t m) +{ + return a > 0 ? a - 1 : m - 1; +} + static size_t inc_mod(size_t a, size_t m) { return a < m - 1 ? a + 1 : 0; @@ -1450,7 +1455,9 @@ static int patch_update_file(struct add_p_state *s, undecided_next = -1; if (file_diff->hunk_nr) { - for (i = hunk_index - 1; i >= 0; i--) + for (i = dec_mod(hunk_index, file_diff->hunk_nr); + i != hunk_index; + i = dec_mod(i, file_diff->hunk_nr)) if (file_diff->hunk[i].use == UNDECIDED_HUNK) { undecided_previous = i; break; @@ -1492,7 +1499,7 @@ static int patch_update_file(struct add_p_state *s, permitted |= ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK; strbuf_addstr(&s->buf, ",k"); } - if (hunk_index) { + if (file_diff->hunk_nr > 1) { permitted |= ALLOW_GOTO_PREVIOUS_HUNK; strbuf_addstr(&s->buf, ",K"); } @@ -1584,9 +1591,10 @@ soft_increment: } } else if (s->answer.buf[0] == 'K') { if (permitted & ALLOW_GOTO_PREVIOUS_HUNK) - hunk_index--; + hunk_index = dec_mod(hunk_index, + file_diff->hunk_nr); else - err(s, _("No previous hunk")); + err(s, _("No other hunk")); } else if (s->answer.buf[0] == 'J') { if (permitted & ALLOW_GOTO_NEXT_HUNK) hunk_index++; @@ -1596,7 +1604,7 @@ soft_increment: if (permitted & ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK) hunk_index = undecided_previous; else - err(s, _("No previous hunk")); + err(s, _("No other undecided hunk")); } else if (s->answer.buf[0] == 'j') { if (permitted & ALLOW_GOTO_NEXT_UNDECIDED_HUNK) hunk_index = undecided_next; diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 8086d3da71..385e55c783 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -333,7 +333,7 @@ test_expect_success 'different prompts for mode change/deleted' ' sed -n "s/^\(([0-9/]*) Stage .*?\).*/\1/p" actual >actual.filtered && cat >expect <<-\EOF && (1/1) Stage deletion [y,n,q,a,d,p,?]? - (1/2) Stage mode change [y,n,q,a,d,j,J,g,/,p,?]? + (1/2) Stage mode change [y,n,q,a,d,k,K,j,J,g,/,p,?]? (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? EOF test_cmp expect actual.filtered @@ -527,7 +527,7 @@ test_expect_success 'goto hunk 1 with "g 1"' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]?_ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]?_ EOF test_write_lines s y g 1 | git add -p >actual && tail -n 7 actual.trimmed && @@ -540,7 +540,7 @@ test_expect_success 'goto hunk 1 with "g1"' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]?_ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]?_ EOF test_write_lines s y g1 | git add -p >actual && tail -n 4 actual.trimmed && @@ -554,7 +554,7 @@ test_expect_success 'navigate to hunk via regex /pattern' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]?_ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]?_ EOF test_write_lines s y /1,2 | git add -p >actual && tail -n 5 actual.trimmed && @@ -567,7 +567,7 @@ test_expect_success 'navigate to hunk via regex / pattern' ' _10 +15 _20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]?_ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]?_ EOF test_write_lines s y / 1,2 | git add -p >actual && tail -n 4 actual.trimmed && @@ -579,11 +579,11 @@ test_expect_success 'print again the hunk' ' tr _ " " >expect <<-EOF && +15 20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? @@ -1,2 +1,3 @@ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? @@ -1,2 +1,3 @@ 10 +15 20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]?_ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]?_ EOF test_write_lines s y g 1 p | git add -p >actual && tail -n 7 actual.trimmed && @@ -595,11 +595,11 @@ test_expect_success TTY 'print again the hunk (PAGER)' ' cat >expect <<-EOF && +15 20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? PAGER @@ -1,2 +1,3 @@ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? PAGER @@ -1,2 +1,3 @@ PAGER 10 PAGER +15 PAGER 20 - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? EOF test_write_lines s y g 1 P | ( @@ -802,7 +802,7 @@ test_expect_success 'colors can be overridden' ' -old +new more-context - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? @@ -3 +3,2 @@ + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? @@ -3 +3,2 @@ more-context +another-one (2/2) Stage this hunk [y,n,q,a,d,K,J,g,/,e,p,?]? @@ -1,3 +1,3 @@ @@ -810,7 +810,7 @@ test_expect_success 'colors can be overridden' ' -old +new more-context - (1/2) Stage this hunk [y,n,q,a,d,j,J,g,/,e,p,?]? + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? EOF test_cmp expect actual ' @@ -1354,34 +1354,34 @@ do ' done -test_expect_success 'option J rolls over' ' +test_expect_success 'options J, K roll over' ' test_write_lines a b c d e f g h i >file && git add file && test_write_lines X b c d e f g h X >file && - test_write_lines J J q | git add -p >out && - test_write_lines 1 2 1 >expect && + test_write_lines J J K q | git add -p >out && + test_write_lines 1 2 1 2 >expect && sed -n -e "s-/.*--" -e "s/^(//p" actual && test_cmp expect actual ' -test_expect_success 'options y, n, j, e roll over to next undecided (1)' ' +test_expect_success 'options y, n, j, k, e roll over to next undecided (1)' ' test_write_lines a b c d e f g h i j k l m n o p q >file && git add file && test_write_lines X b c d e f g h X j k l m n o p X >file && test_set_editor : && - test_write_lines g3 y g3 n g3 j g3 e q | git add -p >out && - test_write_lines 1 3 1 3 1 3 1 3 1 >expect && + test_write_lines g3 y g3 n g3 j g3 e k q | git add -p >out && + test_write_lines 1 3 1 3 1 3 1 3 1 2 >expect && sed -n -e "s-/.*--" -e "s/^(//p" actual && test_cmp expect actual ' -test_expect_success 'options y, n, j, e roll over to next undecided (2)' ' +test_expect_success 'options y, n, j, k, e roll over to next undecided (2)' ' test_write_lines a b c d e f g h i j k l m n o p q >file && git add file && test_write_lines X b c d e f g h X j k l m n o p X >file && test_set_editor : && - test_write_lines y g3 y g3 n g3 j g3 e q | git add -p >out && - test_write_lines 1 2 3 2 3 2 3 2 3 2 >expect && + test_write_lines y g3 y g3 n g3 j g3 e g1 k q | git add -p >out && + test_write_lines 1 2 3 2 3 2 3 2 3 2 1 2 >expect && sed -n -e "s-/.*--" -e "s/^(//p" actual && test_cmp expect actual ' From e8c744dd9a0270d616ab10aaddfa07cfc071e382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:23:34 +0200 Subject: [PATCH 08/92] add-patch: let options a and d roll over like y and n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Options a and d stage and unstage all undecided hunks towards the bottom of the array of hunks, respectively, and then roll over to the very first hunk. The first part is similar to y and n if the current hunk is the last one in the array, but they roll over to the next undecided hunk if there is any. That's more useful; do it for a and d as well. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- add-patch.c | 15 +++++++++++++++ t/t3701-add-interactive.sh | 12 ++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/add-patch.c b/add-patch.c index 4f314c16ec..6da13a78b5 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1418,6 +1418,17 @@ static size_t inc_mod(size_t a, size_t m) return a < m - 1 ? a + 1 : 0; } +static bool get_first_undecided(const struct file_diff *file_diff, size_t *idx) +{ + for (size_t i = 0; i < file_diff->hunk_nr; i++) { + if (file_diff->hunk[i].use == UNDECIDED_HUNK) { + *idx = i; + return true; + } + } + return false; +} + static int patch_update_file(struct add_p_state *s, struct file_diff *file_diff) { @@ -1572,6 +1583,8 @@ soft_increment: if (hunk->use == UNDECIDED_HUNK) hunk->use = USE_HUNK; } + if (!get_first_undecided(file_diff, &hunk_index)) + hunk_index = 0; } else if (hunk->use == UNDECIDED_HUNK) { hunk->use = USE_HUNK; } @@ -1582,6 +1595,8 @@ soft_increment: if (hunk->use == UNDECIDED_HUNK) hunk->use = SKIP_HUNK; } + if (!get_first_undecided(file_diff, &hunk_index)) + hunk_index = 0; } else if (hunk->use == UNDECIDED_HUNK) { hunk->use = SKIP_HUNK; } diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 385e55c783..9d81b0542e 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -1364,24 +1364,24 @@ test_expect_success 'options J, K roll over' ' test_cmp expect actual ' -test_expect_success 'options y, n, j, k, e roll over to next undecided (1)' ' +test_expect_success 'options y, n, a, d, j, k, e roll over to next undecided (1)' ' test_write_lines a b c d e f g h i j k l m n o p q >file && git add file && test_write_lines X b c d e f g h X j k l m n o p X >file && test_set_editor : && - test_write_lines g3 y g3 n g3 j g3 e k q | git add -p >out && - test_write_lines 1 3 1 3 1 3 1 3 1 2 >expect && + test_write_lines g3 y g3 n g3 a g3 d g3 j g3 e k q | git add -p >out && + test_write_lines 1 3 1 3 1 3 1 3 1 3 1 3 1 2 >expect && sed -n -e "s-/.*--" -e "s/^(//p" actual && test_cmp expect actual ' -test_expect_success 'options y, n, j, k, e roll over to next undecided (2)' ' +test_expect_success 'options y, n, a, d, j, k, e roll over to next undecided (2)' ' test_write_lines a b c d e f g h i j k l m n o p q >file && git add file && test_write_lines X b c d e f g h X j k l m n o p X >file && test_set_editor : && - test_write_lines y g3 y g3 n g3 j g3 e g1 k q | git add -p >out && - test_write_lines 1 2 3 2 3 2 3 2 3 2 1 2 >expect && + test_write_lines y g3 y g3 n g3 a g3 d g3 j g3 e g1 k q | git add -p >out && + test_write_lines 1 2 3 2 3 2 3 2 3 2 3 2 3 2 1 2 >expect && sed -n -e "s-/.*--" -e "s/^(//p" actual && test_cmp expect actual ' From 208e23ea47ad71c20246ff234efa3abc8080513f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Oct 2025 19:24:28 +0200 Subject: [PATCH 09/92] add-patch: reset "permitted" at loop start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't accumulate allowed options from any visited hunks, start fresh at the top of the loop instead and only record the allowed options for the current hunk. Reported-by: Junio C Hamano Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- add-patch.c | 19 ++++++++++--------- t/t3701-add-interactive.sh | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/add-patch.c b/add-patch.c index 6da13a78b5..45839ceac5 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1439,15 +1439,6 @@ static int patch_update_file(struct add_p_state *s, struct child_process cp = CHILD_PROCESS_INIT; int colored = !!s->colored.len, quit = 0, use_pager = 0; enum prompt_mode_type prompt_mode_type; - enum { - ALLOW_GOTO_PREVIOUS_HUNK = 1 << 0, - ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK = 1 << 1, - ALLOW_GOTO_NEXT_HUNK = 1 << 2, - ALLOW_GOTO_NEXT_UNDECIDED_HUNK = 1 << 3, - ALLOW_SEARCH_AND_GOTO = 1 << 4, - ALLOW_SPLIT = 1 << 5, - ALLOW_EDIT = 1 << 6 - } permitted = 0; /* Empty added files have no hunks */ if (!file_diff->hunk_nr && !file_diff->added) @@ -1457,6 +1448,16 @@ static int patch_update_file(struct add_p_state *s, render_diff_header(s, file_diff, colored, &s->buf); fputs(s->buf.buf, stdout); for (;;) { + enum { + ALLOW_GOTO_PREVIOUS_HUNK = 1 << 0, + ALLOW_GOTO_PREVIOUS_UNDECIDED_HUNK = 1 << 1, + ALLOW_GOTO_NEXT_HUNK = 1 << 2, + ALLOW_GOTO_NEXT_UNDECIDED_HUNK = 1 << 3, + ALLOW_SEARCH_AND_GOTO = 1 << 4, + ALLOW_SPLIT = 1 << 5, + ALLOW_EDIT = 1 << 6 + } permitted = 0; + if (hunk_index >= file_diff->hunk_nr) hunk_index = 0; hunk = file_diff->hunk_nr diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 9d81b0542e..403aaee356 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -1386,4 +1386,18 @@ test_expect_success 'options y, n, a, d, j, k, e roll over to next undecided (2) test_cmp expect actual ' +test_expect_success 'invalid option s is rejected' ' + test_write_lines a b c d e f g h i j k >file && + git add file && + test_write_lines X b X d e f g h i j X >file && + test_write_lines j s q | git add -p >out && + sed -ne "s/ @@.*//" -e "s/ \$//" -e "/^(/p" actual && + cat >expect <<-EOF && + (1/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,s,e,p,?]? + (2/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? Sorry, cannot split this hunk + (2/2) Stage this hunk [y,n,q,a,d,k,K,j,J,g,/,e,p,?]? + EOF + test_cmp expect actual +' + test_done From 881445157279bc2319b7b3c1392d5083453f4662 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 7 Oct 2025 17:39:37 -0400 Subject: [PATCH 10/92] SubmittingPatches: extend release-notes experiment to topic names In d255105c99 (SubmittingPatches: release-notes entry experiment, 2024-03-25), we began an experiment to have contributors suggest a topic description to appear in our RelNotes and "What's cooking?" reports. Extend that experiment to also welcome suggested topic branch names in addition to descriptions. Suggested-by: Junio C Hamano Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/SubmittingPatches | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 86ca7f6a78..f48688e370 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -579,14 +579,19 @@ line via `git format-patch --notes`. [[the-topic-summary]] *This is EXPERIMENTAL*. -When sending a topic, you can propose a one-paragraph summary that -should appear in the "What's cooking" report when it is picked up to -explain the topic. If you choose to do so, please write a 2-5 line -paragraph that will fit well in our release notes (see many bulleted -entries in the Documentation/RelNotes/* files for examples), and make -it the first paragraph of the cover letter. For a single-patch -series, use the space between the three-dash line and the diffstat, as -described earlier. +When sending a topic, you can optionally propose a topic name and/or a +one-paragraph summary that should appear in the "What's cooking" +report when it is picked up to explain the topic. If you choose to do +so, please write a 2-5 line paragraph that will fit well in our +release notes (see many bulleted entries in the +Documentation/RelNotes/* files for examples), and make it the first +(or second, if including a suggested topic name) paragraph of the +cover letter. If suggesting a topic name, use the format +"XX/your-topic-name", where "XX" is a stand-in for the primary +author's initials, and "your-topic-name" is a brief, dash-delimited +description of what your topic does. For a single-patch series, use +the space between the three-dash line and the diffstat, as described +earlier. [[attachment]] Do not attach the patch as a MIME attachment, compressed or not. From 1a41698841065f7911f31f20cd1ba9ec7c297aae Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 7 Oct 2025 17:39:41 -0400 Subject: [PATCH 11/92] SubmittingPatches: guidance for multi-series efforts Occasionally there are efforts to contribute to the Git project that span more than one patch series in order to achieve a broader goal. By convention, the maintainer has typically suffixed the topic names with "-part-one", or "-part-1" and so on. Document that convention and suggest some guidance on how to structure proposed topic names for multi-series efforts. Suggested-by: Junio C Hamano Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/SubmittingPatches | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index f48688e370..d620bd93bd 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -593,6 +593,14 @@ description of what your topic does. For a single-patch series, use the space between the three-dash line and the diffstat, as described earlier. +[[multi-series-efforts]] +If your patch series is part of a larger effort spanning multiple +patch series, briefly describe the broader goal, and state where the +current series fits into that goal. If you are suggesting a topic +name as in <>, consider +"XX/the-broader-goal-part-one", "XX/the-broader-goal-part-two", and so +on. + [[attachment]] Do not attach the patch as a MIME attachment, compressed or not. Do not let your e-mail client send quoted-printable. Do not let From b9c6962ad5b2f735d8281eb03646cefcf01886de Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 9 Oct 2025 07:46:00 +0000 Subject: [PATCH 12/92] mingw: avoid relative `#include`s We want to make them relative to the top-level directory. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 8538e3d172..da99473f56 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1,22 +1,22 @@ #define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS -#include "../git-compat-util.h" +#include "git-compat-util.h" #include "win32.h" #include #include #include #include -#include "../strbuf.h" -#include "../run-command.h" -#include "../abspath.h" -#include "../alloc.h" +#include "strbuf.h" +#include "run-command.h" +#include "abspath.h" +#include "alloc.h" #include "win32/lazyload.h" -#include "../config.h" -#include "../environment.h" -#include "../trace2.h" -#include "../symlinks.h" -#include "../wrapper.h" +#include "config.h" +#include "environment.h" +#include "trace2.h" +#include "symlinks.h" +#include "wrapper.h" #include "dir.h" #include "gettext.h" #define SECURITY_WIN32 From 15b8abde07de2cbce2dac3630324ead349e168ee Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 9 Oct 2025 07:46:01 +0000 Subject: [PATCH 13/92] mingw: order `#include`s alphabetically It allows for more consistent patches that way. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index da99473f56..736a07a028 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2,25 +2,25 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "win32.h" -#include -#include -#include -#include -#include "strbuf.h" -#include "run-command.h" #include "abspath.h" #include "alloc.h" -#include "win32/lazyload.h" #include "config.h" -#include "environment.h" -#include "trace2.h" -#include "symlinks.h" -#include "wrapper.h" #include "dir.h" +#include "environment.h" #include "gettext.h" +#include "run-command.h" +#include "strbuf.h" +#include "symlinks.h" +#include "trace2.h" +#include "win32.h" +#include "win32/lazyload.h" +#include "wrapper.h" +#include +#include +#include #define SECURITY_WIN32 #include +#include #include #define STATUS_DELETE_PENDING ((NTSTATUS) 0xC0000056) From 3860985105a40f425effc49642693e90e84e1863 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 9 Oct 2025 07:46:22 +0000 Subject: [PATCH 14/92] refs: forbid clang to complain about unreachable code When `NO_SYMLINK_HEAD` is defined, `create_ref_symlink()` is hard-coded as `(-1)`, and as a consequence the condition `!create_ref_symlink()` always evaluates to false, rendering any code guarded by that condition unreachable. Therefore, clang is _technically_ correct when it complains about unreachable code. It does completely miss the fact that this is okay because on _other_ platforms, where `NO_SYMLINK_HEAD` is not defined, the code isn't unreachable at all. Let's use the same trick as in 82e79c63642c (git-compat-util: add NOT_CONSTANT macro and use it in atfork_prepare(), 2025-03-17) to appease clang while at the same time keeping the `-Wunreachable` flag to potentially find _actually_ unreachable code. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- refs/files-backend.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 088b52c740..814decf323 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3186,7 +3186,13 @@ static int files_transaction_finish(struct ref_store *ref_store, * next update. If not, we try and create a regular symref. */ if (update->new_target && refs->prefer_symlink_refs) - if (!create_ref_symlink(lock, update->new_target)) + /* + * By using the `NOT_CONSTANT()` trick, we can avoid + * errors by `clang`'s `-Wunreachable` logic that would + * report that the `continue` statement is not reachable + * when `NO_SYMLINK_HEAD` is `#define`d. + */ + if (NOT_CONSTANT(!create_ref_symlink(lock, update->new_target))) continue; if (update->flags & REF_NEEDS_COMMIT) { From 55269ece0473833af19958672f58d7b85cfb4b7c Mon Sep 17 00:00:00 2001 From: "D. Ben Knoble" Date: Mon, 6 Oct 2025 08:59:29 -0400 Subject: [PATCH 15/92] doc: explain the impact of stash.index on --autostash options With 9842c0c749 (stash: honor stash.index in apply, pop modes, 2025-09-21) merged in a5d4779e6e (Merge branch 'dk/stash-apply-index', 2025-09-29), we did not advertise the connection between the new config option stash.index and the implicit use of git-stash via --autostash (which may also be configured). Do so. Signed-off-by: D. Ben Knoble Signed-off-by: Junio C Hamano --- Documentation/config/stash.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/config/stash.adoc b/Documentation/config/stash.adoc index e556105a15..fcb9a4a7a0 100644 --- a/Documentation/config/stash.adoc +++ b/Documentation/config/stash.adoc @@ -2,6 +2,10 @@ stash.index:: If this is set to true, `git stash apply` and `git stash pop` will behave as if `--index` was supplied. Defaults to false. See the descriptions in linkgit:git-stash[1]. ++ +This also affects invocations of linkgit:git-stash[1] via `--autostash` from +commands like linkgit:git-merge[1], linkgit:git-rebase[1], and +linkgit:git-pull[1]. stash.showIncludeUntracked:: If this is set to true, the `git stash show` command will show From b3ac6e737db8635aaed0c355ebaf291b63fb0461 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 8 Oct 2025 13:48:46 +0200 Subject: [PATCH 16/92] doc: fix accidental literal blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that normal paragraphs in most user-facing docs[1] don’t use literal blocks. This can easily happen if you try to maintain indentation in order to continue a block; that might work in e.g. Markdown variants, but not in AsciiDoc. The fixes are straightforward, i.e. just deindent the block and maybe add line continuations. The only exception is git-sparse-checkout(1) where we also replace indentation used for *intended* literal blocks with `----`. † 1: These have not been considered: • `Documentation/howto/` • `Documentation/technical/` • `Documentation/gitprotocol*` Signed-off-by: Kristoffer Haugsbakk Acked-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/config/core.adoc | 4 +- Documentation/git-config.adoc | 18 +++---- Documentation/git-rev-parse.adoc | 14 ++--- Documentation/git-shortlog.adoc | 4 +- Documentation/git-sparse-checkout.adoc | 72 ++++++++++++++++---------- 5 files changed, 64 insertions(+), 48 deletions(-) diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index 3fbe83eef1..8866ed2771 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -75,8 +75,8 @@ The built-in file system monitor is currently available only on a limited set of supported platforms. Currently, this includes Windows and MacOS. + - Otherwise, this variable contains the pathname of the "fsmonitor" - hook command. +Otherwise, this variable contains the pathname of the "fsmonitor" +hook command. + This hook command is used to identify all files that may have changed since the requested date/time. This information is used to speed up diff --git a/Documentation/git-config.adoc b/Documentation/git-config.adoc index 511b2e26bf..a633ab8ec3 100644 --- a/Documentation/git-config.adoc +++ b/Documentation/git-config.adoc @@ -117,15 +117,15 @@ OPTIONS --comment :: Append a comment at the end of new or modified lines. - - If __ begins with one or more whitespaces followed - by "#", it is used as-is. If it begins with "#", a space is - prepended before it is used. Otherwise, a string " # " (a - space followed by a hash followed by a space) is prepended - to it. And the resulting string is placed immediately after - the value defined for the variable. The __ must - not contain linefeed characters (no multi-line comments are - permitted). ++ +If __ begins with one or more whitespaces followed +by "#", it is used as-is. If it begins with "#", a space is +prepended before it is used. Otherwise, a string " # " (a +space followed by a hash followed by a space) is prepended +to it. And the resulting string is placed immediately after +the value defined for the variable. The __ must +not contain linefeed characters (no multi-line comments are +permitted). --all:: With `get`, return all values for a multi-valued key. diff --git a/Documentation/git-rev-parse.adoc b/Documentation/git-rev-parse.adoc index cc32b4b4f0..18383e52af 100644 --- a/Documentation/git-rev-parse.adoc +++ b/Documentation/git-rev-parse.adoc @@ -174,13 +174,13 @@ for another option. Allow oids to be input from any object format that the current repository supports. - - Specifying "sha1" translates if necessary and returns a sha1 oid. - - Specifying "sha256" translates if necessary and returns a sha256 oid. - - Specifying "storage" translates if necessary and returns an oid in - encoded in the storage hash algorithm. ++ +Specifying "sha1" translates if necessary and returns a sha1 oid. ++ +Specifying "sha256" translates if necessary and returns a sha256 oid. ++ +Specifying "storage" translates if necessary and returns an oid in +encoded in the storage hash algorithm. Options for Objects ~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/git-shortlog.adoc b/Documentation/git-shortlog.adoc index d8ab38dcc1..aa92800c69 100644 --- a/Documentation/git-shortlog.adoc +++ b/Documentation/git-shortlog.adoc @@ -44,8 +44,8 @@ OPTIONS describe each commit. '' can be any string accepted by the `--format` option of 'git log', such as '* [%h] %s'. (See the "PRETTY FORMATS" section of linkgit:git-log[1].) - - Each pretty-printed commit will be rewrapped before it is shown. ++ +Each pretty-printed commit will be rewrapped before it is shown. --date=:: Show dates formatted according to the given date string. (See diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 529a8edd9c..b5fe5da041 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -264,34 +264,50 @@ patterns in non-cone mode has a number of shortcomings: inconsistent. * It has edge cases where the "right" behavior is unclear. Two examples: - - First, two users are in a subdirectory, and the first runs - git sparse-checkout set '/toplevel-dir/*.c' - while the second runs - git sparse-checkout set relative-dir - Should those arguments be transliterated into - current/subdirectory/toplevel-dir/*.c - and - current/subdirectory/relative-dir - before inserting into the sparse-checkout file? The user who typed - the first command is probably aware that arguments to set/add are - supposed to be patterns in non-cone mode, and probably would not be - happy with such a transliteration. However, many gitignore-style - patterns are just paths, which might be what the user who typed the - second command was thinking, and they'd be upset if their argument - wasn't transliterated. - - Second, what should bash-completion complete on for set/add commands - for non-cone users? If it suggests paths, is it exacerbating the - problem above? Also, if it suggests paths, what if the user has a - file or directory that begins with either a '!' or '#' or has a '*', - '\', '?', '[', or ']' in its name? And if it suggests paths, will - it complete "/pro" to "/proc" (in the root filesystem) rather than to - "/progress.txt" in the current directory? (Note that users are - likely to want to start paths with a leading '/' in non-cone mode, - for the same reason that .gitignore files often have one.) - Completing on files or directories might give nasty surprises in - all these cases. ++ +First, two users are in a subdirectory, and the first runs ++ +---- +git sparse-checkout set '/toplevel-dir/*.c' +---- ++ +while the second runs ++ +---- +git sparse-checkout set relative-dir +---- ++ +Should those arguments be transliterated into ++ +---- +current/subdirectory/toplevel-dir/*.c +---- ++ +and ++ +---- +current/subdirectory/relative-dir +---- ++ +before inserting into the sparse-checkout file? The user who typed +the first command is probably aware that arguments to set/add are +supposed to be patterns in non-cone mode, and probably would not be +happy with such a transliteration. However, many gitignore-style +patterns are just paths, which might be what the user who typed the +second command was thinking, and they'd be upset if their argument +wasn't transliterated. ++ +Second, what should bash-completion complete on for set/add commands +for non-cone users? If it suggests paths, is it exacerbating the +problem above? Also, if it suggests paths, what if the user has a +file or directory that begins with either a '!' or '#' or has a '*', +'\', '?', '[', or ']' in its name? And if it suggests paths, will +it complete "/pro" to "/proc" (in the root filesystem) rather than to +"/progress.txt" in the current directory? (Note that users are +likely to want to start paths with a leading '/' in non-cone mode, +for the same reason that .gitignore files often have one.) +Completing on files or directories might give nasty surprises in +all these cases. * The excessive flexibility made other extensions essentially impractical. `--sparse-index` is likely impossible in non-cone From 2cebca05824057493f4b2ef9cd86333a04ed4a7e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Oct 2025 17:56:01 -0400 Subject: [PATCH 17/92] builtin/cat-file.c: simplify calling `report_object_status()` In b0b910e052 (cat-file.c: add batch handling for submodules, 2025-06-02), we began handling submodule entries specially when batching cat-file like so: $ echo :sha1collisiondetection | git.compile cat-file --batch-check 855827c583bc30645ba427885caa40c5b81764d2 submodule Commit b0b910e052 notes that submodules are handled differently than non-existent objects, which print " ", since there is (a) no object to resolve the OID of in the first place, and as commit b0b910e052 notes, (b) for submodules in particular, it is useful to know what commit it points at without having to spawn another Git process. That commit does so by calling report_object_status() and passing in "oid_to_hex(&data->oid)" for the "obj_name" parameter. This is unnecessary, however, since report_object_status() will do the same automatically if given a NULL "obj_name" argument. That behavior dates back to 6a951937ae (cat-file: add --batch-all-objects option, 2015-06-22), so rely on that instead of having the caller open-code that part of report_object_status(). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 4b23fcecbd..71b94c8b3f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -497,7 +497,7 @@ static void batch_object_write(const char *obj_name, OBJECT_INFO_LOOKUP_REPLACE); if (ret < 0) { if (data->mode == S_IFGITLINK) - report_object_status(opt, oid_to_hex(&data->oid), &data->oid, "submodule"); + report_object_status(opt, NULL, &data->oid, "submodule"); else report_object_status(opt, obj_name, &data->oid, "missing"); return; From 4253630c6f07a4bdcc9aa62a50e26a4d466219d1 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Thu, 16 Oct 2025 12:31:43 +0200 Subject: [PATCH 18/92] RelNotes: sync with Git 2.51.1 fixups Carry over the fixups from 8c3d7c5f (RelNotes: minor fixups before 2.51.1, 2025-10-15). Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index 67b0ac7c45..8c4ed4eef4 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -100,7 +100,7 @@ Performance, Internal Implementation, Development Support etc. * CodingGuidelines now spells out how bitfields are to be written. - * Adjust to the way newer versions of cURL selectivel enables tracing + * Adjust to the way newer versions of cURL selectively enable tracing options, so that our tests can continue to work. (merge 1b5a6bfff3 jk/curl-global-trace-components later to maint). @@ -212,13 +212,13 @@ including security updates, are included in this release. name. (merge bcb20dda83 js/doc-gitk-history later to maint). - * Update the instruction to use of GGG in the MyFirstContribution + * Update the instructions for using GGG in the MyFirstContribution document to say that a GitHub PR could be made against `git/git` instead of `gitgitgadget/git`. (merge 37001cdbc4 ds/doc-ggg-pr-fork-clarify later to maint). * Makefile tried to run multiple "cargo build" which would not work - very well; serialize their execution to work it around. + very well; serialize their execution to work around this problem. (merge 0eeacde50e da/cargo-serialize later to maint). * "git repack --path-walk" lost objects in some corner cases, which @@ -294,12 +294,12 @@ including security updates, are included in this release. updated. (merge 54a60e5b38 kh/you-still-use-whatchanged-fix later to maint). - * Clang-format update to let our control macros formatted the way we + * Clang-format update to let our control macros be formatted the way we had them traditionally, e.g., "for_each_string_list_item()" without space before the parentheses. (merge 3721541d35 jt/clang-format-foreach-wo-space-before-parenthesis later to maint). - * A few places where an size_t value was cast to curl_off_t without + * A few places where a size_t value was cast to curl_off_t without checking has been updated to use the existing helper function. (merge ecc5749578 js/curl-off-t-fixes later to maint). From 20b4eeddce165f11d7c5bffb1ecb69017df4a05e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:18 -0400 Subject: [PATCH 19/92] builtin/repack.c: avoid "the_repository" in `cmd_repack()` Reduce builtin/repack.c's reliance on `the_repository` by using the currently-UNUSED "repo" parameter within cmd_repack(). The following commits will continue to reduce the usage of the_repository in other places within builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e8730808c5..305782b2c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1247,7 +1247,7 @@ static const char *find_pack_prefix(const char *packdir, const char *packtmp) int cmd_repack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; @@ -1344,7 +1344,7 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(the_repository, repack_config, &cruft_po_args); + repo_config(repo, repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -1354,7 +1354,7 @@ int cmd_repack(int argc, po_args.depth = xstrdup_or_null(opt_depth); po_args.threads = xstrdup_or_null(opt_threads); - if (delete_redundant && the_repository->repository_format_precious_objects) + if (delete_redundant && repo->repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A", @@ -1376,7 +1376,7 @@ int cmd_repack(int argc, die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && - odb_has_alternates(the_repository->objects)) { + odb_has_alternates(repo->objects)) { /* * When asked to do a local repack, but we have * packfiles that are inherited from an alternate, then @@ -1391,7 +1391,8 @@ int cmd_repack(int argc, if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; - strbuf_addf(&path, "%s/%s_XXXXXX", repo_get_object_directory(the_repository), + strbuf_addf(&path, "%s/%s_XXXXXX", + repo_get_object_directory(repo), "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); @@ -1400,7 +1401,7 @@ int cmd_repack(int argc, strbuf_release(&path); } - packdir = mkpathdup("%s/pack", repo_get_object_directory(the_repository)); + packdir = mkpathdup("%s/pack", repo_get_object_directory(repo)); packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); @@ -1439,7 +1440,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--reflog"); strvec_push(&cmd.args, "--indexed-objects"); } - if (repo_has_promisor_remote(the_repository)) + if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); if (!write_midx) { if (write_bitmaps > 0) @@ -1535,7 +1536,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(the_repository->objects->sources)) + if (!get_multi_pack_index(repo->objects->sources)) midx_must_contain_cruft = 1; } @@ -1618,9 +1619,9 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(the_repository->objects->sources)) { + if (get_multi_pack_index(repo->objects->sources)) { struct multi_pack_index *m = - get_multi_pack_index(the_repository->objects->sources); + get_multi_pack_index(repo->objects->sources); ALLOC_ARRAY(midx_pack_names, m->num_packs + m->num_packs_in_base); @@ -1631,7 +1632,7 @@ int cmd_repack(int argc, xstrdup(m->pack_names[i]); } - close_object_store(the_repository->objects); + close_object_store(repo->objects); /* * Ok we have prepared all new packfiles. @@ -1688,7 +1689,7 @@ int cmd_repack(int argc, goto cleanup; } - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (delete_redundant) { int opts = 0; @@ -1704,18 +1705,18 @@ int cmd_repack(int argc, if (!keep_unreachable && (!(pack_everything & LOOSEN_UNREACHABLE) || unpack_unreachable) && - is_repository_shallow(the_repository)) + is_repository_shallow(repo)) prune_shallow(PRUNE_QUICK); } if (run_update_server_info) - update_server_info(the_repository, 0); + update_server_info(repo, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(the_repository->objects->sources, + write_midx_file(repo->objects->sources, NULL, NULL, flags); } From df3a499bd6b951a9e23894793afec11f0850834a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:21 -0400 Subject: [PATCH 20/92] builtin/repack.c: avoid "the_repository" in existing packs API There are a number of spots within builtin/repack.c which refer to "the_repository", and either make use of the "existing packs" API or otherwise have a 'struct existing_packs *' in scope. Add a "repo" member to "struct existing_packs" and use that instead of "the_repository" in such locations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 305782b2c9..7223553bed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -126,6 +126,7 @@ static void pack_objects_args_release(struct pack_objects_args *args) } struct existing_packs { + struct repository *repo; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; @@ -265,7 +266,7 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -498,7 +499,7 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; @@ -1139,7 +1140,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; @@ -1405,6 +1406,7 @@ int cmd_repack(int argc, packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); + existing.repo = repo; collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { From 94d99de7724bce0325de8293fa1c2312d5960d7c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:24 -0400 Subject: [PATCH 21/92] builtin/repack.c: avoid "the_repository" when taking a ref snapshot Avoid using "the_repository" in various MIDX-related ref snapshotting functions. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7223553bed..113f5fc67f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -771,6 +771,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, } struct midx_snapshot_ref_data { + struct repository *repo; struct tempfile *f; struct oidset seen; int preferred; @@ -784,13 +785,13 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, struct midx_snapshot_ref_data *data = _data; struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(data->repo, oid, &peeled)) oid = &peeled; if (oidset_insert(&data->seen, oid)) return 0; /* already seen */ - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", @@ -799,11 +800,12 @@ static int midx_snapshot_ref_one(const char *refname UNUSED, return 0; } -static void midx_snapshot_refs(struct tempfile *f) +static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) { struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(the_repository); + const struct string_list *preferred = bitmap_preferred_tips(repo); + data.repo = repo; data.f = f; data.preferred = 0; oidset_init(&data.seen, 0); @@ -817,13 +819,13 @@ static void midx_snapshot_refs(struct tempfile *f) data.preferred = 1; for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(the_repository), + refs_for_each_ref_in(get_main_ref_store(repo), item->string, midx_snapshot_ref_one, &data); data.preferred = 0; } - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(repo), midx_snapshot_ref_one, &data); if (close_tempfile_gently(f)) { @@ -1397,7 +1399,7 @@ int cmd_repack(int argc, "bitmap-ref-tips"); refs_snapshot = xmks_tempfile(path.buf); - midx_snapshot_refs(refs_snapshot); + midx_snapshot_refs(repo, refs_snapshot); strbuf_release(&path); } From 03015747584e9f96c7ad6b57ecd99aa694312333 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:27 -0400 Subject: [PATCH 22/92] builtin/repack.c: avoid "the_repository" when removing packs The 'remove_redundant_pack()' function uses "the_repository" to obtain, and optionally remove, the repository's MIDX. Instead of relying on "the_repository", pass around a "struct repository *" parameter through its callers, and use that instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 113f5fc67f..93802531e1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,33 +221,35 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->cruft_packs); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) +static void remove_redundant_pack(struct repository *repo, + const char *dir_name, const char *base_name) { struct strbuf buf = STRBUF_INIT; - struct odb_source *source = the_repository->objects->sources; + struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); + clear_midx_file(repo); strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } -static void remove_redundant_packs_1(struct string_list *packs) +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(packdir, item->string); + remove_redundant_pack(repo, packdir, item->string); } } static void remove_redundant_existing_packs(struct existing_packs *existing) { - remove_redundant_packs_1(&existing->non_kept_packs); - remove_redundant_packs_1(&existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) @@ -685,7 +687,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(packdir, buf.buf); + remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); From cae9e2abbd8fb2fd483e101275cee15ef27d5953 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:30 -0400 Subject: [PATCH 23/92] builtin/repack.c: avoid "the_repository" when repacking promisor objects Pass a "struct repository" pointer to the 'repack_promisor_objects()' function to avoid using "the_repository". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 93802531e1..4f08b57ddb 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -407,7 +407,8 @@ static int has_pack_ext(const struct generated_pack_data *data, BUG("unknown pack extension: '%s'", ext); } -static void repack_promisor_objects(const struct pack_objects_args *args, +static void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, struct string_list *names) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -424,7 +425,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(the_repository, write_oid, &cmd, + for_each_packed_object(repo, write_oid, &cmd, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { @@ -1458,7 +1459,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(&po_args, &names); + repack_promisor_objects(repo, &po_args, &names); if (has_existing_non_kept_packs(&existing) && delete_redundant && From 3758052c0f43fd01d25fc7381c7939daba66c015 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:33 -0400 Subject: [PATCH 24/92] builtin/repack.c: avoid "the_hash_algo" when deleting packs The "mark_packs_for_deletion_1" function uses "the_hash_algo->hexsz" to isolate a pack's checksum before deleting it to avoid deleting a newly written pack having the same checksum (that is, some generated pack wound up identical to an existing pack). Avoid this by passing down a "struct git_hash_algo" pointer, and refer to the hash algorithm through it instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4f08b57ddb..094f5a0cc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -168,11 +168,12 @@ static int pack_is_retained(struct string_list_item *item) return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(struct string_list *names, +static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, struct string_list *list) { struct string_list_item *item; - const int hexsz = the_hash_algo->hexsz; + const int hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -217,8 +218,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) { - mark_packs_for_deletion_1(names, &existing->non_kept_packs); - mark_packs_for_deletion_1(names, &existing->cruft_packs); + const struct git_hash_algo *algop = existing->repo->hash_algo; + mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); + mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, From 9a53583b77c35576f87b7e29cb109b46d29ad803 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:36 -0400 Subject: [PATCH 25/92] builtin/repack.c: avoid "the_hash_algo" in `write_oid()` In a similar spirit as the previous commit, avoid referring directly to "the_hash_algo" within builtin/repack.c::write_oid(). Unlike the previous commit, we are within a callback function, so must introduce a new struct to pass additional data through its "data" pointer. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 094f5a0cc2..7d62959dc2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -339,6 +339,11 @@ static void prepare_pack_objects(struct child_process *cmd, cmd->out = -1; } +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + /* * Write oid to the given struct child_process's stdin, starting it first if * necessary. @@ -347,14 +352,15 @@ static int write_oid(const struct object_id *oid, struct packed_git *pack UNUSED, uint32_t pos UNUSED, void *data) { - struct child_process *cmd = data; + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; if (cmd->in == -1) { if (start_command(cmd)) die(_("could not start pack-objects to repack promisor objects")); } - if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || write_in_full(cmd->in, "\n", 1) < 0) die(_("failed to feed promisor objects to pack-objects")); return 0; @@ -413,6 +419,7 @@ static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) { + struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; FILE *out; struct strbuf line = STRBUF_INIT; @@ -427,7 +434,9 @@ static void repack_promisor_objects(struct repository *repo, * {type -> existing pack order} ordering when computing deltas instead * of a {type -> size} ordering, which may produce better deltas. */ - for_each_packed_object(repo, write_oid, &cmd, + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { From a7a5a607b9c21c7988782cf8ed04078ca320c784 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:38 -0400 Subject: [PATCH 26/92] builtin/repack: avoid "the_hash_algo" in `repack_promisor_objects()` In a similar spirit as the previous commits, avoid referring directly to "the_hash_algo" within builtin/repack.c::repack_promisor_objects(). Since there is already a repository pointer in scope, use its hash_algo value instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7d62959dc2..a7e94ed03c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -452,7 +452,7 @@ static void repack_promisor_objects(struct repository *repo, struct string_list_item *item; char *promisor_name; - if (line.len != the_hash_algo->hexsz) + if (line.len != repo->hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); item = string_list_append(names, line.buf); From c660b0dbcbb70647f5103a4573963397522a1f0f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:41 -0400 Subject: [PATCH 27/92] builtin/repack.c: avoid "the_hash_algo" in `finish_pack_objects_cmd()` In a similar spirit as previous commits, avoid referring directly to "the_hash_algo" in builtin/repack.c::finish_pack_objects_cmd() and instead accept one as a parameter to the function. Since this function has a number of callers throughout the builtin, the diff is a little noisier than previous commits. However, each hunk is limited to passing the hash_algo parameter from a repository pointer that is already in scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a7e94ed03c..a043704aa8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1073,7 +1073,8 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } -static int finish_pack_objects_cmd(struct child_process *cmd, +static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + struct child_process *cmd, struct string_list *names, int local) { @@ -1084,7 +1085,7 @@ static int finish_pack_objects_cmd(struct child_process *cmd, while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; - if (line.len != the_hash_algo->hexsz) + if (line.len != algop->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); /* @@ -1150,7 +1151,8 @@ static int write_filtered_pack(const struct pack_objects_args *args, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -1247,7 +1249,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(&cmd, names, local); + return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, + local); } static const char *find_pack_prefix(const char *packdir, const char *packtmp) @@ -1534,7 +1537,7 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(&cmd, &names, 1); + ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); if (ret) goto cleanup; From 8a5d4bd87d3fa8e9de9bc3b2ddb7ca527fcfeb68 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:50 -0400 Subject: [PATCH 28/92] builtin/repack.c: avoid using `hash_to_hex()` in pack geometry In previous commits, we started passing either repository or git_hash_algo pointers around to various spots within builtin/repack.c to reduce our dependency on the_repository in the hope of undef'ing USE_THE_REPOSITORY_VARIABLE. This commit takes us as far as we can (easily) go in that direction by removing the only use of a convenience function that only exists when USE_THE_REPOSITORY_VARIABLE is defined. Unfortunately, the only other such function is "is_bare_repository()", which is less than straightforward to convert into, say, "repo_is_bare()", the latter of the two accepting a repository pointer. Punt on that for now, and declare this commit as the stopping point for our efforts in the direction of undef'ing USE_THE_REPOSITORY_VARIABLE. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index a043704aa8..0d35f15b4b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -683,12 +683,14 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing) { + const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; for (i = 0; i < geometry->split; i++) { struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex(p->hash))) + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) continue; strbuf_reset(&buf); From c7a120722ed60c07fa6a32f43b56f8361bfe38af Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:53 -0400 Subject: [PATCH 29/92] repack: introduce new compilation unit Over the years, builtin/repack.c has turned into a grab-bag of functionality powering the 'git repack' builtin. Among its many capabilities, it: - can build and spawn 'git pack-objects' commands, which in turn generate new packs - has infrastructure to manage the set of existing packs in a repository - has infrastructure to split a sequence of packs into a geometric progression based on object size - can manage both generating and combining cruft packs together - can write new MIDXs to name a few. As a result, this builtin has accumulated a lot of code, making adding new functionality difficult. In the future, 'repack' will learn how to manage a chain of incremental MIDXs, adding yet more functionality into the builtin. As a prerequisite step, let's first move some of the functionality in the builtin into its own repack.[ch]. This will be done over the course of many steps, since there are many individual components, some of which will end up in other, yet-to-exist compilation units of their own. Some of the code movement here is also non-trivial, so performing it in individual steps will make it easier to verify. Let's start by migrating 'struct pack_objects_args' (and the related corresponding pack_objects_args_release() function) into repack.h, and teach both the Makefile and Meson how to build the new compilation unit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 25 +------------------------ meson.build | 1 + repack.c | 11 +++++++++++ repack.h | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 repack.c create mode 100644 repack.h diff --git a/Makefile b/Makefile index 4c95affadb..c0df6da237 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ LIB_OBJS += refs/packed-backend.o LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o +LIB_OBJS += repack.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 0d35f15b4b..6dfcb3327e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -19,6 +19,7 @@ #include "prune-packed.h" #include "odb.h" #include "promisor-remote.h" +#include "repack.h" #include "shallow.h" #include "pack.h" #include "pack-bitmap.h" @@ -53,21 +54,6 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); -struct pack_objects_args { - char *window; - char *window_memory; - char *depth; - char *threads; - unsigned long max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; - int name_hash_version; - int path_walk; - struct list_objects_filter_options filter_options; -}; - static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { @@ -116,15 +102,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void pack_objects_args_release(struct pack_objects_args *args) -{ - free(args->window); - free(args->window_memory); - free(args->depth); - free(args->threads); - list_objects_filter_release(&args->filter_options); -} - struct existing_packs { struct repository *repo; struct string_list kept_packs; diff --git a/meson.build b/meson.build index b3dfcc0497..993e8f368f 100644 --- a/meson.build +++ b/meson.build @@ -462,6 +462,7 @@ libgit_sources = [ 'reftable/tree.c', 'reftable/writer.c', 'remote.c', + 'repack.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack.c b/repack.c new file mode 100644 index 0000000000..a1f5b796fb --- /dev/null +++ b/repack.c @@ -0,0 +1,11 @@ +#include "git-compat-util.h" +#include "repack.h" + +void pack_objects_args_release(struct pack_objects_args *args) +{ + free(args->window); + free(args->window_memory); + free(args->depth); + free(args->threads); + list_objects_filter_release(&args->filter_options); +} diff --git a/repack.h b/repack.h new file mode 100644 index 0000000000..421d439d5a --- /dev/null +++ b/repack.h @@ -0,0 +1,23 @@ +#ifndef REPACK_H +#define REPACK_H + +#include "list-objects-filter-options.h" + +struct pack_objects_args { + char *window; + char *window_memory; + char *depth; + char *threads; + unsigned long max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; + int name_hash_version; + int path_walk; + struct list_objects_filter_options filter_options; +}; + +void pack_objects_args_release(struct pack_objects_args *args); + +#endif /* REPACK_H */ From 19f6e8d023057113fe8c5890349593e70541bec2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:56 -0400 Subject: [PATCH 30/92] builtin/repack.c: pass both pack_objects args to repack_config A subsequent commit will remove 'delta_base_offset' as a static variable within builtin/repack.c, and reintroduce it as a member of the 'struct pack_objects_args'. As a result, the repack_config callback will need to have both the cruft- and non-cruft 'struct pack_objects_args's in scope. Introduce a new 'struct repack_config_ctx' to allow the callee to provide both pointers to the callback. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 6dfcb3327e..af6de8d77a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -54,10 +54,16 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +struct repack_config_ctx { + struct pack_objects_args *po_args; + struct pack_objects_args *cruft_po_args; +}; + static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { - struct pack_objects_args *cruft_po_args = cb; + struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -1260,6 +1266,7 @@ int cmd_repack(int argc, size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ + struct repack_config_ctx config_ctx; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; @@ -1343,7 +1350,11 @@ int cmd_repack(int argc, list_objects_filter_init(&po_args.filter_options); - repo_config(repo, repack_config, &cruft_po_args); + memset(&config_ctx, 0, sizeof(config_ctx)); + config_ctx.po_args = &po_args; + config_ctx.cruft_po_args = &cruft_po_args; + + repo_config(repo, repack_config, &config_ctx); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); From e35ef71e003cb0731d9f33605f598e1b99746441 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:27:58 -0400 Subject: [PATCH 31/92] repack: move 'delta_base_offset' to 'struct pack_objects_args' The static variable 'delta_base_offset' determines whether or not we pass the "--delta-base-offset" command-line argument when spawning pack-objects as a child process. Its introduction dates back to when repack was rewritten in C, all the way back in a1bbc6c017 (repack: rewrite the shell script in C, 2013-09-15). 'struct pack_objects_args' was introduced much later on in 4571324b99 (builtin/repack.c: allow configuring cruft pack generation, 2022-05-20), but did not move the 'delta_base_offset' variable. Since the 'delta_base_offset' is a property of an individual pack-objects command, re-introduce that variable as a member of 'struct pack_objects_args', which will enable further code movement in the subsequent commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 ++++++----- repack.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index af6de8d77a..f4af830353 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -34,7 +34,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; @@ -63,9 +62,10 @@ static int repack_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { struct repack_config_ctx *repack_ctx = cb; + struct pack_objects_args *po_args = repack_ctx->po_args; struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args; if (!strcmp(var, "repack.usedeltabaseoffset")) { - delta_base_offset = git_config_bool(var, value); + po_args->delta_base_offset = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.packkeptobjects")) { @@ -315,7 +315,7 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--local"); if (args->quiet) strvec_push(&cmd->args, "--quiet"); - if (delta_base_offset) + if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; @@ -1271,8 +1271,8 @@ int cmd_repack(int argc, const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; - struct pack_objects_args po_args = { 0 }; - struct pack_objects_args cruft_po_args = { 0 }; + struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; + struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; int write_midx = 0; const char *cruft_expiration = NULL; const char *expire_to = NULL; @@ -1567,6 +1567,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; + cruft_po_args.delta_base_offset = po_args.delta_base_offset; ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, diff --git a/repack.h b/repack.h index 421d439d5a..12632d7fec 100644 --- a/repack.h +++ b/repack.h @@ -15,9 +15,12 @@ struct pack_objects_args { int local; int name_hash_version; int path_walk; + int delta_base_offset; struct list_objects_filter_options filter_options; }; +#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } + void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ From 7005d2594b73d30beae7abebdd035becca05299d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:01 -0400 Subject: [PATCH 32/92] repack: remove 'prepare_pack_objects' from the builtin Now that the 'prepare_pack_objects' function no longer refers to external, static variables, move it out to repack.h as generic functionality. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 34 ---------------------------------- repack.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f4af830353..ff93654cfe 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -288,40 +288,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void prepare_pack_objects(struct child_process *cmd, - const struct pack_objects_args *args, - const char *out) -{ - strvec_push(&cmd->args, "pack-objects"); - if (args->window) - strvec_pushf(&cmd->args, "--window=%s", args->window); - if (args->window_memory) - strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); - if (args->depth) - strvec_pushf(&cmd->args, "--depth=%s", args->depth); - if (args->threads) - strvec_pushf(&cmd->args, "--threads=%s", args->threads); - if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); - if (args->no_reuse_delta) - strvec_pushf(&cmd->args, "--no-reuse-delta"); - if (args->no_reuse_object) - strvec_pushf(&cmd->args, "--no-reuse-object"); - if (args->name_hash_version) - strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); - if (args->path_walk) - strvec_pushf(&cmd->args, "--path-walk"); - if (args->local) - strvec_push(&cmd->args, "--local"); - if (args->quiet) - strvec_push(&cmd->args, "--quiet"); - if (args->delta_base_offset) - strvec_push(&cmd->args, "--delta-base-offset"); - strvec_push(&cmd->args, out); - cmd->git_cmd = 1; - cmd->out = -1; -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index a1f5b796fb..91b6e1cc09 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,40 @@ #include "git-compat-util.h" #include "repack.h" +#include "run-command.h" + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out) +{ + strvec_push(&cmd->args, "pack-objects"); + if (args->window) + strvec_pushf(&cmd->args, "--window=%s", args->window); + if (args->window_memory) + strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory); + if (args->depth) + strvec_pushf(&cmd->args, "--depth=%s", args->depth); + if (args->threads) + strvec_pushf(&cmd->args, "--threads=%s", args->threads); + if (args->max_pack_size) + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); + if (args->no_reuse_delta) + strvec_pushf(&cmd->args, "--no-reuse-delta"); + if (args->no_reuse_object) + strvec_pushf(&cmd->args, "--no-reuse-object"); + if (args->name_hash_version) + strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); + if (args->local) + strvec_push(&cmd->args, "--local"); + if (args->quiet) + strvec_push(&cmd->args, "--quiet"); + if (args->delta_base_offset) + strvec_push(&cmd->args, "--delta-base-offset"); + strvec_push(&cmd->args, out); + cmd->git_cmd = 1; + cmd->out = -1; +} void pack_objects_args_release(struct pack_objects_args *args) { diff --git a/repack.h b/repack.h index 12632d7fec..3f7ec20735 100644 --- a/repack.h +++ b/repack.h @@ -21,6 +21,11 @@ struct pack_objects_args { #define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +struct child_process; + +void prepare_pack_objects(struct child_process *cmd, + const struct pack_objects_args *args, + const char *out); void pack_objects_args_release(struct pack_objects_args *args); #endif /* REPACK_H */ From a0dcecb14613e5bdfdc06616271bffac9e1366e8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:04 -0400 Subject: [PATCH 33/92] builtin/repack.c: rename many 'struct existing_packs' functions Rename many of the 'struct existing_packs'-related functions according to the convention introduced in and described by 541204aabe (Documentation: document naming schema for structs and their functions, 2024-07-30). Note that some functions which operate over an individual entry in the list of existing packs are prefixed with "existing_pack_" instead of the plural form. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 66 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index ff93654cfe..f82e6c3930 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -121,39 +121,39 @@ struct existing_packs { .cruft_packs = STRING_LIST_INIT_DUP, \ } -static int has_existing_non_kept_packs(const struct existing_packs *existing) +static int existing_packs_has_non_kept(const struct existing_packs *existing) { return existing->non_kept_packs.nr || existing->cruft_packs.nr; } -static void pack_mark_for_deletion(struct string_list_item *item) +static void existing_pack_mark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } -static void pack_unmark_for_deletion(struct string_list_item *item) +static void existing_pack_unmark_for_deletion(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); } -static int pack_is_marked_for_deletion(struct string_list_item *item) +static int existing_pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } -static void pack_mark_retained(struct string_list_item *item) +static void existing_packs_mark_retained(struct string_list_item *item) { item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); } -static int pack_is_retained(struct string_list_item *item) +static int existing_pack_is_retained(struct string_list_item *item) { return (uintptr_t)item->util & RETAIN_PACK; } -static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) { struct string_list_item *item; const int hexsz = algop->hexsz; @@ -165,8 +165,8 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, continue; sha1 = item->string + len - hexsz; - if (pack_is_retained(item)) { - pack_unmark_for_deletion(item); + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); } else if (!string_list_has_string(names, sha1)) { /* * Mark this pack for deletion, which ensures @@ -175,13 +175,13 @@ static void mark_packs_for_deletion_1(const struct git_hash_algo *algop, * will actually delete this pack (if `-d` was * given). */ - pack_mark_for_deletion(item); + existing_pack_mark_for_deletion(item); } } } -static void retain_cruft_pack(struct existing_packs *existing, - struct packed_git *cruft) +static void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) { struct strbuf buf = STRBUF_INIT; struct string_list_item *item; @@ -193,17 +193,19 @@ static void retain_cruft_pack(struct existing_packs *existing, if (!item) BUG("could not find cruft pack '%s'", pack_basename(cruft)); - pack_mark_retained(item); + existing_packs_mark_retained(item); strbuf_release(&buf); } -static void mark_packs_for_deletion(struct existing_packs *existing, - struct string_list *names) +static void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) { const struct git_hash_algo *algop = existing->repo->hash_algo; - mark_packs_for_deletion_1(algop, names, &existing->non_kept_packs); - mark_packs_for_deletion_1(algop, names, &existing->cruft_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); } static void remove_redundant_pack(struct repository *repo, @@ -225,13 +227,13 @@ static void remove_redundant_packs_1(struct repository *repo, { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!pack_is_marked_for_deletion(item)) + if (!existing_pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(repo, packdir, item->string); } } -static void remove_redundant_existing_packs(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); remove_redundant_packs_1(existing->repo, &existing->cruft_packs); @@ -250,7 +252,7 @@ static void existing_packs_release(struct existing_packs *existing) * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct existing_packs *existing, +static void existing_packs_collect(struct existing_packs *existing, const struct string_list *extra_keep) { struct packfile_store *packs = existing->repo->objects->packfiles; @@ -721,7 +723,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, item = string_list_lookup(&existing->non_kept_packs, pack_name); - if (item && !pack_is_marked_for_deletion(item)) + if (item && !existing_pack_is_marked_for_deletion(item)) continue; } @@ -851,7 +853,7 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -888,10 +890,10 @@ static void midx_included_packs(struct string_list *include, * --geometric case, but doing so is unnecessary * since no packs are marked as pending * deletion (since we only call - * `mark_packs_for_deletion()` when doing an - * all-into-one repack). + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). */ - if (pack_is_marked_for_deletion(item)) + if (existing_pack_is_marked_for_deletion(item)) continue; strbuf_reset(&buf); @@ -1128,7 +1130,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, if (p->pack_size < combine_cruft_below_size) { fprintf(in, "-%s\n", pack_basename(p)); } else { - retain_cruft_pack(existing, p); + existing_packs_retain_cruft(existing, p); fprintf(in, "%s\n", pack_basename(p)); } } @@ -1382,7 +1384,7 @@ int cmd_repack(int argc, packtmp = mkpathdup("%s/%s", packdir, packtmp_name); existing.repo = repo; - collect_pack_filenames(&existing, &keep_pack_list); + existing_packs_collect(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) @@ -1431,7 +1433,7 @@ int cmd_repack(int argc, if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(repo, &po_args, &names); - if (has_existing_non_kept_packs(&existing) && + if (existing_packs_has_non_kept(&existing) && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { @@ -1647,7 +1649,7 @@ int cmd_repack(int argc, /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) - mark_packs_for_deletion(&existing, &names); + existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; @@ -1671,7 +1673,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - remove_redundant_existing_packs(&existing); + existing_packs_remove_redundant(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, From f905f49c68f9cf3aff93f0dcd065dd95345c21d5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:07 -0400 Subject: [PATCH 34/92] repack: remove 'remove_redundant_pack' from the builtin Extract "remove_redundant_pack()" as generic repack-related functionality by moving its implementation to the repack.[ch] compilation unit. This is a prerequisite to moving the "existing_packs" API, which is one of the callers of this function. (The remaining caller in the pack geometry code will eventually move to its own compilation unit as well, and will likewise rely on this function.) While moving it over, prefix the function name with "repack_" to indicate that it belongs to the repack-subsystem. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++---------------- repack.c | 18 ++++++++++++++++++ repack.h | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f82e6c3930..31137cf711 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -208,20 +208,6 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } -static void remove_redundant_pack(struct repository *repo, - const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct odb_source *source = repo->objects->sources; - struct multi_pack_index *m = get_multi_pack_index(source); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) - clear_midx_file(repo); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs) { @@ -229,7 +215,7 @@ static void remove_redundant_packs_1(struct repository *repo, for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string); } } @@ -652,7 +638,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); } strbuf_release(&buf); diff --git a/repack.c b/repack.c index 91b6e1cc09..3aaa351b5b 100644 --- a/repack.c +++ b/repack.c @@ -1,5 +1,9 @@ #include "git-compat-util.h" +#include "midx.h" +#include "odb.h" +#include "packfile.h" #include "repack.h" +#include "repository.h" #include "run-command.h" void prepare_pack_objects(struct child_process *cmd, @@ -44,3 +48,17 @@ void pack_objects_args_release(struct pack_objects_args *args) free(args->threads); list_objects_filter_release(&args->filter_options); } + +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct odb_source *source = repo->objects->sources; + struct multi_pack_index *m = get_multi_pack_index(source); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && source->local && midx_contains_pack(m, buf.buf)) + clear_midx_file(repo); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} diff --git a/repack.h b/repack.h index 3f7ec20735..a62bfa2ff9 100644 --- a/repack.h +++ b/repack.h @@ -28,4 +28,7 @@ void prepare_pack_objects(struct child_process *cmd, const char *out); void pack_objects_args_release(struct pack_objects_args *args); +void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, + const char *base_name); + #endif /* REPACK_H */ From 9574e8f31d6d920973213ae5dbab6b77d2deeadf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:10 -0400 Subject: [PATCH 35/92] builtin/repack.c: pass "packdir" when removing packs builtin/repack.c defines a static "packdir" to instruct pack-objects on where to write any new packfiles. This is also the directory scanned when removing any packfiles which were made redundant by the latest repack. Prepare to move the "existing_packs_remove_redundant" function to its own compilation unit by passing in this information as a parameter to that function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 31137cf711..c5a88eda12 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,7 +209,8 @@ static void existing_packs_mark_for_deletion(struct existing_packs *existing, } static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs) + struct string_list *packs, + const char *packdir) { struct string_list_item *item; for_each_string_list_item(item, packs) { @@ -219,10 +220,13 @@ static void remove_redundant_packs_1(struct repository *repo, } } -static void existing_packs_remove_redundant(struct existing_packs *existing) +static void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) { - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs); + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); } static void existing_packs_release(struct existing_packs *existing) @@ -1659,7 +1663,7 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing); + existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, From dab24e4bcbd8499c9262da5e259212765b28b77c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:12 -0400 Subject: [PATCH 36/92] builtin/repack.c: avoid unnecessary numeric casts in existing_packs There are a couple of spots that cause warnings within the existing_packs API without DISABLE_SIGN_COMPARE_WARNINGS under DEVELOPER=1 mode. In both cases, we have int values that are being compared against size_t ones. Neither of these two cases are incorrect, and the cast is completely OK in practice. But both are unnecessary, since: - in existing_packs_mark_for_deletion_1(), 'hexsz' should be defined as a size_t anyway, since algop->hexsz is. - in existing_packs_collect(), 'i' should be defined as a size_t since it is counting up to the value of a string_list's 'nr' field. (This patch is a little bit of noise, but I would rather see us squelch these warnings ahead of moving the existing_packs API into a separate compilation unit to avoid having to define DISABLE_SIGN_COMPARE_WARNINGS in repack.c.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c5a88eda12..e13943b637 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -156,7 +156,7 @@ static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop struct string_list *list) { struct string_list_item *item; - const int hexsz = algop->hexsz; + const size_t hexsz = algop->hexsz; for_each_string_list_item(item, list) { char *sha1; @@ -250,7 +250,7 @@ static void existing_packs_collect(struct existing_packs *existing, struct strbuf buf = STRBUF_INIT; for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - int i; + size_t i; const char *base; if (!p->pack_local) From 7d1f4425889ea7f663ca30dd1d63591e52a628f6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:15 -0400 Subject: [PATCH 37/92] repack: remove 'existing_packs' API from the builtin The repack builtin defines an API for keeping track of which packs were found in the repository at the beginning of the repack operation. This is used to classify what state a pack was in (kept, non-kept, or cruft), and is also used to mark which packs to delete (or keep) at the end of a repack operation. Now that the prerequisite refactoring is complete, this API is isolated enough that it can be moved out to repack.[ch] and removed from the builtin entirely. As a result, some of its functions become static within repack.c, cleaning up the visible API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 173 ----------------------------------------------- repack.c | 157 ++++++++++++++++++++++++++++++++++++++++++ repack.h | 35 ++++++++++ 3 files changed, 192 insertions(+), 173 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index e13943b637..a168c88791 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -3,7 +3,6 @@ #include "builtin.h" #include "config.h" -#include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" @@ -108,178 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct existing_packs { - struct repository *repo; - struct string_list kept_packs; - struct string_list non_kept_packs; - struct string_list cruft_packs; -}; - -#define EXISTING_PACKS_INIT { \ - .kept_packs = STRING_LIST_INIT_DUP, \ - .non_kept_packs = STRING_LIST_INIT_DUP, \ - .cruft_packs = STRING_LIST_INIT_DUP, \ -} - -static int existing_packs_has_non_kept(const struct existing_packs *existing) -{ - return existing->non_kept_packs.nr || existing->cruft_packs.nr; -} - -static void existing_pack_mark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | DELETE_PACK); -} - -static void existing_pack_unmark_for_deletion(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); -} - -static int existing_pack_is_marked_for_deletion(struct string_list_item *item) -{ - return (uintptr_t)item->util & DELETE_PACK; -} - -static void existing_packs_mark_retained(struct string_list_item *item) -{ - item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); -} - -static int existing_pack_is_retained(struct string_list_item *item) -{ - return (uintptr_t)item->util & RETAIN_PACK; -} - -static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, - struct string_list *names, - struct string_list *list) -{ - struct string_list_item *item; - const size_t hexsz = algop->hexsz; - - for_each_string_list_item(item, list) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - - if (existing_pack_is_retained(item)) { - existing_pack_unmark_for_deletion(item); - } else if (!string_list_has_string(names, sha1)) { - /* - * Mark this pack for deletion, which ensures - * that this pack won't be included in a MIDX - * (if `--write-midx` was given) and that we - * will actually delete this pack (if `-d` was - * given). - */ - existing_pack_mark_for_deletion(item); - } - } -} - -static void existing_packs_retain_cruft(struct existing_packs *existing, - struct packed_git *cruft) -{ - struct strbuf buf = STRBUF_INIT; - struct string_list_item *item; - - strbuf_addstr(&buf, pack_basename(cruft)); - strbuf_strip_suffix(&buf, ".pack"); - - item = string_list_lookup(&existing->cruft_packs, buf.buf); - if (!item) - BUG("could not find cruft pack '%s'", pack_basename(cruft)); - - existing_packs_mark_retained(item); - strbuf_release(&buf); -} - -static void existing_packs_mark_for_deletion(struct existing_packs *existing, - struct string_list *names) - -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - existing_packs_mark_for_deletion_1(algop, names, - &existing->non_kept_packs); - existing_packs_mark_for_deletion_1(algop, names, - &existing->cruft_packs); -} - -static void remove_redundant_packs_1(struct repository *repo, - struct string_list *packs, - const char *packdir) -{ - struct string_list_item *item; - for_each_string_list_item(item, packs) { - if (!existing_pack_is_marked_for_deletion(item)) - continue; - repack_remove_redundant_pack(repo, packdir, item->string); - } -} - -static void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) -{ - remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); - remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); -} - -static void existing_packs_release(struct existing_packs *existing) -{ - string_list_clear(&existing->kept_packs, 0); - string_list_clear(&existing->non_kept_packs, 0); - string_list_clear(&existing->cruft_packs, 0); -} - -/* - * Adds all packs hex strings (pack-$HASH) to either packs->non_kept - * or packs->kept based on whether each pack has a corresponding - * .keep file or not. Packs without a .keep file are not to be kept - * if we are going to pack everything into one file. - */ -static void existing_packs_collect(struct existing_packs *existing, - const struct string_list *extra_keep) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - size_t i; - const char *base; - - if (!p->pack_local) - continue; - - base = pack_basename(p); - - for (i = 0; i < extra_keep->nr; i++) - if (!fspathcmp(base, extra_keep->items[i].string)) - break; - - strbuf_reset(&buf); - strbuf_addstr(&buf, base); - strbuf_strip_suffix(&buf, ".pack"); - - if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(&existing->kept_packs, buf.buf); - else if (p->is_cruft) - string_list_append(&existing->cruft_packs, buf.buf); - else - string_list_append(&existing->non_kept_packs, buf.buf); - } - - string_list_sort(&existing->kept_packs); - string_list_sort(&existing->non_kept_packs); - string_list_sort(&existing->cruft_packs); - strbuf_release(&buf); -} - struct write_oid_context { struct child_process *cmd; const struct git_hash_algo *algop; diff --git a/repack.c b/repack.c index 3aaa351b5b..9182e1c50b 100644 --- a/repack.c +++ b/repack.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "dir.h" #include "midx.h" #include "odb.h" #include "packfile.h" @@ -62,3 +63,159 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } + +#define DELETE_PACK 1 +#define RETAIN_PACK 2 + +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + size_t i; + const char *base; + + if (!p->pack_local) + continue; + + base = pack_basename(p); + + for (i = 0; i < extra_keep->nr; i++) + if (!fspathcmp(base, extra_keep->items[i].string)) + break; + + strbuf_reset(&buf); + strbuf_addstr(&buf, base); + strbuf_strip_suffix(&buf, ".pack"); + + if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) + string_list_append(&existing->kept_packs, buf.buf); + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); + } + + string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); + strbuf_release(&buf); +} + +int existing_packs_has_non_kept(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr || existing->cruft_packs.nr; +} + +static void existing_pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static void existing_pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + +int existing_pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + +static void existing_packs_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int existing_pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + +static void existing_packs_mark_for_deletion_1(const struct git_hash_algo *algop, + struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const size_t hexsz = algop->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + + if (existing_pack_is_retained(item)) { + existing_pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ + existing_pack_mark_for_deletion(item); + } + } +} + +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + existing_packs_mark_retained(item); + strbuf_release(&buf); +} + +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + existing_packs_mark_for_deletion_1(algop, names, + &existing->non_kept_packs); + existing_packs_mark_for_deletion_1(algop, names, + &existing->cruft_packs); +} + +static void remove_redundant_packs_1(struct repository *repo, + struct string_list *packs, + const char *packdir) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!existing_pack_is_marked_for_deletion(item)) + continue; + repack_remove_redundant_pack(repo, packdir, item->string); + } +} + +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, + packdir); + remove_redundant_packs_1(existing->repo, &existing->cruft_packs, + packdir); +} + +void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); +} diff --git a/repack.h b/repack.h index a62bfa2ff9..19796e2243 100644 --- a/repack.h +++ b/repack.h @@ -2,6 +2,7 @@ #define REPACK_H #include "list-objects-filter-options.h" +#include "string-list.h" struct pack_objects_args { char *window; @@ -31,4 +32,38 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct repository; +struct packed_git; + +struct existing_packs { + struct repository *repo; + struct string_list kept_packs; + struct string_list non_kept_packs; + struct string_list cruft_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ +} + +/* + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding + * .keep file or not. Packs without a .keep file are not to be kept + * if we are going to pack everything into one file. + */ +void existing_packs_collect(struct existing_packs *existing, + const struct string_list *extra_keep); +int existing_packs_has_non_kept(const struct existing_packs *existing); +int existing_pack_is_marked_for_deletion(struct string_list_item *item); +void existing_packs_retain_cruft(struct existing_packs *existing, + struct packed_git *cruft); +void existing_packs_mark_for_deletion(struct existing_packs *existing, + struct string_list *names); +void existing_packs_remove_redundant(struct existing_packs *existing, + const char *packdir); +void existing_packs_release(struct existing_packs *existing); + #endif /* REPACK_H */ From 2b72c1236725915b353b9740a27a32c107dfe3b0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:18 -0400 Subject: [PATCH 38/92] builtin/repack.c: rename "struct generated_pack_data" The name "generated_pack_data" is somewhat redundant, since the contents of the struct *is* the data associated with the generated pack. Rename the structure to just "generated_pack", resulting in less awkward function names, like "generated_pack_has_ext()" which is preferable to "generated_pack_data_has_ext()". Rename a few related functions to align with the convention that functions to do with a struct "S" should be prefixed with "S_". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a168c88791..a4d80b6b04 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -146,15 +146,15 @@ static struct { {".idx"}, }; -struct generated_pack_data { +struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack_data *populate_pack_exts(const char *name) +static struct generated_pack *generated_pack_populate(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - struct generated_pack_data *data = xcalloc(1, sizeof(*data)); + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -164,21 +164,21 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->tempfiles[i] = register_tempfile(path.buf); + pack->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); - return data; + return pack; } -static int has_pack_ext(const struct generated_pack_data *data, - const char *ext) +static int generated_pack_has_ext(const struct generated_pack *pack, + const char *ext) { int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { if (strcmp(exts[i].name, ext)) continue; - return !!data->tempfiles[i]; + return !!pack->tempfiles[i]; } BUG("unknown pack extension: '%s'", ext); } @@ -239,7 +239,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = populate_pack_exts(item->string); + item->util = generated_pack_populate(item->string); free(promisor_name); } @@ -780,8 +780,8 @@ static int write_midx_included_packs(struct string_list *include, * will suffice, so pick the first one.) */ for_each_string_list_item(item, names) { - struct generated_pack_data *data = item->util; - if (has_pack_ext(data, ".mtimes")) + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) continue; strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", @@ -864,7 +864,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = populate_pack_exts(line.buf); + item->util = generated_pack_populate(line.buf); } } fclose(out); @@ -1435,7 +1435,7 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { - struct generated_pack_data *data = item->util; + struct generated_pack *pack = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname; @@ -1443,8 +1443,8 @@ int cmd_repack(int argc, fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - if (data->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(data->tempfiles[ext]); + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { @@ -1452,7 +1452,7 @@ int cmd_repack(int argc, chmod(fname_old, statbuffer.st_mode); } - if (rename_tempfile(&data->tempfiles[ext], fname)) + if (rename_tempfile(&pack->tempfiles[ext], fname)) die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), From c0427692cb0fe03eb32fffc5bd06fad4ee434561 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:20 -0400 Subject: [PATCH 39/92] builtin/repack.c: factor out "generated_pack_install" Once all new packs are known to exist, 'repack' installs their contents from their temporary location into their permanent one. This is a semi-involved procedure for each pack, since for each extension (e.g., ".idx", ".pack", ".mtimes", and so on) we have to either: - adjust the filemode of the temporary file before renaming it into place, or - die() if we are missing a non-optional extension, or - unlink() any existing file for extensions that we did not generate (e.g., if a non-cruft pack we generated was identical to, say, a cruft pack which existed at the beginning of the process, we have to remove the ".mtimes" file). Extract this procedure into its own function, and call it "generated_pack_install"(). This will set us up for pulling this function out of the builtin entirely and making it part of the repack.h API, which will be done in a future commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 65 ++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a4d80b6b04..bf413a6ee2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -183,6 +183,38 @@ static int generated_pack_has_ext(const struct generated_pack *pack, BUG("unknown pack extension: '%s'", ext); } +static void generated_pack_install(struct generated_pack *pack, + const char *name) +{ + int ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} + static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) @@ -1045,7 +1077,7 @@ int cmd_repack(int argc, struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; - int i, ext, ret; + int i, ret; int show_progress; char **midx_pack_names = NULL; size_t midx_pack_names_nr = 0; @@ -1434,35 +1466,8 @@ int cmd_repack(int argc, /* * Ok we have prepared all new packfiles. */ - for_each_string_list_item(item, &names) { - struct generated_pack *pack = item->util; - - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", - packdir, item->string, exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, item->string); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } - } + for_each_string_list_item(item, &names) + generated_pack_install(item->util, item->string); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) From 184f0abeb802f44c0e23abe3c8a3fc7448c78b99 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:23 -0400 Subject: [PATCH 40/92] builtin/repack.c: pass "packtmp" to `generated_pack_populate()` In a similar spirit as previous commits, this function needs to know the temporary pack prefix, which it currently accesses through the static "packtmp" variable within builtin/repack.c. Pass it explicitly as a function parameter to facilitate moving this function out of builtin/repack.c entirely. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index bf413a6ee2..bed902adde 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -150,7 +150,8 @@ struct generated_pack { struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; -static struct generated_pack *generated_pack_populate(const char *name) +static struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) { struct stat statbuf; struct strbuf path = STRBUF_INIT; @@ -271,7 +272,7 @@ static void repack_promisor_objects(struct repository *repo, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = generated_pack_populate(item->string); + item->util = generated_pack_populate(item->string, packtmp); free(promisor_name); } @@ -896,7 +897,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf); + item->util = generated_pack_populate(line.buf, packtmp); } } fclose(out); From 7036d131ae514f1bc854670a9d26b31064fcd88d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:26 -0400 Subject: [PATCH 41/92] builtin/repack.c: provide pack locations to `generated_pack_install()` Repeat what was done in the preceding commit for the `generated_pack_install()` function, which needs both "packdir" and "packtmp". (As an aside, it is somewhat unfortunate that the final three parameters to this function are all "const char *", making errors like passing "packdir" and "packtmp" in the wrong order easy. We could define a new structure here, but that may be too heavy-handed.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index bed902adde..966db27613 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -185,7 +185,8 @@ static int generated_pack_has_ext(const struct generated_pack *pack, } static void generated_pack_install(struct generated_pack *pack, - const char *name) + const char *name, + const char *packdir, const char *packtmp) { int ext; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { @@ -1468,7 +1469,8 @@ int cmd_repack(int argc, * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) - generated_pack_install(item->util, item->string); + generated_pack_install(item->util, item->string, packdir, + packtmp); /* End of pack replacement. */ if (delete_redundant && pack_everything & ALL_INTO_ONE) From f053ab6c2be6a9869cbdfaabe5bd844a2471f8b7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:29 -0400 Subject: [PATCH 42/92] repack: remove 'generated_pack' API from the builtin Now that we have factored the "generated_pack" API, we can move it to repack.ch, further slimming down builtin/repack.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 83 ------------------------------------------------ repack.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 8 +++++ 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 966db27613..0e11c3b2c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -134,89 +134,6 @@ static int write_oid(const struct object_id *oid, return 0; } -static struct { - const char *name; - unsigned optional:1; -} exts[] = { - {".pack"}, - {".rev", 1}, - {".mtimes", 1}, - {".bitmap", 1}, - {".promisor", 1}, - {".idx"}, -}; - -struct generated_pack { - struct tempfile *tempfiles[ARRAY_SIZE(exts)]; -}; - -static struct generated_pack *generated_pack_populate(const char *name, - const char *packtmp) -{ - struct stat statbuf; - struct strbuf path = STRBUF_INIT; - struct generated_pack *pack = xcalloc(1, sizeof(*pack)); - int i; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_reset(&path); - strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); - - if (stat(path.buf, &statbuf)) - continue; - - pack->tempfiles[i] = register_tempfile(path.buf); - } - - strbuf_release(&path); - return pack; -} - -static int generated_pack_has_ext(const struct generated_pack *pack, - const char *ext) -{ - int i; - for (i = 0; i < ARRAY_SIZE(exts); i++) { - if (strcmp(exts[i].name, ext)) - continue; - return !!pack->tempfiles[i]; - } - BUG("unknown pack extension: '%s'", ext); -} - -static void generated_pack_install(struct generated_pack *pack, - const char *name, - const char *packdir, const char *packtmp) -{ - int ext; - for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname; - - fname = mkpathdup("%s/pack-%s%s", packdir, name, - exts[ext].name); - - if (pack->tempfiles[ext]) { - const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); - struct stat statbuffer; - - if (!stat(fname_old, &statbuffer)) { - statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); - chmod(fname_old, statbuffer.st_mode); - } - - if (rename_tempfile(&pack->tempfiles[ext], fname)) - die_errno(_("renaming pack to '%s' failed"), - fname); - } else if (!exts[ext].optional) - die(_("pack-objects did not write a '%s' file for pack %s-%s"), - exts[ext].name, packtmp, name); - else if (unlink(fname) < 0 && errno != ENOENT) - die_errno(_("could not unlink: %s"), fname); - - free(fname); - } -} - static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names) diff --git a/repack.c b/repack.c index 9182e1c50b..d8afdd352d 100644 --- a/repack.c +++ b/repack.c @@ -3,9 +3,11 @@ #include "midx.h" #include "odb.h" #include "packfile.h" +#include "path.h" #include "repack.h" #include "repository.h" #include "run-command.h" +#include "tempfile.h" void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, @@ -219,3 +221,84 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); } + +static struct { + const char *name; + unsigned optional:1; +} exts[] = { + {".pack"}, + {".rev", 1}, + {".mtimes", 1}, + {".bitmap", 1}, + {".promisor", 1}, + {".idx"}, +}; + +struct generated_pack { + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; +}; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp) +{ + struct stat statbuf; + struct strbuf path = STRBUF_INIT; + struct generated_pack *pack = xcalloc(1, sizeof(*pack)); + size_t i; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_reset(&path); + strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name); + + if (stat(path.buf, &statbuf)) + continue; + + pack->tempfiles[i] = register_tempfile(path.buf); + } + + strbuf_release(&path); + return pack; +} + +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext) +{ + size_t i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!pack->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp) +{ + size_t ext; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { + char *fname; + + fname = mkpathdup("%s/pack-%s%s", packdir, name, + exts[ext].name); + + if (pack->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(pack->tempfiles[ext]); + struct stat statbuffer; + + if (!stat(fname_old, &statbuffer)) { + statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + chmod(fname_old, statbuffer.st_mode); + } + + if (rename_tempfile(&pack->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), + fname); + } else if (!exts[ext].optional) + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, name); + else if (unlink(fname) < 0 && errno != ENOENT) + die_errno(_("could not unlink: %s"), fname); + + free(fname); + } +} diff --git a/repack.h b/repack.h index 19796e2243..f37eb49524 100644 --- a/repack.h +++ b/repack.h @@ -66,4 +66,12 @@ void existing_packs_remove_redundant(struct existing_packs *existing, const char *packdir); void existing_packs_release(struct existing_packs *existing); +struct generated_pack; + +struct generated_pack *generated_pack_populate(const char *name, + const char *packtmp); +int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); +void generated_pack_install(struct generated_pack *pack, const char *name, + const char *packdir, const char *packtmp); + #endif /* REPACK_H */ From bebf941f7db5de3de88962199b4400de8207f9b1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:32 -0400 Subject: [PATCH 43/92] builtin/repack.c: pass "packtmp" to `repack_promisor_objects()` In a similar spirit as previous commit(s), pass the "packtmp" variable to "repack_promisor_objects()" as an explicit parameter of the function, preparing us to move this function in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 0e11c3b2c9..2c67111b33 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int write_oid(const struct object_id *oid, static void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, - struct string_list *names) + struct string_list *names, + const char *packtmp) { struct write_oid_context ctx; struct child_process cmd = CHILD_PROCESS_INIT; @@ -1199,7 +1200,7 @@ int cmd_repack(int argc, strvec_push(&cmd.args, "--delta-islands"); if (pack_everything & ALL_INTO_ONE) { - repack_promisor_objects(repo, &po_args, &names); + repack_promisor_objects(repo, &po_args, &names, packtmp); if (existing_packs_has_non_kept(&existing) && delete_redundant && From 29e935515d1b49fa08b2781371625e5c55d2bf13 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:35 -0400 Subject: [PATCH 44/92] builtin/repack.c: remove "repack_promisor_objects()" from the builtin Now that we have properly factored the portion of the builtin which is responsible for repacking promisor objects, we can move that function (and associated dependencies) out of the builtin entirely. Similar to previous extractions, this function is declared in repack.h, but implemented in a separate repack-promisor.c file. This is done to separate promisor-specific repacking functionality from generic repack utilities (like "existing_packs", and "generated_pack" APIs). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 95 ------------------------------------------ meson.build | 1 + repack-promisor.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++ 5 files changed, 108 insertions(+), 95 deletions(-) create mode 100644 repack-promisor.c diff --git a/Makefile b/Makefile index c0df6da237..2a01bd92dc 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o LIB_OBJS += repository.o diff --git a/builtin/repack.c b/builtin/repack.c index 2c67111b33..24b5e5049b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,101 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct write_oid_context { - struct child_process *cmd; - const struct git_hash_algo *algop; -}; - -/* - * Write oid to the given struct child_process's stdin, starting it first if - * necessary. - */ -static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) -{ - struct write_oid_context *ctx = data; - struct child_process *cmd = ctx->cmd; - - if (cmd->in == -1) { - if (start_command(cmd)) - die(_("could not start pack-objects to repack promisor objects")); - } - - if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || - write_in_full(cmd->in, "\n", 1) < 0) - die(_("failed to feed promisor objects to pack-objects")); - return 0; -} - -static void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, - const char *packtmp) -{ - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; - struct strbuf line = STRBUF_INIT; - - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; - - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - char *promisor_name; - - if (line.len != repo->hash_algo->hexsz) - die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - item = string_list_append(names, line.buf); - - /* - * pack-objects creates the .pack and .idx files, but not the - * .promisor file. Create the .promisor file, which is empty. - * - * NEEDSWORK: fetch-pack sometimes generates non-empty - * .promisor files containing the ref names and associated - * hashes at the point of generation of the corresponding - * packfile, but this would not preserve their contents. Maybe - * concatenate the contents of all .promisor files instead of - * just creating a new empty file. - */ - promisor_name = mkpathdup("%s-%s.promisor", packtmp, - line.buf); - write_promisor_file(promisor_name, NULL, 0); - - item->util = generated_pack_populate(item->string, packtmp); - - free(promisor_name); - } - - fclose(out); - if (finish_command(&cmd)) - die(_("could not finish pack-objects to repack promisor objects")); - strbuf_release(&line); -} - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; diff --git a/meson.build b/meson.build index 993e8f368f..1fbb8c52a6 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', 'repository.c', diff --git a/repack-promisor.c b/repack-promisor.c new file mode 100644 index 0000000000..ee6e0669f6 --- /dev/null +++ b/repack-promisor.c @@ -0,0 +1,102 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hex.h" +#include "pack.h" +#include "packfile.h" +#include "path.h" +#include "repository.h" +#include "run-command.h" + +struct write_oid_context { + struct child_process *cmd; + const struct git_hash_algo *algop; +}; + +/* + * Write oid to the given struct child_process's stdin, starting it first if + * necessary. + */ +static int write_oid(const struct object_id *oid, + struct packed_git *pack UNUSED, + uint32_t pos UNUSED, void *data) +{ + struct write_oid_context *ctx = data; + struct child_process *cmd = ctx->cmd; + + if (cmd->in == -1) { + if (start_command(cmd)) + die(_("could not start pack-objects to repack promisor objects")); + } + + if (write_in_full(cmd->in, oid_to_hex(oid), ctx->algop->hexsz) < 0 || + write_in_full(cmd->in, "\n", 1) < 0) + die(_("failed to feed promisor objects to pack-objects")); + return 0; +} + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + char *promisor_name; + + if (line.len != repo->hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only from pack-objects.")); + item = string_list_append(names, line.buf); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + * + * NEEDSWORK: fetch-pack sometimes generates non-empty + * .promisor files containing the ref names and associated + * hashes at the point of generation of the corresponding + * packfile, but this would not preserve their contents. Maybe + * concatenate the contents of all .promisor files instead of + * just creating a new empty file. + */ + promisor_name = mkpathdup("%s-%s.promisor", packtmp, + line.buf); + write_promisor_file(promisor_name, NULL, 0); + + item->util = generated_pack_populate(item->string, packtmp); + + free(promisor_name); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack promisor objects")); + strbuf_release(&line); +} diff --git a/repack.h b/repack.h index f37eb49524..19dc4fd738 100644 --- a/repack.h +++ b/repack.h @@ -74,4 +74,8 @@ int generated_pack_has_ext(const struct generated_pack *pack, const char *ext); void generated_pack_install(struct generated_pack *pack, const char *name, const char *packdir, const char *packtmp); +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp); + #endif /* REPACK_H */ From e05c2d55668dcaa6a912372d93fb8f82d418d390 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:38 -0400 Subject: [PATCH 45/92] builtin/repack.c: rename various pack_geometry functions Rename functions which work with 'struct pack_geometry' to begin with "pack_geometry_". While we're at it, change `free_pack_geometry()` to instead be named `pack_geometry_release()` to match our conventions, and make clear that that function frees the contents of the struct, not the memory allocated to hold the struct itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 24b5e5049b..42f05d2ebf 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -115,17 +115,17 @@ struct pack_geometry { int split_factor; }; -static uint32_t geometry_pack_weight(struct packed_git *p) +static uint32_t pack_geometry_weight(struct packed_git *p) { if (open_pack_index(p)) die(_("cannot open index for %s"), p->pack_name); return p->num_objects; } -static int geometry_cmp(const void *va, const void *vb) +static int pack_geometry_cmp(const void *va, const void *vb) { - uint32_t aw = geometry_pack_weight(*(struct packed_git **)va), - bw = geometry_pack_weight(*(struct packed_git **)vb); + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); if (aw < bw) return -1; @@ -134,7 +134,7 @@ static int geometry_cmp(const void *va, const void *vb) return 0; } -static void init_pack_geometry(struct pack_geometry *geometry, +static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { @@ -184,11 +184,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, geometry->pack_nr++; } - QSORT(geometry->pack, geometry->pack_nr, geometry_cmp); + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); strbuf_release(&buf); } -static void split_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_split(struct pack_geometry *geometry) { uint32_t i; uint32_t split; @@ -208,13 +208,13 @@ static void split_pack_geometry(struct pack_geometry *geometry) struct packed_git *prev = geometry->pack[i - 1]; if (unsigned_mult_overflows(geometry->split_factor, - geometry_pack_weight(prev))) + pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); - if (geometry_pack_weight(ours) < - geometry->split_factor * geometry_pack_weight(prev)) + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) break; } @@ -242,9 +242,9 @@ static void split_pack_geometry(struct pack_geometry *geometry) for (i = 0; i < split; i++) { struct packed_git *p = geometry->pack[i]; - if (unsigned_add_overflows(total_size, geometry_pack_weight(p))) + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); - total_size += geometry_pack_weight(p); + total_size += pack_geometry_weight(p); } for (i = split; i < geometry->pack_nr; i++) { struct packed_git *ours = geometry->pack[i]; @@ -253,15 +253,15 @@ static void split_pack_geometry(struct pack_geometry *geometry) total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (geometry_pack_weight(ours) < + if (pack_geometry_weight(ours) < geometry->split_factor * total_size) { if (unsigned_add_overflows(total_size, - geometry_pack_weight(ours))) + pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), ours->pack_name); split++; - total_size += geometry_pack_weight(ours); + total_size += pack_geometry_weight(ours); } else break; } @@ -269,7 +269,7 @@ static void split_pack_geometry(struct pack_geometry *geometry) geometry->split = split; } -static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) +static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) { uint32_t i; @@ -304,9 +304,9 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } -static void geometry_remove_redundant_packs(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing) +static void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -332,7 +332,7 @@ static void geometry_remove_redundant_packs(struct pack_geometry *geometry, strbuf_release(&buf); } -static void free_pack_geometry(struct pack_geometry *geometry) +static void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) return; @@ -599,7 +599,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = get_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(geometry); FILE *in; int ret; @@ -1063,8 +1063,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing, &po_args); - split_pack_geometry(&geometry); + pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_split(&geometry); } prepare_pack_objects(&cmd, &po_args, packtmp); @@ -1324,8 +1324,8 @@ int cmd_repack(int argc, existing_packs_remove_redundant(&existing, packdir); if (geometry.split_factor) - geometry_remove_redundant_packs(&geometry, &names, - &existing); + pack_geometry_remove_redundant(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); @@ -1352,7 +1352,7 @@ cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); existing_packs_release(&existing); - free_pack_geometry(&geometry); + pack_geometry_release(&geometry); for (size_t i = 0; i < midx_pack_names_nr; i++) free(midx_pack_names[i]); free(midx_pack_names); From 2a15a739a231d3eac774e13b53003faa7377719c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:41 -0400 Subject: [PATCH 46/92] builtin/repack.c: pass 'pack_kept_objects' to `pack_geometry_init()` Prepare to move pack_geometry-related APIs to their own compilation unit by passing in the static "pack_kept_objects" variable directly as a parameter to the 'pack_geometry_init()' function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 42f05d2ebf..ac8c80d0a5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -136,7 +136,8 @@ static int pack_geometry_cmp(const void *va, const void *vb) static void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args) + const struct pack_objects_args *args, + int pack_kept_objects) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -1063,7 +1064,8 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args); + pack_geometry_init(&geometry, &existing, &po_args, + pack_kept_objects); pack_geometry_split(&geometry); } From b2ebeed1d82c5da8f7bb604594701629dcaf472b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:44 -0400 Subject: [PATCH 47/92] builtin/repack.c: pass 'packdir' to `pack_geometry_remove_redundant()` For similar reasons as the preceding commit, pass the "packdir" variable directly to `pack_geometry_remove_redundant()` as a parameter to the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index ac8c80d0a5..f6d04b33a7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -307,7 +307,8 @@ static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geo static void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, - struct existing_packs *existing) + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -1327,7 +1328,7 @@ int cmd_repack(int argc, if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing); + &existing, packdir); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); From 62d3fa09b3890631af7c572cb6132088a14d2653 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:47 -0400 Subject: [PATCH 48/92] repack: remove pack_geometry API from the builtin Now that the pack_geometry API is fully factored and isolated from the rest of the builtin, declare it within repack.h and move its implementation to "repack-geometry.c" as a separate component. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 235 ---------------------------------------------- meson.build | 1 + repack-geometry.c | 234 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 20 ++++ 5 files changed, 256 insertions(+), 235 deletions(-) create mode 100644 repack-geometry.c diff --git a/Makefile b/Makefile index 2a01bd92dc..3ee8d27dba 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-geometry.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index f6d04b33a7..e2313c80c3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,241 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct pack_geometry { - struct packed_git **pack; - uint32_t pack_nr, pack_alloc; - uint32_t split; - - int split_factor; -}; - -static uint32_t pack_geometry_weight(struct packed_git *p) -{ - if (open_pack_index(p)) - die(_("cannot open index for %s"), p->pack_name); - return p->num_objects; -} - -static int pack_geometry_cmp(const void *va, const void *vb) -{ - uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), - bw = pack_geometry_weight(*(struct packed_git **)vb); - - if (aw < bw) - return -1; - if (aw > bw) - return 1; - return 0; -} - -static void pack_geometry_init(struct pack_geometry *geometry, - struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (args->local && !p->pack_local) - /* - * When asked to only repack local packfiles we skip - * over any packfiles that are borrowed from alternate - * object directories. - */ - continue; - - if (!pack_kept_objects) { - /* - * Any pack that has its pack_keep bit set will - * appear in existing->kept_packs below, but - * this saves us from doing a more expensive - * check. - */ - if (p->pack_keep) - continue; - - /* - * The pack may be kept via the --keep-pack - * option; check 'existing->kept_packs' to - * determine whether to ignore it. - */ - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (string_list_has_string(&existing->kept_packs, buf.buf)) - continue; - } - if (p->is_cruft) - continue; - - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); - - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; - } - - QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); - strbuf_release(&buf); -} - -static void pack_geometry_split(struct pack_geometry *geometry) -{ - uint32_t i; - uint32_t split; - off_t total_size = 0; - - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } - - /* - * First, count the number of packs (in descending order of size) which - * already form a geometric progression. - */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; - - if (unsigned_mult_overflows(geometry->split_factor, - pack_geometry_weight(prev))) - die(_("pack %s too large to consider in geometric " - "progression"), - prev->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) - break; - } - - split = i; - - if (split) { - /* - * Move the split one to the right, since the top element in the - * last-compared pair can't be in the progression. Only do this - * when we split in the middle of the array (otherwise if we got - * to the end, then the split is in the right place). - */ - split++; - } - - /* - * Then, anything to the left of 'split' must be in a new pack. But, - * creating that new pack may cause packs in the heavy half to no longer - * form a geometric progression. - * - * Compute an expected size of the new pack, and then determine how many - * packs in the heavy half need to be joined into it (if any) to restore - * the geometric progression. - */ - for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; - - if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) - die(_("pack %s too large to roll up"), p->pack_name); - total_size += pack_geometry_weight(p); - } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; - - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) - die(_("pack %s too large to roll up"), ours->pack_name); - - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { - if (unsigned_add_overflows(total_size, - pack_geometry_weight(ours))) - die(_("pack %s too large to roll up"), - ours->pack_name); - - split++; - total_size += pack_geometry_weight(ours); - } else - break; - } - - geometry->split = split; -} - -static struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) -{ - uint32_t i; - - if (!geometry) { - /* - * No geometry means either an all-into-one repack (in which - * case there is only one pack left and it is the largest) or an - * incremental one. - * - * If repacking incrementally, then we could check the size of - * all packs to determine which should be preferred, but leave - * this for later. - */ - return NULL; - } - if (geometry->split == geometry->pack_nr) - return NULL; - - /* - * The preferred pack is the largest pack above the split line. In - * other words, it is the largest pack that does not get rolled up in - * the geometric repack. - */ - for (i = geometry->pack_nr; i > geometry->split; i--) - /* - * A pack that is not local would never be included in a - * multi-pack index. We thus skip over any non-local packs. - */ - if (geometry->pack[i - 1]->pack_local) - return geometry->pack[i - 1]; - - return NULL; -} - -static void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) -{ - const struct git_hash_algo *algop = existing->repo->hash_algo; - struct strbuf buf = STRBUF_INIT; - uint32_t i; - - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; - if (string_list_has_string(names, hash_to_hex_algop(p->hash, - algop))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing->kept_packs, buf.buf))) - continue; - - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); - } - - strbuf_release(&buf); -} - -static void pack_geometry_release(struct pack_geometry *geometry) -{ - if (!geometry) - return; - - free(geometry->pack); -} - static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, diff --git a/meson.build b/meson.build index 1fbb8c52a6..47b05089ee 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-geometry.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-geometry.c b/repack-geometry.c new file mode 100644 index 0000000000..f58f1fc7f0 --- /dev/null +++ b/repack-geometry.c @@ -0,0 +1,234 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "hex.h" +#include "packfile.h" + +static uint32_t pack_geometry_weight(struct packed_git *p) +{ + if (open_pack_index(p)) + die(_("cannot open index for %s"), p->pack_name); + return p->num_objects; +} + +static int pack_geometry_cmp(const void *va, const void *vb) +{ + uint32_t aw = pack_geometry_weight(*(struct packed_git **)va), + bw = pack_geometry_weight(*(struct packed_git **)vb); + + if (aw < bw) + return -1; + if (aw > bw) + return 1; + return 0; +} + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + + if (!pack_kept_objects) { + /* + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. + */ + if (p->pack_keep) + continue; + + /* + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. + */ + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (string_list_has_string(&existing->kept_packs, buf.buf)) + continue; + } + if (p->is_cruft) + continue; + + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } + + QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + strbuf_release(&buf); +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + uint32_t i; + uint32_t split; + off_t total_size = 0; + + if (!geometry->pack_nr) { + geometry->split = geometry->pack_nr; + return; + } + + /* + * First, count the number of packs (in descending order of size) which + * already form a geometric progression. + */ + for (i = geometry->pack_nr - 1; i > 0; i--) { + struct packed_git *ours = geometry->pack[i]; + struct packed_git *prev = geometry->pack[i - 1]; + + if (unsigned_mult_overflows(geometry->split_factor, + pack_geometry_weight(prev))) + die(_("pack %s too large to consider in geometric " + "progression"), + prev->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * pack_geometry_weight(prev)) + break; + } + + split = i; + + if (split) { + /* + * Move the split one to the right, since the top element in the + * last-compared pair can't be in the progression. Only do this + * when we split in the middle of the array (otherwise if we got + * to the end, then the split is in the right place). + */ + split++; + } + + /* + * Then, anything to the left of 'split' must be in a new pack. But, + * creating that new pack may cause packs in the heavy half to no longer + * form a geometric progression. + * + * Compute an expected size of the new pack, and then determine how many + * packs in the heavy half need to be joined into it (if any) to restore + * the geometric progression. + */ + for (i = 0; i < split; i++) { + struct packed_git *p = geometry->pack[i]; + + if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) + die(_("pack %s too large to roll up"), p->pack_name); + total_size += pack_geometry_weight(p); + } + for (i = split; i < geometry->pack_nr; i++) { + struct packed_git *ours = geometry->pack[i]; + + if (unsigned_mult_overflows(geometry->split_factor, + total_size)) + die(_("pack %s too large to roll up"), ours->pack_name); + + if (pack_geometry_weight(ours) < + geometry->split_factor * total_size) { + if (unsigned_add_overflows(total_size, + pack_geometry_weight(ours))) + die(_("pack %s too large to roll up"), + ours->pack_name); + + split++; + total_size += pack_geometry_weight(ours); + } else + break; + } + + geometry->split = split; +} + +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) +{ + uint32_t i; + + if (!geometry) { + /* + * No geometry means either an all-into-one repack (in which + * case there is only one pack left and it is the largest) or an + * incremental one. + * + * If repacking incrementally, then we could check the size of + * all packs to determine which should be preferred, but leave + * this for later. + */ + return NULL; + } + if (geometry->split == geometry->pack_nr) + return NULL; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; +} + +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + const struct git_hash_algo *algop = existing->repo->hash_algo; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex_algop(p->hash, + algop))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + } + + strbuf_release(&buf); +} + +void pack_geometry_release(struct pack_geometry *geometry) +{ + if (!geometry) + return; + + free(geometry->pack); +} diff --git a/repack.h b/repack.h index 19dc4fd738..cea7969ae4 100644 --- a/repack.h +++ b/repack.h @@ -78,4 +78,24 @@ void repack_promisor_objects(struct repository *repo, const struct pack_objects_args *args, struct string_list *names, const char *packtmp); +struct pack_geometry { + struct packed_git **pack; + uint32_t pack_nr, pack_alloc; + uint32_t split; + + int split_factor; +}; + +void pack_geometry_init(struct pack_geometry *geometry, + struct existing_packs *existing, + const struct pack_objects_args *args, + int pack_kept_objects); +void pack_geometry_split(struct pack_geometry *geometry); +struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir); +void pack_geometry_release(struct pack_geometry *geometry); + #endif /* REPACK_H */ From ccb7f822d520472026a12250e1390683706a8154 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:50 -0400 Subject: [PATCH 49/92] builtin/repack.c: remove ref snapshotting from builtin When writing a MIDX, 'git repack' takes a snapshot of the repository's references and writes the result out to a file, which it then passes to 'git multi-pack-index write' via the '--refs-snapshot'. This is done in order to make bitmap selections with respect to what we are packing, thus avoiding a race where an incoming reference update causes us to try and write a bitmap for a commit not present in the MIDX. Extract this functionality out into a new repack-midx.c compilation unit, and expose the necessary functions via the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 68 ------------------------------------------ meson.build | 1 + repack-midx.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 +++ 5 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 repack-midx.c diff --git a/Makefile b/Makefile index 3ee8d27dba..b214277163 100644 --- a/Makefile +++ b/Makefile @@ -1138,6 +1138,7 @@ LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o LIB_OBJS += repack-geometry.o +LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o LIB_OBJS += replace-object.o LIB_OBJS += repo-settings.o diff --git a/builtin/repack.c b/builtin/repack.c index e2313c80c3..7713721826 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -178,74 +178,6 @@ static int midx_has_unknown_packs(char **midx_pack_names, return 0; } -struct midx_snapshot_ref_data { - struct repository *repo; - struct tempfile *f; - struct oidset seen; - int preferred; -}; - -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) -{ - struct midx_snapshot_ref_data *data = _data; - struct object_id peeled; - - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - - if (oidset_insert(&data->seen, oid)) - return 0; /* already seen */ - - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) - return 0; - - fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); - - return 0; -} - -static void midx_snapshot_refs(struct repository *repo, struct tempfile *f) -{ - struct midx_snapshot_ref_data data; - const struct string_list *preferred = bitmap_preferred_tips(repo); - - data.repo = repo; - data.f = f; - data.preferred = 0; - oidset_init(&data.seen, 0); - - if (!fdopen_tempfile(f, "w")) - die(_("could not open tempfile %s for writing"), - get_tempfile_path(f)); - - if (preferred) { - struct string_list_item *item; - - data.preferred = 1; - for_each_string_list_item(item, preferred) - refs_for_each_ref_in(get_main_ref_store(repo), - item->string, - midx_snapshot_ref_one, &data); - data.preferred = 0; - } - - refs_for_each_ref(get_main_ref_store(repo), - midx_snapshot_ref_one, &data); - - if (close_tempfile_gently(f)) { - int save_errno = errno; - delete_tempfile(&f); - errno = save_errno; - die_errno(_("could not close refs snapshot tempfile")); - } - - oidset_clear(&data.seen); -} - static void midx_included_packs(struct string_list *include, struct existing_packs *existing, char **midx_pack_names, diff --git a/meson.build b/meson.build index 47b05089ee..0423ed30c4 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ libgit_sources = [ 'remote.c', 'repack.c', 'repack-geometry.c', + 'repack-midx.c', 'repack-promisor.c', 'replace-object.c', 'repo-settings.c', diff --git a/repack-midx.c b/repack-midx.c new file mode 100644 index 0000000000..354df729a5 --- /dev/null +++ b/repack-midx.c @@ -0,0 +1,77 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "hash.h" +#include "hex.h" +#include "odb.h" +#include "oidset.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "tempfile.h" + +struct midx_snapshot_ref_data { + struct repository *repo; + struct tempfile *f; + struct oidset seen; + int preferred; +}; + +static int midx_snapshot_ref_one(const char *refname UNUSED, + const char *referent UNUSED, + const struct object_id *oid, + int flag UNUSED, void *_data) +{ + struct midx_snapshot_ref_data *data = _data; + struct object_id peeled; + + if (!peel_iterated_oid(data->repo, oid, &peeled)) + oid = &peeled; + + if (oidset_insert(&data->seen, oid)) + return 0; /* already seen */ + + if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + return 0; + + fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", + oid_to_hex(oid)); + + return 0; +} + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f) +{ + struct midx_snapshot_ref_data data; + const struct string_list *preferred = bitmap_preferred_tips(repo); + + data.repo = repo; + data.f = f; + data.preferred = 0; + oidset_init(&data.seen, 0); + + if (!fdopen_tempfile(f, "w")) + die(_("could not open tempfile %s for writing"), + get_tempfile_path(f)); + + if (preferred) { + struct string_list_item *item; + + data.preferred = 1; + for_each_string_list_item(item, preferred) + refs_for_each_ref_in(get_main_ref_store(repo), + item->string, + midx_snapshot_ref_one, &data); + data.preferred = 0; + } + + refs_for_each_ref(get_main_ref_store(repo), + midx_snapshot_ref_one, &data); + + if (close_tempfile_gently(f)) { + int save_errno = errno; + delete_tempfile(&f); + errno = save_errno; + die_errno(_("could not close refs snapshot tempfile")); + } + + oidset_clear(&data.seen); +} diff --git a/repack.h b/repack.h index cea7969ae4..803e129224 100644 --- a/repack.h +++ b/repack.h @@ -98,4 +98,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, const char *packdir); void pack_geometry_release(struct pack_geometry *geometry); +struct tempfile; + +void midx_snapshot_refs(struct repository *repo, struct tempfile *f); + #endif /* REPACK_H */ From e6b09077216ecc1c767506f39be736ba3dcccecb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:53 -0400 Subject: [PATCH 50/92] builtin/repack.c: extract opts struct for 'write_midx_included_packs()' The function 'write_midx_included_packs()', which is responsible for writing a new MIDX with a given set of included packs, currently takes a list of six arguments. In order to extract this function out of the builtin, we have to pass in a few additional parameters, like 'midx_must_contain_cruft' and 'packdir', which are currently declared as static variables within the builtin/repack.c compilation unit. Instead of adding additional parameters to `write_midx_included_packs()` extract out an "opts" struct that names these parameters, and pass a pointer to that, making it less cumbersome to add additional parameters. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7713721826..1a79d1d834 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,6 +107,17 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct repack_write_midx_opts { + struct string_list *include; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + static int midx_has_unknown_packs(char **midx_pack_names, size_t midx_pack_names_nr, struct string_list *include, @@ -290,19 +301,15 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } -static int write_midx_included_packs(struct string_list *include, - struct pack_geometry *geometry, - struct string_list *names, - const char *refs_snapshot, - int show_progress, int write_bitmaps) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(geometry); + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret; - if (!include->nr) + if (!opts->include->nr) return 0; cmd.in = -1; @@ -311,18 +318,18 @@ static int write_midx_included_packs(struct string_list *include, strvec_push(&cmd.args, "multi-pack-index"); strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - if (show_progress) + if (opts->show_progress) strvec_push(&cmd.args, "--progress"); else strvec_push(&cmd.args, "--no-progress"); - if (write_bitmaps) + if (opts->write_bitmaps) strvec_push(&cmd.args, "--bitmap"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); - else if (names->nr) { + else if (opts->names->nr) { /* The largest pack was repacked, meaning that either * one or two packs exist depending on whether the * repository has a cruft pack or not. @@ -335,7 +342,7 @@ static int write_midx_included_packs(struct string_list *include, * `--max-pack-size` was given, but any one of them * will suffice, so pick the first one.) */ - for_each_string_list_item(item, names) { + for_each_string_list_item(item, opts->names) { struct generated_pack *pack = item->util; if (generated_pack_has_ext(pack, ".mtimes")) continue; @@ -355,15 +362,16 @@ static int write_midx_included_packs(struct string_list *include, ; } - if (refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); ret = start_command(&cmd); if (ret) return ret; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, include) + for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); @@ -1001,15 +1009,23 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; + struct repack_write_midx_opts opts = { + .include = &include, + .geometry = &geometry, + .names = &names, + .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, + .packdir = packdir, + .show_progress = show_progress, + .write_bitmaps = write_bitmaps > 0, + .midx_must_contain_cruft = midx_must_contain_cruft + }; midx_included_packs(&include, &existing, midx_pack_names, midx_pack_names_nr, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, &names, - refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, - show_progress, write_bitmaps > 0); + ret = write_midx_included_packs(&opts); if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, packdir); + remove_redundant_bitmaps(&include, opts.packdir); string_list_clear(&include, 0); From c3690c97d7b08d9876fcaf0a572b4956bc9b4c33 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:56 -0400 Subject: [PATCH 51/92] builtin/repack.c: use a string_list for 'midx_pack_names' When writing a new MIDX, repack must determine whether or not there are any packs in the MIDX it is replacing (if one exists) that are not somehow represented in the new MIDX (e.g., either by preserving the pack verbatim, or rolling it up as part of a geometric repack, etc.). In order to do this, it keeps track of a list of pack names from the MIDX present in the repository at the start of the repack operation. Since we manipulate and close the object store, we cannot rely on the repository's in-core representation of the MIDX, since this is subject to change and/or go away. When this behavior was introduced in 5ee86c273b (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), we maintained an array of character pointers instead of using a convenience API, such as string-list.h. Store the list of MIDX pack names in a string_list, thereby reducing the number of parameters we have to pass to `midx_has_unknown_packs()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 1a79d1d834..dda533f171 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,18 +118,17 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(char **midx_pack_names, - size_t midx_pack_names_nr, +static int midx_has_unknown_packs(struct string_list *midx_pack_names, struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { - size_t i; + struct string_list_item *item; string_list_sort(include); - for (i = 0; i < midx_pack_names_nr; i++) { - const char *pack_name = midx_pack_names[i]; + for_each_string_list_item(item, midx_pack_names) { + const char *pack_name = item->string; /* * Determine whether or not each MIDX'd pack from the existing @@ -191,8 +190,7 @@ static int midx_has_unknown_packs(char **midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - char **midx_pack_names, - size_t midx_pack_names_nr, + struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -247,8 +245,8 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, - include, geometry, existing)) { + midx_has_unknown_packs(midx_pack_names, include, geometry, + existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -606,13 +604,12 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; + struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; int i, ret; int show_progress; - char **midx_pack_names = NULL; - size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ struct repack_config_ctx config_ctx; @@ -985,13 +982,12 @@ int cmd_repack(int argc, struct multi_pack_index *m = get_multi_pack_index(repo->objects->sources); - ALLOC_ARRAY(midx_pack_names, - m->num_packs + m->num_packs_in_base); - - for (; m; m = m->base_midx) - for (uint32_t i = 0; i < m->num_packs; i++) - midx_pack_names[midx_pack_names_nr++] = - xstrdup(m->pack_names[i]); + for (; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + string_list_append(&midx_pack_names, + m->pack_names[i]); + } + } } close_object_store(repo->objects); @@ -1019,8 +1015,8 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, midx_pack_names, - midx_pack_names_nr, &names, &geometry); + midx_included_packs(&include, &existing, &midx_pack_names, + &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1067,11 +1063,9 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); + string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); - for (size_t i = 0; i < midx_pack_names_nr; i++) - free(midx_pack_names[i]); - free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); From 2fee63a71ae8113fd91d8e5924ae4a5619ad0cd3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:28:59 -0400 Subject: [PATCH 52/92] repack: keep track of MIDX pack names using existing_packs Instead of storing the list of MIDX pack names separately, let's inline it into the existing_packs struct, further reducing the number of parameters we have to pass around. This amounts to adding a new string_list to the existing_packs struct, and populating it via `existing_packs_collect()`. This is fairly straightforward to do, since we are already looping over all packs, all we need to do is: if (p->multi_pack_index) string_list_append(&existing->midx_packs, pack_basename(p)); Note, however, that this check *must* come before other conditions where we discard and do not keep track of a pack, including the condition "if (!p->pack_local)" immediately below. This is because the existing routine which collects MIDX pack names does so blindly, and does not discard, for example, non-local packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++---------------------- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index dda533f171..a57a14ef60 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -118,8 +118,7 @@ struct repack_write_midx_opts { int midx_must_contain_cruft; }; -static int midx_has_unknown_packs(struct string_list *midx_pack_names, - struct string_list *include, +static int midx_has_unknown_packs(struct string_list *include, struct pack_geometry *geometry, struct existing_packs *existing) { @@ -127,7 +126,7 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, string_list_sort(include); - for_each_string_list_item(item, midx_pack_names) { + for_each_string_list_item(item, &existing->midx_packs) { const char *pack_name = item->string; /* @@ -190,7 +189,6 @@ static int midx_has_unknown_packs(struct string_list *midx_pack_names, static void midx_included_packs(struct string_list *include, struct existing_packs *existing, - struct string_list *midx_pack_names, struct string_list *names, struct pack_geometry *geometry) { @@ -245,8 +243,7 @@ static void midx_included_packs(struct string_list *include, } if (midx_must_contain_cruft || - midx_has_unknown_packs(midx_pack_names, include, geometry, - existing)) { + midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see * midx_has_unknown_packs() for what makes a pack @@ -604,7 +601,6 @@ int cmd_repack(int argc, struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list midx_pack_names = STRING_LIST_INIT_DUP; struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct tempfile *refs_snapshot = NULL; @@ -978,18 +974,6 @@ int cmd_repack(int argc, string_list_sort(&names); - if (get_multi_pack_index(repo->objects->sources)) { - struct multi_pack_index *m = - get_multi_pack_index(repo->objects->sources); - - for (; m; m = m->base_midx) { - for (uint32_t i = 0; i < m->num_packs; i++) { - string_list_append(&midx_pack_names, - m->pack_names[i]); - } - } - } - close_object_store(repo->objects); /* @@ -1015,8 +999,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &midx_pack_names, - &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&opts); @@ -1063,7 +1046,6 @@ int cmd_repack(int argc, cleanup: string_list_clear(&keep_pack_list, 0); string_list_clear(&names, 1); - string_list_clear(&midx_pack_names, 0); existing_packs_release(&existing); pack_geometry_release(&geometry); pack_objects_args_release(&po_args); diff --git a/repack.c b/repack.c index d8afdd352d..1d485e0112 100644 --- a/repack.c +++ b/repack.c @@ -80,6 +80,9 @@ void existing_packs_collect(struct existing_packs *existing, size_t i; const char *base; + if (p->multi_pack_index) + string_list_append(&existing->midx_packs, + pack_basename(p)); if (!p->pack_local) continue; @@ -104,6 +107,7 @@ void existing_packs_collect(struct existing_packs *existing, string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); + string_list_sort(&existing->midx_packs); strbuf_release(&buf); } @@ -220,6 +224,7 @@ void existing_packs_release(struct existing_packs *existing) string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); string_list_clear(&existing->cruft_packs, 0); + string_list_clear(&existing->midx_packs, 0); } static struct { diff --git a/repack.h b/repack.h index 803e129224..6aa5b4e0f0 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; + struct string_list midx_packs; }; #define EXISTING_PACKS_INIT { \ From 42088e3d4ae5c5bc77a49fcbba79832d10d03499 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:02 -0400 Subject: [PATCH 53/92] builtin/repack.c: reorder `remove_redundant_bitmaps()` The next commit will inline the call to `remove_redundant_bitmaps()` into `write_midx_included_packs()`. Reorder these two functions to avoid a forward declaration to `remove_redundant_bitmaps()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a57a14ef60..865e0af039 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -296,6 +296,35 @@ static void midx_included_packs(struct string_list *include, strbuf_release(&buf); } +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -373,35 +402,6 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return finish_command(&cmd); } -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, From 337baea7212f0cf1aaa00a885d75098e260a22b0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:05 -0400 Subject: [PATCH 54/92] builtin/repack.c: inline `remove_redundant_bitmaps()` After writing a new MIDX, the repack command removes any bitmaps belonging to packs which were written into the MIDX. This is currently done in a separate function outside of `write_midx_included_packs()`, which forces the caller to keep track of the set of packs written into the MIDX. Prepare to no longer require the caller to keep track of such information by inlining the clean-up into `write_midx_included_packs()`. Future commits will make the caller oblivious to the set of packs included in the MIDX altogether. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 865e0af039..271c869268 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -331,10 +331,10 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; - int ret; + int ret = 0; if (!opts->include->nr) - return 0; + goto done; cmd.in = -1; cmd.git_cmd = 1; @@ -392,14 +392,18 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) ret = start_command(&cmd); if (ret) - return ret; + goto done; in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, opts->include) fprintf(in, "%s\n", item->string); fclose(in); - return finish_command(&cmd); + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(opts->include, opts->packdir); + return ret; } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, @@ -1003,9 +1007,6 @@ int cmd_repack(int argc, ret = write_midx_included_packs(&opts); - if (!ret && write_bitmaps) - remove_redundant_bitmaps(&include, opts.packdir); - string_list_clear(&include, 0); if (ret) From f07263fd9fcb6b03f1e1db041269e2d5b85ccff8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:08 -0400 Subject: [PATCH 55/92] builtin/repack.c: pass `repack_write_midx_opts` to `midx_included_packs` Instead of passing individual parameters (in this case, "existing", "names", and "geometry") to `midx_included_packs()`, pass a pointer to a `repack_write_midx_opts` structure instead. Besides reducing the number of parameters necessary to call the `midx_included_packs` function, this refactoring sets us up nicely to inline the call to `midx_included_packs()` into `write_midx_included_packs()`, thus making the caller (in this case, `cmd_repack()`) oblivious to the set of packs being written into the MIDX. In order to do this, `repack_write_midx_opts` has to keep track of the set of existing packs, so add an additional field to point to that set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 271c869268..a518a2b2f3 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,6 +108,7 @@ static int repack_config(const char *var, const char *value, } struct repack_write_midx_opts { + struct existing_packs *existing; struct string_list *include; struct pack_geometry *geometry; struct string_list *names; @@ -188,10 +189,11 @@ static int midx_has_unknown_packs(struct string_list *include, } static void midx_included_packs(struct string_list *include, - struct existing_packs *existing, - struct string_list *names, - struct pack_geometry *geometry) + struct repack_write_midx_opts *opts) { + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; @@ -242,7 +244,7 @@ static void midx_included_packs(struct string_list *include, } } - if (midx_must_contain_cruft || + if (opts->midx_must_contain_cruft || midx_has_unknown_packs(include, geometry, existing)) { /* * If there are one or more unknown pack(s) present (see @@ -994,6 +996,7 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { + .existing = &existing, .include = &include, .geometry = &geometry, .names = &names, @@ -1003,7 +1006,7 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); From f17757487b2d212f86edaaf02306972e1a555bbd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:10 -0400 Subject: [PATCH 56/92] builtin/repack.c: inline packs within `write_midx_included_packs()` To write a MIDX at the end of a repack operation, 'git repack' presently computes the set of packs to write into the MIDX, before invoking `write_midx_included_packs()` with a `string_list` containing those packs. The logic for computing which packs are supposed to appear in the resulting MIDX is within `midx_included_packs()`, where it is aware of details like which cruft pack(s) were written/combined, if/how we did a geometric repack, etc. Computing this list ourselves before providing it to the sole function to make use of that list `write_midx_included_packs()` is somewhat awkward. In the future, repack will learn how to write incremental MIDXs, which will use a very different pack selection routine. Instead of doing something like: struct string_list included_packs = STRING_LIST_INIT_DUP; if (incremental) { midx_incremental_included_packs(&included_packs, ...): write_midx_incremental_included_packs(&included_packs, ...); } else { midx_included_packs(&included_packs, ...): write_midx_included_packs(&included_packs, ...); } in the future, let's have each function that writes a MIDX be responsible for itself computing the list of included packs. Inline the declaration and initialization of `included_packs` into the `write_midx_included_packs()` function itself, and repeat that pattern in the future when we introduce new ways to write MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a518a2b2f3..fad10be42a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -109,7 +109,6 @@ static int repack_config(const char *var, const char *value, struct repack_write_midx_opts { struct existing_packs *existing; - struct string_list *include; struct pack_geometry *geometry; struct string_list *names; const char *refs_snapshot; @@ -330,12 +329,14 @@ static void remove_redundant_bitmaps(struct string_list *include, static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); FILE *in; int ret = 0; - if (!opts->include->nr) + midx_included_packs(&include, opts); + if (!include.nr) goto done; cmd.in = -1; @@ -397,14 +398,17 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, opts->include) + for_each_string_list_item(item, &include) fprintf(in, "%s\n", item->string); fclose(in); ret = finish_command(&cmd); done: if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(opts->include, opts->packdir); + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + return ret; } @@ -994,10 +998,8 @@ int cmd_repack(int argc, existing_packs_mark_for_deletion(&existing, &names); if (write_midx) { - struct string_list include = STRING_LIST_INIT_DUP; struct repack_write_midx_opts opts = { .existing = &existing, - .include = &include, .geometry = &geometry, .names = &names, .refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1006,12 +1008,9 @@ int cmd_repack(int argc, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft }; - midx_included_packs(&include, &opts); ret = write_midx_included_packs(&opts); - string_list_clear(&include, 0); - if (ret) goto cleanup; } From 6d05eb135f67d2d45a0fbd110a32d28b1e28c95d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:13 -0400 Subject: [PATCH 57/92] repack: 'write_midx_included_packs' API from the builtin Now that we have sufficiently cleaned up the write_midx_included_packs() function, we can move it (along with the struct repack_write_midx_opts) out of the builtin, and into the repack.h header. Since this function (and the static ones that it depends on) are MIDX-specific details of the repacking process, move them to the repack-midx.c compilation unit instead of the general repack.c one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 305 ----------------------------------------------- repack-midx.c | 295 +++++++++++++++++++++++++++++++++++++++++++++ repack.h | 12 ++ 3 files changed, 307 insertions(+), 305 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index fad10be42a..2f49a18283 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -struct repack_write_midx_opts { - struct existing_packs *existing; - struct pack_geometry *geometry; - struct string_list *names; - const char *refs_snapshot; - const char *packdir; - int show_progress; - int write_bitmaps; - int midx_must_contain_cruft; -}; - -static int midx_has_unknown_packs(struct string_list *include, - struct pack_geometry *geometry, - struct existing_packs *existing) -{ - struct string_list_item *item; - - string_list_sort(include); - - for_each_string_list_item(item, &existing->midx_packs) { - const char *pack_name = item->string; - - /* - * Determine whether or not each MIDX'd pack from the existing - * MIDX (if any) is represented in the new MIDX. For each pack - * in the MIDX, it must either be: - * - * - In the "include" list of packs to be included in the new - * MIDX. Note this function is called before the include - * list is populated with any cruft pack(s). - * - * - Below the geometric split line (if using pack geometry), - * indicating that the pack won't be included in the new - * MIDX, but its contents were rolled up as part of the - * geometric repack. - * - * - In the existing non-kept packs list (if not using pack - * geometry), and marked as non-deleted. - */ - if (string_list_has_string(include, pack_name)) { - continue; - } else if (geometry) { - struct strbuf buf = STRBUF_INIT; - uint32_t j; - - for (j = 0; j < geometry->split; j++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(geometry->pack[j])); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - if (!strcmp(pack_name, buf.buf)) { - strbuf_release(&buf); - break; - } - } - - strbuf_release(&buf); - - if (j < geometry->split) - continue; - } else { - struct string_list_item *item; - - item = string_list_lookup(&existing->non_kept_packs, - pack_name); - if (item && !existing_pack_is_marked_for_deletion(item)) - continue; - } - - /* - * If we got to this point, the MIDX includes some pack that we - * don't know about. - */ - return 1; - } - - return 0; -} - -static void midx_included_packs(struct string_list *include, - struct repack_write_midx_opts *opts) -{ - struct existing_packs *existing = opts->existing; - struct pack_geometry *geometry = opts->geometry; - struct string_list *names = opts->names; - struct string_list_item *item; - struct strbuf buf = STRBUF_INIT; - - for_each_string_list_item(item, &existing->kept_packs) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - for_each_string_list_item(item, names) { - strbuf_reset(&buf); - strbuf_addf(&buf, "pack-%s.idx", item->string); - string_list_insert(include, buf.buf); - } - - if (geometry->split_factor) { - uint32_t i; - - for (i = geometry->split; i < geometry->pack_nr; i++) { - struct packed_git *p = geometry->pack[i]; - - /* - * The multi-pack index never refers to packfiles part - * of an alternate object database, so we skip these. - * While git-multi-pack-index(1) would silently ignore - * them anyway, this allows us to skip executing the - * command completely when we have only non-local - * packfiles. - */ - if (!p->pack_local) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - strbuf_addstr(&buf, ".idx"); - - string_list_insert(include, buf.buf); - } - } else { - for_each_string_list_item(item, &existing->non_kept_packs) { - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } - - if (opts->midx_must_contain_cruft || - midx_has_unknown_packs(include, geometry, existing)) { - /* - * If there are one or more unknown pack(s) present (see - * midx_has_unknown_packs() for what makes a pack - * "unknown") in the MIDX before the repack, keep them - * as they may be required to form a reachability - * closure if the MIDX is bitmapped. - * - * For example, a cruft pack can be required to form a - * reachability closure if the MIDX is bitmapped and one - * or more of the bitmap's selected commits reaches a - * once-cruft object that was later made reachable. - */ - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * When doing a --geometric repack, there is no - * need to check for deleted packs, since we're - * by definition not doing an ALL_INTO_ONE - * repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any - * packs which are enqueued for deletion. - * - * So we could omit the conditional below in the - * --geometric case, but doing so is unnecessary - * since no packs are marked as pending - * deletion (since we only call - * `existing_packs_mark_for_deletion()` when - * doing an all-into-one repack). - */ - if (existing_pack_is_marked_for_deletion(item)) - continue; - - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); - } - } else { - /* - * Modern versions of Git (with the appropriate - * configuration setting) will write new copies of - * once-cruft objects when doing a --geometric repack. - * - * If the MIDX has no cruft pack, new packs written - * during a --geometric repack will not rely on the - * cruft pack to form a reachability closure, so we can - * avoid including them in the MIDX in that case. - */ - ; - } - - strbuf_release(&buf); -} - -static void remove_redundant_bitmaps(struct string_list *include, - const char *packdir) -{ - struct strbuf path = STRBUF_INIT; - struct string_list_item *item; - size_t packdir_len; - - strbuf_addstr(&path, packdir); - strbuf_addch(&path, '/'); - packdir_len = path.len; - - /* - * Remove any pack bitmaps corresponding to packs which are now - * included in the MIDX. - */ - for_each_string_list_item(item, include) { - strbuf_addstr(&path, item->string); - strbuf_strip_suffix(&path, ".idx"); - strbuf_addstr(&path, ".bitmap"); - - if (unlink(path.buf) && errno != ENOENT) - warning_errno(_("could not remove stale bitmap: %s"), - path.buf); - - strbuf_setlen(&path, packdir_len); - } - strbuf_release(&path); -} - -static int write_midx_included_packs(struct repack_write_midx_opts *opts) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list include = STRING_LIST_INIT_DUP; - struct string_list_item *item; - struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; - int ret = 0; - - midx_included_packs(&include, opts); - if (!include.nr) - goto done; - - cmd.in = -1; - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); - - if (preferred) - strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(preferred)); - else if (opts->names->nr) { - /* The largest pack was repacked, meaning that either - * one or two packs exist depending on whether the - * repository has a cruft pack or not. - * - * Select the non-cruft one as preferred to encourage - * pack-reuse among packs containing reachable objects - * over unreachable ones. - * - * (Note we could write multiple packs here if - * `--max-pack-size` was given, but any one of them - * will suffice, so pick the first one.) - */ - for_each_string_list_item(item, opts->names) { - struct generated_pack *pack = item->util; - if (generated_pack_has_ext(pack, ".mtimes")) - continue; - - strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", - item->string); - break; - } - } else { - /* - * No packs were kept, and no packs were written. The - * only thing remaining are .keep packs (unless - * --pack-kept-objects was given). - * - * Set the `--preferred-pack` arbitrarily here. - */ - ; - } - - if (opts->refs_snapshot) - strvec_pushf(&cmd.args, "--refs-snapshot=%s", - opts->refs_snapshot); - - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); -done: - if (!ret && opts->write_bitmaps) - remove_redundant_bitmaps(&include, opts->packdir); - - string_list_clear(&include, 0); - - return ret; -} - static int finish_pack_objects_cmd(const struct git_hash_algo *algop, struct child_process *cmd, struct string_list *names, diff --git a/repack-midx.c b/repack-midx.c index 354df729a5..6f6202c5bc 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -6,6 +6,7 @@ #include "oidset.h" #include "pack-bitmap.h" #include "refs.h" +#include "run-command.h" #include "tempfile.h" struct midx_snapshot_ref_data { @@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f) oidset_clear(&data.seen); } + +static int midx_has_unknown_packs(struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + struct string_list_item *item; + + string_list_sort(include); + + for_each_string_list_item(item, &existing->midx_packs) { + const char *pack_name = item->string; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !existing_pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + +static void midx_included_packs(struct string_list *include, + struct repack_write_midx_opts *opts) +{ + struct existing_packs *existing = opts->existing; + struct pack_geometry *geometry = opts->geometry; + struct string_list *names = opts->names; + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->kept_packs) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + for_each_string_list_item(item, names) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", item->string); + string_list_insert(include, buf.buf); + } + + if (geometry->split_factor) { + uint32_t i; + + for (i = geometry->split; i < geometry->pack_nr; i++) { + struct packed_git *p = geometry->pack[i]; + + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + string_list_insert(include, buf.buf); + } + } else { + for_each_string_list_item(item, &existing->non_kept_packs) { + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } + + if (opts->midx_must_contain_cruft || + midx_has_unknown_packs(include, geometry, existing)) { + /* + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. + * + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. + */ + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `existing_packs_mark_for_deletion()` when + * doing an all-into-one repack). + */ + if (existing_pack_is_marked_for_deletion(item)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; + } + + strbuf_release(&buf); +} + +static void remove_redundant_bitmaps(struct string_list *include, + const char *packdir) +{ + struct strbuf path = STRBUF_INIT; + struct string_list_item *item; + size_t packdir_len; + + strbuf_addstr(&path, packdir); + strbuf_addch(&path, '/'); + packdir_len = path.len; + + /* + * Remove any pack bitmaps corresponding to packs which are now + * included in the MIDX. + */ + for_each_string_list_item(item, include) { + strbuf_addstr(&path, item->string); + strbuf_strip_suffix(&path, ".idx"); + strbuf_addstr(&path, ".bitmap"); + + if (unlink(path.buf) && errno != ENOENT) + warning_errno(_("could not remove stale bitmap: %s"), + path.buf); + + strbuf_setlen(&path, packdir_len); + } + strbuf_release(&path); +} + +int write_midx_included_packs(struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list include = STRING_LIST_INIT_DUP; + struct string_list_item *item; + struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); + FILE *in; + int ret = 0; + + midx_included_packs(&include, opts); + if (!include.nr) + goto done; + + cmd.in = -1; + cmd.git_cmd = 1; + + strvec_push(&cmd.args, "multi-pack-index"); + strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); + + if (opts->show_progress) + strvec_push(&cmd.args, "--progress"); + else + strvec_push(&cmd.args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd.args, "--bitmap"); + + if (preferred) + strvec_pushf(&cmd.args, "--preferred-pack=%s", + pack_basename(preferred)); + else if (opts->names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, opts->names) { + struct generated_pack *pack = item->util; + if (generated_pack_has_ext(pack, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } + + if (opts->refs_snapshot) + strvec_pushf(&cmd.args, "--refs-snapshot=%s", + opts->refs_snapshot); + + ret = start_command(&cmd); + if (ret) + goto done; + + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, &include) + fprintf(in, "%s\n", item->string); + fclose(in); + + ret = finish_command(&cmd); +done: + if (!ret && opts->write_bitmaps) + remove_redundant_bitmaps(&include, opts->packdir); + + string_list_clear(&include, 0); + + return ret; +} diff --git a/repack.h b/repack.h index 6aa5b4e0f0..25a31ac0a0 100644 --- a/repack.h +++ b/repack.h @@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +struct repack_write_midx_opts { + struct existing_packs *existing; + struct pack_geometry *geometry; + struct string_list *names; + const char *refs_snapshot; + const char *packdir; + int show_progress; + int write_bitmaps; + int midx_must_contain_cruft; +}; + void midx_snapshot_refs(struct repository *repo, struct tempfile *f); +int write_midx_included_packs(struct repack_write_midx_opts *opts); #endif /* REPACK_H */ From 7a9c81a38ddb3b382103ccd45345c4892053fdfc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:16 -0400 Subject: [PATCH 58/92] builtin/repack.c: introduce `struct write_pack_opts` There are various functions within the 'repack' builtin which are responsible for writing different kinds of packs. They include: - `static int write_filtered_pack(...)` - `static int write_cruft_pack(...)` as well as the function `finish_pack_objects_cmd()`, which is responsible for finalizing a new pack write, and recording the checksum of its contents in the 'names' list. Both of these `write_` functions have a few things in common. They both take a pointer to the 'pack_objects_args' struct, as well as a pair of character pointers for `destination` and `pack_prefix`. Instead of repeating those arguments for each function, let's extract an options struct called "write_pack_opts" which has these three parameters as member fields. While we're at it, add fields for "packdir," and "packtmp", both of which are static variables within the builtin, and need to be read from within these two functions. This will shorten the list of parameters that callers have to provide to `write_filtered_pack()`, avoid ambiguity when passing multiple variables of the same type, and provide a unified interface for the two functions mentioned earlier. (Note that "pack_prefix" can be derived on the fly as a function of "packdir" and "packtmp", making it unnecessary to store "pack_prefix" explicitly. This commit ignores that potential cleanup in the name of doing as few things as possible, but a later commit will make that change.) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 28 +++++++++++++++------------- repack.h | 8 ++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2f49a18283..45ce469898 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -138,9 +138,7 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, return finish_command(cmd); } -static int write_filtered_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) { @@ -150,9 +148,9 @@ static int write_filtered_pack(const struct pack_objects_args *args, int ret; const char *caret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--stdin-packs"); @@ -175,7 +173,7 @@ static int write_filtered_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -665,14 +663,18 @@ int cmd_repack(int argc, } if (po_args.filter_options.choice) { - if (!filter_to) - filter_to = packtmp; + struct write_pack_opts opts = { + .po_args = &po_args, + .destination = filter_to, + .pack_prefix = find_pack_prefix(packdir, packtmp), + .packdir = packdir, + .packtmp = packtmp, + }; - ret = write_filtered_pack(&po_args, - filter_to, - find_pack_prefix(packdir, packtmp), - &existing, - &names); + if (!opts.destination) + opts.destination = packtmp; + + ret = write_filtered_pack(&opts, &existing, &names); if (ret) goto cleanup; } diff --git a/repack.h b/repack.h index 25a31ac0a0..6ef503f623 100644 --- a/repack.h +++ b/repack.h @@ -32,6 +32,14 @@ void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, const char *base_name); +struct write_pack_opts { + struct pack_objects_args *po_args; + const char *destination; + const char *pack_prefix; + const char *packdir; + const char *packtmp; +}; + struct repository; struct packed_git; From 3d2ac2065e2ac230c92cb87cc46053a0f3db1616 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:19 -0400 Subject: [PATCH 59/92] builtin/repack.c: use `write_pack_opts` within `write_cruft_pack()` Similar to the changes made in the previous commit to `write_filtered_pack()`, teach `write_cruft_pack()` to take a `write_pack_opts` struct and use that where possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 45ce469898..7295135ec2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -221,9 +221,7 @@ static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, strbuf_release(&buf); } -static int write_cruft_pack(const struct pack_objects_args *args, - const char *destination, - const char *pack_prefix, +static int write_cruft_pack(const struct write_pack_opts *opts, const char *cruft_expiration, unsigned long combine_cruft_below_size, struct string_list *names, @@ -234,9 +232,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, FILE *in; int ret; const char *scratch; - int local = skip_prefix(destination, packdir, &scratch); + int local = skip_prefix(opts->destination, opts->packdir, &scratch); - prepare_pack_objects(&cmd, args, destination); + prepare_pack_objects(&cmd, opts->po_args, opts->destination); strvec_push(&cmd.args, "--cruft"); if (cruft_expiration) @@ -267,7 +265,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -599,6 +597,13 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); + struct write_pack_opts opts = { + .po_args = &cruft_po_args, + .destination = packtmp, + .pack_prefix = pack_prefix, + .packtmp = packtmp, + .packdir = packdir, + }; if (!cruft_po_args.window) cruft_po_args.window = xstrdup_or_null(po_args.window); @@ -615,8 +620,7 @@ int cmd_repack(int argc, cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; - ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, - cruft_expiration, + ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, &existing); if (ret) @@ -651,11 +655,8 @@ int cmd_repack(int argc, * pack, but rather removing all cruft packs from the * main repository regardless of size. */ - ret = write_cruft_pack(&cruft_po_args, expire_to, - pack_prefix, - NULL, - 0ul, - &names, + opts.destination = expire_to; + ret = write_cruft_pack(&opts, NULL, 0ul, &names, &existing); if (ret) goto cleanup; From 98fa0d50a75099df3f2d62f9181e4c1bbf70f063 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:21 -0400 Subject: [PATCH 60/92] repack: move `find_pack_prefix()` out of the builtin Both callers within the repack builtin which call functions that take a 'write_pack_opts' structure have the following pattern: struct write_pack_opts opts = { .packdir = packdir, .packtmp = packtmp, .pack_prefix = find_pack_prefix(packdir, packtmp), /* ... */ }; int ret = write_some_kind_of_pack(&opts, /* ... */); , but both "packdir" and "packtmp" are fields within the write_pack_opts struct itself! Instead of also computing the pack_prefix ahead of time, let's have the callees compute it themselves by moving `find_pack_prefix()` out of the repack builtin, and have it take a write_pack_opts pointer instead of the "packdir" and "packtmp" fields directly. This avoids the callers having to do some prep work that is common between the two of them, but also avoids the potential pitfall of accidentally writing: .pack_prefix = find_pack_prefix(packtmp, packdir), (which is well-typed) when the caller meant to instead write: .pack_prefix = find_pack_prefix(packdir, packtmp), Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 ++++---------------- repack.c | 11 +++++++++++ repack.h | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 7295135ec2..b21799c650 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -149,6 +149,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, const char *caret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -173,7 +174,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) @@ -233,6 +234,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, int ret; const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); + const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -265,7 +267,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, */ in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", opts->pack_prefix, item->string); + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); if (combine_cruft_below_size && !cruft_expiration) { combine_small_cruft_packs(in, combine_cruft_below_size, existing); @@ -283,17 +285,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, local); } -static const char *find_pack_prefix(const char *packdir, const char *packtmp) -{ - const char *pack_prefix; - if (!skip_prefix(packtmp, packdir, &pack_prefix)) - die(_("pack prefix %s does not begin with objdir %s"), - packtmp, packdir); - if (*pack_prefix == '/') - pack_prefix++; - return pack_prefix; -} - int cmd_repack(int argc, const char **argv, const char *prefix, @@ -596,11 +587,9 @@ int cmd_repack(int argc, } if (pack_everything & PACK_CRUFT) { - const char *pack_prefix = find_pack_prefix(packdir, packtmp); struct write_pack_opts opts = { .po_args = &cruft_po_args, .destination = packtmp, - .pack_prefix = pack_prefix, .packtmp = packtmp, .packdir = packdir, }; @@ -667,7 +656,6 @@ int cmd_repack(int argc, struct write_pack_opts opts = { .po_args = &po_args, .destination = filter_to, - .pack_prefix = find_pack_prefix(packdir, packtmp), .packdir = packdir, .packtmp = packtmp, }; diff --git a/repack.c b/repack.c index 1d485e0112..19fd1d6d5b 100644 --- a/repack.c +++ b/repack.c @@ -66,6 +66,17 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, strbuf_release(&buf); } +const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts) +{ + const char *pack_prefix; + if (!skip_prefix(opts->packtmp, opts->packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + opts->packtmp, opts->packdir); + if (*pack_prefix == '/') + pack_prefix++; + return pack_prefix; +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 6ef503f623..5852e2407f 100644 --- a/repack.h +++ b/repack.h @@ -35,11 +35,12 @@ void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, struct write_pack_opts { struct pack_objects_args *po_args; const char *destination; - const char *pack_prefix; const char *packdir; const char *packtmp; }; +const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); + struct repository; struct packed_git; From 2f79c79bba0da415eed3a8e1b32823b7c388b7f4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:24 -0400 Subject: [PATCH 61/92] repack: extract `write_pack_opts_is_local()` Similar to the previous commit, the functions `write_cruft_pack()` and `write_filtered_pack()` both compute a "local" variable via the exact same mechanism: const char *scratch; int local = skip_prefix(opts->destination, opts->packdir, &scratch); Not only does this cause us to repeat the same pair of lines, it also introduces an unnecessary "scratch" variable that is common between both functions. Instead of repeating ourselves, let's extract that functionality into a new function in the repack.h API called "write_pack_opts_is_local()". That function takes a pointer to a "struct write_pack_opts" (which has as fields both "destination" and "packdir"), and can encapsulate the dangling "scratch" field. Extract that function and make it visible within the repack.h API, and use it within both `write_cruft_pack()` and `write_filtered_pack()`. While we're at it, match our modern conventions by returning a "bool" instead of "int", and use `starts_with()` instead of `skip_prefix()` to avoid storing the dummy "scratch" variable. The remaining duplication (that is, that both `write_cruft_pack()` and `write_filtered_pack()` still both call `write_pack_opts_is_local()`) will be addressed in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 6 ++---- repack.c | 5 +++++ repack.h | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index b21799c650..d1449cfe13 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -147,8 +147,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -232,8 +231,7 @@ static int write_cruft_pack(const struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - const char *scratch; - int local = skip_prefix(opts->destination, opts->packdir, &scratch); + bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); diff --git a/repack.c b/repack.c index 19fd1d6d5b..d2ee9f2460 100644 --- a/repack.c +++ b/repack.c @@ -77,6 +77,11 @@ const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts) return pack_prefix; } +bool write_pack_opts_is_local(const struct write_pack_opts *opts) +{ + return starts_with(opts->destination, opts->packdir); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 5852e2407f..26d1954ae2 100644 --- a/repack.h +++ b/repack.h @@ -40,6 +40,7 @@ struct write_pack_opts { }; const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); +bool write_pack_opts_is_local(const struct write_pack_opts *opts); struct repository; struct packed_git; From 80db3cd18985609340f40b2b06f4ef9f86a2cbe0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:27 -0400 Subject: [PATCH 62/92] builtin/repack.c: pass `write_pack_opts` to `finish_pack_objects_cmd()` To prepare to move the `finish_pack_objects_cmd()` function out of the builtin and into the repack.h API, there are a couple of things we need to do first: - First, let's take advantage of `write_pack_opts_is_local()` function introduced in the previous commit instead of passing "local" explicitly. - Let's also avoid referring to the static 'packtmp' field within builtin/repack.c by instead accessing it through the write_pack_opts argument. There are three callers which need to adjust themselves in order to account for this change. The callers which reside in write_cruft_pack() and write_filtered_pack() both already have an "opts" in scope, so they can pass it through transparently. The other call (at the bottom of `cmd_repack()`) needs to initialize its own write_pack_opts to pass the necessary fields over to the direct call to `finish_pack_objects_cmd()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index d1449cfe13..5f382aaf19 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -108,11 +108,12 @@ static int repack_config(const char *var, const char *value, } static int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, struct child_process *cmd, - struct string_list *names, - int local) + struct string_list *names) { FILE *out; + bool local = write_pack_opts_is_local(opts); struct strbuf line = STRBUF_INIT; out = xfdopen(cmd->out, "r"); @@ -128,7 +129,8 @@ static int finish_pack_objects_cmd(const struct git_hash_algo *algop, */ if (local) { item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, packtmp); + item->util = generated_pack_populate(line.buf, + opts->packtmp); } } fclose(out); @@ -147,7 +149,6 @@ static int write_filtered_pack(const struct write_pack_opts *opts, FILE *in; int ret; const char *caret; - bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -183,8 +184,8 @@ static int write_filtered_pack(const struct write_pack_opts *opts, fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, @@ -231,7 +232,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, struct string_list_item *item; FILE *in; int ret; - bool local = write_pack_opts_is_local(opts); const char *pack_prefix = write_pack_opts_pack_prefix(opts); prepare_pack_objects(&cmd, opts->po_args, opts->destination); @@ -279,8 +279,8 @@ static int write_cruft_pack(const struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); fclose(in); - return finish_pack_objects_cmd(existing->repo->hash_algo, &cmd, names, - local); + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); } int cmd_repack(int argc, @@ -560,9 +560,17 @@ int cmd_repack(int argc, fclose(in); } - ret = finish_pack_objects_cmd(repo->hash_algo, &cmd, &names, 1); - if (ret) - goto cleanup; + { + struct write_pack_opts opts = { + .packdir = packdir, + .destination = packdir, + .packtmp = packtmp, + }; + ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd, + &names); + if (ret) + goto cleanup; + } if (!names.nr) { if (!po_args.quiet) From fa0787a6cc1d8e7ef1e2e8398bdc13b987c61d69 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:30 -0400 Subject: [PATCH 63/92] repack: move `finish_pack_objects_cmd()` out of the builtin In a similar spirit as the previous commit(s), now that the function `finish_pack_objects_cmd()` has no explicit dependencies within the repack builtin, let's extract it. This prepares us to extract the remaining two functions within the repack builtin that explicitly write packfiles, which are `write_cruft_pack()` and `write_filtered_pack()`, which will be done in the future commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 33 --------------------------------- repack.c | 33 +++++++++++++++++++++++++++++++++ repack.h | 5 +++++ 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5f382aaf19..71abcfa0b7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -107,39 +107,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int finish_pack_objects_cmd(const struct git_hash_algo *algop, - const struct write_pack_opts *opts, - struct child_process *cmd, - struct string_list *names) -{ - FILE *out; - bool local = write_pack_opts_is_local(opts); - struct strbuf line = STRBUF_INIT; - - out = xfdopen(cmd->out, "r"); - while (strbuf_getline_lf(&line, out) != EOF) { - struct string_list_item *item; - - if (line.len != algop->hexsz) - die(_("repack: Expecting full hex object ID lines only " - "from pack-objects.")); - /* - * Avoid putting packs written outside of the repository in the - * list of names. - */ - if (local) { - item = string_list_append(names, line.buf); - item->util = generated_pack_populate(line.buf, - opts->packtmp); - } - } - fclose(out); - - strbuf_release(&line); - - return finish_command(cmd); -} - static int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names) diff --git a/repack.c b/repack.c index d2ee9f2460..2c478970f3 100644 --- a/repack.c +++ b/repack.c @@ -82,6 +82,39 @@ bool write_pack_opts_is_local(const struct write_pack_opts *opts) return starts_with(opts->destination, opts->packdir); } +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names) +{ + FILE *out; + bool local = write_pack_opts_is_local(opts); + struct strbuf line = STRBUF_INIT; + + out = xfdopen(cmd->out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + + if (line.len != algop->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + /* + * Avoid putting packs written outside of the repository in the + * list of names. + */ + if (local) { + item = string_list_append(names, line.buf); + item->util = generated_pack_populate(line.buf, + opts->packtmp); + } + } + fclose(out); + + strbuf_release(&line); + + return finish_command(cmd); +} + #define DELETE_PACK 1 #define RETAIN_PACK 2 diff --git a/repack.h b/repack.h index 26d1954ae2..3244f601e2 100644 --- a/repack.h +++ b/repack.h @@ -42,6 +42,11 @@ struct write_pack_opts { const char *write_pack_opts_pack_prefix(const struct write_pack_opts *opts); bool write_pack_opts_is_local(const struct write_pack_opts *opts); +int finish_pack_objects_cmd(const struct git_hash_algo *algop, + const struct write_pack_opts *opts, + struct child_process *cmd, + struct string_list *names); + struct repository; struct packed_git; From d278970aef66e2cfcbcbab650c1fc1b6613b40db Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:33 -0400 Subject: [PATCH 64/92] repack: move `pack_kept_objects` to `struct pack_objects_args` The "pack_kept_objects" variable is defined as static to the repack builtin, but is inherently related to the pack-objects arguments that the builtin uses when generating new packs. Move that field into the "struct pack_objects_args", and shuffle around where we append the corresponding command-line option when preparing a pack-objects process. Specifically: - `write_cruft_pack()` always wants to pass "--honor-pack-keep", so explicitly set the `pack_kept_objects` field to "0" when initializing the `write_pack_opts` struct before calling `write_cruft_pack()`. - `write_filtered_pack()` no longer needs to handle writing the command-line option "--honor-pack-keep" when preparing a pack-objects process, since its call to `prepare_pack_objects()` will have already taken care of that. `write_filtered_pack()` also reads the `pack_kept_objects` field to determine whether to write the existing kept packs with a leading "^" character, so update that to read through the `po_args` pointer instead. - `cmd_repack()` also no longer has to write the "--honor-pack-keep" flag explicitly, since this is also handled via its call to `prepare_pack_objects()`. Since there is a default value for "pack_kept_objects" that relies on whether or not we are writing a bitmap (and not writing a MIDX), extract a default initializer for `struct pack_objects_args` that keeps this conditional default behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 20 +++++++------------- repack-geometry.c | 5 ++--- repack.c | 2 ++ repack.h | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 71abcfa0b7..3c6d7e91fd 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,6 @@ #define RETAIN_PACK 2 static int pack_everything; -static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; @@ -68,7 +67,7 @@ static int repack_config(const char *var, const char *value, return 0; } if (!strcmp(var, "repack.packkeptobjects")) { - pack_kept_objects = git_config_bool(var, value); + po_args->pack_kept_objects = git_config_bool(var, value); return 0; } if (!strcmp(var, "repack.writebitmaps") || @@ -122,8 +121,6 @@ static int write_filtered_pack(const struct write_pack_opts *opts, strvec_push(&cmd.args, "--stdin-packs"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for_each_string_list_item(item, &existing->kept_packs) strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); @@ -146,7 +143,7 @@ static int write_filtered_pack(const struct write_pack_opts *opts, fprintf(in, "%s.pack\n", item->string); for_each_string_list_item(item, &existing->cruft_packs) fprintf(in, "%s.pack\n", item->string); - caret = pack_kept_objects ? "" : "^"; + caret = opts->po_args->pack_kept_objects ? "" : "^"; for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s%s.pack\n", caret, item->string); fclose(in); @@ -208,7 +205,6 @@ static int write_cruft_pack(const struct write_pack_opts *opts, strvec_pushf(&cmd.args, "--cruft-expiration=%s", cruft_expiration); - strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); cmd.in = -1; @@ -332,7 +328,7 @@ int cmd_repack(int argc, OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects, N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), @@ -378,8 +374,8 @@ int cmd_repack(int argc, (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } - if (pack_kept_objects < 0) - pack_kept_objects = write_bitmaps > 0 && !write_midx; + if (po_args.pack_kept_objects < 0) + po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); @@ -420,8 +416,7 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - pack_geometry_init(&geometry, &existing, &po_args, - pack_kept_objects); + pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -430,8 +425,6 @@ int cmd_repack(int argc, show_progress = !po_args.quiet && isatty(2); strvec_push(&cmd.args, "--keep-true-parents"); - if (!pack_kept_objects) - strvec_push(&cmd.args, "--honor-pack-keep"); for (i = 0; i < keep_pack_list.nr; i++) strvec_pushf(&cmd.args, "--keep-pack=%s", keep_pack_list.items[i].string); @@ -581,6 +574,7 @@ int cmd_repack(int argc, cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; cruft_po_args.delta_base_offset = po_args.delta_base_offset; + cruft_po_args.pack_kept_objects = 0; ret = write_cruft_pack(&opts, cruft_expiration, combine_cruft_below_size, &names, diff --git a/repack-geometry.c b/repack-geometry.c index f58f1fc7f0..e2f9794d7d 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -27,8 +27,7 @@ static int pack_geometry_cmp(const void *va, const void *vb) void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects) + const struct pack_objects_args *args) { struct packfile_store *packs = existing->repo->objects->packfiles; struct packed_git *p; @@ -43,7 +42,7 @@ void pack_geometry_init(struct pack_geometry *geometry, */ continue; - if (!pack_kept_objects) { + if (!args->pack_kept_objects) { /* * Any pack that has its pack_keep bit set will * appear in existing->kept_packs below, but diff --git a/repack.c b/repack.c index 2c478970f3..2ab33c665a 100644 --- a/repack.c +++ b/repack.c @@ -38,6 +38,8 @@ void prepare_pack_objects(struct child_process *cmd, strvec_push(&cmd->args, "--quiet"); if (args->delta_base_offset) strvec_push(&cmd->args, "--delta-base-offset"); + if (!args->pack_kept_objects) + strvec_push(&cmd->args, "--honor-pack-keep"); strvec_push(&cmd->args, out); cmd->git_cmd = 1; cmd->out = -1; diff --git a/repack.h b/repack.h index 3244f601e2..0432379815 100644 --- a/repack.h +++ b/repack.h @@ -17,10 +17,14 @@ struct pack_objects_args { int name_hash_version; int path_walk; int delta_base_offset; + int pack_kept_objects; struct list_objects_filter_options filter_options; }; -#define PACK_OBJECTS_ARGS_INIT { .delta_base_offset = 1 } +#define PACK_OBJECTS_ARGS_INIT { \ + .delta_base_offset = 1, \ + .pack_kept_objects = -1, \ +} struct child_process; @@ -104,8 +108,7 @@ struct pack_geometry { void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, - const struct pack_objects_args *args, - int pack_kept_objects); + const struct pack_objects_args *args); void pack_geometry_split(struct pack_geometry *geometry); struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, From 7ac4231b4283f4f8dc8447439730a5a2b8ed7eb4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:35 -0400 Subject: [PATCH 65/92] repack: move `write_filtered_pack()` out of the builtin In a similar fashion as in previous commits, move the function `write_filtered_pack()` out of the builtin and into its own compilation unit. This function is now part of the repack.h API, but implemented in its own "repack-filtered.c" unit as it is a separate component from other kinds of repacking operations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 46 ------------------------------------------ meson.build | 1 + repack-filtered.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 repack-filtered.c diff --git a/Makefile b/Makefile index b214277163..ba4f3bbfa2 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o LIB_OBJS += repack-promisor.o diff --git a/builtin/repack.c b/builtin/repack.c index 3c6d7e91fd..f65880d8f6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,52 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static int write_filtered_pack(const struct write_pack_opts *opts, - struct existing_packs *existing, - struct string_list *names) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *caret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--stdin-packs"); - - for_each_string_list_item(item, &existing->kept_packs) - strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * Here 'names' contains only the pack(s) that were just - * written, which is exactly the packs we want to keep. Also - * 'existing_kept_packs' already contains the packs in - * 'keep_pack_list'. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "%s.pack\n", item->string); - caret = opts->po_args->pack_kept_objects ? "" : "^"; - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s%s.pack\n", caret, item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { diff --git a/meson.build b/meson.build index 0423ed30c4..7124b158ae 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', 'repack-promisor.c', diff --git a/repack-filtered.c b/repack-filtered.c new file mode 100644 index 0000000000..edcf7667c5 --- /dev/null +++ b/repack-filtered.c @@ -0,0 +1,51 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "repository.h" +#include "run-command.h" +#include "string-list.h" + +int write_filtered_pack(const struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *caret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--stdin-packs"); + + for_each_string_list_item(item, &existing->kept_packs) + strvec_pushf(&cmd.args, "--keep-pack=%s", item->string); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * Here 'names' contains only the pack(s) that were just + * written, which is exactly the packs we want to keep. Also + * 'existing_kept_packs' already contains the packs in + * 'keep_pack_list'. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "%s.pack\n", item->string); + caret = opts->po_args->pack_kept_objects ? "" : "^"; + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s%s.pack\n", caret, item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index 0432379815..c790c90ef8 100644 --- a/repack.h +++ b/repack.h @@ -133,4 +133,8 @@ struct repack_write_midx_opts { void midx_snapshot_refs(struct repository *repo, struct tempfile *f); int write_midx_included_packs(struct repack_write_midx_opts *opts); +int write_filtered_pack(const struct write_pack_opts *opts, + struct existing_packs *existing, + struct string_list *names); + #endif /* REPACK_H */ From 09797bd9666bb9cc6232e414498578deb2697c2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:38 -0400 Subject: [PATCH 66/92] repack: move `write_cruft_pack()` out of the builtin In an identical fashion as the previous commit, move the function `write_cruft_pack()` into its own compilation unit, and make the function visible through the repack.h API. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/repack.c | 94 --------------------------------------------- meson.build | 1 + repack-cruft.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ repack.h | 6 +++ 5 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 repack-cruft.c diff --git a/Makefile b/Makefile index ba4f3bbfa2..e3c4bf1b4a 100644 --- a/Makefile +++ b/Makefile @@ -1137,6 +1137,7 @@ LIB_OBJS += refs/ref-cache.o LIB_OBJS += refspec.o LIB_OBJS += remote.o LIB_OBJS += repack.o +LIB_OBJS += repack-cruft.o LIB_OBJS += repack-filtered.o LIB_OBJS += repack-geometry.o LIB_OBJS += repack-midx.o diff --git a/builtin/repack.c b/builtin/repack.c index f65880d8f6..a68c22f605 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -106,100 +106,6 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } -static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, - struct existing_packs *existing) -{ - struct packfile_store *packs = existing->repo->objects->packfiles; - struct packed_git *p; - struct strbuf buf = STRBUF_INIT; - size_t i; - - for (p = packfile_store_get_all_packs(packs); p; p = p->next) { - if (!(p->is_cruft && p->pack_local)) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if (!string_list_has_string(&existing->cruft_packs, buf.buf)) - continue; - - if (p->pack_size < combine_cruft_below_size) { - fprintf(in, "-%s\n", pack_basename(p)); - } else { - existing_packs_retain_cruft(existing, p); - fprintf(in, "%s\n", pack_basename(p)); - } - } - - for (i = 0; i < existing->non_kept_packs.nr; i++) - fprintf(in, "-%s.pack\n", - existing->non_kept_packs.items[i].string); - - strbuf_release(&buf); -} - -static int write_cruft_pack(const struct write_pack_opts *opts, - const char *cruft_expiration, - unsigned long combine_cruft_below_size, - struct string_list *names, - struct existing_packs *existing) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - struct string_list_item *item; - FILE *in; - int ret; - const char *pack_prefix = write_pack_opts_pack_prefix(opts); - - prepare_pack_objects(&cmd, opts->po_args, opts->destination); - - strvec_push(&cmd.args, "--cruft"); - if (cruft_expiration) - strvec_pushf(&cmd.args, "--cruft-expiration=%s", - cruft_expiration); - - strvec_push(&cmd.args, "--non-empty"); - - cmd.in = -1; - - ret = start_command(&cmd); - if (ret) - return ret; - - /* - * names has a confusing double use: it both provides the list - * of just-written new packs, and accepts the name of the cruft - * pack we are writing. - * - * By the time it is read here, it contains only the pack(s) - * that were just written, which is exactly the set of packs we - * want to consider kept. - * - * If `--expire-to` is given, the double-use served by `names` - * ensures that the pack written to `--expire-to` excludes any - * objects contained in the cruft pack. - */ - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, names) - fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - if (combine_cruft_below_size && !cruft_expiration) { - combine_small_cruft_packs(in, combine_cruft_below_size, - existing); - } else { - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); - } - for_each_string_list_item(item, &existing->kept_packs) - fprintf(in, "%s.pack\n", item->string); - fclose(in); - - return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, - names); -} - int cmd_repack(int argc, const char **argv, const char *prefix, diff --git a/meson.build b/meson.build index 7124b158ae..39152b37ba 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ libgit_sources = [ 'reftable/writer.c', 'remote.c', 'repack.c', + 'repack-cruft.c', 'repack-filtered.c', 'repack-geometry.c', 'repack-midx.c', diff --git a/repack-cruft.c b/repack-cruft.c new file mode 100644 index 0000000000..c51df36722 --- /dev/null +++ b/repack-cruft.c @@ -0,0 +1,99 @@ +#include "git-compat-util.h" +#include "repack.h" +#include "packfile.h" +#include "repository.h" +#include "run-command.h" + +static void combine_small_cruft_packs(FILE *in, off_t combine_cruft_below_size, + struct existing_packs *existing) +{ + struct packfile_store *packs = existing->repo->objects->packfiles; + struct packed_git *p; + struct strbuf buf = STRBUF_INIT; + size_t i; + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (p->pack_size < combine_cruft_below_size) { + fprintf(in, "-%s\n", pack_basename(p)); + } else { + existing_packs_retain_cruft(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); +} + +int write_cruft_pack(const struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list_item *item; + FILE *in; + int ret; + const char *pack_prefix = write_pack_opts_pack_prefix(opts); + + prepare_pack_objects(&cmd, opts->po_args, opts->destination); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--non-empty"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + * + * If `--expire-to` is given, the double-use served by `names` + * ensures that the pack written to `--expire-to` excludes any + * objects contained in the cruft pack. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + if (combine_cruft_below_size && !cruft_expiration) { + combine_small_cruft_packs(in, combine_cruft_below_size, + existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } + for_each_string_list_item(item, &existing->kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + return finish_pack_objects_cmd(existing->repo->hash_algo, opts, &cmd, + names); +} diff --git a/repack.h b/repack.h index c790c90ef8..3a688a12ee 100644 --- a/repack.h +++ b/repack.h @@ -137,4 +137,10 @@ int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, struct string_list *names); +int write_cruft_pack(const struct write_pack_opts *opts, + const char *cruft_expiration, + unsigned long combine_cruft_below_size, + struct string_list *names, + struct existing_packs *existing); + #endif /* REPACK_H */ From 935ab44a0a4fae54f9cd378ede16f19e563e53d9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 15 Oct 2025 18:29:41 -0400 Subject: [PATCH 67/92] builtin/repack.c: clean up unused `#include`s Over the past several dozen commits, we have moved a large amount of functionality out of the repack builtin and into other files like repack.c, repack-cruft.c, repack-filtered.c, repack-midx.c, and repack-promisor.c. These files specify the minimal set of `#include`s that they need to compile successfully, but we did not change the set of `#include`s in the repack builtin itself. Now that the code movement is complete, let's clean up that set of `#include`s and trim down the builtin to include the minimal amount of external headers necessary to compile. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a68c22f605..cfdb4c0920 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -4,26 +4,17 @@ #include "builtin.h" #include "config.h" #include "environment.h" -#include "gettext.h" -#include "hex.h" #include "parse-options.h" #include "path.h" #include "run-command.h" #include "server-info.h" -#include "strbuf.h" #include "string-list.h" -#include "strvec.h" #include "midx.h" #include "packfile.h" #include "prune-packed.h" -#include "odb.h" #include "promisor-remote.h" #include "repack.h" #include "shallow.h" -#include "pack.h" -#include "pack-bitmap.h" -#include "refs.h" -#include "list-objects-filter-options.h" #define ALL_INTO_ONE 1 #define LOOSEN_UNREACHABLE 2 From f229982df19c327876ce7ded40f6efefe20da5d4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 17 Oct 2025 14:02:03 -0700 Subject: [PATCH 68/92] The twentieth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index 8c4ed4eef4..ef5f91fcc0 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -127,6 +127,10 @@ Performance, Internal Implementation, Development Support etc. * Documentation for "git log --pretty" options has been updated to make it easier to translate. + * Instead of three library archives (one for git, one for reftable, + and one for xdiff), roll everything into a single libgit.a archive. + This would help later effort to FFI into Rust. + Fixes since v2.51 ----------------- @@ -329,6 +333,19 @@ including security updates, are included in this release. you would get from "git format-patch --notes=..." for a singleton patch. + * The code in "git add -p" and friends to iterate over hunks was + riddled with bugs, which has been corrected. + + * A few more things that patch authors can do to help maintainer to + keep track of their topics better. + (merge 1a41698841 tb/doc-submitting-patches later to maint). + + * An earlier addition to "git diff --no-index A B" to limit the + output with pathspec after the two directories misbehaved when + these directories were given with a trailing slash, which has been + corrected. + (merge c0bec06cfe jk/diff-no-index-with-pathspec-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 823d537fa7 kh/doc-git-log-markup-fix later to maint). (merge cf7efa4f33 rj/t6137-cygwin-fix later to maint). From 133d151831d32bdcc02422599a3f26cef44f929b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 Oct 2025 14:11:52 -0700 Subject: [PATCH 69/92] The twenty-first batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index ef5f91fcc0..1e41b7380a 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -376,3 +376,7 @@ including security updates, are included in this release. (merge 1c573a3451 en/doc-merge-tree-describe-merge-base later to maint). (merge 84a6bf7965 ja/doc-markup-attached-paragraph-fix later to maint). (merge 399694384b kh/doc-patch-id-markup-fix later to maint). + (merge 15b8abde07 js/mingw-includes-cleanup later to maint). + (merge 3860985105 js/unreachable-workaround-for-no-symlink-head later to maint). + (merge b3ac6e737d kh/doc-continued-paragraph-fix later to maint). + (merge 2cebca0582 tb/cat-file-objectmode-update later to maint). From 026ad6016070748a66ed9a977ad90efc08df2225 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:55 -0500 Subject: [PATCH 70/92] builtin/repo: rename repo_info() to cmd_repo_info() Subcommand functions are often prefixed with `cmd_` to denote that they are an entrypoint. Rename repo_info() to cmd_repo_info() accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index bbb0966f2d..eeeab8fbd2 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -136,8 +136,8 @@ static int parse_format_cb(const struct option *opt, return 0; } -static int repo_info(int argc, const char **argv, const char *prefix, - struct repository *repo) +static int cmd_repo_info(int argc, const char **argv, const char *prefix, + struct repository *repo) { enum output_format format = FORMAT_KEYVALUE; struct option options[] = { @@ -161,7 +161,7 @@ int cmd_repo(int argc, const char **argv, const char *prefix, { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { - OPT_SUBCOMMAND("info", &fn, repo_info), + OPT_SUBCOMMAND("info", &fn, cmd_repo_info), OPT_END() }; From eafc03dbe316478acff5eef3b70c037de4758f08 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:56 -0500 Subject: [PATCH 71/92] ref-filter: allow NULL filter pattern When setting up `struct ref_filter` for filter_refs(), the `name_patterns` field must point to an array of pattern strings even if no patterns are required. To improve this interface, treat a NULL `name_patterns` field the same as when it points to an empty array. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- ref-filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 520d2539c9..2cb5a166d6 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2664,7 +2664,7 @@ static int match_name_as_path(const char **pattern, const char *refname, /* Return 1 if the refname matches one of the patterns, otherwise 0. */ static int filter_pattern_match(struct ref_filter *filter, const char *refname) { - if (!*filter->name_patterns) + if (!filter->name_patterns || !*filter->name_patterns) return 1; /* No pattern always matches */ if (filter->match_as_path) return match_name_as_path(filter->name_patterns, refname, @@ -2751,7 +2751,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, return for_each_fullref_with_seek(filter, cb, cb_data, 0); } - if (!filter->name_patterns[0]) { + if (!filter->name_patterns || !filter->name_patterns[0]) { /* no patterns; we have to look at everything */ return for_each_fullref_with_seek(filter, cb, cb_data, 0); } From 6d1997f6cbc10ac03bc450630de4410762f77b6f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:57 -0500 Subject: [PATCH 72/92] ref-filter: export ref_kind_from_refname() When filtering refs, `ref_kind_from_refname()` is used to determine the ref type. In a subsequent commit, this same logic is reused when counting refs by type. Export the function to prepare for this change. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- ref-filter.c | 2 +- ref-filter.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ref-filter.c b/ref-filter.c index 2cb5a166d6..30cc488d8a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2833,7 +2833,7 @@ struct ref_array_item *ref_array_push(struct ref_array *array, return ref; } -static int ref_kind_from_refname(const char *refname) +int ref_kind_from_refname(const char *refname) { unsigned int i; diff --git a/ref-filter.h b/ref-filter.h index f22ca94b49..4ed1edf09a 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -135,6 +135,8 @@ struct ref_format { OPT_STRVEC(0, "exclude", &(var)->exclude, \ N_("pattern"), N_("exclude refs which match pattern")) +/* Get the reference kind from the provided reference name. */ +int ref_kind_from_refname(const char *refname); /* * API for filtering a set of refs. Based on the type of refs the user * has requested, we iterate through those refs and apply filters From bbb2b9334856ae0a2b18e65e5924a42c31a83c6b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:58 -0500 Subject: [PATCH 73/92] builtin/repo: introduce structure subcommand The structure of a repository's history can have huge impacts on the performance and health of the repository itself. Currently, Git lacks a means to surface repository metrics regarding its structure/shape via a single command. Acquiring this information requires users to be familiar with the relevant data points and the various Git commands required to surface them. To fill this gap, supplemental tools such as git-sizer(1) have been developed. To allow users to more readily identify repository structure related information, introduce the "structure" subcommand in git-repo(1). The goal of this subcommand is to eventually provide similar functionality to git-sizer(1), but natively in Git. The initial version of this command only iterates through all references in the repository and tracks the count of branches, tags, remote refs, and other reference types. The corresponding information is displayed in a human-friendly table formatted in a very similar manner to git-sizer(1). The width of each table column is adjusted automatically to satisfy the requirements of the widest row contained. Subsequent commits will surface additional relevant data points to output and also provide other more machine-friendly output formats. Based-on-patch-by: Derrick Stolee Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 10 ++ builtin/repo.c | 200 ++++++++++++++++++++++++++++++++++++ t/meson.build | 1 + t/t1901-repo-structure.sh | 61 +++++++++++ 4 files changed, 272 insertions(+) create mode 100755 t/t1901-repo-structure.sh diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 209afd1b61..8193298dd5 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,6 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] +git repo structure DESCRIPTION ----------- @@ -43,6 +44,15 @@ supported: + `-z` is an alias for `--format=nul`. +`structure`:: + Retrieve statistics about the current repository structure. The + following kinds of information are reported: ++ +* Reference counts categorized by type + ++ +The table output format may change and is not intended for machine parsing. + INFO KEYS --------- In order to obtain a set of values from `git repo info`, you should provide diff --git a/builtin/repo.c b/builtin/repo.c index eeeab8fbd2..e77e8db563 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,12 +4,16 @@ #include "environment.h" #include "parse-options.h" #include "quote.h" +#include "ref-filter.h" #include "refs.h" #include "strbuf.h" +#include "string-list.h" #include "shallow.h" +#include "utf8.h" static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", + "git repo structure", NULL }; @@ -156,12 +160,208 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct ref_stats { + size_t branches; + size_t remotes; + size_t tags; + size_t others; +}; + +struct stats_table { + struct string_list rows; + + int name_col_width; + int value_col_width; +}; + +/* + * Holds column data that gets stored for each row. + */ +struct stats_table_entry { + char *value; +}; + +static void stats_table_vaddf(struct stats_table *table, + struct stats_table_entry *entry, + const char *format, va_list ap) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + char *formatted_name; + int name_width; + + strbuf_vaddf(&buf, format, ap); + formatted_name = strbuf_detach(&buf, NULL); + name_width = utf8_strwidth(formatted_name); + + item = string_list_append_nodup(&table->rows, formatted_name); + item->util = entry; + + if (name_width > table->name_col_width) + table->name_col_width = name_width; + if (entry) { + int value_width = utf8_strwidth(entry->value); + if (value_width > table->value_col_width) + table->value_col_width = value_width; + } +} + +static void stats_table_addf(struct stats_table *table, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + stats_table_vaddf(table, NULL, format, ap); + va_end(ap); +} + +static void stats_table_count_addf(struct stats_table *table, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value); + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + +static inline size_t get_total_reference_count(struct ref_stats *stats) +{ + return stats->branches + stats->remotes + stats->tags + stats->others; +} + +static void stats_table_setup_structure(struct stats_table *table, + struct ref_stats *refs) +{ + size_t ref_total; + + ref_total = get_total_reference_count(refs); + stats_table_addf(table, "* %s", _("References")); + stats_table_count_addf(table, ref_total, " * %s", _("Count")); + stats_table_count_addf(table, refs->branches, " * %s", _("Branches")); + stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); + stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); + stats_table_count_addf(table, refs->others, " * %s", _("Others")); +} + +static void stats_table_print_structure(const struct stats_table *table) +{ + const char *name_col_title = _("Repository structure"); + const char *value_col_title = _("Value"); + int name_col_width = utf8_strwidth(name_col_title); + int value_col_width = utf8_strwidth(value_col_title); + struct string_list_item *item; + + if (table->name_col_width > name_col_width) + name_col_width = table->name_col_width; + if (table->value_col_width > value_col_width) + value_col_width = table->value_col_width; + + printf("| %-*s | %-*s |\n", name_col_width, name_col_title, + value_col_width, value_col_title); + printf("| "); + for (int i = 0; i < name_col_width; i++) + putchar('-'); + printf(" | "); + for (int i = 0; i < value_col_width; i++) + putchar('-'); + printf(" |\n"); + + for_each_string_list_item(item, &table->rows) { + struct stats_table_entry *entry = item->util; + const char *value = ""; + + if (entry) { + struct stats_table_entry *entry = item->util; + value = entry->value; + } + + printf("| %-*s | %*s |\n", name_col_width, item->string, + value_col_width, value); + } +} + +static void stats_table_clear(struct stats_table *table) +{ + struct stats_table_entry *entry; + struct string_list_item *item; + + for_each_string_list_item(item, &table->rows) { + entry = item->util; + if (entry) + free(entry->value); + } + + string_list_clear(&table->rows, 1); +} + +static int count_references(const char *refname, + const char *referent UNUSED, + const struct object_id *oid UNUSED, + int flags UNUSED, void *cb_data) +{ + struct ref_stats *stats = cb_data; + + switch (ref_kind_from_refname(refname)) { + case FILTER_REFS_BRANCHES: + stats->branches++; + break; + case FILTER_REFS_REMOTES: + stats->remotes++; + break; + case FILTER_REFS_TAGS: + stats->tags++; + break; + case FILTER_REFS_OTHERS: + stats->others++; + break; + default: + BUG("unexpected reference type"); + } + + return 0; +} + +static void structure_count_references(struct ref_stats *stats, + struct repository *repo) +{ + refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); +} + +static int cmd_repo_structure(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + struct stats_table table = { + .rows = STRING_LIST_INIT_DUP, + }; + struct ref_stats stats = { 0 }; + struct option options[] = { 0 }; + + argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (argc) + usage(_("too many arguments")); + + structure_count_references(&stats, repo); + + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + + stats_table_clear(&table); + + return 0; +} + int cmd_repo(int argc, const char **argv, const char *prefix, struct repository *repo) { parse_opt_subcommand_fn *fn = NULL; struct option options[] = { OPT_SUBCOMMAND("info", &fn, cmd_repo_info), + OPT_SUBCOMMAND("structure", &fn, cmd_repo_structure), OPT_END() }; diff --git a/t/meson.build b/t/meson.build index 7974795fe4..9e426f8edc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -236,6 +236,7 @@ integration_tests = [ 't1701-racy-split-index.sh', 't1800-hook.sh', 't1900-repo.sh', + 't1901-repo-structure.sh', 't2000-conflict-when-checking-files-out.sh', 't2002-checkout-cache-u.sh', 't2003-checkout-cache-mkdir.sh', diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh new file mode 100755 index 0000000000..e592eea0eb --- /dev/null +++ b/t/t1901-repo-structure.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='test git repo structure' + +. ./test-lib.sh + +test_expect_success 'empty repository' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_expect_success 'repository with references' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m init && + git tag -a foo -m bar && + + oid="$(git rev-parse HEAD)" && + git update-ref refs/remotes/origin/foo "$oid" && + + git notes add -m foo && + + cat >expect <<-\EOF && + | Repository structure | Value | + | -------------------- | ----- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + EOF + + git repo structure >out 2>err && + + test_cmp expect out && + test_line_count = 0 err + ) +' + +test_done From eb5cf58ffcd4bb117c870d448b0df0193df52c82 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:25:59 -0500 Subject: [PATCH 74/92] builtin/repo: add object counts in structure output The amount of objects in a repository can provide insight regarding its shape. To surface this information, use the path-walk API to count the number of reachable objects in the repository by object type. All regular references are used to determine the reachable set of objects. The object counts are appended to the same table containing the reference information. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 107 +++++++++++++++++++++++++++++++++--- t/t1901-repo-structure.sh | 19 ++++++- 3 files changed, 118 insertions(+), 9 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 8193298dd5..ae62d2415f 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -49,6 +49,7 @@ supported: following kinds of information are reported: + * Reference counts categorized by type +* Reachable object counts categorized by type + The table output format may change and is not intended for machine parsing. diff --git a/builtin/repo.c b/builtin/repo.c index e77e8db563..f39f06ee8c 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -3,9 +3,11 @@ #include "builtin.h" #include "environment.h" #include "parse-options.h" +#include "path-walk.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" +#include "revision.h" #include "strbuf.h" #include "string-list.h" #include "shallow.h" @@ -167,6 +169,18 @@ struct ref_stats { size_t others; }; +struct object_stats { + size_t tags; + size_t commits; + size_t trees; + size_t blobs; +}; + +struct repo_structure { + struct ref_stats refs; + struct object_stats objects; +}; + struct stats_table { struct string_list rows; @@ -234,9 +248,17 @@ static inline size_t get_total_reference_count(struct ref_stats *stats) return stats->branches + stats->remotes + stats->tags + stats->others; } -static void stats_table_setup_structure(struct stats_table *table, - struct ref_stats *refs) +static inline size_t get_total_object_count(struct object_stats *stats) { + return stats->tags + stats->commits + stats->trees + stats->blobs; +} + +static void stats_table_setup_structure(struct stats_table *table, + struct repo_structure *stats) +{ + struct object_stats *objects = &stats->objects; + struct ref_stats *refs = &stats->refs; + size_t object_total; size_t ref_total; ref_total = get_total_reference_count(refs); @@ -246,6 +268,15 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); + + object_total = get_total_object_count(objects); + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", _("Reachable objects")); + stats_table_count_addf(table, object_total, " * %s", _("Count")); + stats_table_count_addf(table, objects->commits, " * %s", _("Commits")); + stats_table_count_addf(table, objects->trees, " * %s", _("Trees")); + stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs")); + stats_table_count_addf(table, objects->tags, " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) @@ -299,12 +330,18 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +struct count_references_data { + struct ref_stats *stats; + struct rev_info *revs; +}; + static int count_references(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, + const struct object_id *oid, int flags UNUSED, void *cb_data) { - struct ref_stats *stats = cb_data; + struct count_references_data *data = cb_data; + struct ref_stats *stats = data->stats; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -323,13 +360,64 @@ static int count_references(const char *refname, BUG("unexpected reference type"); } + /* + * While iterating through references for counting, also add OIDs in + * preparation for the path walk. + */ + add_pending_oid(data->revs, NULL, oid, 0); + return 0; } static void structure_count_references(struct ref_stats *stats, + struct rev_info *revs, struct repository *repo) { - refs_for_each_ref(get_main_ref_store(repo), count_references, &stats); + struct count_references_data data = { + .stats = stats, + .revs = revs, + }; + + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); +} + + +static int count_objects(const char *path UNUSED, struct oid_array *oids, + enum object_type type, void *cb_data) +{ + struct object_stats *stats = cb_data; + + switch (type) { + case OBJ_TAG: + stats->tags += oids->nr; + break; + case OBJ_COMMIT: + stats->commits += oids->nr; + break; + case OBJ_TREE: + stats->trees += oids->nr; + break; + case OBJ_BLOB: + stats->blobs += oids->nr; + break; + default: + BUG("invalid object type"); + } + + return 0; +} + +static void structure_count_objects(struct object_stats *stats, + struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + + info.revs = revs; + info.path_fn = count_objects; + info.path_fn_data = stats; + + walk_objects_by_path(&info); + path_walk_info_clear(&info); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -338,19 +426,24 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; - struct ref_stats stats = { 0 }; + struct repo_structure stats = { 0 }; + struct rev_info revs; struct option options[] = { 0 }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) usage(_("too many arguments")); - structure_count_references(&stats, repo); + repo_init_revisions(repo, &revs, prefix); + + structure_count_references(&stats.refs, &revs, repo); + structure_count_objects(&stats.objects, &revs); stats_table_setup_structure(&table, &stats); stats_table_print_structure(&table); stats_table_clear(&table); + release_revisions(&revs); return 0; } diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index e592eea0eb..c32cf4e239 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -18,6 +18,13 @@ test_expect_success 'empty repository' ' | * Tags | 0 | | * Remotes | 0 | | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | EOF git repo structure >out 2>err && @@ -27,17 +34,18 @@ test_expect_success 'empty repository' ' ) ' -test_expect_success 'repository with references' ' +test_expect_success 'repository with references and objects' ' test_when_finished "rm -rf repo" && git init repo && ( cd repo && - git commit --allow-empty -m init && + test_commit_bulk 42 && git tag -a foo -m bar && oid="$(git rev-parse HEAD)" && git update-ref refs/remotes/origin/foo "$oid" && + # Also creates a commit, tree, and blob. git notes add -m foo && cat >expect <<-\EOF && @@ -49,6 +57,13 @@ test_expect_success 'repository with references' ' | * Tags | 1 | | * Remotes | 1 | | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 130 | + | * Commits | 43 | + | * Trees | 43 | + | * Blobs | 43 | + | * Tags | 1 | EOF git repo structure >out 2>err && From 17215675b5a2c2eab54b295a7e92d953af2e8779 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:00 -0500 Subject: [PATCH 75/92] builtin/repo: add keyvalue and nul format for structure stats All repository structure stats are outputted in a human-friendly table form. This format is not suitable for machine parsing. Add a --format option that supports three output modes: `table`, `keyvalue`, and `nul`. The `table` mode is the default format and prints the same table output as before. With the `keyvalue` mode, each line of output contains a key-value pair of a repository stat. The '=' character is used to delimit between keys and values. The `nul` mode is similar to `keyvalue`, but key-values are delimited by a NUL character instead of a newline. Also, instead of a '=' character to delimit between keys and values, a newline character is used. This allows stat values to support special characters without having to cquote them. These two new modes provides output that is more machine-friendly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 25 +++++++++++++++-- builtin/repo.c | 55 ++++++++++++++++++++++++++++++++++--- t/t1901-repo-structure.sh | 33 ++++++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index ae62d2415f..ce43cb19c8 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -9,7 +9,7 @@ SYNOPSIS -------- [synopsis] git repo info [--format=(keyvalue|nul)] [-z] [...] -git repo structure +git repo structure [--format=(table|keyvalue|nul)] DESCRIPTION ----------- @@ -44,7 +44,7 @@ supported: + `-z` is an alias for `--format=nul`. -`structure`:: +`structure [--format=(table|keyvalue|nul)]`:: Retrieve statistics about the current repository structure. The following kinds of information are reported: + @@ -52,7 +52,26 @@ supported: * Reachable object counts categorized by type + -The table output format may change and is not intended for machine parsing. +The output format can be chosen through the flag `--format`. Three formats are +supported: ++ +`table`::: + Outputs repository stats in a human-friendly table. This format may + change and is not intended for machine parsing. This is the default + format. + +`keyvalue`::: + Each line of output contains a key-value pair for a repository stat. + The '=' character is used to delimit between the key and the value. + Values containing "unusual" characters are quoted as explained for the + configuration variable `core.quotePath` (see linkgit:git-config[1]). + +`nul`::: + Similar to `keyvalue`, but uses a NUL character to delimit between + key-value pairs instead of a newline. Also uses a newline character as + the delimiter between the key and value instead of '='. Unlike the + `keyvalue` format, values containing "unusual" characters are never + quoted. INFO KEYS --------- diff --git a/builtin/repo.c b/builtin/repo.c index f39f06ee8c..1754cc7e5d 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -15,13 +15,14 @@ static const char *const repo_usage[] = { "git repo info [--format=(keyvalue|nul)] [-z] [...]", - "git repo structure", + "git repo structure [--format=(table|keyvalue|nul)]", NULL }; typedef int get_value_fn(struct repository *repo, struct strbuf *buf); enum output_format { + FORMAT_TABLE, FORMAT_KEYVALUE, FORMAT_NUL_TERMINATED, }; @@ -136,6 +137,8 @@ static int parse_format_cb(const struct option *opt, *format = FORMAT_NUL_TERMINATED; else if (!strcmp(arg, "keyvalue")) *format = FORMAT_KEYVALUE; + else if (!strcmp(arg, "table")) + *format = FORMAT_TABLE; else die(_("invalid format '%s'"), arg); @@ -158,6 +161,8 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); + if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED) + die(_("unsupported output format")); return print_fields(argc, argv, repo, format); } @@ -330,6 +335,30 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +static void structure_keyvalue_print(struct repo_structure *stats, + char key_delim, char value_delim) +{ + printf("references.branches.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.branches, value_delim); + printf("references.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.tags, value_delim); + printf("references.remotes.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.remotes, value_delim); + printf("references.others.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->refs.others, value_delim); + + printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.commits, value_delim); + printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.trees, value_delim); + printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.blobs, value_delim); + printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.tags, value_delim); + + fflush(stdout); +} + struct count_references_data { struct ref_stats *stats; struct rev_info *revs; @@ -426,9 +455,15 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct stats_table table = { .rows = STRING_LIST_INIT_DUP, }; + enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; - struct option options[] = { 0 }; + struct option options[] = { + OPT_CALLBACK_F(0, "format", &format, N_("format"), + N_("output format"), + PARSE_OPT_NONEG, parse_format_cb), + OPT_END() + }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); if (argc) @@ -439,8 +474,20 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, structure_count_references(&stats.refs, &revs, repo); structure_count_objects(&stats.objects, &revs); - stats_table_setup_structure(&table, &stats); - stats_table_print_structure(&table); + switch (format) { + case FORMAT_TABLE: + stats_table_setup_structure(&table, &stats); + stats_table_print_structure(&table); + break; + case FORMAT_KEYVALUE: + structure_keyvalue_print(&stats, '=', '\n'); + break; + case FORMAT_NUL_TERMINATED: + structure_keyvalue_print(&stats, '\n', '\0'); + break; + default: + BUG("invalid output format"); + } stats_table_clear(&table); release_revisions(&revs); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index c32cf4e239..14bd8aede5 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -73,4 +73,37 @@ test_expect_success 'repository with references and objects' ' ) ' +test_expect_success 'keyvalue and nul format' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit_bulk 42 && + git tag -a foo -m bar && + + cat >expect <<-\EOF && + references.branches.count=1 + references.tags.count=1 + references.remotes.count=0 + references.others.count=0 + objects.commits.count=42 + objects.trees.count=42 + objects.blobs.count=42 + objects.tags.count=1 + EOF + + git repo structure --format=keyvalue >out 2>err && + + test_cmp expect out && + test_line_count = 0 err && + + # Replace key and value delimiters for nul format. + tr "\n=" "\0\n" expect_nul && + git repo structure --format=nul >out 2>err && + + test_cmp expect_nul out && + test_line_count = 0 err + ) +' + test_done From 16a93c03c7824a40b034a6ee1cb1c68c8ef48682 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 21 Oct 2025 13:26:01 -0500 Subject: [PATCH 76/92] builtin/repo: add progress meter for structure stats When using the structure subcommand for git-repo(1), evaluating a repository may take some time depending on its shape. Add a progress meter to provide feedback to the user about what is happening. The progress meter is enabled by default when the command is executed from a tty. It can also be explicitly enabled/disabled via the --[no-]progress option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 46 ++++++++++++++++++++++++++++++++++----- t/t1901-repo-structure.sh | 20 +++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 1754cc7e5d..9d4749f79b 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -4,6 +4,7 @@ #include "environment.h" #include "parse-options.h" #include "path-walk.h" +#include "progress.h" #include "quote.h" #include "ref-filter.h" #include "refs.h" @@ -362,6 +363,7 @@ static void structure_keyvalue_print(struct repo_structure *stats, struct count_references_data { struct ref_stats *stats; struct rev_info *revs; + struct progress *progress; }; static int count_references(const char *refname, @@ -371,6 +373,7 @@ static int count_references(const char *refname, { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; + size_t ref_count; switch (ref_kind_from_refname(refname)) { case FILTER_REFS_BRANCHES: @@ -395,26 +398,41 @@ static int count_references(const char *refname, */ add_pending_oid(data->revs, NULL, oid, 0); + ref_count = get_total_reference_count(stats); + display_progress(data->progress, ref_count); + return 0; } static void structure_count_references(struct ref_stats *stats, struct rev_info *revs, - struct repository *repo) + struct repository *repo, + int show_progress) { struct count_references_data data = { .stats = stats, .revs = revs, }; + if (show_progress) + data.progress = start_delayed_progress(repo, + _("Counting references"), 0); + refs_for_each_ref(get_main_ref_store(repo), count_references, &data); + stop_progress(&data.progress); } +struct count_objects_data { + struct object_stats *stats; + struct progress *progress; +}; static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { - struct object_stats *stats = cb_data; + struct count_objects_data *data = cb_data; + struct object_stats *stats = data->stats; + size_t object_count; switch (type) { case OBJ_TAG: @@ -433,20 +451,31 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, BUG("invalid object type"); } + object_count = get_total_object_count(stats); + display_progress(data->progress, object_count); + return 0; } static void structure_count_objects(struct object_stats *stats, - struct rev_info *revs) + struct rev_info *revs, + struct repository *repo, int show_progress) { struct path_walk_info info = PATH_WALK_INFO_INIT; + struct count_objects_data data = { + .stats = stats, + }; info.revs = revs; info.path_fn = count_objects; - info.path_fn_data = stats; + info.path_fn_data = &data; + + if (show_progress) + data.progress = start_delayed_progress(repo, _("Counting objects"), 0); walk_objects_by_path(&info); path_walk_info_clear(&info); + stop_progress(&data.progress); } static int cmd_repo_structure(int argc, const char **argv, const char *prefix, @@ -458,10 +487,12 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; struct rev_info revs; + int show_progress = -1; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), N_("output format"), PARSE_OPT_NONEG, parse_format_cb), + OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_END() }; @@ -471,8 +502,11 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, repo_init_revisions(repo, &revs, prefix); - structure_count_references(&stats.refs, &revs, repo); - structure_count_objects(&stats.objects, &revs); + if (show_progress < 0) + show_progress = isatty(2); + + structure_count_references(&stats.refs, &revs, repo, show_progress); + structure_count_objects(&stats.objects, &revs, repo, show_progress); switch (format) { case FORMAT_TABLE: diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 14bd8aede5..36a71a144e 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -106,4 +106,24 @@ test_expect_success 'keyvalue and nul format' ' ) ' +test_expect_success 'progress meter option' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + + GIT_PROGRESS_DELAY=0 git repo structure --progress >out 2>err && + + test_file_not_empty out && + test_grep "Counting references: 2, done." err && + test_grep "Counting objects: 3, done." err && + + GIT_PROGRESS_DELAY=0 git repo structure --no-progress >out 2>err && + + test_file_not_empty out && + test_line_count = 0 err + ) +' + test_done From bdbebe5714b25dc9d215b48efbb80f410925d7dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:10 +0200 Subject: [PATCH 77/92] refs: introduce wrapper struct for `each_ref_fn` The `each_ref_fn` callback function type is used across our code base for several different functions that iterate through reference. There's a bunch of callbacks implementing this type, which makes any changes to the callback signature extremely noisy. An example of the required churn is e8207717f1 (refs: add referent to each_ref_fn, 2024-08-09): adding a single argument required us to change 48 files. It was already proposed back then [1] that we might want to introduce a wrapper structure to alleviate the pain going forward. While this of course requires the same kind of global refactoring as just introducing a new parameter, it at least allows us to more change the callback type afterwards by just extending the wrapper structure. One counterargument to this refactoring is that it makes the structure more opaque. While it is obvious which callsites need to be fixed up when we change the function type, it's not obvious anymore once we use a structure. That being said, we only have a handful of sites that actually need to populate this wrapper structure: our ref backends, "refs/iterator.c" as well as very few sites that invoke the iterator callback functions directly. Introduce this wrapper structure so that we can adapt the iterator interfaces more readily. [1]: Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 24 ++++++------- builtin/bisect.c | 17 +++------- builtin/checkout.c | 6 ++-- builtin/describe.c | 18 +++++----- builtin/fetch.c | 13 +++---- builtin/fsck.c | 33 ++++++++++-------- builtin/gc.c | 15 ++++----- builtin/name-rev.c | 17 +++++----- builtin/pack-objects.c | 27 ++++++--------- builtin/receive-pack.c | 13 ++++--- builtin/remote.c | 44 +++++++++++------------- builtin/replace.c | 21 +++++------- builtin/repo.c | 9 ++--- builtin/rev-parse.c | 12 +++---- builtin/show-branch.c | 35 +++++++++---------- builtin/show-ref.c | 20 +++++------ builtin/submodule--helper.c | 10 ++---- builtin/worktree.c | 6 +--- commit-graph.c | 14 ++++---- delta-islands.c | 9 +++-- fetch-pack.c | 16 +++------ help.c | 10 +++--- http-backend.c | 20 +++++------ log-tree.c | 24 ++++++------- ls-refs.c | 36 ++++++++++++-------- midx-write.c | 17 +++++----- negotiator/default.c | 7 ++-- negotiator/skipping.c | 7 ++-- notes.c | 8 ++--- object-name.c | 10 +++--- pseudo-merge.c | 21 +++++------- reachable.c | 9 +++-- ref-filter.c | 24 +++++++------ reflog.c | 9 ++--- refs.c | 67 +++++++++++++++++++++---------------- refs.h | 26 +++++++++++--- refs/files-backend.c | 7 ++-- refs/iterator.c | 9 ++++- remote.c | 27 +++++++-------- repack-midx.c | 16 ++++----- replace-object.c | 16 ++++----- revision.c | 12 +++---- server-info.c | 12 +++---- shallow.c | 16 +++------ submodule.c | 12 ++----- t/helper/test-ref-store.c | 5 ++- upload-pack.c | 29 +++++++--------- walker.c | 8 ++--- worktree.c | 11 ++++-- 49 files changed, 392 insertions(+), 462 deletions(-) diff --git a/bisect.c b/bisect.c index a6dc76b15c..326b59c0dc 100644 --- a/bisect.c +++ b/bisect.c @@ -450,21 +450,20 @@ void find_bisection(struct commit_list **commit_list, int *reaches, clear_commit_weight(&commit_weight); } -static int register_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data UNUSED) +static int register_ref(const struct reference *ref, void *cb_data UNUSED) { struct strbuf good_prefix = STRBUF_INIT; strbuf_addstr(&good_prefix, term_good); strbuf_addstr(&good_prefix, "-"); - if (!strcmp(refname, term_bad)) { + if (!strcmp(ref->name, term_bad)) { free(current_bad_oid); current_bad_oid = xmalloc(sizeof(*current_bad_oid)); - oidcpy(current_bad_oid, oid); - } else if (starts_with(refname, good_prefix.buf)) { - oid_array_append(&good_revs, oid); - } else if (starts_with(refname, "skip-")) { - oid_array_append(&skipped_revs, oid); + oidcpy(current_bad_oid, ref->oid); + } else if (starts_with(ref->name, good_prefix.buf)) { + oid_array_append(&good_revs, ref->oid); + } else if (starts_with(ref->name, "skip-")) { + oid_array_append(&skipped_revs, ref->oid); } strbuf_release(&good_prefix); @@ -1178,14 +1177,11 @@ int estimate_bisect_steps(int all) return (e < 3 * x) ? n : n - 1; } -static int mark_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int mark_for_removal(const struct reference *ref, void *cb_data) { struct string_list *refs = cb_data; - char *ref = xstrfmt("refs/bisect%s", refname); - string_list_append(refs, ref); + char *bisect_ref = xstrfmt("refs/bisect%s", ref->name); + string_list_append(refs, bisect_ref); return 0; } diff --git a/builtin/bisect.c b/builtin/bisect.c index 8b8d870cd1..5b2024be62 100644 --- a/builtin/bisect.c +++ b/builtin/bisect.c @@ -358,10 +358,7 @@ static int check_and_set_terms(struct bisect_terms *terms, const char *cmd) return 0; } -static int inc_nr(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int inc_nr(const struct reference *ref UNUSED, void *cb_data) { unsigned int *nr = (unsigned int *)cb_data; (*nr)++; @@ -549,12 +546,11 @@ finish: return res; } -static int add_bisect_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int add_bisect_ref(const struct reference *ref, void *cb) { struct add_bisect_ref_data *data = cb; - add_pending_oid(data->revs, refname, oid, data->object_flags); + add_pending_oid(data->revs, ref->name, ref->oid, data->object_flags); return 0; } @@ -1165,12 +1161,9 @@ static int bisect_visualize(struct bisect_terms *terms, int argc, return run_command(&cmd); } -static int get_first_good(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int get_first_good(const struct reference *ref, void *cb_data) { - oidcpy(cb_data, oid); + oidcpy(cb_data, ref->oid); return 1; } diff --git a/builtin/checkout.c b/builtin/checkout.c index f9453473fe..66b69df6e6 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1063,11 +1063,9 @@ static void update_refs_for_switch(const struct checkout_opts *opts, report_tracking(new_branch_info); } -static int add_pending_uninteresting_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_pending_uninteresting_ref(const struct reference *ref, void *cb_data) { - add_pending_oid(cb_data, refname, oid, UNINTERESTING); + add_pending_oid(cb_data, ref->name, ref->oid, UNINTERESTING); return 0; } diff --git a/builtin/describe.c b/builtin/describe.c index ffaf8d9f0a..7954535044 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -154,20 +154,19 @@ static void add_to_known_names(const char *path, } } -static int get_name(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int get_name(const struct reference *ref, void *cb_data UNUSED) { int is_tag = 0; struct object_id peeled; int is_annotated, prio; const char *path_to_match = NULL; - if (skip_prefix(path, "refs/tags/", &path_to_match)) { + if (skip_prefix(ref->name, "refs/tags/", &path_to_match)) { is_tag = 1; } else if (all) { if ((exclude_patterns.nr || patterns.nr) && - !skip_prefix(path, "refs/heads/", &path_to_match) && - !skip_prefix(path, "refs/remotes/", &path_to_match)) { + !skip_prefix(ref->name, "refs/heads/", &path_to_match) && + !skip_prefix(ref->name, "refs/remotes/", &path_to_match)) { /* Only accept reference of known type if there are match/exclude patterns */ return 0; } @@ -209,10 +208,10 @@ static int get_name(const char *path, const char *referent UNUSED, const struct } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, oid, &peeled)) { - is_annotated = !oideq(oid, &peeled); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + is_annotated = !oideq(ref->oid, &peeled); } else { - oidcpy(&peeled, oid); + oidcpy(&peeled, ref->oid); is_annotated = 0; } @@ -229,7 +228,8 @@ static int get_name(const char *path, const char *referent UNUSED, const struct else prio = 0; - add_to_known_names(all ? path + 5 : path + 10, &peeled, prio, oid); + add_to_known_names(all ? ref->name + 5 : ref->name + 10, + &peeled, prio, ref->oid); return 0; } diff --git a/builtin/fetch.c b/builtin/fetch.c index c7ff3480fb..7052e6ff21 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -289,13 +289,11 @@ static struct refname_hash_entry *refname_hash_add(struct hashmap *map, return ent; } -static int add_one_refname(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int add_one_refname(const struct reference *ref, void *cbdata) { struct hashmap *refname_map = cbdata; - (void) refname_hash_add(refname_map, refname, oid); + (void) refname_hash_add(refname_map, ref->name, ref->oid); return 0; } @@ -1416,14 +1414,11 @@ static void set_option(struct transport *transport, const char *name, const char } -static int add_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_oid(const struct reference *ref, void *cb_data) { struct oid_array *oids = cb_data; - oid_array_append(oids, oid); + oid_array_append(oids, ref->oid); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index 8ee95e0d67..ed4eea1680 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -530,14 +530,13 @@ static int fsck_handle_reflog(const char *logname, void *cb_data) return 0; } -static int fsck_handle_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) { struct object *obj; - obj = parse_object(the_repository, oid); + obj = parse_object(the_repository, ref->oid); if (!obj) { - if (is_promisor_object(the_repository, oid)) { + if (is_promisor_object(the_repository, ref->oid)) { /* * Increment default_refs anyway, because this is a * valid ref. @@ -546,19 +545,19 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con return 0; } error(_("%s: invalid sha1 pointer %s"), - refname, oid_to_hex(oid)); + ref->name, oid_to_hex(ref->oid)); errors_found |= ERROR_REACHABLE; /* We'll continue with the rest despite the error.. */ return 0; } - if (obj->type != OBJ_COMMIT && is_branch(refname)) { - error(_("%s: not a commit"), refname); + if (obj->type != OBJ_COMMIT && is_branch(ref->name)) { + error(_("%s: not a commit"), ref->name); errors_found |= ERROR_REFS; } default_refs++; obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, - oid, "%s", refname); + ref->oid, "%s", ref->name); mark_object_reachable(obj); return 0; @@ -580,13 +579,19 @@ static void get_default_heads(void) worktrees = get_worktrees(); for (p = worktrees; *p; p++) { struct worktree *wt = *p; - struct strbuf ref = STRBUF_INIT; + struct strbuf refname = STRBUF_INIT; - strbuf_worktree_ref(wt, &ref, "HEAD"); - fsck_head_link(ref.buf, &head_points_at, &head_oid); - if (head_points_at && !is_null_oid(&head_oid)) - fsck_handle_ref(ref.buf, NULL, &head_oid, 0, NULL); - strbuf_release(&ref); + strbuf_worktree_ref(wt, &refname, "HEAD"); + fsck_head_link(refname.buf, &head_points_at, &head_oid); + if (head_points_at && !is_null_oid(&head_oid)) { + struct reference ref = { + .name = refname.buf, + .oid = &head_oid, + }; + + fsck_handle_ref(&ref, NULL); + } + strbuf_release(&refname); if (include_reflogs) refs_for_each_reflog(get_worktree_ref_store(wt), diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d978..9de5de175f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1100,24 +1100,21 @@ struct cg_auto_data { int limit; }; -static int dfs_on_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int dfs_on_ref(const struct reference *ref, void *cb_data) { struct cg_auto_data *data = (struct cg_auto_data *)cb_data; int result = 0; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; - commit = lookup_commit(the_repository, oid); + commit = lookup_commit(the_repository, maybe_peeled); if (!commit) return 0; if (repo_parse_commit(the_repository, commit) || diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 74512e54a3..615f7d1aae 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -339,10 +339,9 @@ static int cmp_by_tag_and_age(const void *a_, const void *b_) return a->taggerdate != b->taggerdate; } -static int name_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int name_ref(const struct reference *ref, void *cb_data) { - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); struct name_ref_data *data = cb_data; int can_abbreviate_output = data->tags_only && data->name_only; int deref = 0; @@ -350,14 +349,14 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct struct commit *commit = NULL; timestamp_t taggerdate = TIME_MAX; - if (data->tags_only && !starts_with(path, "refs/tags/")) + if (data->tags_only && !starts_with(ref->name, "refs/tags/")) return 0; if (data->exclude_filters.nr) { struct string_list_item *item; for_each_string_list_item(item, &data->exclude_filters) { - if (subpath_matches(path, item->string) >= 0) + if (subpath_matches(ref->name, item->string) >= 0) return 0; } } @@ -378,7 +377,7 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct * shouldn't stop when seeing 'refs/tags/v1.4' matches * 'refs/tags/v*'. We should show it as 'v1.4'. */ - switch (subpath_matches(path, item->string)) { + switch (subpath_matches(ref->name, item->string)) { case -1: /* did not match */ break; case 0: /* matched fully */ @@ -406,13 +405,13 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct } if (o && o->type == OBJ_COMMIT) { commit = (struct commit *)o; - from_tag = starts_with(path, "refs/tags/"); + from_tag = starts_with(ref->name, "refs/tags/"); if (taggerdate == TIME_MAX) taggerdate = commit->date; } - add_to_tip_table(oid, path, can_abbreviate_output, commit, taggerdate, - from_tag, deref); + add_to_tip_table(ref->oid, ref->name, can_abbreviate_output, + commit, taggerdate, from_tag, deref); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5bdc44fb2d..39633a0158 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -831,15 +831,14 @@ static enum write_one_status write_one(struct hashfile *f, return WRITE_ONE_WRITTEN; } -static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid); + struct object_entry *entry = packlist_find(&to_pack, ref->oid); if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3306,13 +3305,12 @@ static void add_tag_chain(const struct object_id *oid) } } -static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled)) - add_tag_chain(oid); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + add_tag_chain(ref->oid); return 0; } @@ -4533,19 +4531,16 @@ static void record_recent_commit(struct commit *commit, void *data UNUSED) oid_array_append(&recent_objects, &commit->object.oid); } -static int mark_bitmap_preferred_tip(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *data UNUSED) +static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNUSED) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(the_repository, oid, refname); + object = parse_object_or_die(the_repository, maybe_peeled, ref->name); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e..e8ee0e7321 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -305,13 +305,12 @@ static void show_ref(const char *path, const struct object_id *oid) } } -static int show_ref_cb(const char *path_full, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *data) +static int show_ref_cb(const struct reference *ref, void *data) { struct oidset *seen = data; - const char *path = strip_namespace(path_full); + const char *path = strip_namespace(ref->name); - if (ref_is_hidden(path, path_full, &hidden_refs)) + if (ref_is_hidden(path, ref->name, &hidden_refs)) return 0; /* @@ -320,13 +319,13 @@ static int show_ref_cb(const char *path_full, const char *referent UNUSED, const * transfer but will otherwise ignore them. */ if (!path) { - if (oidset_insert(seen, oid)) + if (oidset_insert(seen, ref->oid)) return 0; path = ".have"; } else { - oidset_insert(seen, oid); + oidset_insert(seen, ref->oid); } - show_ref(path, oid); + show_ref(path, ref->oid); return 0; } diff --git a/builtin/remote.c b/builtin/remote.c index 8a7ed4299a..7ffc14ba15 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -570,17 +570,14 @@ struct branches_for_remote { struct known_remotes *keep; }; -static int add_branch_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int add_branch_for_removal(const struct reference *ref, void *cb_data) { struct branches_for_remote *branches = cb_data; struct refspec_item refspec; struct known_remote *kr; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (remote_find_tracking(branches->remote, &refspec)) return 0; free(refspec.src); @@ -588,7 +585,7 @@ static int add_branch_for_removal(const char *refname, /* don't delete a branch if another remote also uses it */ for (kr = branches->keep->list; kr; kr = kr->next) { memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(kr->remote, &refspec)) { free(refspec.src); return 0; @@ -596,16 +593,16 @@ static int add_branch_for_removal(const char *refname, } /* don't delete non-remote-tracking refs */ - if (!starts_with(refname, "refs/remotes/")) { + if (!starts_with(ref->name, "refs/remotes/")) { /* advise user how to delete local branches */ - if (starts_with(refname, "refs/heads/")) + if (starts_with(ref->name, "refs/heads/")) string_list_append(branches->skipped, - abbrev_branch(refname)); + abbrev_branch(ref->name)); /* silently skip over other non-remote refs */ return 0; } - string_list_append(branches->branches, refname); + string_list_append(branches->branches, ref->name); return 0; } @@ -713,18 +710,18 @@ out: return error; } -static int rename_one_ref(const char *old_refname, const char *referent, - const struct object_id *oid, - int flags, void *cb_data) +static int rename_one_ref(const struct reference *ref, void *cb_data) { struct strbuf new_referent = STRBUF_INIT; struct strbuf new_refname = STRBUF_INIT; struct rename_info *rename = cb_data; + const struct object_id *oid = ref->oid; + const char *referent = ref->target; int error; - compute_renamed_ref(rename, old_refname, &new_refname); + compute_renamed_ref(rename, ref->name, &new_refname); - if (flags & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { /* * Stupidly enough `referent` is not pointing to the immediate * target of a symref, but it's the recursively resolved value. @@ -732,25 +729,25 @@ static int rename_one_ref(const char *old_refname, const char *referent, * unborn symrefs don't have any value for the `referent` at all. */ referent = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - old_refname, RESOLVE_REF_NO_RECURSE, + ref->name, RESOLVE_REF_NO_RECURSE, NULL, NULL); compute_renamed_ref(rename, referent, &new_referent); oid = NULL; } - error = ref_transaction_delete(rename->transaction, old_refname, + error = ref_transaction_delete(rename->transaction, ref->name, oid, referent, REF_NO_DEREF, NULL, rename->err); if (error < 0) goto out; error = ref_transaction_update(rename->transaction, new_refname.buf, oid, null_oid(the_hash_algo), - (flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, + (ref->flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF | REF_SKIP_OID_VERIFICATION, NULL, rename->err); if (error < 0) goto out; - error = rename_one_reflog(old_refname, oid, rename); + error = rename_one_reflog(ref->name, oid, rename); if (error < 0) goto out; @@ -1125,19 +1122,16 @@ static void free_remote_ref_states(struct ref_states *states) string_list_clear_func(&states->push, clear_push_info); } -static int append_ref_to_tracked_list(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int append_ref_to_tracked_list(const struct reference *ref, void *cb_data) { struct ref_states *states = cb_data; struct refspec_item refspec; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(states->remote, &refspec)) { string_list_append(&states->tracked, abbrev_branch(refspec.src)); free(refspec.src); diff --git a/builtin/replace.c b/builtin/replace.c index 900b560a77..4c62c5ab58 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -47,30 +47,27 @@ struct show_data { enum replace_format format; }; -static int show_reference(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_reference(const struct reference *ref, void *cb_data) { struct show_data *data = cb_data; - if (!wildmatch(data->pattern, refname, 0)) { + if (!wildmatch(data->pattern, ref->name, 0)) { if (data->format == REPLACE_FORMAT_SHORT) - printf("%s\n", refname); + printf("%s\n", ref->name); else if (data->format == REPLACE_FORMAT_MEDIUM) - printf("%s -> %s\n", refname, oid_to_hex(oid)); + printf("%s -> %s\n", ref->name, oid_to_hex(ref->oid)); else { /* data->format == REPLACE_FORMAT_LONG */ struct object_id object; enum object_type obj_type, repl_type; - if (repo_get_oid(data->repo, refname, &object)) - return error(_("failed to resolve '%s' as a valid ref"), refname); + if (repo_get_oid(data->repo, ref->name, &object)) + return error(_("failed to resolve '%s' as a valid ref"), ref->name); obj_type = odb_read_object_info(data->repo->objects, &object, NULL); - repl_type = odb_read_object_info(data->repo->objects, oid, NULL); + repl_type = odb_read_object_info(data->repo->objects, ref->oid, NULL); - printf("%s (%s) -> %s (%s)\n", refname, type_name(obj_type), - oid_to_hex(oid), type_name(repl_type)); + printf("%s (%s) -> %s (%s)\n", ref->name, type_name(obj_type), + oid_to_hex(ref->oid), type_name(repl_type)); } } diff --git a/builtin/repo.c b/builtin/repo.c index 9d4749f79b..f26640bd6e 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -366,16 +366,13 @@ struct count_references_data { struct progress *progress; }; -static int count_references(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int count_references(const struct reference *ref, void *cb_data) { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; size_t ref_count; - switch (ref_kind_from_refname(refname)) { + switch (ref_kind_from_refname(ref->name)) { case FILTER_REFS_BRANCHES: stats->branches++; break; @@ -396,7 +393,7 @@ static int count_references(const char *refname, * While iterating through references for counting, also add OIDs in * preparation for the path walk. */ - add_pending_oid(data->revs, NULL, oid, 0); + add_pending_oid(data->revs, NULL, ref->oid, 0); ref_count = get_total_reference_count(stats); display_progress(data->progress, ref_count); diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 9da92b990d..3578591b4f 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -217,19 +217,17 @@ static int show_default(void) return 0; } -static int show_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int show_reference(const struct reference *ref, void *cb_data UNUSED) { - if (ref_excluded(&ref_excludes, refname)) + if (ref_excluded(&ref_excludes, ref->name)) return 0; - show_rev(NORMAL, oid, refname); + show_rev(NORMAL, ref->oid, ref->name); return 0; } -static int anti_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int anti_reference(const struct reference *ref, void *cb_data UNUSED) { - show_rev(REVERSED, oid, refname); + show_rev(REVERSED, ref->oid, ref->name); return 0; } diff --git a/builtin/show-branch.c b/builtin/show-branch.c index 441babf2e3..10475a6b5e 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -413,34 +413,32 @@ static int append_ref(const char *refname, const struct object_id *oid, return 0; } -static int append_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_head_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 11; - if (!starts_with(refname, "refs/heads/")) + if (!starts_with(ref->name, "refs/heads/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } -static int append_remote_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_remote_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 13; - if (!starts_with(refname, "refs/remotes/")) + if (!starts_with(ref->name, "refs/remotes/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } static int append_tag_ref(const char *refname, const struct object_id *oid, @@ -454,27 +452,26 @@ static int append_tag_ref(const char *refname, const struct object_id *oid, static const char *match_ref_pattern = NULL; static int match_ref_slash = 0; -static int append_matching_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int append_matching_ref(const struct reference *ref, void *cb_data) { /* we want to allow pattern hold/ to show all * branches under refs/heads/hold/, and v0.99.9? to show * refs/tags/v0.99.9a and friends. */ const char *tail; - int slash = count_slashes(refname); - for (tail = refname; *tail && match_ref_slash < slash; ) + int slash = count_slashes(ref->name); + for (tail = ref->name; *tail && match_ref_slash < slash; ) if (*tail++ == '/') slash--; if (!*tail) return 0; if (wildmatch(match_ref_pattern, tail, 0)) return 0; - if (starts_with(refname, "refs/heads/")) - return append_head_ref(refname, NULL, oid, flag, cb_data); - if (starts_with(refname, "refs/tags/")) - return append_tag_ref(refname, oid, flag, cb_data); - return append_ref(refname, oid, 0); + if (starts_with(ref->name, "refs/heads/")) + return append_head_ref(ref, cb_data); + if (starts_with(ref->name, "refs/tags/")) + return append_tag_ref(ref->name, ref->oid, ref->flags, cb_data); + return append_ref(ref->name, ref->oid, 0); } static void snarf_refs(int head, int remotes) diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 0b6f9edf86..4803b5e598 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -66,26 +66,25 @@ struct show_ref_data { int show_head; }; -static int show_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int show_ref(const struct reference *ref, void *cbdata) { struct show_ref_data *data = cbdata; - if (data->show_head && !strcmp(refname, "HEAD")) + if (data->show_head && !strcmp(ref->name, "HEAD")) goto match; if (data->patterns) { - int reflen = strlen(refname); + int reflen = strlen(ref->name); const char **p = data->patterns, *m; while ((m = *p++) != NULL) { int len = strlen(m); if (len > reflen) continue; - if (memcmp(m, refname + reflen - len, len)) + if (memcmp(m, ref->name + reflen - len, len)) continue; if (len == reflen) goto match; - if (refname[reflen - len - 1] == '/') + if (ref->name[reflen - len - 1] == '/') goto match; } return 0; @@ -94,18 +93,15 @@ static int show_ref(const char *refname, const char *referent UNUSED, const stru match: data->found_match++; - show_one(data->show_one_opts, refname, oid); + show_one(data->show_one_opts, ref->name, ref->oid); return 0; } -static int add_existing(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cbdata) +static int add_existing(const struct reference *ref, void *cbdata) { struct string_list *list = (struct string_list *)cbdata; - string_list_insert(list, refname); + string_list_insert(list, ref->name); return 0; } diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fcd73abe53..35f6cf735e 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -593,16 +593,12 @@ static void print_status(unsigned int flags, char state, const char *path, printf("\n"); } -static int handle_submodule_head_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int handle_submodule_head_ref(const struct reference *ref, void *cb_data) { struct object_id *output = cb_data; - if (oid) - oidcpy(output, oid); + if (ref->oid) + oidcpy(output, ref->oid); return 0; } diff --git a/builtin/worktree.c b/builtin/worktree.c index 812774a5ca..b7f323b5e4 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -635,11 +635,7 @@ static void print_preparing_worktree_line(int detach, * * Returns 0 on failure and non-zero on success. */ -static int first_valid_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data UNUSED) +static int first_valid_ref(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } diff --git a/commit-graph.c b/commit-graph.c index 474454db73..f91af41625 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1851,18 +1851,16 @@ struct refs_cb_data { struct progress *progress; }; -static int add_ref_to_set(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_ref_to_set(const struct reference *ref, void *cb_data) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(data->repo->objects, oid, NULL) == OBJ_COMMIT) - oidset_insert(data->commits, oid); + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) + oidset_insert(data->commits, maybe_peeled); display_progress(data->progress, oidset_size(data->commits)); diff --git a/delta-islands.c b/delta-islands.c index 36c94799d6..7cfebc4162 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -390,8 +390,7 @@ static void add_ref_to_island(kh_str_t *remote_islands, const char *island_name, rl->hash += sha_core; } -static int find_island_for_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int find_island_for_ref(const struct reference *ref, void *cb) { struct island_load_data *ild = cb; @@ -406,7 +405,7 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, /* walk backwards to get last-one-wins ordering */ for (i = ild->nr - 1; i >= 0; i--) { - if (!regexec(&ild->rx[i], refname, + if (!regexec(&ild->rx[i], ref->name, ARRAY_SIZE(matches), matches, 0)) break; } @@ -428,10 +427,10 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, if (island_name.len) strbuf_addch(&island_name, '-'); - strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so); + strbuf_add(&island_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } - add_ref_to_island(ild->remote_islands, island_name.buf, oid); + add_ref_to_island(ild->remote_islands, island_name.buf, ref->oid); strbuf_release(&island_name); return 0; } diff --git a/fetch-pack.c b/fetch-pack.c index fe7a84bf2f..78c45d4a15 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -188,13 +188,9 @@ static int rev_list_insert_ref(struct fetch_negotiator *negotiator, return 0; } -static int rev_list_insert_ref_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int rev_list_insert_ref_oid(const struct reference *ref, void *cb_data) { - return rev_list_insert_ref(cb_data, oid); + return rev_list_insert_ref(cb_data, ref->oid); } enum ack_type { @@ -616,13 +612,9 @@ static int mark_complete(const struct object_id *oid) return 0; } -static int mark_complete_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete_oid(const struct reference *ref, void *cb_data UNUSED) { - return mark_complete(oid); + return mark_complete(ref->oid); } static void mark_recent_complete_commits(struct fetch_pack_args *args, diff --git a/help.c b/help.c index 5854dd4a7e..20e114432d 100644 --- a/help.c +++ b/help.c @@ -851,18 +851,16 @@ struct similar_ref_cb { struct string_list *similar_refs; }; -static int append_similar_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int append_similar_ref(const struct reference *ref, void *cb_data) { struct similar_ref_cb *cb = (struct similar_ref_cb *)(cb_data); - char *branch = strrchr(refname, '/') + 1; + char *branch = strrchr(ref->name, '/') + 1; /* A remote branch of the same name is deemed similar */ - if (starts_with(refname, "refs/remotes/") && + if (starts_with(ref->name, "refs/remotes/") && !strcmp(branch, cb->base_ref)) string_list_append_nodup(cb->similar_refs, - refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), refname, 1)); + refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), ref->name, 1)); return 0; } diff --git a/http-backend.c b/http-backend.c index 9084058f1e..92e1733f14 100644 --- a/http-backend.c +++ b/http-backend.c @@ -513,18 +513,17 @@ static void run_service(const char **argv, int buffer_input) exit(1); } -static int show_text_ref(const char *name, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_text_ref(const struct reference *ref, void *cb_data) { - const char *name_nons = strip_namespace(name); + const char *name_nons = strip_namespace(ref->name); struct strbuf *buf = cb_data; - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); if (!o) return 0; - strbuf_addf(buf, "%s\t%s\n", oid_to_hex(oid), name_nons); + strbuf_addf(buf, "%s\t%s\n", oid_to_hex(ref->oid), name_nons); if (o->type == OBJ_TAG) { - o = deref_tag(the_repository, o, name, 0); + o = deref_tag(the_repository, o, ref->name, 0); if (!o) return 0; strbuf_addf(buf, "%s\t%s^{}\n", oid_to_hex(&o->oid), @@ -569,21 +568,20 @@ static void get_info_refs(struct strbuf *hdr, char *arg UNUSED) strbuf_release(&buf); } -static int show_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int show_head_ref(const struct reference *ref, void *cb_data) { struct strbuf *buf = cb_data; - if (flag & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { const char *target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, RESOLVE_REF_READING, NULL, NULL); if (target) strbuf_addf(buf, "ref: %s\n", strip_namespace(target)); } else { - strbuf_addf(buf, "%s\n", oid_to_hex(oid)); + strbuf_addf(buf, "%s\n", oid_to_hex(ref->oid)); } return 0; diff --git a/log-tree.c b/log-tree.c index 7d917f2a83..1729b0c201 100644 --- a/log-tree.c +++ b/log-tree.c @@ -147,9 +147,7 @@ static int ref_filter_match(const char *refname, return 1; } -static int add_ref_decoration(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref_decoration(const struct reference *ref, void *cb_data) { int i; struct object *obj; @@ -158,16 +156,16 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, struct decoration_filter *filter = (struct decoration_filter *)cb_data; const char *git_replace_ref_base = ref_namespace[NAMESPACE_REPLACE].ref; - if (filter && !ref_filter_match(refname, filter)) + if (filter && !ref_filter_match(ref->name, filter)) return 0; - if (starts_with(refname, git_replace_ref_base)) { + if (starts_with(ref->name, git_replace_ref_base)) { struct object_id original_oid; if (!replace_refs_enabled(the_repository)) return 0; - if (get_oid_hex(refname + strlen(git_replace_ref_base), + if (get_oid_hex(ref->name + strlen(git_replace_ref_base), &original_oid)) { - warning("invalid replace ref %s", refname); + warning("invalid replace ref %s", ref->name); return 0; } obj = parse_object(the_repository, &original_oid); @@ -176,10 +174,10 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, return 0; } - objtype = odb_read_object_info(the_repository->objects, oid, NULL); + objtype = odb_read_object_info(the_repository->objects, ref->oid, NULL); if (objtype < 0) return 0; - obj = lookup_object_by_type(the_repository, oid, objtype); + obj = lookup_object_by_type(the_repository, ref->oid, objtype); for (i = 0; i < ARRAY_SIZE(ref_namespace); i++) { struct ref_namespace_info *info = &ref_namespace[i]; @@ -187,24 +185,24 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, if (!info->decoration) continue; if (info->exact) { - if (!strcmp(refname, info->ref)) { + if (!strcmp(ref->name, info->ref)) { deco_type = info->decoration; break; } - } else if (starts_with(refname, info->ref)) { + } else if (starts_with(ref->name, info->ref)) { deco_type = info->decoration; break; } } - add_name_decoration(deco_type, refname, obj); + add_name_decoration(deco_type, ref->name, obj); while (obj->type == OBJ_TAG) { if (!obj->parsed) parse_object(the_repository, &obj->oid); obj = ((struct tag *)obj)->tagged; if (!obj) break; - add_name_decoration(DECORATION_REF_TAG, refname, obj); + add_name_decoration(DECORATION_REF_TAG, ref->name, obj); } return 0; } diff --git a/ls-refs.c b/ls-refs.c index c47acde07f..64d0272369 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -75,42 +75,42 @@ struct ls_refs_data { unsigned unborn : 1; }; -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { struct ls_refs_data *data = cb_data; - const char *refname_nons = strip_namespace(refname); + const char *refname_nons = strip_namespace(ref->name); strbuf_reset(&data->buf); - if (ref_is_hidden(refname_nons, refname, &data->hidden_refs)) + if (ref_is_hidden(refname_nons, ref->name, &data->hidden_refs)) return 0; if (!ref_match(&data->prefixes, refname_nons)) return 0; - if (oid) - strbuf_addf(&data->buf, "%s %s", oid_to_hex(oid), refname_nons); + if (ref->oid) + strbuf_addf(&data->buf, "%s %s", oid_to_hex(ref->oid), refname_nons); else strbuf_addf(&data->buf, "unborn %s", refname_nons); - if (data->symrefs && flag & REF_ISSYMREF) { + if (data->symrefs && ref->flags & REF_ISSYMREF) { + int unused_flag; struct object_id unused; const char *symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, 0, &unused, - &flag); + &unused_flag); if (!symref_target) - die("'%s' is a symref but it is not?", refname); + die("'%s' is a symref but it is not?", ref->name); strbuf_addf(&data->buf, " symref-target:%s", strip_namespace(symref_target)); } - if (data->peel && oid) { + if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } @@ -131,9 +131,17 @@ static void send_possibly_unborn_head(struct ls_refs_data *data) if (!refs_resolve_ref_unsafe(get_main_ref_store(the_repository), namespaced.buf, 0, &oid, &flag)) return; /* bad ref */ oid_is_null = is_null_oid(&oid); + if (!oid_is_null || - (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) - send_ref(namespaced.buf, NULL, oid_is_null ? NULL : &oid, flag, data); + (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) { + struct reference ref = { + .name = namespaced.buf, + .oid = oid_is_null ? NULL : &oid, + .flags = flag, + }; + + send_ref(&ref, data); + } strbuf_release(&namespaced); } diff --git a/midx-write.c b/midx-write.c index c73010df6d..f4dd875747 100644 --- a/midx-write.c +++ b/midx-write.c @@ -697,28 +697,27 @@ static void prepare_midx_packing_data(struct packing_data *pdata, trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); } -static int add_ref_to_pending(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag, void *cb_data) +static int add_ref_to_pending(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info*)cb_data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", refname); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - if (!peel_iterated_oid(revs->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(revs->repo, oid, refname); + object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); if (object->type != OBJ_COMMIT) return 0; add_pending_object(revs, object, ""); - if (bitmap_is_preferred_refname(revs->repo, refname)) + if (bitmap_is_preferred_refname(revs->repo, ref->name)) object->flags |= NEEDS_BITMAP; return 0; } diff --git a/negotiator/default.c b/negotiator/default.c index c479da9b09..116dedcf83 100644 --- a/negotiator/default.c +++ b/negotiator/default.c @@ -38,11 +38,10 @@ static void rev_list_push(struct negotiation_state *ns, } } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/negotiator/skipping.c b/negotiator/skipping.c index 616df6bf3a..0a272130fb 100644 --- a/negotiator/skipping.c +++ b/negotiator/skipping.c @@ -75,11 +75,10 @@ static struct entry *rev_list_push(struct data *data, struct commit *commit, int return entry; } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/notes.c b/notes.c index 9a2e9181fe..8e00fd8c47 100644 --- a/notes.c +++ b/notes.c @@ -938,13 +938,11 @@ out: return ret; } -static int string_list_add_one_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb) +static int string_list_add_one_ref(const struct reference *ref, void *cb) { struct string_list *refs = cb; - if (!unsorted_string_list_has_string(refs, refname)) - string_list_append(refs, refname); + if (!unsorted_string_list_has_string(refs, ref->name)) + string_list_append(refs, ref->name); return 0; } diff --git a/object-name.c b/object-name.c index f6902e140d..7e8109f25f 100644 --- a/object-name.c +++ b/object-name.c @@ -1444,18 +1444,16 @@ struct handle_one_ref_cb { struct commit_list **list; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct handle_one_ref_cb *cb = cb_data; struct commit_list **list = cb->list; - struct object *object = parse_object(cb->repo, oid); + struct object *object = parse_object(cb->repo, ref->oid); if (!object) return 0; if (object->type == OBJ_TAG) { - object = deref_tag(cb->repo, object, path, - strlen(path)); + object = deref_tag(cb->repo, object, ref->name, + strlen(ref->name)); if (!object) return 0; } diff --git a/pseudo-merge.c b/pseudo-merge.c index 893b763fe4..0abd51b42c 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -221,28 +221,25 @@ void load_pseudo_merges_from_config(struct repository *r, } } -static int find_pseudo_merge_group_for_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *_data) +static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_data) { struct bitmap_writer *writer = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit *c; uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - c = lookup_commit(the_repository, oid); + c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; - if (!packlist_find(writer->to_pack, oid)) + if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; - has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, maybe_peeled); for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { struct pseudo_merge_group *group; @@ -252,7 +249,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, size_t j; group = writer->pseudo_merge_groups.items[i].util; - if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + if (regexec(group->pattern, ref->name, ARRAY_SIZE(captures), captures, 0)) continue; @@ -269,7 +266,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, if (group_name.len) strbuf_addch(&group_name, '-'); - strbuf_add(&group_name, refname + match->rm_so, + strbuf_add(&group_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } diff --git a/reachable.c b/reachable.c index 22266db523..b753c39553 100644 --- a/reachable.c +++ b/reachable.c @@ -83,18 +83,17 @@ static void add_rebase_files(struct rev_info *revs) free_worktrees(worktrees); } -static int add_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int add_one_ref(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info *)cb_data; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", path); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - object = parse_object_or_die(the_repository, oid, path); + object = parse_object_or_die(the_repository, ref->oid, ref->name); add_pending_object(revs, object, ""); return 0; diff --git a/ref-filter.c b/ref-filter.c index 30cc488d8a..6837fa60a9 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2954,14 +2954,15 @@ struct ref_filter_cbdata { * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. */ -static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_one(const struct reference *ref, void *cb_data) { struct ref_filter_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (ref) - ref_array_append(ref_cbdata->array, ref); + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (item) + ref_array_append(ref_cbdata->array, item); return 0; } @@ -2990,17 +2991,18 @@ struct ref_filter_and_format_cbdata { } internal; }; -static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_and_format_one(const struct reference *ref, void *cb_data) { struct ref_filter_and_format_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (!ref) + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (!item) return 0; - if (format_ref_array_item(ref, ref_cbdata->format, &output, &err)) + if (format_ref_array_item(item, ref_cbdata->format, &output, &err)) die("%s", err.buf); if (output.len || !ref_cbdata->format->array_opts.omit_empty) { @@ -3010,7 +3012,7 @@ static int filter_and_format_one(const char *refname, const char *referent, cons strbuf_release(&output); strbuf_release(&err); - free_array_item(ref); + free_array_item(item); /* * Increment the running count of refs that match the filter. If diff --git a/reflog.c b/reflog.c index 65ef259b4f..ac87e20c4f 100644 --- a/reflog.c +++ b/reflog.c @@ -423,16 +423,13 @@ int should_expire_reflog_ent_verbose(struct object_id *ooid, return expire; } -static int push_tip_to_list(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags, void *cb_data) +static int push_tip_to_list(const struct reference *ref, void *cb_data) { struct commit_list **list = cb_data; struct commit *tip_commit; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; - tip_commit = lookup_commit_reference_gently(the_repository, oid, 1); + tip_commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!tip_commit) return 0; commit_list_insert(tip_commit, list); diff --git a/refs.c b/refs.c index 965381367e..25f0579d61 100644 --- a/refs.c +++ b/refs.c @@ -426,17 +426,19 @@ int refs_ref_exists(struct ref_store *refs, const char *refname) NULL, NULL); } -static int for_each_filter_refs(const char *refname, const char *referent, - const struct object_id *oid, - int flags, void *data) +static int for_each_filter_refs(const struct reference *ref, void *data) { struct for_each_ref_filter *filter = data; - if (wildmatch(filter->pattern, refname, 0)) + if (wildmatch(filter->pattern, ref->name, 0)) return 0; - if (filter->prefix) - skip_prefix(refname, filter->prefix, &refname); - return filter->fn(refname, referent, oid, flags, filter->cb_data); + if (filter->prefix) { + struct reference skipped = *ref; + skip_prefix(skipped.name, filter->prefix, &skipped.name); + return filter->fn(&skipped, filter->cb_data); + } else { + return filter->fn(ref, filter->cb_data); + } } struct warn_if_dangling_data { @@ -447,17 +449,15 @@ struct warn_if_dangling_data { int dry_run; }; -static int warn_if_dangling_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int warn_if_dangling_symref(const struct reference *ref, void *cb_data) { struct warn_if_dangling_data *d = cb_data; const char *resolves_to, *msg; - if (!(flags & REF_ISSYMREF)) + if (!(ref->flags & REF_ISSYMREF)) return 0; - resolves_to = refs_resolve_ref_unsafe(d->refs, refname, 0, NULL, NULL); + resolves_to = refs_resolve_ref_unsafe(d->refs, ref->name, 0, NULL, NULL); if (!resolves_to || !string_list_has_string(d->refnames, resolves_to)) { return 0; @@ -466,7 +466,7 @@ static int warn_if_dangling_symref(const char *refname, const char *referent UNU msg = d->dry_run ? _("%s%s will become dangling after %s is deleted\n") : _("%s%s has become dangling after %s was deleted\n"); - fprintf(d->fp, msg, d->indent, refname, resolves_to); + fprintf(d->fp, msg, d->indent, ref->name, resolves_to); return 0; } @@ -507,8 +507,15 @@ int refs_head_ref_namespaced(struct ref_store *refs, each_ref_fn fn, void *cb_da int flag; strbuf_addf(&buf, "%sHEAD", get_git_namespace()); - if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) - ret = fn(buf.buf, NULL, &oid, flag, cb_data); + if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) { + struct reference ref = { + .name = buf.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } strbuf_release(&buf); return ret; @@ -1741,8 +1748,15 @@ int refs_head_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data) int flag; if (refs_resolve_ref_unsafe(refs, "HEAD", RESOLVE_REF_READING, - &oid, &flag)) - return fn("HEAD", NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = "HEAD", + .oid = &oid, + .flags = flag, + }; + + return fn(&ref, cb_data); + } return 0; } @@ -2753,14 +2767,10 @@ struct do_for_each_reflog_help { void *cb_data; }; -static int do_for_each_reflog_helper(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int do_for_each_reflog_helper(const struct reference *ref, void *cb_data) { struct do_for_each_reflog_help *hp = cb_data; - return hp->fn(refname, hp->cb_data); + return hp->fn(ref->name, hp->cb_data); } int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_data) @@ -2976,25 +2986,24 @@ struct migration_data { uint64_t index; }; -static int migrate_one_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int migrate_one_ref(const struct reference *ref, void *cb_data) { struct migration_data *data = cb_data; struct strbuf symref_target = STRBUF_INIT; int ret; - if (flags & REF_ISSYMREF) { - ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target); + if (ref->flags & REF_ISSYMREF) { + ret = refs_read_symbolic_ref(data->old_refs, ref->name, &symref_target); if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(the_hash_algo), + ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(the_hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) goto done; } else { - ret = ref_transaction_create(data->transaction, refname, oid, NULL, + ret = ref_transaction_create(data->transaction, ref->name, ref->oid, NULL, REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION, NULL, data->errbuf); if (ret < 0) diff --git a/refs.h b/refs.h index 4e6bd63aa8..68d235438c 100644 --- a/refs.h +++ b/refs.h @@ -355,14 +355,32 @@ struct ref_transaction; */ #define REF_BAD_NAME 0x08 +/* A reference passed to `for_each_ref()`-style callbacks. */ +struct reference { + /* The fully-qualified name of the reference. */ + const char *name; + + /* The target of a symbolic ref. `NULL` for direct references. */ + const char *target; + + /* + * The object ID of a reference. Either the direct object ID or the + * resolved object ID in the case of a symbolic ref. May be the zero + * object ID in case the symbolic ref cannot be resolved. + */ + const struct object_id *oid; + + /* A bitfield of `REF_` flags. */ + int flags; +}; + /* * The signature for the callback function for the for_each_*() - * functions below. The memory pointed to by the refname and oid - * arguments is only guaranteed to be valid for the duration of a + * functions below. The memory pointed to by the `struct reference` + * argument is only guaranteed to be valid for the duration of a * single callback invocation. */ -typedef int each_ref_fn(const char *refname, const char *referent, - const struct object_id *oid, int flags, void *cb_data); +typedef int each_ref_fn(const struct reference *ref, void *cb_data); /* * The following functions invoke the specified callback function for diff --git a/refs/files-backend.c b/refs/files-backend.c index 8d7007f4aa..eb3142f8f2 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3150,14 +3150,11 @@ static int parse_and_write_reflog(struct files_ref_store *refs, return 0; } -static int ref_present(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int ref_present(const struct reference *ref, void *cb_data) { struct string_list *affected_refnames = cb_data; - return string_list_has_string(affected_refnames, refname); + return string_list_has_string(affected_refnames, ref->name); } static int files_transaction_finish_initial(struct files_ref_store *refs, diff --git a/refs/iterator.c b/refs/iterator.c index 17ef841d8a..7f2e718f1c 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -476,7 +476,14 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - retval = fn(iter->refname, iter->referent, iter->oid, iter->flags, cb_data); + struct reference ref = { + .name = iter->refname, + .target = iter->referent, + .oid = iter->oid, + .flags = iter->flags, + }; + + retval = fn(&ref, cb_data); if (retval) goto out; } diff --git a/remote.c b/remote.c index df9675cd33..59b3715120 100644 --- a/remote.c +++ b/remote.c @@ -2315,21 +2315,19 @@ int format_tracking_info(struct branch *branch, struct strbuf *sb, return 1; } -static int one_local_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int one_local_ref(const struct reference *ref, void *cb_data) { struct ref ***local_tail = cb_data; - struct ref *ref; + struct ref *local_ref; /* we already know it starts with refs/ to get here */ - if (check_refname_format(refname + 5, 0)) + if (check_refname_format(ref->name + 5, 0)) return 0; - ref = alloc_ref(refname); - oidcpy(&ref->new_oid, oid); - **local_tail = ref; - *local_tail = &ref->next; + local_ref = alloc_ref(ref->name); + oidcpy(&local_ref->new_oid, ref->oid); + **local_tail = local_ref; + *local_tail = &local_ref->next; return 0; } @@ -2402,15 +2400,14 @@ struct stale_heads_info { struct refspec *rs; }; -static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int get_stale_heads_cb(const struct reference *ref, void *cb_data) { struct stale_heads_info *info = cb_data; struct string_list matches = STRING_LIST_INIT_DUP; struct refspec_item query; int i, stale = 1; memset(&query, 0, sizeof(struct refspec_item)); - query.dst = (char *)refname; + query.dst = (char *)ref->name; refspec_find_all_matches(info->rs, &query, &matches); if (matches.nr == 0) @@ -2423,7 +2420,7 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, * overlapping refspecs, we need to go over all of the * matching refs. */ - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) goto clean_exit; for (i = 0; stale && i < matches.nr; i++) @@ -2431,8 +2428,8 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, stale = 0; if (stale) { - struct ref *ref = make_linked_ref(refname, &info->stale_refs_tail); - oidcpy(&ref->new_oid, oid); + struct ref *linked_ref = make_linked_ref(ref->name, &info->stale_refs_tail); + oidcpy(&linked_ref->new_oid, ref->oid); } clean_exit: diff --git a/repack-midx.c b/repack-midx.c index 6f6202c5bc..349f7e20b5 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -16,25 +16,23 @@ struct midx_snapshot_ref_data { int preferred; }; -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) +static int midx_snapshot_ref_one(const struct reference *ref, void *_data) { struct midx_snapshot_ref_data *data = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - if (oidset_insert(&data->seen, oid)) + if (oidset_insert(&data->seen, maybe_peeled)) return 0; /* already seen */ - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); + oid_to_hex(maybe_peeled)); return 0; } diff --git a/replace-object.c b/replace-object.c index 3eae051074..03d0f1f083 100644 --- a/replace-object.c +++ b/replace-object.c @@ -8,31 +8,27 @@ #include "repository.h" #include "commit.h" -static int register_replace_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int register_replace_ref(const struct reference *ref, void *cb_data) { struct repository *r = cb_data; /* Get sha1 from refname */ - const char *slash = strrchr(refname, '/'); - const char *hash = slash ? slash + 1 : refname; + const char *slash = strrchr(ref->name, '/'); + const char *hash = slash ? slash + 1 : ref->name; struct replace_object *repl_obj = xmalloc(sizeof(*repl_obj)); if (get_oid_hex_algop(hash, &repl_obj->original.oid, r->hash_algo)) { free(repl_obj); - warning(_("bad replace ref name: %s"), refname); + warning(_("bad replace ref name: %s"), ref->name); return 0; } /* Copy sha1 from the read ref */ - oidcpy(&repl_obj->replacement, oid); + oidcpy(&repl_obj->replacement, ref->oid); /* Register new object */ if (oidmap_put(&r->objects->replace_map, repl_obj)) - die(_("duplicate replace ref: %s"), refname); + die(_("duplicate replace ref: %s"), ref->name); return 0; } diff --git a/revision.c b/revision.c index cf5e6c1ec9..5f0850ae5c 100644 --- a/revision.c +++ b/revision.c @@ -1644,19 +1644,17 @@ struct all_refs_cb { struct worktree *wt; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct all_refs_cb *cb = cb_data; struct object *object; - if (ref_excluded(&cb->all_revs->ref_excludes, path)) + if (ref_excluded(&cb->all_revs->ref_excludes, ref->name)) return 0; - object = get_reference(cb->all_revs, path, oid, cb->all_flags); - add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags); - add_pending_object(cb->all_revs, object, path); + object = get_reference(cb->all_revs, ref->name, ref->oid, cb->all_flags); + add_rev_cmdline(cb->all_revs, object, ref->name, REV_CMD_REF, cb->all_flags); + add_pending_object(cb->all_revs, object, ref->name); return 0; } diff --git a/server-info.c b/server-info.c index 1d33de821e..0a07c722e8 100644 --- a/server-info.c +++ b/server-info.c @@ -148,23 +148,21 @@ out: return ret; } -static int add_info_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int add_info_ref(const struct reference *ref, void *cb_data) { struct update_info_ctx *uic = cb_data; - struct object *o = parse_object(uic->repo, oid); + struct object *o = parse_object(uic->repo, ref->oid); if (!o) return -1; - if (uic_printf(uic, "%s %s\n", oid_to_hex(oid), path) < 0) + if (uic_printf(uic, "%s %s\n", oid_to_hex(ref->oid), ref->name) < 0) return -1; if (o->type == OBJ_TAG) { - o = deref_tag(uic->repo, o, path, 0); + o = deref_tag(uic->repo, o, ref->name, 0); if (o) if (uic_printf(uic, "%s %s^{}\n", - oid_to_hex(&o->oid), path) < 0) + oid_to_hex(&o->oid), ref->name) < 0) return -1; } return 0; diff --git a/shallow.c b/shallow.c index d9cd4e219c..55b9cd9d3f 100644 --- a/shallow.c +++ b/shallow.c @@ -626,14 +626,10 @@ static void paint_down(struct paint_info *info, const struct object_id *oid, free(tmp); } -static int mark_uninteresting(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data UNUSED) +static int mark_uninteresting(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (!commit) return 0; commit->object.flags |= UNINTERESTING; @@ -742,16 +738,12 @@ struct commit_array { size_t nr, alloc; }; -static int add_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref(const struct reference *ref, void *cb_data) { struct commit_array *ca = cb_data; ALLOC_GROW(ca->commits, ca->nr + 1, ca->alloc); ca->commits[ca->nr] = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (ca->commits[ca->nr]) ca->nr++; return 0; diff --git a/submodule.c b/submodule.c index 35c55155f7..40a5c6fb9d 100644 --- a/submodule.c +++ b/submodule.c @@ -934,10 +934,7 @@ static void free_submodules_data(struct string_list *submodules) string_list_clear(submodules, 1); } -static int has_remote(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data UNUSED) +static int has_remote(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } @@ -1255,13 +1252,10 @@ int push_unpushed_submodules(struct repository *r, return ret; } -static int append_oid_to_array(const char *ref UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *data) +static int append_oid_to_array(const struct reference *ref, void *data) { struct oid_array *array = data; - oid_array_append(array, oid); + oid_array_append(array, ref->oid); return 0; } diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 83b06d39a3..b1215947c5 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -154,10 +154,9 @@ static int cmd_rename_ref(struct ref_store *refs, const char **argv) return refs_rename_ref(refs, oldref, newref, logmsg); } -static int each_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data UNUSED) +static int each_ref(const struct reference *ref, void *cb_data UNUSED) { - printf("%s %s 0x%x\n", oid_to_hex(oid), refname, flags); + printf("%s %s 0x%x\n", oid_to_hex(ref->oid), ref->name, ref->flags); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 1e87ae9559..0d563ae74e 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -870,8 +870,8 @@ static void send_unshallow(struct upload_pack_data *data) } } -static int check_ref(const char *refname_full, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data); +static int check_ref(const struct reference *ref, void *cb_data); + static void deepen(struct upload_pack_data *data, int depth) { if (depth == INFINITE_DEPTH && !is_repository_shallow(the_repository)) { @@ -1224,13 +1224,12 @@ static int mark_our_ref(const char *refname, const char *refname_full, return 0; } -static int check_ref(const char *refname_full, const char *referent UNUSED,const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int check_ref(const struct reference *ref, void *cb_data) { - const char *refname = strip_namespace(refname_full); + const char *refname = strip_namespace(ref->name); struct upload_pack_data *data = cb_data; - mark_our_ref(refname, refname_full, oid, &data->hidden_refs); + mark_our_ref(refname, ref->name, ref->oid, &data->hidden_refs); return 0; } @@ -1292,27 +1291,25 @@ static void write_v0_ref(struct upload_pack_data *data, return; } -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { - write_v0_ref(cb_data, refname, strip_namespace(refname), oid); + write_v0_ref(cb_data, ref->name, strip_namespace(ref->name), ref->oid); return 0; } -static int find_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag, void *cb_data) +static int find_symref(const struct reference *ref, void *cb_data) { const char *symref_target; struct string_list_item *item; + int flag; - if ((flag & REF_ISSYMREF) == 0) + if ((ref->flags & REF_ISSYMREF) == 0) return 0; symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, 0, NULL, &flag); + ref->name, 0, NULL, &flag); if (!symref_target || (flag & REF_ISSYMREF) == 0) - die("'%s' is a symref but it is not?", refname); - item = string_list_append(cb_data, strip_namespace(refname)); + die("'%s' is a symref but it is not?", ref->name); + item = string_list_append(cb_data, strip_namespace(ref->name)); item->util = xstrdup(strip_namespace(symref_target)); return 0; } diff --git a/walker.c b/walker.c index 8073754517..409b646578 100644 --- a/walker.c +++ b/walker.c @@ -226,14 +226,10 @@ static int interpret_target(struct walker *walker, char *target, struct object_i return -1; } -static int mark_complete(const char *path UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (commit) { commit->object.flags |= COMPLETE; diff --git a/worktree.c b/worktree.c index a2a5f51f29..9308389cb6 100644 --- a/worktree.c +++ b/worktree.c @@ -595,8 +595,15 @@ int other_head_refs(each_ref_fn fn, void *cb_data) if (refs_resolve_ref_unsafe(get_main_ref_store(the_repository), refname.buf, RESOLVE_REF_READING, - &oid, &flag)) - ret = fn(refname.buf, NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = refname.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } if (ret) break; } From 89baa52da612dde6da031acfa2cb957d4297d544 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:11 +0200 Subject: [PATCH 78/92] refs: introduce `.ref` field for the base iterator The base iterator has a couple of fields that tracks the name, target, object ID and flags for the current reference. Due to this design we have to create a new `struct reference` whenever we want to hand over that reference to the callback function, which is tedious and not very efficient. Convert the structure to instead contain a `struct reference` as member. This member is expected to be populated by the implementations of the iterator and is handed over to the callback directly. While at it, simplify `should_pack_ref()` to take a `struct reference` directly instead of passing its respective fields. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 8 +++---- refs/debug.c | 8 +++---- refs/files-backend.c | 47 ++++++++++++++++++----------------------- refs/iterator.c | 39 +++++++++++----------------------- refs/packed-backend.c | 46 ++++++++++++++++++++-------------------- refs/ref-cache.c | 10 ++++----- refs/refs-internal.h | 5 +---- refs/reftable-backend.c | 12 +++++------ 8 files changed, 75 insertions(+), 100 deletions(-) diff --git a/refs.c b/refs.c index 25f0579d61..f96cf43b12 100644 --- a/refs.c +++ b/refs.c @@ -2327,8 +2327,8 @@ int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) int peel_iterated_oid(struct repository *r, const struct object_id *base, struct object_id *peeled) { if (current_ref_iter && - (current_ref_iter->oid == base || - oideq(current_ref_iter->oid, base))) + (current_ref_iter->ref.oid == base || + oideq(current_ref_iter->ref.oid, base))) return ref_iterator_peel(current_ref_iter, peeled); return peel_object(r, base, peeled) ? -1 : 0; @@ -2703,7 +2703,7 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs while ((ok = ref_iterator_advance(iter)) == ITER_OK) { if (skip && - string_list_has_string(skip, iter->refname)) + string_list_has_string(skip, iter->ref.name)) continue; if (transaction && ref_transaction_maybe_set_rejected( @@ -2712,7 +2712,7 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs continue; strbuf_addf(err, _("'%s' exists; cannot create '%s'"), - iter->refname, refname); + iter->ref.name, refname); goto cleanup; } diff --git a/refs/debug.c b/refs/debug.c index 697adbd0dc..67718bd1f4 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -160,11 +160,9 @@ static int debug_ref_iterator_advance(struct ref_iterator *ref_iterator) trace_printf_key(&trace_refs, "iterator_advance: (%d)\n", res); else trace_printf_key(&trace_refs, "iterator_advance: %s (0)\n", - diter->iter->refname); + diter->iter->ref.name); - diter->base.refname = diter->iter->refname; - diter->base.oid = diter->iter->oid; - diter->base.flags = diter->iter->flags; + diter->base.ref = diter->iter->ref; return res; } @@ -185,7 +183,7 @@ static int debug_ref_iterator_peel(struct ref_iterator *ref_iterator, struct debug_ref_iterator *diter = (struct debug_ref_iterator *)ref_iterator; int res = diter->iter->vtable->peel(diter->iter, peeled); - trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->refname, res); + trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->ref.name, res); return res; } diff --git a/refs/files-backend.c b/refs/files-backend.c index eb3142f8f2..fac53fa052 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -961,26 +961,23 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && - parse_worktree_ref(iter->iter0->refname, NULL, NULL, + parse_worktree_ref(iter->iter0->ref.name, NULL, NULL, NULL) != REF_WORKTREE_CURRENT) continue; if ((iter->flags & DO_FOR_EACH_OMIT_DANGLING_SYMREFS) && - (iter->iter0->flags & REF_ISSYMREF) && - (iter->iter0->flags & REF_ISBROKEN)) + (iter->iter0->ref.flags & REF_ISSYMREF) && + (iter->iter0->ref.flags & REF_ISBROKEN)) continue; if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) && - !ref_resolves_to_object(iter->iter0->refname, + !ref_resolves_to_object(iter->iter0->ref.name, iter->repo, - iter->iter0->oid, - iter->iter0->flags)) + iter->iter0->ref.oid, + iter->iter0->ref.flags)) continue; - iter->base.refname = iter->iter0->refname; - iter->base.oid = iter->iter0->oid; - iter->base.flags = iter->iter0->flags; - iter->base.referent = iter->iter0->referent; + iter->base.ref = iter->iter0->ref; return ITER_OK; } @@ -1367,30 +1364,29 @@ static void prune_refs(struct files_ref_store *refs, struct ref_to_prune **refs_ * Return true if the specified reference should be packed. */ static int should_pack_ref(struct files_ref_store *refs, - const char *refname, - const struct object_id *oid, unsigned int ref_flags, + const struct reference *ref, struct pack_refs_opts *opts) { struct string_list_item *item; /* Do not pack per-worktree refs: */ - if (parse_worktree_ref(refname, NULL, NULL, NULL) != + if (parse_worktree_ref(ref->name, NULL, NULL, NULL) != REF_WORKTREE_SHARED) return 0; /* Do not pack symbolic refs: */ - if (ref_flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; /* Do not pack broken refs: */ - if (!ref_resolves_to_object(refname, refs->base.repo, oid, ref_flags)) + if (!ref_resolves_to_object(ref->name, refs->base.repo, ref->oid, ref->flags)) return 0; - if (ref_excluded(opts->exclusions, refname)) + if (ref_excluded(opts->exclusions, ref->name)) return 0; for_each_string_list_item(item, opts->includes) - if (!wildmatch(item->string, refname, 0)) + if (!wildmatch(item->string, ref->name, 0)) return 1; return 0; @@ -1443,8 +1439,7 @@ static int should_pack_refs(struct files_ref_store *refs, iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL, refs->base.repo, 0); while ((ret = ref_iterator_advance(iter)) == ITER_OK) { - if (should_pack_ref(refs, iter->refname, iter->oid, - iter->flags, opts)) + if (should_pack_ref(refs, &iter->ref, opts)) refcount++; if (refcount >= limit) { ref_iterator_free(iter); @@ -1489,24 +1484,24 @@ static int files_pack_refs(struct ref_store *ref_store, * in the packed ref cache. If the reference should be * pruned, also add it to refs_to_prune. */ - if (!should_pack_ref(refs, iter->refname, iter->oid, iter->flags, opts)) + if (!should_pack_ref(refs, &iter->ref, opts)) continue; /* * Add a reference creation for this reference to the * packed-refs transaction: */ - if (ref_transaction_update(transaction, iter->refname, - iter->oid, NULL, NULL, NULL, + if (ref_transaction_update(transaction, iter->ref.name, + iter->ref.oid, NULL, NULL, NULL, REF_NO_DEREF, NULL, &err)) die("failure preparing to create packed reference %s: %s", - iter->refname, err.buf); + iter->ref.name, err.buf); /* Schedule the loose reference for pruning if requested. */ if ((opts->flags & PACK_REFS_PRUNE)) { struct ref_to_prune *n; - FLEX_ALLOC_STR(n, name, iter->refname); - oidcpy(&n->oid, iter->oid); + FLEX_ALLOC_STR(n, name, iter->ref.name); + oidcpy(&n->oid, iter->ref.oid); n->next = refs_to_prune; refs_to_prune = n; } @@ -2379,7 +2374,7 @@ static int files_reflog_iterator_advance(struct ref_iterator *ref_iterator) REFNAME_ALLOW_ONELEVEL)) continue; - iter->base.refname = diter->relative_path; + iter->base.ref.name = diter->relative_path; return ITER_OK; } diff --git a/refs/iterator.c b/refs/iterator.c index 7f2e718f1c..fe5980e1b6 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -41,10 +41,7 @@ void base_ref_iterator_init(struct ref_iterator *iter, struct ref_iterator_vtable *vtable) { iter->vtable = vtable; - iter->refname = NULL; - iter->referent = NULL; - iter->oid = NULL; - iter->flags = 0; + memset(&iter->ref, 0, sizeof(iter->ref)); } struct empty_ref_iterator { @@ -127,8 +124,8 @@ enum iterator_selection ref_iterator_select(struct ref_iterator *iter_worktree, * latter. */ if (iter_worktree) { - int cmp = strcmp(iter_worktree->refname, - iter_common->refname); + int cmp = strcmp(iter_worktree->ref.name, + iter_common->ref.name); if (cmp < 0) return ITER_SELECT_0; else if (!cmp) @@ -139,7 +136,7 @@ enum iterator_selection ref_iterator_select(struct ref_iterator *iter_worktree, * We now know that the lexicographically-next ref is a common * ref. When the common ref is a shared one we return it. */ - if (parse_worktree_ref(iter_common->refname, NULL, NULL, + if (parse_worktree_ref(iter_common->ref.name, NULL, NULL, NULL) == REF_WORKTREE_SHARED) return ITER_SELECT_1; @@ -212,10 +209,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) } if (selection & ITER_YIELD_CURRENT) { - iter->base.referent = (*iter->current)->referent; - iter->base.refname = (*iter->current)->refname; - iter->base.oid = (*iter->current)->oid; - iter->base.flags = (*iter->current)->flags; + iter->base.ref = (*iter->current)->ref; return ITER_OK; } } @@ -313,7 +307,7 @@ static enum iterator_selection overlay_iterator_select( else if (!front) return ITER_SELECT_1; - cmp = strcmp(front->refname, back->refname); + cmp = strcmp(front->ref.name, back->ref.name); if (cmp < 0) return ITER_SELECT_0; @@ -371,7 +365,7 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) int ok; while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { - int cmp = compare_prefix(iter->iter0->refname, iter->prefix); + int cmp = compare_prefix(iter->iter0->ref.name, iter->prefix); if (cmp < 0) continue; /* @@ -382,6 +376,8 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) if (cmp > 0) return ITER_DONE; + iter->base.ref = iter->iter0->ref; + if (iter->trim) { /* * It is nonsense to trim off characters that @@ -392,15 +388,11 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) * one character left in the refname after * trimming, report it as a bug: */ - if (strlen(iter->iter0->refname) <= iter->trim) + if (strlen(iter->base.ref.name) <= iter->trim) BUG("attempt to trim too many characters"); - iter->base.refname = iter->iter0->refname + iter->trim; - } else { - iter->base.refname = iter->iter0->refname; + iter->base.ref.name += iter->trim; } - iter->base.oid = iter->iter0->oid; - iter->base.flags = iter->iter0->flags; return ITER_OK; } @@ -476,14 +468,7 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - struct reference ref = { - .name = iter->refname, - .target = iter->referent, - .oid = iter->oid, - .flags = iter->flags, - }; - - retval = fn(&ref, cb_data); + retval = fn(&iter->ref, cb_data); if (retval) goto out; } diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a8c22a0a7f..7987acdc96 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -908,7 +908,7 @@ static int next_record(struct packed_ref_iterator *iter) if (iter->pos == iter->eof) return ITER_DONE; - iter->base.flags = REF_ISPACKED; + iter->base.ref.flags = REF_ISPACKED; p = iter->pos; if (iter->eof - p < snapshot_hexsz(iter->snapshot) + 2 || @@ -923,22 +923,22 @@ static int next_record(struct packed_ref_iterator *iter) iter->pos, iter->eof - iter->pos); strbuf_add(&iter->refname_buf, p, eol - p); - iter->base.refname = iter->refname_buf.buf; + iter->base.ref.name = iter->refname_buf.buf; if (refname_contains_nul(&iter->refname_buf)) - die("packed refname contains embedded NULL: %s", iter->base.refname); + die("packed refname contains embedded NULL: %s", iter->base.ref.name); - if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { - if (!refname_is_safe(iter->base.refname)) + if (check_refname_format(iter->base.ref.name, REFNAME_ALLOW_ONELEVEL)) { + if (!refname_is_safe(iter->base.ref.name)) die("packed refname is dangerous: %s", - iter->base.refname); + iter->base.ref.name); oidclr(&iter->oid, iter->repo->hash_algo); - iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; + iter->base.ref.flags |= REF_BAD_NAME | REF_ISBROKEN; } if (iter->snapshot->peeled == PEELED_FULLY || (iter->snapshot->peeled == PEELED_TAGS && - starts_with(iter->base.refname, "refs/tags/"))) - iter->base.flags |= REF_KNOWS_PEELED; + starts_with(iter->base.ref.name, "refs/tags/"))) + iter->base.ref.flags |= REF_KNOWS_PEELED; iter->pos = eol + 1; @@ -956,11 +956,11 @@ static int next_record(struct packed_ref_iterator *iter) * definitely know the value of *this* reference. But * we suppress it if the reference is broken: */ - if ((iter->base.flags & REF_ISBROKEN)) { + if ((iter->base.ref.flags & REF_ISBROKEN)) { oidclr(&iter->peeled, iter->repo->hash_algo); - iter->base.flags &= ~REF_KNOWS_PEELED; + iter->base.ref.flags &= ~REF_KNOWS_PEELED; } else { - iter->base.flags |= REF_KNOWS_PEELED; + iter->base.ref.flags |= REF_KNOWS_PEELED; } } else { oidclr(&iter->peeled, iter->repo->hash_algo); @@ -976,15 +976,15 @@ static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator) int ok; while ((ok = next_record(iter)) == ITER_OK) { - const char *refname = iter->base.refname; + const char *refname = iter->base.ref.name; const char *prefix = iter->prefix; if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && - !is_per_worktree_ref(iter->base.refname)) + !is_per_worktree_ref(iter->base.ref.name)) continue; if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) && - !ref_resolves_to_object(iter->base.refname, iter->repo, + !ref_resolves_to_object(iter->base.ref.name, iter->repo, &iter->oid, iter->flags)) continue; @@ -1033,10 +1033,10 @@ static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator, struct packed_ref_iterator *iter = (struct packed_ref_iterator *)ref_iterator; - if ((iter->base.flags & REF_KNOWS_PEELED)) { + if ((iter->base.ref.flags & REF_KNOWS_PEELED)) { oidcpy(peeled, &iter->peeled); return is_null_oid(&iter->peeled) ? -1 : 0; - } else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) { + } else if ((iter->base.ref.flags & (REF_ISBROKEN | REF_ISSYMREF))) { return -1; } else { return peel_object(iter->repo, &iter->oid, peeled) ? -1 : 0; @@ -1194,7 +1194,7 @@ static struct ref_iterator *packed_ref_iterator_begin( iter->snapshot = snapshot; acquire_snapshot(snapshot); strbuf_init(&iter->refname_buf, 0); - iter->base.oid = &iter->oid; + iter->base.ref.oid = &iter->oid; iter->repo = ref_store->repo; iter->flags = flags; @@ -1436,7 +1436,7 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re if (!iter) cmp = +1; else - cmp = strcmp(iter->refname, update->refname); + cmp = strcmp(iter->ref.name, update->refname); } if (!cmp) { @@ -1459,11 +1459,11 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re } goto error; - } else if (!oideq(&update->old_oid, iter->oid)) { + } else if (!oideq(&update->old_oid, iter->ref.oid)) { strbuf_addf(err, "cannot update ref '%s': " "is at %s but expected %s", update->refname, - oid_to_hex(iter->oid), + oid_to_hex(iter->ref.oid), oid_to_hex(&update->old_oid)); ret = REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE; @@ -1527,8 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re struct object_id peeled; int peel_error = ref_iterator_peel(iter, &peeled); - if (write_packed_entry(out, iter->refname, - iter->oid, + if (write_packed_entry(out, iter->ref.name, + iter->ref.oid, peel_error ? NULL : &peeled)) goto write_error; diff --git a/refs/ref-cache.c b/refs/ref-cache.c index e5e5df16d8..f1abc39624 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -425,10 +425,10 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level->prefix_state = entry_prefix_state; level->index = -1; } else { - iter->base.refname = entry->name; - iter->base.referent = entry->u.value.referent; - iter->base.oid = &entry->u.value.oid; - iter->base.flags = entry->flag; + iter->base.ref.name = entry->name; + iter->base.ref.target = entry->u.value.referent; + iter->base.ref.oid = &entry->u.value.oid; + iter->base.ref.flags = entry->flag; return ITER_OK; } } @@ -550,7 +550,7 @@ static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, { struct cache_ref_iterator *iter = (struct cache_ref_iterator *)ref_iterator; - return peel_object(iter->repo, ref_iterator->oid, peeled) ? -1 : 0; + return peel_object(iter->repo, ref_iterator->ref.oid, peeled) ? -1 : 0; } static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 4ef3bd75c6..ed749d1657 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -249,10 +249,7 @@ const char *find_descendant_ref(const char *dirname, */ struct ref_iterator { struct ref_iterator_vtable *vtable; - const char *refname; - const char *referent; - const struct object_id *oid; - unsigned int flags; + struct reference ref; }; /* diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index d4b7928620..0e47986cb5 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -704,10 +704,10 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->oid, flags)) continue; - iter->base.refname = iter->ref.refname; - iter->base.referent = referent; - iter->base.oid = &iter->oid; - iter->base.flags = flags; + iter->base.ref.name = iter->ref.refname; + iter->base.ref.target = referent; + iter->base.ref.oid = &iter->oid; + iter->base.ref.flags = flags; break; } @@ -828,7 +828,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ iter = xcalloc(1, sizeof(*iter)); base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable); - iter->base.oid = &iter->oid; + iter->base.ref.oid = &iter->oid; iter->flags = flags; iter->refs = refs; iter->exclude_patterns = filter_exclude_patterns(exclude_patterns); @@ -2072,7 +2072,7 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator) strbuf_reset(&iter->last_name); strbuf_addstr(&iter->last_name, iter->log.refname); - iter->base.refname = iter->log.refname; + iter->base.ref.name = iter->log.refname; break; } From 4cea0422879f6a64c0f7ad0ddac6d43897a53e94 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:12 +0200 Subject: [PATCH 79/92] refs: fully reset `struct ref_iterator::ref` on iteration With the introduction of the `struct ref_iterator::ref` field it now is a whole lot easier to introduce new fields that become accessible to the caller without having to adapt every single callsite. But there's a downside: when a new field is introduced we always have to adapt all backends to set that field. This isn't something we can avoid in the general case: when the new field is expected to be populated by all backends we of course cannot avoid doing so. But new fields may be entirely optional, in which case we'd still have such churn. And furthermore, it is very easy right now to leak state from a previous iteration into the next iteration. Address this issue by ensuring that the reference backends all fully reset the field on every single iteration. This ensures that no state from previous iterations can leak into the next one. And it ensures that any newly introduced fields will be zeroed out by default. Note that we don't have to explicitly adapt the "files" backend, as it uses the `cache_ref_iterator` internally. Furthermore, other "wrapping" iterators like for example the `prefix_ref_iterator` copy around the whole reference, so these don't need to be adapted either. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 3 ++- refs/ref-cache.c | 1 + refs/reftable-backend.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 7987acdc96..711e07f832 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -882,6 +882,7 @@ static int next_record(struct packed_ref_iterator *iter) { const char *p, *eol; + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); strbuf_reset(&iter->refname_buf); /* @@ -916,6 +917,7 @@ static int next_record(struct packed_ref_iterator *iter) !isspace(*p++)) die_invalid_line(iter->snapshot->refs->path, iter->pos, iter->eof - iter->pos); + iter->base.ref.oid = &iter->oid; eol = memchr(p, '\n', iter->eof - p); if (!eol) @@ -1194,7 +1196,6 @@ static struct ref_iterator *packed_ref_iterator_begin( iter->snapshot = snapshot; acquire_snapshot(snapshot); strbuf_init(&iter->refname_buf, 0); - iter->base.ref.oid = &iter->oid; iter->repo = ref_store->repo; iter->flags = flags; diff --git a/refs/ref-cache.c b/refs/ref-cache.c index f1abc39624..e427848879 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -425,6 +425,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level->prefix_state = entry_prefix_state; level->index = -1; } else { + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); iter->base.ref.name = entry->name; iter->base.ref.target = entry->u.value.referent; iter->base.ref.oid = &entry->u.value.oid; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 0e47986cb5..728886eafd 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -704,6 +704,7 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->oid, flags)) continue; + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); iter->base.ref.name = iter->ref.refname; iter->base.ref.target = referent; iter->base.ref.oid = &iter->oid; From eb2934d94b43388642ce4840994800310a6d3456 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:13 +0200 Subject: [PATCH 80/92] refs: refactor reference status flags The reference flags encode information like whether or not a reference is a symbolic reference or whether it may be broken. This information is stored in a `int flags` bitfield, which is in conflict with our modern best practices; we tend to use an unsigned integer to store flags. Change the type of the field to be `unsigned`. While at it, refactor the individual flags to be part of an `enum` instead of using preprocessor defines. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.h | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/refs.h b/refs.h index 68d235438c..4f0a685714 100644 --- a/refs.h +++ b/refs.h @@ -333,27 +333,28 @@ struct ref_transaction; * stored in ref_iterator::flags. Other bits are for internal use * only: */ +enum reference_status { + /* Reference is a symbolic reference. */ + REF_ISSYMREF = (1 << 0), -/* Reference is a symbolic reference. */ -#define REF_ISSYMREF 0x01 + /* Reference is a packed reference. */ + REF_ISPACKED = (1 << 1), -/* Reference is a packed reference. */ -#define REF_ISPACKED 0x02 + /* + * Reference cannot be resolved to an object name: dangling symbolic + * reference (directly or indirectly), corrupt reference file, + * reference exists but name is bad, or symbolic reference refers to + * ill-formatted reference name. + */ + REF_ISBROKEN = (1 << 2), -/* - * Reference cannot be resolved to an object name: dangling symbolic - * reference (directly or indirectly), corrupt reference file, - * reference exists but name is bad, or symbolic reference refers to - * ill-formatted reference name. - */ -#define REF_ISBROKEN 0x04 - -/* - * Reference name is not well formed. - * - * See git-check-ref-format(1) for the definition of well formed ref names. - */ -#define REF_BAD_NAME 0x08 + /* + * Reference name is not well formed. + * + * See git-check-ref-format(1) for the definition of well formed ref names. + */ + REF_BAD_NAME = (1 << 3), +}; /* A reference passed to `for_each_ref()`-style callbacks. */ struct reference { @@ -370,8 +371,8 @@ struct reference { */ const struct object_id *oid; - /* A bitfield of `REF_` flags. */ - int flags; + /* A bitfield of `enum reference_status` flags. */ + unsigned flags; }; /* From f89866163704528f1a6570e134853dbb99120e7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:14 +0200 Subject: [PATCH 81/92] refs: expose peeled object ID via the iterator Both the "files" and "reftable" backend are able to store peeled values for tags in the respective formats. This allows for a more efficient lookup of the target object of such a tag without having to manually peel via the object database. The infrastructure to access these peeled object IDs is somewhat funky though. When iterating through objects, we store a pointer reference to the current iterator in a global variable. The callbacks invoked by that iterator are then expected to call `peel_iterated_oid()`, which checks whether the globally-stored iterator's current reference refers to the one handed into that function. If so, we ask the iterator to peel the object, otherwise we manually peel the object via the object database. Depending on global state like this is somewhat weird and also quite fragile. Introduce a new `struct reference::peeled_oid` field that can be populated by the reference backends. This field can be accessed via a new function `reference_get_peeled_oid()` that either uses that value, if set, or alternatively peels via the ODB. With this change we don't have to rely on global state anymore, but make the peeled object ID available to the callback functions directly. Adjust trivial callers that already have a `struct reference` available. Remaining callers will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/describe.c | 2 +- builtin/gc.c | 2 +- builtin/pack-objects.c | 7 ++++--- commit-graph.c | 2 +- ls-refs.c | 2 +- midx-write.c | 2 +- pseudo-merge.c | 2 +- refs.c | 12 ++++++++++++ refs.h | 19 +++++++++++++++++++ refs/packed-backend.c | 1 + refs/reftable-backend.c | 5 +++++ repack-midx.c | 2 +- 12 files changed, 48 insertions(+), 10 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 7954535044..443546aaac 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -208,7 +208,7 @@ static int get_name(const struct reference *ref, void *cb_data UNUSED) } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { is_annotated = !oideq(ref->oid, &peeled); } else { oidcpy(&peeled, ref->oid); diff --git a/builtin/gc.c b/builtin/gc.c index 9de5de175f..f0cf20d423 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1109,7 +1109,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 39633a0158..1613fecb66 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -838,7 +838,7 @@ static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3309,7 +3309,8 @@ static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled) && + obj_is_packed(&peeled)) add_tag_chain(ref->oid); return 0; } @@ -4537,7 +4538,7 @@ static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNU struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(the_repository, maybe_peeled, ref->name); diff --git a/commit-graph.c b/commit-graph.c index f91af41625..80be2ff2c3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1857,7 +1857,7 @@ static int add_ref_to_set(const struct reference *ref, void *cb_data) struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) oidset_insert(data->commits, maybe_peeled); diff --git a/ls-refs.c b/ls-refs.c index 64d0272369..8641281b86 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -110,7 +110,7 @@ static int send_ref(const struct reference *ref, void *cb_data) if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } diff --git a/midx-write.c b/midx-write.c index f4dd875747..23e61cb000 100644 --- a/midx-write.c +++ b/midx-write.c @@ -709,7 +709,7 @@ static int add_ref_to_pending(const struct reference *ref, void *cb_data) return 0; } - if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(revs->repo, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); diff --git a/pseudo-merge.c b/pseudo-merge.c index 0abd51b42c..a2d5bd85f9 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -230,7 +230,7 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; c = lookup_commit(the_repository, maybe_peeled); diff --git a/refs.c b/refs.c index f96cf43b12..1b1551f981 100644 --- a/refs.c +++ b/refs.c @@ -2334,6 +2334,18 @@ int peel_iterated_oid(struct repository *r, const struct object_id *base, struct return peel_object(r, base, peeled) ? -1 : 0; } +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid) +{ + if (ref->peeled_oid) { + oidcpy(peeled_oid, ref->peeled_oid); + return 0; + } + + return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; +} + int refs_update_symref(struct ref_store *refs, const char *ref, const char *target, const char *logmsg) { diff --git a/refs.h b/refs.h index 4f0a685714..886ed2c0f4 100644 --- a/refs.h +++ b/refs.h @@ -371,10 +371,29 @@ struct reference { */ const struct object_id *oid; + /* + * An optional peeled object ID. This field _may_ be set for tags in + * case the peeled value is present in the backend. Please refer to + * `reference_get_peeled_oid()`. + */ + const struct object_id *peeled_oid; + /* A bitfield of `enum reference_status` flags. */ unsigned flags; }; +/* + * Peel the tag to a non-tag commit. If present, this uses the peeled object ID + * exposed by the reference backend. Otherwise, the object is peeled via the + * object database, which is less efficient. + * + * Return `0` if the reference could be peeled, a negative error code + * otherwise. + */ +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid); + /* * The signature for the callback function for the for_each_*() * functions below. The memory pointed to by the `struct reference` diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 711e07f832..1fefefd54e 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -963,6 +963,7 @@ static int next_record(struct packed_ref_iterator *iter) iter->base.ref.flags &= ~REF_KNOWS_PEELED; } else { iter->base.ref.flags |= REF_KNOWS_PEELED; + iter->base.ref.peeled_oid = &iter->peeled; } } else { oidclr(&iter->peeled, iter->repo->hash_algo); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 728886eafd..e214e120d7 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -547,6 +547,7 @@ struct reftable_ref_iterator { struct reftable_iterator iter; struct reftable_ref_record ref; struct object_id oid; + struct object_id peeled_oid; char *prefix; size_t prefix_len; @@ -671,6 +672,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) case REFTABLE_REF_VAL2: oidread(&iter->oid, iter->ref.value.val2.value, refs->base.repo->hash_algo); + oidread(&iter->peeled_oid, iter->ref.value.val2.target_value, + refs->base.repo->hash_algo); break; case REFTABLE_REF_SYMREF: referent = refs_resolve_ref_unsafe(&iter->refs->base, @@ -708,6 +711,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) iter->base.ref.name = iter->ref.refname; iter->base.ref.target = referent; iter->base.ref.oid = &iter->oid; + if (iter->ref.value_type == REFTABLE_REF_VAL2) + iter->base.ref.peeled_oid = &iter->peeled_oid; iter->base.ref.flags = flags; break; diff --git a/repack-midx.c b/repack-midx.c index 349f7e20b5..74bdfa3a6e 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -22,7 +22,7 @@ static int midx_snapshot_ref_one(const struct reference *ref, void *_data) const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (oidset_insert(&data->seen, maybe_peeled)) From adecd5f0b6fdd40219d5503fdaf46aa8d36a4ff7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:15 +0200 Subject: [PATCH 82/92] upload-pack: convert to use `reference_get_peeled_oid()` The `write_v0_ref()` callback is invoked from two callsites: - Once via `send_ref()` which is a callback passed to `for_each_namespaced_ref_1()` and `refs_head_ref_namespaced()`. - Once manually to announce capabilities. When sending references to the client we also send the peeled value of tags. As we don't have a `struct reference` available in the second case, we cannot easily peel by calling `reference_get_peeled_oid()`, but we instead have to depend on on global state via `peel_iterated_oid()`. We do have a reference available though in the first case, it's only the second case that keeps us from using `reference_get_peeled_oid()`. But that second case only announces capabilities anyway, so we're not really handling a reference at all here. Adapt that case to construct a reference manually and pass that to `write_v0_ref()`. Start to use `reference_get_peeled_oid()` now that we always have a `struct reference` available. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 0d563ae74e..2d2b70cbf2 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1249,15 +1249,15 @@ static void format_session_id(struct strbuf *buf, struct upload_pack_data *d) { } static void write_v0_ref(struct upload_pack_data *data, - const char *refname, const char *refname_nons, - const struct object_id *oid) + const struct reference *ref, + const char *refname_nons) { static const char *capabilities = "multi_ack thin-pack side-band" " side-band-64k ofs-delta shallow deepen-since deepen-not" " deepen-relative no-progress include-tag multi_ack_detailed"; struct object_id peeled; - if (mark_our_ref(refname_nons, refname, oid, &data->hidden_refs)) + if (mark_our_ref(refname_nons, ref->name, ref->oid, &data->hidden_refs)) return; if (capabilities) { @@ -1267,7 +1267,7 @@ static void write_v0_ref(struct upload_pack_data *data, format_symref_info(&symref_info, &data->symref); format_session_id(&session_id, data); packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", - oid_to_hex(oid), refname_nons, + oid_to_hex(ref->oid), refname_nons, 0, capabilities, (data->allow_uor & ALLOW_TIP_SHA1) ? " allow-tip-sha1-in-want" : "", @@ -1283,17 +1283,17 @@ static void write_v0_ref(struct upload_pack_data *data, strbuf_release(&session_id); data->sent_capabilities = 1; } else { - packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons); + packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(ref->oid), refname_nons); } capabilities = NULL; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); return; } static int send_ref(const struct reference *ref, void *cb_data) { - write_v0_ref(cb_data, ref->name, strip_namespace(ref->name), ref->oid); + write_v0_ref(cb_data, ref, strip_namespace(ref->name)); return 0; } @@ -1442,8 +1442,12 @@ void upload_pack(const int advertise_refs, const int stateless_rpc, send_ref, &data); for_each_namespaced_ref_1(send_ref, &data); if (!data.sent_capabilities) { - const char *refname = "capabilities^{}"; - write_v0_ref(&data, refname, refname, null_oid(the_hash_algo)); + struct reference ref = { + .name = "capabilities^{}", + .oid = null_oid(the_hash_algo), + }; + + write_v0_ref(&data, &ref, ref.name); } /* * fflush stdout before calling advertise_shallow_grafts because send_ref From 70b783c3a194746d8b747677615f33b94454146f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:16 +0200 Subject: [PATCH 83/92] ref-filter: propagate peeled object ID When queueing a reference in the "ref-filter" subsystem we end up creating a new ref array item that contains the reference's info. One bit of info that we always discard though is the peeled object ID, and because of that we are forced to use `peel_iterated_oid()`. Refactor the code to propagate the peeled object ID via the ref array, if available. This allows us to manually peel tags without having to go through the object database. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 2 +- builtin/tag.c | 2 +- builtin/verify-tag.c | 2 +- ref-filter.c | 66 +++++++++++++++++++++++++------------------- ref-filter.h | 5 +++- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index df09000b30..fe77829557 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -156,7 +156,7 @@ int cmd_ls_remote(int argc, continue; if (!tail_match(&pattern, ref->name)) continue; - item = ref_array_push(&ref_array, ref->name, &ref->old_oid); + item = ref_array_push(&ref_array, ref->name, &ref->old_oid, NULL); item->symref = xstrdup_or_null(ref->symref); } diff --git a/builtin/tag.c b/builtin/tag.c index f0665af3ac..01eba90c5c 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -153,7 +153,7 @@ static int verify_tag(const char *name, const char *ref UNUSED, return -1; if (format->format) - pretty_print_ref(name, oid, format); + pretty_print_ref(name, oid, NULL, format); return 0; } diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index cd6bc11095..558121eaa1 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -67,7 +67,7 @@ int cmd_verify_tag(int argc, } if (format.format) - pretty_print_ref(name, &oid, &format); + pretty_print_ref(name, &oid, NULL, &format); } return had_error; } diff --git a/ref-filter.c b/ref-filter.c index 6837fa60a9..7fd8babec8 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2578,8 +2578,15 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) * If it is a tag object, see if we use the peeled value. If we do, * grab the peeled OID. */ - if (need_tagged && peel_iterated_oid(the_repository, &obj->oid, &oi_deref.oid)) - die("bad tag"); + if (need_tagged) { + if (!is_null_oid(&ref->peeled_oid)) { + oidcpy(&oi_deref.oid, &ref->peeled_oid); + } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + /* We managed to peel the object ourselves. */ + } else { + die("bad tag"); + } + } return get_object(ref, 1, &obj, &oi_deref, err); } @@ -2807,12 +2814,15 @@ static int match_points_at(struct oid_array *points_at, * Callers can then fill in other struct members at their leisure. */ static struct ref_array_item *new_ref_array_item(const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { struct ref_array_item *ref; FLEX_ALLOC_STR(ref, refname, refname); oidcpy(&ref->objectname, oid); + if (peeled_oid) + oidcpy(&ref->peeled_oid, peeled_oid); ref->rest = NULL; return ref; @@ -2826,9 +2836,10 @@ static void ref_array_append(struct ref_array *array, struct ref_array_item *ref struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { - struct ref_array_item *ref = new_ref_array_item(refname, oid); + struct ref_array_item *ref = new_ref_array_item(refname, oid, peeled_oid); ref_array_append(array, ref); return ref; } @@ -2871,25 +2882,25 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } -static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid, - int flag, struct ref_filter *filter) +static struct ref_array_item *apply_ref_filter(const struct reference *ref, + struct ref_filter *filter) { - struct ref_array_item *ref; + struct ref_array_item *item; struct commit *commit = NULL; unsigned int kind; - if (flag & REF_BAD_NAME) { - warning(_("ignoring ref with broken name %s"), refname); + if (ref->flags & REF_BAD_NAME) { + warning(_("ignoring ref with broken name %s"), ref->name); return NULL; } - if (flag & REF_ISBROKEN) { - warning(_("ignoring broken ref %s"), refname); + if (ref->flags & REF_ISBROKEN) { + warning(_("ignoring broken ref %s"), ref->name); return NULL; } /* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */ - kind = filter_ref_kind(filter, refname); + kind = filter_ref_kind(filter, ref->name); /* * Generally HEAD refs are printed with special description denoting a rebase, @@ -2902,13 +2913,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * else if (!(kind & filter->kind)) return NULL; - if (!filter_pattern_match(filter, refname)) + if (!filter_pattern_match(filter, ref->name)) return NULL; - if (filter_exclude_match(filter, refname)) + if (filter_exclude_match(filter, ref->name)) return NULL; - if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname)) + if (filter->points_at.nr && !match_points_at(&filter->points_at, ref->oid, ref->name)) return NULL; /* @@ -2918,7 +2929,7 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * */ if (filter->reachable_from || filter->unreachable_from || filter->with_commit || filter->no_commit || filter->verbose) { - commit = lookup_commit_reference_gently(the_repository, oid, 1); + commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!commit) return NULL; /* We perform the filtering for the '--contains' option... */ @@ -2936,13 +2947,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * * to do its job and the resulting list may yet to be pruned * by maxcount logic. */ - ref = new_ref_array_item(refname, oid); - ref->commit = commit; - ref->flag = flag; - ref->kind = kind; - ref->symref = xstrdup_or_null(referent); + item = new_ref_array_item(ref->name, ref->oid, ref->peeled_oid); + item->commit = commit; + item->flag = ref->flags; + item->kind = kind; + item->symref = xstrdup_or_null(ref->target); - return ref; + return item; } struct ref_filter_cbdata { @@ -2959,8 +2970,7 @@ static int filter_one(const struct reference *ref, void *cb_data) struct ref_filter_cbdata *ref_cbdata = cb_data; struct ref_array_item *item; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (item) ref_array_append(ref_cbdata->array, item); @@ -2997,8 +3007,7 @@ static int filter_and_format_one(const struct reference *ref, void *cb_data) struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (!item) return 0; @@ -3585,13 +3594,14 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma } void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format) { struct ref_array_item *ref_item; struct strbuf output = STRBUF_INIT; struct strbuf err = STRBUF_INIT; - ref_item = new_ref_array_item(name, oid); + ref_item = new_ref_array_item(name, oid, peeled_oid); ref_item->kind = ref_kind_from_refname(name); if (format_ref_array_item(ref_item, format, &output, &err)) die("%s", err.buf); diff --git a/ref-filter.h b/ref-filter.h index 235c60f79c..120221b47f 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -41,6 +41,7 @@ enum ref_sorting_order { struct ref_array_item { struct object_id objectname; + struct object_id peeled_oid; const char *rest; int flag; unsigned int kind; @@ -187,6 +188,7 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma * name must be a fully qualified refname. */ void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format); /* @@ -195,7 +197,8 @@ void pretty_print_ref(const char *name, const struct object_id *oid, */ struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid); + const struct object_id *oid, + const struct object_id *peeled_oid); /* * If the provided format includes ahead-behind atoms, then compute the From feaaea4c123e6b94ebbdc2135278946ee9cc8eed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:17 +0200 Subject: [PATCH 84/92] builtin/show-ref: convert to use `reference_get_peeled_oid()` The git-show-ref(1) command has multiple different modes: - It knows to show all references matching a pattern. - It knows to list all references that are an exact match to whatever the user has provided. - It knows to check for reference existence. The first two commands use mostly the same infrastructure to print the references via `show_one()`. But while the former mode uses a proper iterator and thus has a `struct reference` available in its context, the latter calls `refs_read_ref()` and thus doesn't. Consequently, we cannot easily use `reference_get_peeled_oid()` to print the peeled value. Adapt the code so that we manually construct a `struct reference` when verifying refs. We wouldn't ever have the peeled value available anyway as we're not using an iterator here, so we can simply plug in the values we _do_ have. With this change we now have a `struct reference` available at both callsites of `show_one()` and can thus pass it, which allows us to use `reference_get_peeled_oid()` instead of `peel_iterated_oid()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/show-ref.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 4803b5e598..4d4984e4e0 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -31,31 +31,31 @@ struct show_one_options { }; static void show_one(const struct show_one_options *opts, - const char *refname, const struct object_id *oid) + const struct reference *ref) { const char *hex; struct object_id peeled; - if (!odb_has_object(the_repository->objects, oid, + if (!odb_has_object(the_repository->objects, ref->oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - die("git show-ref: bad ref %s (%s)", refname, - oid_to_hex(oid)); + die("git show-ref: bad ref %s (%s)", ref->name, + oid_to_hex(ref->oid)); if (opts->quiet) return; - hex = repo_find_unique_abbrev(the_repository, oid, opts->abbrev); + hex = repo_find_unique_abbrev(the_repository, ref->oid, opts->abbrev); if (opts->hash_only) printf("%s\n", hex); else - printf("%s %s\n", hex, refname); + printf("%s %s\n", hex, ref->name); if (!opts->deref_tags) return; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { hex = repo_find_unique_abbrev(the_repository, &peeled, opts->abbrev); - printf("%s %s^{}\n", hex, refname); + printf("%s %s^{}\n", hex, ref->name); } } @@ -93,7 +93,7 @@ static int show_ref(const struct reference *ref, void *cbdata) match: data->found_match++; - show_one(data->show_one_opts, ref->name, ref->oid); + show_one(data->show_one_opts, ref); return 0; } @@ -175,12 +175,18 @@ static int cmd_show_ref__verify(const struct show_one_options *show_one_opts, if ((starts_with(*refs, "refs/") || refname_is_safe(*refs)) && !refs_read_ref(get_main_ref_store(the_repository), *refs, &oid)) { - show_one(show_one_opts, *refs, &oid); - } - else if (!show_one_opts->quiet) + struct reference ref = { + .name = *refs, + .oid = &oid, + }; + + show_one(show_one_opts, &ref); + } else if (!show_one_opts->quiet) { die("'%s' - not a valid ref", *refs); - else + } else { return 1; + } + refs++; } From 5a5c7359f77ecd1bc4b0e172563161d602f131d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:18 +0200 Subject: [PATCH 85/92] refs: drop `current_ref_iter` hack In preceding commits we have refactored all callers of `peel_iterated_oid()` to instead use `reference_get_peeled_oid()`. This allows us to thus get rid of the former function. Getting rid of that function is nice, but even nicer is that this also allows us to get rid of the `current_ref_iter` hack. This global variable tracked the currently-active ref iterator so that we can use it to peel an object ID. Now that the peeled object ID is propagated via `struct reference` though we don't have to depend on this hack anymore, which makes for a more robust and easier-to-understand infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 10 ---------- refs/iterator.c | 5 ----- refs/refs-internal.h | 13 ------------- 3 files changed, 28 deletions(-) diff --git a/refs.c b/refs.c index 1b1551f981..9d8f0a9ca4 100644 --- a/refs.c +++ b/refs.c @@ -2324,16 +2324,6 @@ int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) return refs->be->optimize(refs, opts); } -int peel_iterated_oid(struct repository *r, const struct object_id *base, struct object_id *peeled) -{ - if (current_ref_iter && - (current_ref_iter->ref.oid == base || - oideq(current_ref_iter->ref.oid, base))) - return ref_iterator_peel(current_ref_iter, peeled); - - return peel_object(r, base, peeled) ? -1 : 0; -} - int reference_get_peeled_oid(struct repository *repo, const struct reference *ref, struct object_id *peeled_oid) diff --git a/refs/iterator.c b/refs/iterator.c index fe5980e1b6..072c6aacdb 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -458,15 +458,11 @@ struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0, return ref_iterator; } -struct ref_iterator *current_ref_iter = NULL; - int do_for_each_ref_iterator(struct ref_iterator *iter, each_ref_fn fn, void *cb_data) { int retval = 0, ok; - struct ref_iterator *old_ref_iter = current_ref_iter; - current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { retval = fn(&iter->ref, cb_data); if (retval) @@ -474,7 +470,6 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, } out: - current_ref_iter = old_ref_iter; if (ok == ITER_ERROR) retval = -1; ref_iterator_free(iter); diff --git a/refs/refs-internal.h b/refs/refs-internal.h index ed749d1657..f4f845bbea 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -376,19 +376,6 @@ struct ref_iterator_vtable { ref_iterator_release_fn *release; }; -/* - * current_ref_iter is a performance hack: when iterating over - * references using the for_each_ref*() functions, current_ref_iter is - * set to the reference iterator before calling the callback function. - * If the callback function calls peel_ref(), then peel_ref() first - * checks whether the reference to be peeled is the one referred to by - * the iterator (it usually is) and if so, asks the iterator for the - * peeled version of the reference if it is available. This avoids a - * refname lookup in a common case. current_ref_iter is set to NULL - * when the iteration is over. - */ -extern struct ref_iterator *current_ref_iter; - struct ref_store; /* refs backends */ From 705114772e0a0741c3288329bd9ac4e11e38db9a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:19 +0200 Subject: [PATCH 86/92] refs: drop infrastructure to peel via iterators Now that the peeled object ID gets propagated via the `struct reference` there is no need anymore to call into the reference iterator itself to dereference an object. Remove this infrastructure. Most of the changes are straight-forward deletions of code. There is one exception though in `refs/packed-backend.c::write_with_updates()`. Here we stop peeling the iterator and instead just pass the peeled object ID of that iterator directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.h | 14 -------------- refs/debug.c | 11 ----------- refs/files-backend.c | 17 ----------------- refs/iterator.c | 36 ------------------------------------ refs/packed-backend.c | 24 +----------------------- refs/ref-cache.c | 9 --------- refs/refs-internal.h | 7 ------- refs/reftable-backend.c | 24 ------------------------ 8 files changed, 1 insertion(+), 141 deletions(-) diff --git a/refs.h b/refs.h index 886ed2c0f4..2dd7ac1a16 100644 --- a/refs.h +++ b/refs.h @@ -1289,10 +1289,6 @@ int repo_migrate_ref_storage_format(struct repository *repo, * to the next entry, ref_iterator_advance() aborts the iteration, * frees the ref_iterator, and returns ITER_ERROR. * - * The reference currently being looked at can be peeled by calling - * ref_iterator_peel(). This function is often faster than peel_ref(), - * so it should be preferred when iterating over references. - * * Putting it all together, a typical iteration looks like this: * * int ok; @@ -1307,9 +1303,6 @@ int repo_migrate_ref_storage_format(struct repository *repo, * // Access information about the current reference: * if (!(iter->flags & REF_ISSYMREF)) * printf("%s is %s\n", iter->refname, oid_to_hex(iter->oid)); - * - * // If you need to peel the reference: - * ref_iterator_peel(iter, &oid); * } * * if (ok != ITER_DONE) @@ -1400,13 +1393,6 @@ enum ref_iterator_seek_flag { int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, unsigned int flags); -/* - * If possible, peel the reference currently being viewed by the - * iterator. Return 0 on success. - */ -int ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled); - /* Free the reference iterator and any associated resources. */ void ref_iterator_free(struct ref_iterator *ref_iterator); diff --git a/refs/debug.c b/refs/debug.c index 67718bd1f4..01499b9033 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -177,16 +177,6 @@ static int debug_ref_iterator_seek(struct ref_iterator *ref_iterator, return res; } -static int debug_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct debug_ref_iterator *diter = - (struct debug_ref_iterator *)ref_iterator; - int res = diter->iter->vtable->peel(diter->iter, peeled); - trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->ref.name, res); - return res; -} - static void debug_ref_iterator_release(struct ref_iterator *ref_iterator) { struct debug_ref_iterator *diter = @@ -198,7 +188,6 @@ static void debug_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable debug_ref_iterator_vtable = { .advance = debug_ref_iterator_advance, .seek = debug_ref_iterator_seek, - .peel = debug_ref_iterator_peel, .release = debug_ref_iterator_release, }; diff --git a/refs/files-backend.c b/refs/files-backend.c index fac53fa052..5aeb454fb4 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -993,15 +993,6 @@ static int files_ref_iterator_seek(struct ref_iterator *ref_iterator, return ref_iterator_seek(iter->iter0, refname, flags); } -static int files_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct files_ref_iterator *iter = - (struct files_ref_iterator *)ref_iterator; - - return ref_iterator_peel(iter->iter0, peeled); -} - static void files_ref_iterator_release(struct ref_iterator *ref_iterator) { struct files_ref_iterator *iter = @@ -1012,7 +1003,6 @@ static void files_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable files_ref_iterator_vtable = { .advance = files_ref_iterator_advance, .seek = files_ref_iterator_seek, - .peel = files_ref_iterator_peel, .release = files_ref_iterator_release, }; @@ -2388,12 +2378,6 @@ static int files_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSED, BUG("ref_iterator_seek() called for reflog_iterator"); } -static int files_reflog_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("ref_iterator_peel() called for reflog_iterator"); -} - static void files_reflog_iterator_release(struct ref_iterator *ref_iterator) { struct files_reflog_iterator *iter = @@ -2404,7 +2388,6 @@ static void files_reflog_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable files_reflog_iterator_vtable = { .advance = files_reflog_iterator_advance, .seek = files_reflog_iterator_seek, - .peel = files_reflog_iterator_peel, .release = files_reflog_iterator_release, }; diff --git a/refs/iterator.c b/refs/iterator.c index 072c6aacdb..d79aa5ec82 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -21,12 +21,6 @@ int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, return ref_iterator->vtable->seek(ref_iterator, refname, flags); } -int ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - return ref_iterator->vtable->peel(ref_iterator, peeled); -} - void ref_iterator_free(struct ref_iterator *ref_iterator) { if (ref_iterator) { @@ -60,12 +54,6 @@ static int empty_ref_iterator_seek(struct ref_iterator *ref_iterator UNUSED, return 0; } -static int empty_ref_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("peel called for empty iterator"); -} - static void empty_ref_iterator_release(struct ref_iterator *ref_iterator UNUSED) { } @@ -73,7 +61,6 @@ static void empty_ref_iterator_release(struct ref_iterator *ref_iterator UNUSED) static struct ref_iterator_vtable empty_ref_iterator_vtable = { .advance = empty_ref_iterator_advance, .seek = empty_ref_iterator_seek, - .peel = empty_ref_iterator_peel, .release = empty_ref_iterator_release, }; @@ -240,18 +227,6 @@ static int merge_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int merge_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct merge_ref_iterator *iter = - (struct merge_ref_iterator *)ref_iterator; - - if (!iter->current) { - BUG("peel called before advance for merge iterator"); - } - return ref_iterator_peel(*iter->current, peeled); -} - static void merge_ref_iterator_release(struct ref_iterator *ref_iterator) { struct merge_ref_iterator *iter = @@ -263,7 +238,6 @@ static void merge_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable merge_ref_iterator_vtable = { .advance = merge_ref_iterator_advance, .seek = merge_ref_iterator_seek, - .peel = merge_ref_iterator_peel, .release = merge_ref_iterator_release, }; @@ -412,15 +386,6 @@ static int prefix_ref_iterator_seek(struct ref_iterator *ref_iterator, return ref_iterator_seek(iter->iter0, refname, flags); } -static int prefix_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct prefix_ref_iterator *iter = - (struct prefix_ref_iterator *)ref_iterator; - - return ref_iterator_peel(iter->iter0, peeled); -} - static void prefix_ref_iterator_release(struct ref_iterator *ref_iterator) { struct prefix_ref_iterator *iter = @@ -432,7 +397,6 @@ static void prefix_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable prefix_ref_iterator_vtable = { .advance = prefix_ref_iterator_advance, .seek = prefix_ref_iterator_seek, - .peel = prefix_ref_iterator_peel, .release = prefix_ref_iterator_release, }; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1fefefd54e..6fa229edd0 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1030,22 +1030,6 @@ static int packed_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct packed_ref_iterator *iter = - (struct packed_ref_iterator *)ref_iterator; - - if ((iter->base.ref.flags & REF_KNOWS_PEELED)) { - oidcpy(peeled, &iter->peeled); - return is_null_oid(&iter->peeled) ? -1 : 0; - } else if ((iter->base.ref.flags & (REF_ISBROKEN | REF_ISSYMREF))) { - return -1; - } else { - return peel_object(iter->repo, &iter->oid, peeled) ? -1 : 0; - } -} - static void packed_ref_iterator_release(struct ref_iterator *ref_iterator) { struct packed_ref_iterator *iter = @@ -1059,7 +1043,6 @@ static void packed_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable packed_ref_iterator_vtable = { .advance = packed_ref_iterator_advance, .seek = packed_ref_iterator_seek, - .peel = packed_ref_iterator_peel, .release = packed_ref_iterator_release, }; @@ -1525,13 +1508,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re if (cmp < 0) { /* Pass the old reference through. */ - - struct object_id peeled; - int peel_error = ref_iterator_peel(iter, &peeled); - if (write_packed_entry(out, iter->ref.name, - iter->ref.oid, - peel_error ? NULL : &peeled)) + iter->ref.oid, iter->ref.peeled_oid)) goto write_error; if ((ok = ref_iterator_advance(iter)) != ITER_OK) { diff --git a/refs/ref-cache.c b/refs/ref-cache.c index e427848879..ffef01a597 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -546,14 +546,6 @@ static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct cache_ref_iterator *iter = - (struct cache_ref_iterator *)ref_iterator; - return peel_object(iter->repo, ref_iterator->ref.oid, peeled) ? -1 : 0; -} - static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) { struct cache_ref_iterator *iter = @@ -565,7 +557,6 @@ static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable cache_ref_iterator_vtable = { .advance = cache_ref_iterator_advance, .seek = cache_ref_iterator_seek, - .peel = cache_ref_iterator_peel, .release = cache_ref_iterator_release, }; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index f4f845bbea..4671517dad 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -357,12 +357,6 @@ typedef int ref_iterator_advance_fn(struct ref_iterator *ref_iterator); typedef int ref_iterator_seek_fn(struct ref_iterator *ref_iterator, const char *refname, unsigned int flags); -/* - * Peels the current ref, returning 0 for success or -1 for failure. - */ -typedef int ref_iterator_peel_fn(struct ref_iterator *ref_iterator, - struct object_id *peeled); - /* * Implementations of this function should free any resources specific * to the derived class. @@ -372,7 +366,6 @@ typedef void ref_iterator_release_fn(struct ref_iterator *ref_iterator); struct ref_iterator_vtable { ref_iterator_advance_fn *advance; ref_iterator_seek_fn *seek; - ref_iterator_peel_fn *peel; ref_iterator_release_fn *release; }; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e214e120d7..e329d4a423 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -744,21 +744,6 @@ static int reftable_ref_iterator_seek(struct ref_iterator *ref_iterator, return iter->err; } -static int reftable_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct reftable_ref_iterator *iter = - (struct reftable_ref_iterator *)ref_iterator; - - if (iter->ref.value_type == REFTABLE_REF_VAL2) { - oidread(peeled, iter->ref.value.val2.target_value, - iter->refs->base.repo->hash_algo); - return 0; - } - - return -1; -} - static void reftable_ref_iterator_release(struct ref_iterator *ref_iterator) { struct reftable_ref_iterator *iter = @@ -776,7 +761,6 @@ static void reftable_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable reftable_ref_iterator_vtable = { .advance = reftable_ref_iterator_advance, .seek = reftable_ref_iterator_seek, - .peel = reftable_ref_iterator_peel, .release = reftable_ref_iterator_release, }; @@ -2098,13 +2082,6 @@ static int reftable_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSE return -1; } -static int reftable_reflog_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("reftable reflog iterator cannot be peeled"); - return -1; -} - static void reftable_reflog_iterator_release(struct ref_iterator *ref_iterator) { struct reftable_reflog_iterator *iter = @@ -2117,7 +2094,6 @@ static void reftable_reflog_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable reftable_reflog_iterator_vtable = { .advance = reftable_reflog_iterator_advance, .seek = reftable_reflog_iterator_seek, - .peel = reftable_reflog_iterator_peel, .release = reftable_reflog_iterator_release, }; From 7ec85185b197ce1cd28721a6f4415fb9db5cd42f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:20 +0200 Subject: [PATCH 87/92] object: add flag to `peel_object()` to verify object type When peeling a tag to a non-tag object we repeatedly call `parse_object()` on the tagged object until we find the first object that isn't a tag. While this feels sensible at first, there is a big catch here: `parse_object()` doesn't actually verify the type of the tagged object. The relevant code path here eventually ends up in `parse_tag_buffer()`. Here, we parse the various fields of the tag, including the "type". Once we've figured out the type and the tagged object ID, we call one of the `lookup_${type}()` functions for whatever type we have found. There is two possible outcomes in the successful case: 1. The object is already part of our cached objects. In that case we double-check whether the type we're trying to look up matches the type that was cached. 2. The object is _not_ part of our cached objects. In that case, we simply create a new object with the expected type, but we don't parse that object. In the first case we might notice type mismatches, but only in the case where our cache has the object with the correct type. In the second case, we'll blindly assume that the type is correct and then go with it. We'll only notice that the type might be wrong when we try to parse the object at a later point. Now arguably, we could change `parse_tag_buffer()` to verify the tagged object's type for us. But that would have the effect that such a tag cannot be parsed at all anymore, and we have a small bunch of tests for exactly this case that assert we still can open such tags. So this change does not feel like something we can retroactively tighten, even though one shouldn't ever hit such corrupted tags. Instead, add a new `flags` field to `peel_object()` that allows the caller to opt in to strict object verification. This will be wired up at a subset of callsites over the next few commits. Note that this change also inlines `deref_tag_noverify()`. There's only been two callsites of that function, the one we're changing and one in our test helpers. The latter callsite can trivially use `deref_tag()` instead, so by inlining the function we avoid having to pass down the flag. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object.c | 20 +++++++++++++++++--- object.h | 15 ++++++++++++++- ref-filter.c | 2 +- refs.c | 2 +- refs/packed-backend.c | 5 ++--- refs/reftable-backend.c | 4 ++-- t/helper/test-reach.c | 2 +- tag.c | 12 ------------ tag.h | 1 - 9 files changed, 38 insertions(+), 25 deletions(-) diff --git a/object.c b/object.c index 986114a6db..e72b0ed436 100644 --- a/object.c +++ b/object.c @@ -209,11 +209,12 @@ struct object *lookup_object_by_type(struct repository *r, enum peel_status peel_object(struct repository *r, const struct object_id *name, - struct object_id *oid) + struct object_id *oid, + unsigned flags) { struct object *o = lookup_unknown_object(r, name); - if (o->type == OBJ_NONE) { + if (o->type == OBJ_NONE || flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { int type = odb_read_object_info(r->objects, name, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; @@ -222,7 +223,20 @@ enum peel_status peel_object(struct repository *r, if (o->type != OBJ_TAG) return PEEL_NON_TAG; - o = deref_tag_noverify(r, o); + while (o && o->type == OBJ_TAG) { + o = parse_object(r, &o->oid); + if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) { + o = ((struct tag *)o)->tagged; + + if (flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + int type = odb_read_object_info(r->objects, &o->oid, NULL); + if (type < 0 || !object_as_type(o, type, 0)) + return PEEL_INVALID; + } + } else { + o = NULL; + } + } if (!o) return PEEL_INVALID; diff --git a/object.h b/object.h index 8c3c1c46e1..1499f63d50 100644 --- a/object.h +++ b/object.h @@ -287,6 +287,17 @@ enum peel_status { PEEL_BROKEN = -4 }; +enum peel_object_flags { + /* + * Always verify the object type, even in the case where the looked-up + * object already has an object type. This can be useful when the + * stored object type may be invalid. One such case is when looking up + * objects via tags, where we blindly trust the object type declared by + * the tag. + */ + PEEL_OBJECT_VERIFY_OBJECT_TYPE = (1 << 0), +}; + /* * Peel the named object; i.e., if the object is a tag, resolve the * tag recursively until a non-tag is found. If successful, store the @@ -295,7 +306,9 @@ enum peel_status { * and leave oid unchanged. */ enum peel_status peel_object(struct repository *r, - const struct object_id *name, struct object_id *oid); + const struct object_id *name, + struct object_id *oid, + unsigned flags); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/ref-filter.c b/ref-filter.c index 7fd8babec8..9a8ed8c8fc 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/refs.c b/refs.c index 9d8f0a9ca4..a41a94ae55 100644 --- a/refs.c +++ b/refs.c @@ -2333,7 +2333,7 @@ int reference_get_peeled_oid(struct repository *repo, return 0; } - return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; + return peel_object(repo, ref->oid, peeled_oid, 0) ? -1 : 0; } int refs_update_symref(struct ref_store *refs, const char *ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 6fa229edd0..4752d3f398 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1527,9 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re i++; } else { struct object_id peeled; - int peel_error = peel_object(refs->base.repo, - &update->new_oid, - &peeled); + int peel_error = peel_object(refs->base.repo, &update->new_oid, + &peeled, 0); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e329d4a423..9febb2322c 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); @@ -2497,7 +2497,7 @@ static int write_reflog_expiry_table(struct reftable_writer *writer, void *cb_da ref.refname = (char *)arg->refname; ref.update_index = ts; - if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled)) { + if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled, 0)) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); memcpy(ref.value.val2.value, arg->update_oid.hash, GIT_MAX_RAWSZ); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 028ec00306..c58c93800f 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -63,7 +63,7 @@ int cmd__reach(int ac, const char **av) die("failed to resolve %s", buf.buf + 2); orig = parse_object(r, &oid); - peeled = deref_tag_noverify(the_repository, orig); + peeled = deref_tag(the_repository, orig, NULL, 0); if (!peeled) die("failed to load commit for input %s resulting in oid %s", diff --git a/tag.c b/tag.c index 1d52686ee1..f5c232d2f1 100644 --- a/tag.c +++ b/tag.c @@ -94,18 +94,6 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war return o; } -struct object *deref_tag_noverify(struct repository *r, struct object *o) -{ - while (o && o->type == OBJ_TAG) { - o = parse_object(r, &o->oid); - if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) - o = ((struct tag *)o)->tagged; - else - o = NULL; - } - return o; -} - struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); diff --git a/tag.h b/tag.h index c49d7c19ad..ef12a61037 100644 --- a/tag.h +++ b/tag.h @@ -16,7 +16,6 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u int parse_tag(struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); -struct object *deref_tag_noverify(struct repository *r, struct object *); int gpg_verify_tag(const struct object_id *oid, const char *name_to_report, unsigned flags); struct object_id *get_tagged_oid(struct tag *tag); From 6ec4c0b45ba916770c6fbc03df94018ca06fb77e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:21 +0200 Subject: [PATCH 88/92] refs: don't store peeled object IDs for invalid tags Both the "files" and "reftable" backend store peeled object IDs for references that point to tags: - The "files" backend stores the value when packing refs, where each peeled object ID is prefixed with "^". - The "reftable" backend stores the value whenever writing a new reference that points to a tag via a special ref record type. Both of these backends use `peel_object()` to find the peeled object ID. But as explained in the preceding commit, that function does not detect the case where the tag's tagged object and its claimed type mismatch. The consequence of storing these bogus peeled object IDs is that we're less likely to detect such corruption in other parts of Git. git-for-each-ref(1) for example does not notice anymore that the tag is broken when using "--format=%(*objectname)" to dereference tags. One could claim that this is good, because it still allows us to mostly use the tag as intended. But the biggest problem here is that we now have different behaviour for such a broken tag depending on whether or not we have its peeled value in the refdb. Fix the issue by verifying the object type when peeling the object. If that verification fails we simply skip storing the peeled value in either of the reference formats. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 2 +- refs/reftable-backend.c | 3 ++- t/pack-refs-tests.sh | 32 ++++++++++++++++++++++++++++++++ t/t0610-reftable-basics.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 4752d3f398..1ab0c50393 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1528,7 +1528,7 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re } else { struct object_id peeled; int peel_error = peel_object(refs->base.repo, &update->new_oid, - &peeled, 0); + &peeled, PEEL_OBJECT_VERIFY_OBJECT_TYPE); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 9febb2322c..6bbfd5618d 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, + PEEL_OBJECT_VERIFY_OBJECT_TYPE); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); diff --git a/t/pack-refs-tests.sh b/t/pack-refs-tests.sh index 3dbcc01718..095823d915 100644 --- a/t/pack-refs-tests.sh +++ b/t/pack-refs-tests.sh @@ -428,4 +428,36 @@ do ' done +test_expect_success 'pack-refs does not store invalid peeled tag value' ' + test_when_finished rm -rf repo && + git init repo && + ( + cd repo && + git commit --allow-empty --message initial && + + echo garbage >blob-content && + blob_id=$(git hash-object -w -t blob blob-content) && + + # Write an invalid tag into the object database. The tag itself + # is well-formed, but the tagged object is a blob while we + # claim that it is a commit. + cat >tag-content <<-EOF && + object $blob_id + type commit + tag bad-tag + tagger C O Mitter 1112354055 +0200 + + annotated + EOF + tag_id=$(git hash-object -w -t tag tag-content) && + git update-ref refs/tags/bad-tag "$tag_id" && + + # The packed-refs file should not contain the peeled object ID. + # If it did this would cause commands that use the peeled value + # to not notice this corrupted tag. + git pack-refs --all && + test_grep ! "^\^" .git/packed-refs + ) +' + test_done diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index 3ea5d51532..6575528f21 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -1135,4 +1135,32 @@ test_expect_success 'fetch: accessing FETCH_HEAD special ref works' ' test_cmp expect actual ' +test_expect_success 'writes do not persist peeled value for invalid tags' ' + test_when_finished rm -rf repo && + git init repo && + ( + cd repo && + git commit --allow-empty --message initial && + + # We cannot easily verify that the peeled value is not stored + # in the tables. Instead, we test this indirectly: we create + # two tags that both point to the same object, but they claim + # different object types. If we parse both tags we notice that + # the parsed tagged object has a mismatch between the two tags + # and bail out. + # + # If we instead use the persisted peeled value we would not + # even parse the tags. As such, we would not notice the + # discrepancy either and thus listing these tags would succeed. + git tag tag-1 -m "tag 1" && + git cat-file tag tag-1 >raw-tag && + sed "s/^type commit$/type blob/" broken-tag && + broken_tag_id=$(git hash-object -w -t tag broken-tag) && + git update-ref refs/tags/tag-2 $broken_tag_id && + + test_must_fail git for-each-ref --format="%(*objectname)" refs/tags/ 2>err && + test_grep "bad tag pointer" err + ) +' + test_done From e66077ae45813a5ca269a7d676310a243bdcc1c2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:22 +0200 Subject: [PATCH 89/92] ref-filter: detect broken tags when dereferencing them Users can ask git-for-each-ref(1) to peel tags and return information of the tagged object by adding an asterisk to the format, like for example "%(*$objectname)". If so, git-for-each-ref(1) peels that object to the first non-tag object and then returns its values. As mentioned in preceding commits, it can happen that the tagged object type and the claimed object type differ, effectively resulting in a corrupt tag. git-for-each-ref(1) would notice this mismatch, print an error and then bail out when trying to peel the tag. But we only notice this corruption in some very specific edge cases! While we have a test in "t/for-each-ref-tests.sh" that verifies the above scenario, this test is specifically crafted to detect the issue at hand. Namely, we create two tags: - One tag points to a specific object with the correct type. - The other tag points to the *same* object with a different type. The fact that both tags point to the same object is important here: `peel_object()` wouldn't notice the corruption if the tagged objects were different. The root cause is that `peel_object()` calls `lookup_${type}()` eventually, where the type is the same type declared in the tag object. Consequently, when we have two tags pointing to the same object but with different declared types we'll call two different lookup functions. The first lookup will store the object with an unverified type A, whereas the second lookup will try to look up the object with a different unverified type B. And it is only now that we notice the discrepancy in object types, even though type A could've already been the wrong type. Fix the issue by verifying the object type in `populate_value()`. With this change we'll also notice type mismatches when only dereferencing a tag once. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 3 ++- t/for-each-ref-tests.sh | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 9a8ed8c8fc..c54025d6b4 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, + PEEL_OBJECT_VERIFY_OBJECT_TYPE)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/t/for-each-ref-tests.sh b/t/for-each-ref-tests.sh index e3ad19298a..4593be5fd5 100644 --- a/t/for-each-ref-tests.sh +++ b/t/for-each-ref-tests.sh @@ -1809,7 +1809,9 @@ test_expect_success "${git_for_each_ref} reports broken tags" ' bad=$(git hash-object -w -t tag bad) && git update-ref refs/tags/broken-tag-bad $bad && test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ - refs/tags/broken-tag-* + refs/tags/broken-tag-* && + test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ + refs/tags/broken-tag-bad ' test_expect_success 'set up tag with signature and no blank lines' ' From a29e2e8fe7e3935e23d2a03dc429cc9c2e68bfbe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:23 +0200 Subject: [PATCH 90/92] ref-filter: parse objects on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When formatting an arbitrary object we parse that object regardless of whether or not we actually need any parsed data. In fact, many of the atoms we have don't require any. Refactor the code so that we parse the data on demand when we see an atom that wants to access the objects. This leads to a small speedup, for example in the Chromium repository with around 40000 refs: Benchmark 1: for-each-ref --format='%(raw)' (HEAD~) Time (mean ± σ): 388.7 ms ± 1.1 ms [User: 322.2 ms, System: 65.0 ms] Range (min … max): 387.3 ms … 390.8 ms 10 runs Benchmark 2: for-each-ref --format='%(raw)' (HEAD) Time (mean ± σ): 344.7 ms ± 0.7 ms [User: 287.8 ms, System: 55.1 ms] Range (min … max): 343.9 ms … 345.7 ms 10 runs Summary for-each-ref --format='%(raw)' (HEAD) ran 1.13 ± 0.00 times faster than for-each-ref --format='%(raw)' (HEAD~) With this change, we now spend ~90% of the time decompressing objects, which is almost as good as it gets regarding git-for-each-ref(1)'s own infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 144 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 37 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index c54025d6b4..7cfcd5c355 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -91,6 +91,7 @@ static struct expand_data { struct object_id delta_base_oid; void *content; + struct object *maybe_object; struct object_info info; } oi, oi_deref; @@ -1475,11 +1476,29 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_ } } -/* See grab_values */ -static void grab_tag_values(struct atom_value *val, int deref, struct object *obj) +static struct object *get_or_parse_object(struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { + if (!data->maybe_object) { + data->maybe_object = parse_object_buffer(the_repository, &data->oid, data->type, + data->size, data->content, eaten); + if (!data->maybe_object) { + strbuf_addf(err, _("parse_object_buffer failed on %s for %s"), + oid_to_hex(&data->oid), refname); + return NULL; + } + } + + return data->maybe_object; +} + +/* See grab_values */ +static int grab_tag_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) +{ + struct tag *tag = NULL; int i; - struct tag *tag = (struct tag *) obj; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; @@ -1487,6 +1506,14 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob struct atom_value *v = &val[i]; if (!!deref != (*name == '*')) continue; + + if (!tag) { + tag = (struct tag *) get_or_parse_object(data, refname, + err, eaten); + if (!tag) + return -1; + } + if (deref) name++; if (atom_type == ATOM_TAG) @@ -1496,22 +1523,35 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob else if (atom_type == ATOM_OBJECT && tag->tagged) v->s = xstrdup(oid_to_hex(&tag->tagged->oid)); } + + return 0; } /* See grab_values */ -static void grab_commit_values(struct atom_value *val, int deref, struct object *obj) +static int grab_commit_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; enum atom_type atom_type = used_atom[i].atom_type; struct atom_value *v = &val[i]; + if (!!deref != (*name == '*')) continue; if (deref) name++; + + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + if (atom_type == ATOM_TREE && grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i])) continue; @@ -1531,6 +1571,8 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object v->s = strbuf_detach(&s, NULL); } } + + return 0; } static const char *find_wholine(const char *who, int wholen, const char *buf) @@ -1759,10 +1801,12 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void } } -static void grab_signature(struct atom_value *val, int deref, struct object *obj) +static int grab_signature(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; struct signature_check sigc = { 0 }; int signature_checked = 0; @@ -1790,6 +1834,13 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj continue; if (!signature_checked) { + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + check_commit_signature(commit, &sigc); signature_checked = 1; } @@ -1843,6 +1894,8 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj if (signature_checked) signature_check_clear(&sigc); + + return 0; } static void find_subpos(const char *buf, @@ -1920,9 +1973,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size } static void grab_describe_values(struct atom_value *val, int deref, - struct object *obj) + struct expand_data *data) { - struct commit *commit = (struct commit *)obj; int i; for (i = 0; i < used_atom_cnt; i++) { @@ -1944,7 +1996,7 @@ static void grab_describe_values(struct atom_value *val, int deref, cmd.git_cmd = 1; strvec_push(&cmd.args, "describe"); strvec_pushv(&cmd.args, atom->u.describe_args.v); - strvec_push(&cmd.args, oid_to_hex(&commit->object.oid)); + strvec_push(&cmd.args, oid_to_hex(&data->oid)); if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) { error(_("failed to run 'describe'")); v->s = xstrdup(""); @@ -2066,24 +2118,36 @@ static void fill_missing_values(struct atom_value *val) * pointed at by the ref itself; otherwise it is the object the * ref (which is a tag) refers to. */ -static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data) +static int grab_values(struct atom_value *val, int deref, struct expand_data *data, + const char *refname, struct strbuf *err, int *eaten) { void *buf = data->content; + int ret; - switch (obj->type) { + switch (data->type) { case OBJ_TAG: - grab_tag_values(val, deref, obj); + ret = grab_tag_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("tagger", val, deref, buf); - grab_describe_values(val, deref, obj); + grab_describe_values(val, deref, data); break; case OBJ_COMMIT: - grab_commit_values(val, deref, obj); + ret = grab_commit_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("author", val, deref, buf); grab_person("committer", val, deref, buf); - grab_signature(val, deref, obj); - grab_describe_values(val, deref, obj); + + ret = grab_signature(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + + grab_describe_values(val, deref, data); break; case OBJ_TREE: /* grab_tree_values(val, deref, obj, buf, sz); */ @@ -2094,8 +2158,12 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s grab_sub_body_contents(val, deref, data); break; default: - die("Eh? Object of type %d?", obj->type); + die("Eh? Object of type %d?", data->type); } + + ret = 0; +out: + return ret; } static inline char *copy_advance(char *dst, const char *src) @@ -2292,38 +2360,41 @@ static const char *get_refname(struct used_atom *atom, struct ref_array_item *re return show_ref(&atom->u.refname, ref->refname); } -static int get_object(struct ref_array_item *ref, int deref, struct object **obj, +static int get_object(struct ref_array_item *ref, int deref, struct expand_data *oi, struct strbuf *err) { - /* parse_object_buffer() will set eaten to 0 if free() will be needed */ - int eaten = 1; + /* parse_object_buffer() will set eaten to 1 if free() will be needed */ + int eaten = 0; + int ret; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; oi->info.typep = &oi->type; } + if (odb_read_object_info_extended(the_repository->objects, &oi->oid, &oi->info, - OBJECT_INFO_LOOKUP_REPLACE)) - return strbuf_addf_ret(err, -1, _("missing object %s for %s"), - oid_to_hex(&oi->oid), ref->refname); + OBJECT_INFO_LOOKUP_REPLACE)) { + ret = strbuf_addf_ret(err, -1, _("missing object %s for %s"), + oid_to_hex(&oi->oid), ref->refname); + goto out; + } if (oi->info.disk_sizep && oi->disk_size < 0) BUG("Object size is less than zero."); if (oi->info.contentp) { - *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten); - if (!*obj) { - if (!eaten) - free(oi->content); - return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"), - oid_to_hex(&oi->oid), ref->refname); - } - grab_values(ref->value, deref, *obj, oi); + ret = grab_values(ref->value, deref, oi, ref->refname, err, &eaten); + if (ret < 0) + goto out; } grab_common_values(ref->value, deref, oi); + ret = 0; + +out: if (!eaten) free(oi->content); - return 0; + return ret; } static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees) @@ -2376,7 +2447,6 @@ static char *get_worktree_path(const struct ref_array_item *ref) */ static int populate_value(struct ref_array_item *ref, struct strbuf *err) { - struct object *obj; int i; struct object_info empty = OBJECT_INFO_INIT; int ahead_behind_atoms = 0; @@ -2564,14 +2634,14 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) oi.oid = ref->objectname; - if (get_object(ref, 0, &obj, &oi, err)) + if (get_object(ref, 0, &oi, err)) return -1; /* * If there is no atom that wants to know about tagged * object, we are done. */ - if (!need_tagged || (obj->type != OBJ_TAG)) + if (!need_tagged || (oi.type != OBJ_TAG)) return 0; /* @@ -2589,7 +2659,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) } } - return get_object(ref, 1, &obj, &oi_deref, err); + return get_object(ref, 1, &oi_deref, err); } /* From bea37f1d647c6b17896eb3f0c210ac8dfc27b6d7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 15:36:13 +0100 Subject: [PATCH 91/92] ref-filter: fix stale parsed objects In 054f5f457e (ref-filter: parse objects on demand, 2025-10-23) we have started to skip parsing some objects in case we don't need to access their values in the first place. This was done by introducing a new member `struct expand_data::maybe_object` that gets populated on demand via `get_or_parse_object()`. This has led to a regression though where the object now gets reused because we don't reset it properly. The `oi` structure is declared in global scope, and there is no single place where we reset it before invoking `get_object()`. The consequence is that the `maybe_object` member doesn't get reset across calls, so subsequent calls will end up reusing the same object. This is only an issue for a subset of retrieved values, as not all of the infrastructure ends up calling `get_or_parse_object()`. So the effect is limited, which is probably why the issue wasn't detected earlier. Fix the issue by resetting `maybe_object` in `get_object()`. Reported-by: Junio C Hamano Based-on-patch-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 2 ++ t/t7004-tag.sh | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/ref-filter.c b/ref-filter.c index 7cfcd5c355..d8667c569a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2367,6 +2367,8 @@ static int get_object(struct ref_array_item *ref, int deref, int eaten = 0; int ret; + oi->maybe_object = NULL; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index 10835631ca..d1388cfdf4 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -2332,4 +2332,24 @@ test_expect_success 'If tag cannot be created then tag message file is not unlin test_path_exists .git/TAG_EDITMSG ' +test_expect_success 'annotated tag version sort' ' + git tag -a -m "sample 1.0" vsample-1.0 && + git tag -a -m "sample 2.0" vsample-2.0 && + git tag -a -m "sample 10.0" vsample-10.0 && + cat >expect <<-EOF && + vsample-1.0 + vsample-2.0 + vsample-10.0 + EOF + + git tag --list --sort=version:tag vsample-\* >actual && + test_cmp expect actual && + + # Ensure that we also handle this case alright in the case we have the + # peeled values cached e.g. via the packed-refs file. + git pack-refs --all && + git tag --list --sort=version:tag vsample-\* && + test_cmp expect actual +' + test_done From 61ac8ba0f034b61d7353a799fecae9fe45137a72 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 4 Nov 2025 07:28:59 -0800 Subject: [PATCH 92/92] t7004: do not chdir around in the main process Move down to no-contains subdirectory inside a subshell, just like the previous step that created and used it does. Signed-off-by: Junio C Hamano --- t/t7004-tag.sh | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index d1388cfdf4..ce2ff2a28a 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -2293,24 +2293,26 @@ test_expect_success '--contains combined with --no-contains' ' # don't recurse down to tags for trees or blobs pointed to by *those* # commits. test_expect_success 'Does --[no-]contains stop at commits? Yes!' ' - cd no-contains && - blob=$(git rev-parse v0.3:v0.3.t) && - tree=$(git rev-parse v0.3^{tree}) && - git tag tag-blob $blob && - git tag tag-tree $tree && - git tag --contains v0.3 >actual && - cat >expected <<-\EOF && - v0.3 - v0.4 - v0.5 - EOF - test_cmp expected actual && - git tag --no-contains v0.3 >actual && - cat >expected <<-\EOF && - v0.1 - v0.2 - EOF - test_cmp expected actual + ( + cd no-contains && + blob=$(git rev-parse v0.3:v0.3.t) && + tree=$(git rev-parse v0.3^{tree}) && + git tag tag-blob $blob && + git tag tag-tree $tree && + git tag --contains v0.3 >actual && + cat >expected <<-\EOF && + v0.3 + v0.4 + v0.5 + EOF + test_cmp expected actual && + git tag --no-contains v0.3 >actual && + cat >expected <<-\EOF && + v0.1 + v0.2 + EOF + test_cmp expected actual + ) ' test_expect_success 'If tag is created then tag message file is unlinked' '