From 0cd306ebc8c7a9faeced0a2fa7bddeea434bf285 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 5 Jan 2026 14:16:41 +0100 Subject: [PATCH 1/5] builtin/pack-objects: exclude promisor objects with "--stdin-packs" It is currently not possible to combine "--exclude-promisor-objects" with "--stdin-packs" because both flags want to set up a revision walk to enumerate the objects to pack. In a subsequent commit though we want to extend geometric repacks to support promisor objects, and for that we need to handle the combination of both flags. There are two cases we have to think about here: - "--stdin-packs" asks us to pack exactly the objects part of the specified packfiles. It is somewhat questionable what to do in the case where the user asks us to exclude promisor objects, but at the same time explicitly passes a promisor pack to us. For now, we simply abort the request as it is self-contradicting. As we have also been dying before this commit there is no regression here. - "--stdin-packs=follow" does the same as the first flag, but it also asks us to include all objects transitively reachable from any object in the packs we are about to repack. This is done by doing the revision walk mentioned further up. Luckily, fixing this case is trivial: we only need to modify the revision walk to also set the `exclude_promisor_objects` field. Note that we do not support the "--exclude-promisor-objects-best-effort" flag for now as we don't need it to support geometric repacking with promisor objects. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 14 ++++++++++--- t/t5331-pack-objects-stdin.sh | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ca44b7894f..c53e0c91b9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3857,8 +3857,11 @@ static void read_packs_list_from_stdin(struct rev_info *revs) repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); - if ((item = string_list_lookup(&include_packs, pack_name))) + if ((item = string_list_lookup(&include_packs, pack_name))) { + if (exclude_promisor_objects && p->pack_promisor) + die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); item->util = p; + } if ((item = string_list_lookup(&exclude_packs, pack_name))) item->util = p; } @@ -3936,6 +3939,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) revs.tree_objects = 1; revs.tag_objects = 1; revs.ignore_missing_links = 1; + revs.exclude_promisor_objects = exclude_promisor_objects; /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; @@ -5092,9 +5096,13 @@ int cmd_pack_objects(int argc, exclude_promisor_objects_best_effort, "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { - use_internal_rev_list = 1; fetch_if_missing = 0; - strvec_push(&rp, "--exclude-promisor-objects"); + + /* --stdin-packs handles promisor objects separately. */ + if (!stdin_packs) { + use_internal_rev_list = 1; + strvec_push(&rp, "--exclude-promisor-objects"); + } } else if (exclude_promisor_objects_best_effort) { use_internal_rev_list = 1; fetch_if_missing = 0; diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 4a8df5a389..cd949025b9 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -319,6 +319,45 @@ test_expect_success '--stdin-packs=follow walks into unknown packs' ' ) ' +test_expect_success '--stdin-packs with promisors' ' + test_when_finished "rm -fr repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + git remote add promisor garbage && + git config set remote.promisor.promisor true && + + for c in A B C D + do + echo "$c" >file && + git add file && + git commit --message "$c" && + git tag "$c" || return 1 + done && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack --filter=blob:none)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo C..D | git pack-objects --revs $packdir/pack)" && + touch $packdir/pack-$B.promisor && + + test_must_fail git pack-objects --stdin-packs --exclude-promisor-objects pack- 2>err <<-EOF && + pack-$B.pack + EOF + test_grep "is a promisor but --exclude-promisor-objects was given" err && + + PACK=$(git pack-objects --stdin-packs=follow --exclude-promisor-objects $packdir/pack <<-EOF + pack-$D.pack + EOF + ) && + objects_in_packs $C $D >expect && + objects_in_packs $PACK >actual && + test_cmp expect actual && + rm -f $packdir/pack-$PACK.* + ) +' + stdin_packs__follow_with_only () { rm -fr stdin_packs__follow_with_only && git init stdin_packs__follow_with_only && From 861248b946b68822375d2fe3cdffa174bf73104c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 5 Jan 2026 14:16:42 +0100 Subject: [PATCH 2/5] repack-geometry: extract function to compute repacking split We're about to add a second caller that wants to compute the repacking split for a set of packfiles. Split out the function that computes this split to prepare for that. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repack-geometry.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/repack-geometry.c b/repack-geometry.c index b3e32cd07e..17e6652a91 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -78,33 +78,32 @@ void pack_geometry_init(struct pack_geometry *geometry, strbuf_release(&buf); } -void pack_geometry_split(struct pack_geometry *geometry) +static uint32_t compute_pack_geometry_split(struct packed_git **pack, size_t pack_nr, + int split_factor) { uint32_t i; uint32_t split; off_t total_size = 0; - if (!geometry->pack_nr) { - geometry->split = geometry->pack_nr; - return; - } + if (!pack_nr) + return 0; /* * First, count the number of packs (in descending order of size) which * already form a geometric progression. */ - for (i = geometry->pack_nr - 1; i > 0; i--) { - struct packed_git *ours = geometry->pack[i]; - struct packed_git *prev = geometry->pack[i - 1]; + for (i = pack_nr - 1; i > 0; i--) { + struct packed_git *ours = pack[i]; + struct packed_git *prev = pack[i - 1]; - if (unsigned_mult_overflows(geometry->split_factor, + if (unsigned_mult_overflows(split_factor, pack_geometry_weight(prev))) die(_("pack %s too large to consider in geometric " "progression"), prev->pack_name); if (pack_geometry_weight(ours) < - geometry->split_factor * pack_geometry_weight(prev)) + split_factor * pack_geometry_weight(prev)) break; } @@ -130,21 +129,19 @@ void pack_geometry_split(struct pack_geometry *geometry) * the geometric progression. */ for (i = 0; i < split; i++) { - struct packed_git *p = geometry->pack[i]; + struct packed_git *p = pack[i]; if (unsigned_add_overflows(total_size, pack_geometry_weight(p))) die(_("pack %s too large to roll up"), p->pack_name); total_size += pack_geometry_weight(p); } - for (i = split; i < geometry->pack_nr; i++) { - struct packed_git *ours = geometry->pack[i]; + for (i = split; i < pack_nr; i++) { + struct packed_git *ours = pack[i]; - if (unsigned_mult_overflows(geometry->split_factor, - total_size)) + if (unsigned_mult_overflows(split_factor, total_size)) die(_("pack %s too large to roll up"), ours->pack_name); - if (pack_geometry_weight(ours) < - geometry->split_factor * total_size) { + if (pack_geometry_weight(ours) < split_factor * total_size) { if (unsigned_add_overflows(total_size, pack_geometry_weight(ours))) die(_("pack %s too large to roll up"), @@ -156,7 +153,13 @@ void pack_geometry_split(struct pack_geometry *geometry) break; } - geometry->split = split; + return split; +} + +void pack_geometry_split(struct pack_geometry *geometry) +{ + geometry->split = compute_pack_geometry_split(geometry->pack, geometry->pack_nr, + geometry->split_factor); } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) From dd8c4e12c2da850d67849ccfc221ee1dbf87ce55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 5 Jan 2026 14:16:43 +0100 Subject: [PATCH 3/5] repack-promisor: extract function to finalize repacking We're about to add a second caller that wants to finalize repacking of promisor objects. Split out the function which does this to prepare for that. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repack-promisor.c | 69 ++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f6..125038d92e 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -34,39 +34,17 @@ static int write_oid(const struct object_id *oid, return 0; } -void repack_promisor_objects(struct repository *repo, - const struct pack_objects_args *args, - struct string_list *names, const char *packtmp) +static void finish_repacking_promisor_objects(struct repository *repo, + struct child_process *cmd, + struct string_list *names, + const char *packtmp) { - struct write_oid_context ctx; - struct child_process cmd = CHILD_PROCESS_INIT; - FILE *out; struct strbuf line = STRBUF_INIT; + FILE *out; - prepare_pack_objects(&cmd, args, packtmp); - cmd.in = -1; + close(cmd->in); - /* - * NEEDSWORK: Giving pack-objects only the OIDs without any ordering - * hints may result in suboptimal deltas in the resulting pack. See if - * the OIDs can be sent with fake paths such that pack-objects can use a - * {type -> existing pack order} ordering when computing deltas instead - * of a {type -> size} ordering, which may produce better deltas. - */ - ctx.cmd = &cmd; - ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); - - if (cmd.in == -1) { - /* No packed objects; cmd was never started */ - child_process_clear(&cmd); - return; - } - - close(cmd.in); - - out = xfdopen(cmd.out, "r"); + out = xfdopen(cmd->out, "r"); while (strbuf_getline_lf(&line, out) != EOF) { struct string_list_item *item; char *promisor_name; @@ -96,7 +74,38 @@ void repack_promisor_objects(struct repository *repo, } fclose(out); - if (finish_command(&cmd)) + if (finish_command(cmd)) die(_("could not finish pack-objects to repack promisor objects")); strbuf_release(&line); } + +void repack_promisor_objects(struct repository *repo, + const struct pack_objects_args *args, + struct string_list *names, const char *packtmp) +{ + struct write_oid_context ctx; + struct child_process cmd = CHILD_PROCESS_INIT; + + prepare_pack_objects(&cmd, args, packtmp); + cmd.in = -1; + + /* + * NEEDSWORK: Giving pack-objects only the OIDs without any ordering + * hints may result in suboptimal deltas in the resulting pack. See if + * the OIDs can be sent with fake paths such that pack-objects can use a + * {type -> existing pack order} ordering when computing deltas instead + * of a {type -> size} ordering, which may produce better deltas. + */ + ctx.cmd = &cmd; + ctx.algop = repo->hash_algo; + for_each_packed_object(repo, write_oid, &ctx, + FOR_EACH_OBJECT_PROMISOR_ONLY); + + if (cmd.in == -1) { + /* No packed objects; cmd was never started */ + child_process_clear(&cmd); + return; + } + + finish_repacking_promisor_objects(repo, &cmd, names, packtmp); +} From fa7b91247b42bc9ffab423d42f0e9e12e86769fa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 5 Jan 2026 14:16:44 +0100 Subject: [PATCH 4/5] repack-promisor: extract function to remove redundant packs We're about to add a second caller that wants to remove redundant packs after a geometric repack. Split out the function which does this to prepare for that. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repack-geometry.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/repack-geometry.c b/repack-geometry.c index 17e6652a91..0daf545a81 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -197,17 +197,18 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) return NULL; } -void pack_geometry_remove_redundant(struct pack_geometry *geometry, - struct string_list *names, - struct existing_packs *existing, - const char *packdir) +static void remove_redundant_packs(struct packed_git **pack, + uint32_t pack_nr, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; uint32_t i; - for (i = 0; i < geometry->split; i++) { - struct packed_git *p = geometry->pack[i]; + for (i = 0; i < pack_nr; i++) { + struct packed_git *p = pack[i]; if (string_list_has_string(names, hash_to_hex_algop(p->hash, algop))) continue; @@ -226,6 +227,15 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, strbuf_release(&buf); } +void pack_geometry_remove_redundant(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing, + const char *packdir) +{ + remove_redundant_packs(geometry->pack, geometry->split, + names, existing, packdir); +} + void pack_geometry_release(struct pack_geometry *geometry) { if (!geometry) From dcc9c7ef47d8755f1448116cdf0a421129999cd4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 5 Jan 2026 14:16:45 +0100 Subject: [PATCH 5/5] builtin/repack: handle promisor packs with geometric repacking When performing a fetch with an object filter, we mark the resulting packfile as a promisor pack. An object part of such a pack may miss any of its referenced objects, and Git knows to handle this case by fetching any such missing objects from the promisor remote. The "promisor" property needs to be retained going forward. So every time we pack a promisor object, the resulting pack must be marked as a promisor pack. git-repack(1) does this already: when a repository has a promisor remote, it knows to pass "--exclude-promisor-objects" to the git-pack-objects(1) child process. Promisor packs are written separately when doing an all-into-one repack via `repack_promisor_objects()`. But we don't support promisor objects when doing a geometric repack yet. Promisor packs do not get any special treatment there, as we simply merge promisor and non-promisor packs. The resulting pack is not even marked as a promisor pack, which essentially corrupts the repository. This corruption couldn't happen in the real world though: we pass both "--exclude-promisor-objects" and "--stdin-packs" to git-pack-objects(1) if a repository has a promisor remote, but as those options are mutually exclusive we always end up dying. And while we made those flags compatible with one another in a preceding commit, we still end up dying in case git-pack-objects(1) is asked to repack a promisor pack. There's multiple ways to fix this: - We can exclude promisor packs from the geometric progression altogether. This would have the consequence that we never repack promisor packs at all. But in a partial clone it is quite likely that the user generates a bunch of promisor packs over time, as every backfill fetch would create another one. So this doesn't really feel like a sensible option. - We can adapt git-pack-objects(1) to support repacking promisor packs and include them in the normal geometric progression. But this would mean that the set of promisor objects expands over time as the packs are merged with normal packs. - We can use a separate geometric progression to repack promisor packs. The first two options both have significant downsides, so they aren't really feasible. But the third option fixes both of these downsides: we make sure that promisor packs get merged, and at the same time we never expand the set of promisor objects beyond the set of objects that are already marked as promisor objects. Implement this strategy so that geometric repacking works in partial clones. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 3 ++ repack-geometry.c | 26 +++++++++++++--- repack-promisor.c | 28 +++++++++++++++++ repack.h | 10 ++++++ t/t7703-repack-geometric.sh | 61 +++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 5 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index d9012141f6..f6bb04bef7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -332,6 +332,9 @@ int cmd_repack(int argc, !(pack_everything & PACK_CRUFT)) strvec_push(&cmd.args, "--pack-loose-unreachable"); } else if (geometry.split_factor) { + pack_geometry_repack_promisors(repo, &po_args, &geometry, + &names, packtmp); + if (midx_must_contain_cruft) strvec_push(&cmd.args, "--stdin-packs"); else diff --git a/repack-geometry.c b/repack-geometry.c index 0daf545a81..7cebd0cb45 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -66,15 +66,25 @@ void pack_geometry_init(struct pack_geometry *geometry, if (p->is_cruft) continue; - ALLOC_GROW(geometry->pack, - geometry->pack_nr + 1, - geometry->pack_alloc); + if (p->pack_promisor) { + ALLOC_GROW(geometry->promisor_pack, + geometry->promisor_pack_nr + 1, + geometry->promisor_pack_alloc); - geometry->pack[geometry->pack_nr] = p; - geometry->pack_nr++; + geometry->promisor_pack[geometry->promisor_pack_nr] = p; + geometry->promisor_pack_nr++; + } else { + ALLOC_GROW(geometry->pack, + geometry->pack_nr + 1, + geometry->pack_alloc); + + geometry->pack[geometry->pack_nr] = p; + geometry->pack_nr++; + } } QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp); + QSORT(geometry->promisor_pack, geometry->promisor_pack_nr, pack_geometry_cmp); strbuf_release(&buf); } @@ -160,6 +170,9 @@ void pack_geometry_split(struct pack_geometry *geometry) { geometry->split = compute_pack_geometry_split(geometry->pack, geometry->pack_nr, geometry->split_factor); + geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack, + geometry->promisor_pack_nr, + geometry->split_factor); } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) @@ -234,6 +247,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry, { remove_redundant_packs(geometry->pack, geometry->split, names, existing, packdir); + remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split, + names, existing, packdir); } void pack_geometry_release(struct pack_geometry *geometry) @@ -242,4 +257,5 @@ void pack_geometry_release(struct pack_geometry *geometry) return; free(geometry->pack); + free(geometry->promisor_pack); } diff --git a/repack-promisor.c b/repack-promisor.c index 125038d92e..73af57bce3 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -109,3 +109,31 @@ void repack_promisor_objects(struct repository *repo, finish_repacking_promisor_objects(repo, &cmd, names, packtmp); } + +void pack_geometry_repack_promisors(struct repository *repo, + const struct pack_objects_args *args, + const struct pack_geometry *geometry, + struct string_list *names, + const char *packtmp) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *in; + + if (!geometry->promisor_split) + return; + + prepare_pack_objects(&cmd, args, packtmp); + strvec_push(&cmd.args, "--stdin-packs"); + cmd.in = -1; + if (start_command(&cmd)) + die(_("could not start pack-objects to repack promisor packs")); + + in = xfdopen(cmd.in, "w"); + for (size_t i = 0; i < geometry->promisor_split; i++) + fprintf(in, "%s\n", pack_basename(geometry->promisor_pack[i])); + for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++) + fprintf(in, "^%s\n", pack_basename(geometry->promisor_pack[i])); + fclose(in); + + finish_repacking_promisor_objects(repo, &cmd, names, packtmp); +} diff --git a/repack.h b/repack.h index 3a688a12ee..bc9f2e1a5d 100644 --- a/repack.h +++ b/repack.h @@ -103,9 +103,19 @@ struct pack_geometry { uint32_t pack_nr, pack_alloc; uint32_t split; + struct packed_git **promisor_pack; + uint32_t promisor_pack_nr, promisor_pack_alloc; + uint32_t promisor_split; + int split_factor; }; +void pack_geometry_repack_promisors(struct repository *repo, + const struct pack_objects_args *args, + const struct pack_geometry *geometry, + struct string_list *names, + const char *packtmp); + void pack_geometry_init(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args); diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 98806cdb6f..04d5d8fc33 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -480,4 +480,65 @@ test_expect_success '--geometric -l disables writing bitmaps with non-local pack test_path_is_file member/.git/objects/pack/multi-pack-index-*.bitmap ' +write_packfile () { + NR="$1" + PREFIX="$2" + + printf "blob\ndata <objects-expect && + + ls .git/objects/pack/*.pack >packs-before && + test_line_count = 8 packs-before && + git repack --geometric=2 -d && + ls .git/objects/pack/*.pack >packs-after && + test_line_count = 5 packs-after && + test_grep ! "$NORMAL_A" packs-after && + test_grep ! "$NORMAL_B" packs-after && + test_grep "$NORMAL_C" packs-after && + test_grep ! "$PROMISOR_A" packs-after && + test_grep ! "$PROMISOR_B" packs-after && + test_grep ! "$PROMISOR_C" packs-after && + test_grep "$PROMISOR_D" packs-after && + test_grep "$PROMISOR_E" packs-after && + + ls .git/objects/pack/*.promisor >promisors && + test_line_count = 3 promisors && + + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects-actual && + test_cmp objects-expect objects-actual + ) +' + test_done