Merge branch 'ps/geometric-repacking-with-promisor-remotes' into jch

"git repack --geometric" did not work with promisor packs, which
has been corrected.

* ps/geometric-repacking-with-promisor-remotes:
  builtin/repack: handle promisor packs with geometric repacking
  repack-promisor: extract function to remove redundant packs
  repack-promisor: extract function to finalize repacking
  repack-geometry: extract function to compute repacking split
  builtin/pack-objects: exclude promisor objects with "--stdin-packs"
This commit is contained in:
Junio C Hamano 2026-01-19 16:44:16 -08:00
commit 6a8a856e45
7 changed files with 249 additions and 62 deletions

View File

@ -3863,8 +3863,11 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
repo_for_each_pack(the_repository, p) {
const char *pack_name = pack_basename(p);
if ((item = string_list_lookup(&include_packs, pack_name)))
if ((item = string_list_lookup(&include_packs, pack_name))) {
if (exclude_promisor_objects && p->pack_promisor)
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
item->util = p;
}
if ((item = string_list_lookup(&exclude_packs, pack_name)))
item->util = p;
}
@ -3942,6 +3945,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
revs.tree_objects = 1;
revs.tag_objects = 1;
revs.ignore_missing_links = 1;
revs.exclude_promisor_objects = exclude_promisor_objects;
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
@ -5098,9 +5102,13 @@ int cmd_pack_objects(int argc,
exclude_promisor_objects_best_effort,
"--exclude-promisor-objects-best-effort");
if (exclude_promisor_objects) {
use_internal_rev_list = 1;
fetch_if_missing = 0;
strvec_push(&rp, "--exclude-promisor-objects");
/* --stdin-packs handles promisor objects separately. */
if (!stdin_packs) {
use_internal_rev_list = 1;
strvec_push(&rp, "--exclude-promisor-objects");
}
} else if (exclude_promisor_objects_best_effort) {
use_internal_rev_list = 1;
fetch_if_missing = 0;

View File

@ -332,6 +332,9 @@ int cmd_repack(int argc,
!(pack_everything & PACK_CRUFT))
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else if (geometry.split_factor) {
pack_geometry_repack_promisors(repo, &po_args, &geometry,
&names, packtmp);
if (midx_must_contain_cruft)
strvec_push(&cmd.args, "--stdin-packs");
else

View File

@ -66,45 +66,54 @@ void pack_geometry_init(struct pack_geometry *geometry,
if (p->is_cruft)
continue;
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
geometry->pack_alloc);
if (p->pack_promisor) {
ALLOC_GROW(geometry->promisor_pack,
geometry->promisor_pack_nr + 1,
geometry->promisor_pack_alloc);
geometry->pack[geometry->pack_nr] = p;
geometry->pack_nr++;
geometry->promisor_pack[geometry->promisor_pack_nr] = p;
geometry->promisor_pack_nr++;
} else {
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
geometry->pack_alloc);
geometry->pack[geometry->pack_nr] = p;
geometry->pack_nr++;
}
}
QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp);
QSORT(geometry->promisor_pack, geometry->promisor_pack_nr, pack_geometry_cmp);
strbuf_release(&buf);
}
void pack_geometry_split(struct pack_geometry *geometry)
static uint32_t compute_pack_geometry_split(struct packed_git **pack, size_t pack_nr,
int split_factor)
{
uint32_t i;
uint32_t split;
off_t total_size = 0;
if (!geometry->pack_nr) {
geometry->split = geometry->pack_nr;
return;
}
if (!pack_nr)
return 0;
/*
* First, count the number of packs (in descending order of size) which
* already form a geometric progression.
*/
for (i = geometry->pack_nr - 1; i > 0; i--) {
struct packed_git *ours = geometry->pack[i];
struct packed_git *prev = geometry->pack[i - 1];
for (i = pack_nr - 1; i > 0; i--) {
struct packed_git *ours = pack[i];
struct packed_git *prev = pack[i - 1];
if (unsigned_mult_overflows(geometry->split_factor,
if (unsigned_mult_overflows(split_factor,
pack_geometry_weight(prev)))
die(_("pack %s too large to consider in geometric "
"progression"),
prev->pack_name);
if (pack_geometry_weight(ours) <
geometry->split_factor * pack_geometry_weight(prev))
split_factor * pack_geometry_weight(prev))
break;
}
@ -130,21 +139,19 @@ void pack_geometry_split(struct pack_geometry *geometry)
* the geometric progression.
*/
for (i = 0; i < split; i++) {
struct packed_git *p = geometry->pack[i];
struct packed_git *p = pack[i];
if (unsigned_add_overflows(total_size, pack_geometry_weight(p)))
die(_("pack %s too large to roll up"), p->pack_name);
total_size += pack_geometry_weight(p);
}
for (i = split; i < geometry->pack_nr; i++) {
struct packed_git *ours = geometry->pack[i];
for (i = split; i < pack_nr; i++) {
struct packed_git *ours = pack[i];
if (unsigned_mult_overflows(geometry->split_factor,
total_size))
if (unsigned_mult_overflows(split_factor, total_size))
die(_("pack %s too large to roll up"), ours->pack_name);
if (pack_geometry_weight(ours) <
geometry->split_factor * total_size) {
if (pack_geometry_weight(ours) < split_factor * total_size) {
if (unsigned_add_overflows(total_size,
pack_geometry_weight(ours)))
die(_("pack %s too large to roll up"),
@ -156,7 +163,16 @@ void pack_geometry_split(struct pack_geometry *geometry)
break;
}
geometry->split = split;
return split;
}
void pack_geometry_split(struct pack_geometry *geometry)
{
geometry->split = compute_pack_geometry_split(geometry->pack, geometry->pack_nr,
geometry->split_factor);
geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack,
geometry->promisor_pack_nr,
geometry->split_factor);
}
struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry)
@ -194,17 +210,18 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry)
return NULL;
}
void pack_geometry_remove_redundant(struct pack_geometry *geometry,
struct string_list *names,
struct existing_packs *existing,
const char *packdir)
static void remove_redundant_packs(struct packed_git **pack,
uint32_t pack_nr,
struct string_list *names,
struct existing_packs *existing,
const char *packdir)
{
const struct git_hash_algo *algop = existing->repo->hash_algo;
struct strbuf buf = STRBUF_INIT;
uint32_t i;
for (i = 0; i < geometry->split; i++) {
struct packed_git *p = geometry->pack[i];
for (i = 0; i < pack_nr; i++) {
struct packed_git *p = pack[i];
if (string_list_has_string(names, hash_to_hex_algop(p->hash,
algop)))
continue;
@ -223,10 +240,22 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry,
strbuf_release(&buf);
}
void pack_geometry_remove_redundant(struct pack_geometry *geometry,
struct string_list *names,
struct existing_packs *existing,
const char *packdir)
{
remove_redundant_packs(geometry->pack, geometry->split,
names, existing, packdir);
remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split,
names, existing, packdir);
}
void pack_geometry_release(struct pack_geometry *geometry)
{
if (!geometry)
return;
free(geometry->pack);
free(geometry->promisor_pack);
}

View File

@ -34,39 +34,17 @@ static int write_oid(const struct object_id *oid,
return 0;
}
void repack_promisor_objects(struct repository *repo,
const struct pack_objects_args *args,
struct string_list *names, const char *packtmp)
static void finish_repacking_promisor_objects(struct repository *repo,
struct child_process *cmd,
struct string_list *names,
const char *packtmp)
{
struct write_oid_context ctx;
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *out;
struct strbuf line = STRBUF_INIT;
FILE *out;
prepare_pack_objects(&cmd, args, packtmp);
cmd.in = -1;
close(cmd->in);
/*
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
* hints may result in suboptimal deltas in the resulting pack. See if
* the OIDs can be sent with fake paths such that pack-objects can use a
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
child_process_clear(&cmd);
return;
}
close(cmd.in);
out = xfdopen(cmd.out, "r");
out = xfdopen(cmd->out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
struct string_list_item *item;
char *promisor_name;
@ -96,7 +74,66 @@ void repack_promisor_objects(struct repository *repo,
}
fclose(out);
if (finish_command(&cmd))
if (finish_command(cmd))
die(_("could not finish pack-objects to repack promisor objects"));
strbuf_release(&line);
}
void repack_promisor_objects(struct repository *repo,
const struct pack_objects_args *args,
struct string_list *names, const char *packtmp)
{
struct write_oid_context ctx;
struct child_process cmd = CHILD_PROCESS_INIT;
prepare_pack_objects(&cmd, args, packtmp);
cmd.in = -1;
/*
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
* hints may result in suboptimal deltas in the resulting pack. See if
* the OIDs can be sent with fake paths such that pack-objects can use a
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
child_process_clear(&cmd);
return;
}
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
}
void pack_geometry_repack_promisors(struct repository *repo,
const struct pack_objects_args *args,
const struct pack_geometry *geometry,
struct string_list *names,
const char *packtmp)
{
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *in;
if (!geometry->promisor_split)
return;
prepare_pack_objects(&cmd, args, packtmp);
strvec_push(&cmd.args, "--stdin-packs");
cmd.in = -1;
if (start_command(&cmd))
die(_("could not start pack-objects to repack promisor packs"));
in = xfdopen(cmd.in, "w");
for (size_t i = 0; i < geometry->promisor_split; i++)
fprintf(in, "%s\n", pack_basename(geometry->promisor_pack[i]));
for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry->promisor_pack[i]));
fclose(in);
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
}

View File

@ -103,9 +103,19 @@ struct pack_geometry {
uint32_t pack_nr, pack_alloc;
uint32_t split;
struct packed_git **promisor_pack;
uint32_t promisor_pack_nr, promisor_pack_alloc;
uint32_t promisor_split;
int split_factor;
};
void pack_geometry_repack_promisors(struct repository *repo,
const struct pack_objects_args *args,
const struct pack_geometry *geometry,
struct string_list *names,
const char *packtmp);
void pack_geometry_init(struct pack_geometry *geometry,
struct existing_packs *existing,
const struct pack_objects_args *args);

View File

@ -319,6 +319,45 @@ test_expect_success '--stdin-packs=follow walks into unknown packs' '
)
'
test_expect_success '--stdin-packs with promisors' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git remote add promisor garbage &&
git config set remote.promisor.promisor true &&
for c in A B C D
do
echo "$c" >file &&
git add file &&
git commit --message "$c" &&
git tag "$c" || return 1
done &&
A="$(echo A | git pack-objects --revs $packdir/pack)" &&
B="$(echo A..B | git pack-objects --revs $packdir/pack --filter=blob:none)" &&
C="$(echo B..C | git pack-objects --revs $packdir/pack)" &&
D="$(echo C..D | git pack-objects --revs $packdir/pack)" &&
touch $packdir/pack-$B.promisor &&
test_must_fail git pack-objects --stdin-packs --exclude-promisor-objects pack- 2>err <<-EOF &&
pack-$B.pack
EOF
test_grep "is a promisor but --exclude-promisor-objects was given" err &&
PACK=$(git pack-objects --stdin-packs=follow --exclude-promisor-objects $packdir/pack <<-EOF
pack-$D.pack
EOF
) &&
objects_in_packs $C $D >expect &&
objects_in_packs $PACK >actual &&
test_cmp expect actual &&
rm -f $packdir/pack-$PACK.*
)
'
stdin_packs__follow_with_only () {
rm -fr stdin_packs__follow_with_only &&
git init stdin_packs__follow_with_only &&

View File

@ -480,4 +480,65 @@ test_expect_success '--geometric -l disables writing bitmaps with non-local pack
test_path_is_file member/.git/objects/pack/multi-pack-index-*.bitmap
'
write_packfile () {
NR="$1"
PREFIX="$2"
printf "blob\ndata <<EOB\n$PREFIX %s\nEOB\n" $(test_seq $NR) |
git fast-import &&
git pack-objects --pack-loose-unreachable .git/objects/pack/pack &&
git prune-packed
}
write_promisor_packfile () {
PACKFILE=$(write_packfile "$@") &&
touch .git/objects/pack/pack-$PACKFILE.promisor &&
echo "$PACKFILE"
}
test_expect_success 'geometric repack works with promisor packs' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git remote add promisor garbage &&
git config set remote.promisor.promisor true &&
# Packs A and B need to be merged.
NORMAL_A=$(write_packfile 2 normal-a) &&
NORMAL_B=$(write_packfile 2 normal-b) &&
NORMAL_C=$(write_packfile 14 normal-c) &&
# Packs A, B and C need to be merged.
PROMISOR_A=$(write_promisor_packfile 1 promisor-a) &&
PROMISOR_B=$(write_promisor_packfile 3 promisor-b) &&
PROMISOR_C=$(write_promisor_packfile 3 promisor-c) &&
PROMISOR_D=$(write_promisor_packfile 20 promisor-d) &&
PROMISOR_E=$(write_promisor_packfile 40 promisor-e) &&
git cat-file --batch-all-objects --batch-check="%(objectname)" >objects-expect &&
ls .git/objects/pack/*.pack >packs-before &&
test_line_count = 8 packs-before &&
git repack --geometric=2 -d &&
ls .git/objects/pack/*.pack >packs-after &&
test_line_count = 5 packs-after &&
test_grep ! "$NORMAL_A" packs-after &&
test_grep ! "$NORMAL_B" packs-after &&
test_grep "$NORMAL_C" packs-after &&
test_grep ! "$PROMISOR_A" packs-after &&
test_grep ! "$PROMISOR_B" packs-after &&
test_grep ! "$PROMISOR_C" packs-after &&
test_grep "$PROMISOR_D" packs-after &&
test_grep "$PROMISOR_E" packs-after &&
ls .git/objects/pack/*.promisor >promisors &&
test_line_count = 3 promisors &&
git cat-file --batch-all-objects --batch-check="%(objectname)" >objects-actual &&
test_cmp objects-expect objects-actual
)
'
test_done