midx-write.c: introduce midx_pack_perm() helper

The `ctx->pack_perm` array can be considered as a permutation between
the original `pack_int_id` of some given pack to its position in the
`ctx->info` array containing all packs.

Today we can always index into this array with any known `pack_int_id`,
since there is never a `pack_int_id` which is greater than or equal to
the value `ctx->nr`.

That is not necessarily the case with MIDX compaction. For example,
suppose we have a MIDX chain with three layers, each containing three
packs. The base of the MIDX chain will have packs with IDs 0, 1, and 2,
the next layer 3, 4, and 5, and so on. If we are compacting the topmost
two layers, we'll have input `pack_int_id` values between [3, 8], but
`ctx->nr` will only be 6.

In that example, if we want to know where the pack whose original
`pack_int_id` value was, say, 7, we would compute `ctx->pack_perm[7]`,
leading to an uninitialized read, since there are only 6 entries
allocated in that array.

To address this, there are a couple of options:

 - We could allocate enough entries in `ctx->pack_perm` to accommodate
   the largest `orig_pack_int_id` value.

 - Or, we could internally shift the input values by the number of packs
   in the base layer of the lower end of the MIDX compaction range.

This patch prepare us to take the latter approach, since it does not
allocate more memory than strictly necessary. (In our above example, the
base of the lower end of the compaction range is the first MIDX layer
(having three packs), so we would end up indexing `ctx->pack_perm[7-3]`,
which is a valid read.)

Note that this patch does not actually implement that approach yet, but
merely performs a behavior-preserving refactoring which will make the
change easier to carry out in the future.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau 2025-12-06 15:31:31 -05:00 committed by Junio C Hamano
parent 35364d7aba
commit 363485a311

View File

@ -114,6 +114,12 @@ struct write_midx_context {
struct odb_source *source;
};
static uint32_t midx_pack_perm(struct write_midx_context *ctx,
uint32_t orig_pack_int_id)
{
return ctx->pack_perm[orig_pack_int_id];
}
static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name)
{
@ -509,12 +515,12 @@ static int write_midx_object_offsets(struct hashfile *f,
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *obj = list++;
if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED)
BUG("object %s is in an expired pack with int-id %d",
oid_to_hex(&obj->oid),
obj->pack_int_id);
hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id));
if (ctx->large_offsets_needed && obj->offset >> 31)
hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@ -615,7 +621,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[i];
data[i].nr = i;
data[i].pack = ctx->pack_perm[e->pack_int_id];
data[i].pack = midx_pack_perm(ctx, e->pack_int_id);
if (!e->preferred)
data[i].pack |= (1U << 31);
data[i].offset = e->offset;
@ -625,7 +631,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[data[i].nr];
struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]];
struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
pack->bitmap_pos = i + base_objects;
pack->bitmap_nr++;
@ -686,7 +692,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata,
struct object_entry *to = packlist_alloc(pdata, &from->oid);
oe_set_in_pack(pdata, to,
ctx->info[ctx->pack_perm[from->pack_int_id]].p);
ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p);
}
trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo);
@ -1285,7 +1291,7 @@ static int write_midx_internal(struct write_midx_opts *opts)
sizeof(*ctx.info),
idx_or_pack_name_cmp);
if (preferred) {
uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id);
if (perm == PACK_EXPIRED)
warning(_("preferred pack '%s' is expired"),
opts->preferred_pack_name);