From 34c19c7a2b2e2575d5ec6248cfb60d0298900c38 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:20 +0100 Subject: [PATCH 01/10] packfile: create store via its owning source In subsequent patches we're about to move the packfile store from the object database layer into the object database source layer. Once done, we'll have one packfile store per source, where the source is owning the store. Prepare for this future and refactor `packfile_store_new()` to be initialized via an object database source instead of via the object database itself. This refactoring leads to a weird in-between state where the store is owned by the object database but created via the source. But this makes subsequent refactorings easier because we can now start to access the owning source of a given store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 2 +- packfile.c | 20 ++++++++++---------- packfile.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index 45b6600800..94144a69f5 100644 --- a/odb.c +++ b/odb.c @@ -1056,7 +1056,6 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); @@ -1065,6 +1064,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->packfiles = packfile_store_new(o->sources); free(to_free); diff --git a/packfile.c b/packfile.c index c88bd92619..0a05a10daa 100644 --- a/packfile.c +++ b/packfile.c @@ -876,7 +876,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -1068,8 +1068,8 @@ void packfile_store_prepare(struct packfile_store *store) if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { + odb_prepare_alternates(store->source->odb); + for (source = store->source->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } @@ -1092,7 +1092,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { + for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -2121,7 +2121,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2142,7 +2142,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 1; /* @@ -2152,7 +2152,7 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi == &blank_oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, oid); return -1; @@ -2411,11 +2411,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2534,7 +2534,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(store->odb->repo) >= size) + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, diff --git a/packfile.h b/packfile.h index 59d162a3f4..33cc1c1654 100644 --- a/packfile.h +++ b/packfile.h @@ -77,7 +77,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -129,9 +129,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles From 0e9a46149c5b61d55607edc1b8848aa9996afbb1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:21 +0100 Subject: [PATCH 02/10] packfile: pass source to `prepare_pack()` When preparing a packfile we pass various pieces attached to the pack's object database source via the `struct prepare_pack_data`. Refactor this code to instead pass in the source directly. This reduces the number of variables we need to pass and allows for a subsequent refactoring where we start to prepare the pack via the source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packfile.c b/packfile.c index 0a05a10daa..ab86afa01d 100644 --- a/packfile.c +++ b/packfile.c @@ -975,10 +975,8 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -988,10 +986,10 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(data->source->odb->packfiles, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1020,10 +1018,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); From 4dfe8e0ed6f817dba3e09e78cda69a4b32b30c0c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:22 +0100 Subject: [PATCH 03/10] packfile: refactor kept-pack cache to work with packfile stores The kept pack cache is a cache of packfiles that are marked as kept either via an accompanying ".kept" file or via an in-memory flag. The cache can be retrieved via `kept_pack_cache()`, where one needs to pass in a repository. Ultimately though the kept-pack cache is a property of the packfile store, and this causes problems in a subsequent commit where we want to move down the packfile store to be a per-object-source entity. Prepare for this and refactor the kept-pack cache to work on top of a packfile store instead. While at it, rename both the function and flags specific to the kept-pack cache so that they can be properly attributed to the respective subsystems. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 12 ++++++------ packfile.c | 37 ++++++++++++++++++++----------------- packfile.h | 25 +++++++++++++++++-------- reachable.c | 2 +- revision.c | 8 ++++---- 5 files changed, 48 insertions(+), 36 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee21..e86b8f387a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,9 +1529,9 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; off_t ofs; uint32_t candidate_mtime; @@ -1624,9 +1624,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -3931,7 +3931,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4030,7 +4030,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) diff --git a/packfile.c b/packfile.c index ab86afa01d..191344eb1c 100644 --- a/packfile.c +++ b/packfile.c @@ -2164,25 +2164,26 @@ int packfile_store_read_object_info(struct packfile_store *store, return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2192,9 +2193,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2202,11 +2205,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, @@ -2214,9 +2217,9 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; if (fill_pack_entry(oid, e, p)) return 1; diff --git a/packfile.h b/packfile.h index 33cc1c1654..410f85f03d 100644 --- a/packfile.h +++ b/packfile.h @@ -90,9 +90,10 @@ struct packfile_store { * is an on-disk ".keep" file or because they are marked as "kept" in * memory. * - * Should not be accessed directly, but via `kept_pack_cache()`. The - * list of packs gets invalidated when the stored flags and the flags - * passed to `kept_pack_cache()` mismatch. + * Should not be accessed directly, but via + * `packfile_store_get_kept_pack_cache()`. The list of packs gets + * invalidated when the stored flags and the flags passed to + * `packfile_store_get_kept_pack_cache()` mismatch. */ struct { struct packed_git **packs; @@ -210,6 +211,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -385,9 +399,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. @@ -398,8 +409,6 @@ int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/reachable.c b/reachable.c index b753c39553..4b532039d5 100644 --- a/reachable.c +++ b/reachable.c @@ -242,7 +242,7 @@ static int want_recent_object(struct recent_data *data, const struct object_id *oid) { if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) return 0; return 1; } diff --git a/revision.c b/revision.c index 5f0850ae5c..64d223a7c6 100644 --- a/revision.c +++ b/revision.c @@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; From 4c442d71f63f9ee5138240ff92a0bf5a85d5a101 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:23 +0100 Subject: [PATCH 04/10] packfile: refactor misleading code when unusing pack windows The function `unuse_one_window()` is responsible for unmapping one of the packfile windows, which is done when we have exceeded the allowed number of window. The function receives a `struct packed_git` as input, which serves as an additional packfile that should be considered to be closed. If not given, we seemingly skip that and instead go through all of the repository's packfiles. The conditional that checks whether we have a packfile though does not make much sense anymore, as we dereference the packfile regardless of whether or not it is a `NULL` pointer to derive the repository's packfile store. The function was originally introduced via f0e17e86e1 (pack: move release_pack_memory(), 2017-08-18), and here we indeed had a caller that passed a `NULL` pointer. That caller was later removed via 9827d4c185 (packfile: drop release_pack_memory(), 2019-08-12), so starting with that commit we always pass a `struct packed_git`. In 9c5ce06d74 (packfile: use `repository` from `packed_git` directly, 2024-12-03) we then inadvertently started to rely on the fact that the pointer is never `NULL` because we use it now to identify the repository. Arguably, it didn't really make sense in the first place that the caller provides a packfile, as the selected window would have been overridden anyway by the subsequent loop over all packfiles if there was an older window. So the overall logic is quite misleading overall. The only case where it _could_ make a difference is when there were two packfiles with the same `last_used` value, but that case doesn't ever happen because the `pack_used_ctr` is strictly increasing. Refactor the code so that we instead pass in the object database to help make the code less misleading. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packfile.c b/packfile.c index 191344eb1c..3700612465 100644 --- a/packfile.c +++ b/packfile.c @@ -355,16 +355,15 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + for (e = odb->packfiles->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -740,8 +739,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, From a6614da81fe2866cb1b817c5c0e6a56a6f371020 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:24 +0100 Subject: [PATCH 05/10] packfile: move packfile store into object source The packfile store is a member of `struct object_database`, which means that we have a single store per database. This doesn't really make much sense though: each source connected to the database has its own set of packfiles, so there is a conceptual mismatch here. This hasn't really caused much of a problem in the past, but with the advent of pluggable object databases this is becoming more of a problem because some of the sources may not even use packfiles in the first place. Move the packfile store down by one level from the object database into the object database source. This ensures that each source now has its own packfile store, and we can eventually start to abstract it away entirely so that the caller doesn't even know what kind of store it uses. Note that we only need to adjust a relatively small number of callers, way less than one might expect. This is because most callers are using `repo_for_each_pack()`, which handles enumeration of all packfiles that exist in the repository. So for now, none of these callers need to be adapted. The remaining callers that iterate through the packfiles directly and that need adjustment are those that are a bit more tangled with packfiles. These will be adjusted over time. Note that this patch only moves the packfile store, and there is still a bunch of functions that seemingly operate on a packfile store but that end up iterating over all sources. These will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 37 +++++++----- builtin/grep.c | 6 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 96 ++++++++++++++++--------------- http.c | 2 +- midx.c | 5 +- odb.c | 36 ++++++------ odb.h | 6 +- odb/streaming.c | 9 ++- packfile.c | 127 ++++++++++++++++++++++++++--------------- packfile.h | 62 +++++++++++++++++--- 11 files changed, 243 insertions(+), 145 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 7849005ccb..b8a7757cfd 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,11 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1100,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1183,29 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25..4855b871dd 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,12 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); + /* + * Note: `packfile_store_prepare()` prepares stores from all + * sources. This will be fixed in a subsequent commit. + */ if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->sources->packfiles); start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a7e901e49c..b67fb0256c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, + packfile_store_load_pack(the_repository->objects->sources->packfiles, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e86b8f387a..7fd90a9996 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,49 +1529,53 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1749,13 +1753,15 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&source->packfiles->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { diff --git a/http.c b/http.c index 41f850db16..7815f144de 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/midx.c b/midx.c index 24e1e72175..dbb2aa68ba 100644 --- a/midx.c +++ b/midx.c @@ -95,7 +95,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); + packfile_store_prepare(source->packfiles); return source->midx; } @@ -447,7 +447,6 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -460,7 +459,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, + p = packfile_store_load_pack(m->source->packfiles, pack_name.buf, m->source->local); strbuf_release(&pack_name); diff --git a/odb.c b/odb.c index 94144a69f5..f159fbdd99 100644 --- a/odb.c +++ b/odb.c @@ -155,6 +155,7 @@ static struct odb_source *odb_source_new(struct object_database *odb, source->local = local; source->path = xstrdup(path); source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); return source; } @@ -373,6 +374,7 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); free(source); } @@ -704,19 +706,19 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; + } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + return 0; } /* @@ -975,13 +977,14 @@ int odb_freshen_object(struct object_database *odb, { struct odb_source *source; - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { + if (packfile_store_freshen_object(source->packfiles, oid)) + return 1; + if (odb_source_loose_freshen_object(source, oid)) return 1; + } return 0; } @@ -1064,7 +1067,6 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->packfiles = packfile_store_new(o->sources); free(to_free); @@ -1077,9 +1079,8 @@ void odb_close(struct object_database *o) { struct odb_source *source; - packfile_store_close(o->packfiles); - for (source = o->sources; source; source = source->next) { + packfile_store_close(source->packfiles); if (source->midx) close_midx(source->midx); source->midx = NULL; @@ -1118,7 +1119,6 @@ void odb_free(struct object_database *o) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1141,13 +1141,13 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) + for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); + packfile_store_reprepare(source->packfiles); + } o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a..c97b41c58c 100644 --- a/odb.h +++ b/odb.h @@ -51,6 +51,9 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; + /* * private data * @@ -128,9 +131,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want diff --git a/odb/streaming.c b/odb/streaming.c index 745cd486fb..4a4474f891 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -185,13 +185,12 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) - return 0; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) return 0; + } return open_istream_incore(out, odb, oid); } diff --git a/packfile.c b/packfile.c index 3700612465..a0225cb2cb 100644 --- a/packfile.c +++ b/packfile.c @@ -357,12 +357,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - for (e = odb->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) + for (e = source->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -528,15 +530,18 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -987,7 +992,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->odb->packfiles, + packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); free(trimmed_path); } @@ -1245,11 +1250,15 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + for (e = source->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -2089,26 +2098,32 @@ static int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct packfile_list_entry *l; + struct odb_source *source; - packfile_store_prepare(r->objects->packfiles); + /* + * Note: `packfile_store_prepare()` prepares stores from all sources. + * This will be fixed in a subsequent commit. + */ + packfile_store_prepare(r->objects->sources->packfiles); - for (struct odb_source *source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs.head) - return 0; + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *l; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = source->packfiles->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); - return 1; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!source->packfiles->skip_mru_updates) + packfile_list_prepend(&source->packfiles->packs, p); + return 1; + } } } + return 0; } @@ -2216,12 +2231,18 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } } return 0; @@ -2287,32 +2308,46 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) { - struct packed_git *p; + struct odb_source *source; int r = 0; int pack_errors = 0; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + + source->packfiles->skip_mru_updates = true; + + for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + r = for_each_object_in_pack(p, cb, data, flags); + if (r) + break; } - r = for_each_object_in_pack(p, cb, data, flags); + + source->packfiles->skip_mru_updates = false; + if (r) break; } - repo->objects->packfiles->skip_mru_updates = false; return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index 410f85f03d..07f7cdbad1 100644 --- a/packfile.h +++ b/packfile.h @@ -5,6 +5,7 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ @@ -170,14 +171,65 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) int packfile_store_read_object_stream(struct odb_read_stream **out, struct packfile_store *store, @@ -194,12 +246,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags); -/* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. - */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a From 156bc34f9b5d4eaec7ec42ee9d836f180010ee55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:25 +0100 Subject: [PATCH 06/10] packfile: only prepare owning store in `packfile_store_get_packs()` When calling `packfile_store_get_packs()` we prepare not only the provided packfile store, but also all those of all other sources part of the same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Adapt the code so that we only prepare the MIDX of the provided store. All callers only work in the context of a single store or call the function in a loop over all sources, so this change shouldn't have any practical effects. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index a0225cb2cb..c46d53b75d 100644 --- a/packfile.c +++ b/packfile.c @@ -1092,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->source->midx) { + struct multi_pack_index *m = store->source->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } From 7060cb98e9cb3a1aef7c07d6a556197970cc35a8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:26 +0100 Subject: [PATCH 07/10] packfile: only prepare owning store in `packfile_store_prepare()` When calling `packfile_store_prepare()` we prepare not only the provided packfile store, but also all those of all other sources part of the same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Refactor the code so that we only prepare the single packfile store passed by the caller. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/grep.c | 14 ++++++++------ packfile.c | 19 +++++-------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 4855b871dd..5b8b87b1ac 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,12 +1213,14 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - /* - * Note: `packfile_store_prepare()` prepares stores from all - * sources. This will be fixed in a subsequent commit. - */ - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->sources->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) + packfile_store_prepare(source->packfiles); + } start_threads(&opt); } else { diff --git a/packfile.c b/packfile.c index c46d53b75d..23d8f7cb93 100644 --- a/packfile.c +++ b/packfile.c @@ -1063,16 +1063,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->source->odb); - for (source = store->source->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -2098,15 +2093,11 @@ static int find_pack_entry(struct repository *r, { struct odb_source *source; - /* - * Note: `packfile_store_prepare()` prepares stores from all sources. - * This will be fixed in a subsequent commit. - */ - packfile_store_prepare(r->objects->sources->packfiles); - - for (source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) { + packfile_store_prepare(r->objects->sources->packfiles); if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; + } for (source = r->objects->sources; source; source = source->next) { struct packfile_list_entry *l; From b0f97d5b7d24db27722b7d521e07a6c2278cc1ba Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:27 +0100 Subject: [PATCH 08/10] packfile: inline `find_kept_pack_entry()` The `find_kept_pack_entry()` function is only used in `has_oject_kept_pack()`, which is only a trivial wrapper itself. Inline the latter into the former. Furthermore, reorder the code so that we can drop the declaration of the function in "packfile.h". This allows us to make the function file-local. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 38 +++++++++++++++----------------------- packfile.h | 6 ------ 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/packfile.c b/packfile.c index 23d8f7cb93..3bce1b150d 100644 --- a/packfile.c +++ b/packfile.c @@ -2215,28 +2215,6 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) -{ - struct odb_source *source; - - for (source = r->objects->sources; source; source = source->next) { - struct packed_git **cache; - - cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - - for (; *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; - } - } - - return 0; -} - int has_object_pack(struct repository *r, const struct object_id *oid) { struct pack_entry e; @@ -2246,8 +2224,22 @@ int has_object_pack(struct repository *r, const struct object_id *oid) int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags) { + struct odb_source *source; struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); + + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, &e, p)) + return 1; + } + } + + return 0; } int for_each_object_in_pack(struct packed_git *p, diff --git a/packfile.h b/packfile.h index 07f7cdbad1..08a666d538 100644 --- a/packfile.h +++ b/packfile.h @@ -445,12 +445,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); From 4cd60cf483aa01c39b50a223669a8a8f92e9a6c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:28 +0100 Subject: [PATCH 09/10] packfile: refactor `find_pack_entry()` to work on the packfile store The function `find_pack_entry()` doesn't work on a specific packfile store, but instead works on the whole repository. This causes a bit of a conceptual mismatch in its callers: - `packfile_store_freshen_object()` supposedly acts on a store, and its callers know to iterate through all sources already. - `packfile_store_read_object_info()` behaves likewise. The only exception that doesn't know to handle iteration through sources is `has_object_pack()`, but that function is trivial to adapt. Refactor the code so that `find_pack_entry()` works on the packfile store level instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/packfile.c b/packfile.c index 3bce1b150d..0e4c63e11d 100644 --- a/packfile.c +++ b/packfile.c @@ -2087,29 +2087,23 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_pack_entry(struct repository *r, +static int find_pack_entry(struct packfile_store *store, const struct object_id *oid, struct pack_entry *e) { - struct odb_source *source; + struct packfile_list_entry *l; - for (source = r->objects->sources; source; source = source->next) { - packfile_store_prepare(r->objects->sources->packfiles); - if (source->midx && fill_midx_entry(source->midx, oid, e)) + packfile_store_prepare(store); + if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + return 1; + + for (l = store->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; + + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); return 1; - } - - for (source = r->objects->sources; source; source = source->next) { - struct packfile_list_entry *l; - - for (l = source->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; - - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!source->packfiles->skip_mru_updates) - packfile_list_prepend(&source->packfiles->packs, p); - return 1; - } } } @@ -2120,7 +2114,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2141,7 +2135,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 1; /* @@ -2217,8 +2211,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st int has_object_pack(struct repository *r, const struct object_id *oid) { + struct odb_source *source; struct pack_entry e; - return find_pack_entry(r, oid, &e); + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + int ret = find_pack_entry(source->packfiles, oid, &e); + if (ret) + return ret; + } + + return 0; } int has_object_kept_pack(struct repository *r, const struct object_id *oid, From 65405829a8f7e486daa2cd26a3680324129ec54b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 18 Dec 2025 07:55:29 +0100 Subject: [PATCH 10/10] packfile: move MIDX into packfile store The multi-pack index still is tracked as a member of the object database source, but ultimately the MIDX is always tied to one specific packfile store. Move the structure into `struct packfile_store` accordingly. This ensures that the packfile store now keeps track of all data related to packfiles. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 14 +++++++------- odb.c | 8 +------- odb.h | 7 ------- packfile.c | 12 ++++++++---- packfile.h | 3 +++ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/midx.c b/midx.c index dbb2aa68ba..fa7a7e5d13 100644 --- a/midx.c +++ b/midx.c @@ -96,7 +96,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { packfile_store_prepare(source->packfiles); - return source->midx; + return source->packfiles->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -709,12 +709,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (source->packfiles->midx) return 1; - source->midx = load_multi_pack_index(source); + source->packfiles->midx = load_multi_pack_index(source); - return !!source->midx; + return !!source->packfiles->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -803,9 +803,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + if (source->packfiles->midx) + close_midx(source->packfiles->midx); + source->packfiles->midx = NULL; } } diff --git a/odb.c b/odb.c index f159fbdd99..902251f9ed 100644 --- a/odb.c +++ b/odb.c @@ -1078,14 +1078,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - for (source = o->sources; source; source = source->next) { + for (source = o->sources; source; source = source->next) packfile_store_close(source->packfiles); - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - close_commit_graph(o); } diff --git a/odb.h b/odb.h index c97b41c58c..300c3c0c46 100644 --- a/odb.h +++ b/odb.h @@ -54,13 +54,6 @@ struct odb_source { /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; - /* * Figure out whether this is the local source of the owning * repository, which would typically be its ".git/objects" directory. diff --git a/packfile.c b/packfile.c index 0e4c63e11d..097dd8d85d 100644 --- a/packfile.c +++ b/packfile.c @@ -990,7 +990,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { + !(data->source->packfiles->midx && + midx_contains_pack(data->source->packfiles->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); @@ -1087,8 +1088,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - if (store->source->midx) { - struct multi_pack_index *m = store->source->midx; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -2094,7 +2095,7 @@ static int find_pack_entry(struct packfile_store *store, struct packfile_list_entry *l; packfile_store_prepare(store); - if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + if (store->midx && fill_midx_entry(store->midx, oid, e)) return 1; for (l = store->packs.head; l; l = l->next) { @@ -2454,6 +2455,9 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; } struct odb_packed_read_stream { diff --git a/packfile.h b/packfile.h index 08a666d538..92baf8ee88 100644 --- a/packfile.h +++ b/packfile.h @@ -101,6 +101,9 @@ struct packfile_store { unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already.