From fa087f57c79b28a7acff9971f11b079270e17add Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:30 +0100 Subject: [PATCH 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This prepares us for a new upcoming `odb_for_each_object()` function and ensures that both the function and its flags have the same prefix. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/pack-objects.c | 10 +++++----- commit-graph.c | 4 ++-- object-file.c | 4 ++-- object-file.h | 2 +- odb.h | 13 +++++++------ packfile.c | 20 ++++++++++---------- packfile.h | 4 ++-- reachable.c | 8 ++++---- repack-promisor.c | 2 +- revision.c | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 2ad712e9f8..6964a5a52c 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; batch_each_object(opt, batch_unordered_object, - FOR_EACH_OBJECT_PACK_ORDER, &cb); + ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6ee31d48c9..74317051fd 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) for_each_object_in_pack(p, add_object_entry_from_pack, revs, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); } strbuf_release(&buf); @@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void) if (for_each_packed_object(to_pack.repo, add_object_in_unpacked_pack, NULL, - FOR_EACH_OBJECT_PACK_ORDER | - FOR_EACH_OBJECT_LOCAL_ONLY | - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) die(_("cannot open pack index")); } diff --git a/commit-graph.c b/commit-graph.c index 6b1f02e179..7f1145a082 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, goto cleanup; } for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } @@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); for_each_packed_object(ctx->r, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); diff --git a/object-file.c b/object-file.c index e7e4c3348f..64e9e239dc 100644 --- a/object-file.c +++ b/object-file.c @@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source, int for_each_loose_object(struct object_database *odb, each_loose_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { struct odb_source *source; @@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb, if (r) return r; - if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) break; } diff --git a/object-file.h b/object-file.h index 1229d5f675..42bb50e10c 100644 --- a/object-file.h +++ b/object-file.h @@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source, */ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); /** diff --git a/odb.h b/odb.h index bab07755f4..74503addf1 100644 --- a/odb.h +++ b/odb.h @@ -442,24 +442,25 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } + /* Flags for for_each_*_object(). */ -enum for_each_object_flags { +enum odb_for_each_object_flags { /* Iterate only over local objects, not alternates. */ - FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), /* Only iterate over packs obtained from the promisor remote. */ - FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), /* * Visit objects within a pack in packfile order rather than .idx order */ - FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2), /* Only iterate over packs that are not marked as kept in-core. */ - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), /* Only iterate over packs that do not have .keep files. */ - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; enum { diff --git a/packfile.c b/packfile.c index 402c3b5dc7..b65f0b43f1 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) { if (load_pack_revindex(p->repo, p)) return -1; } @@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p, * - in pack-order, it is pack position, which we must * convert to an index position in order to get the oid. */ - if (flags & FOR_EACH_OBJECT_PACK_ORDER) + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) index_pos = pack_pos_to_index(p, i); else index_pos = i; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags) + void *data, enum odb_for_each_object_flags flags) { struct odb_source *source; int r = 0; @@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (repo_has_promisor_remote(r)) { for_each_packed_object(r, add_promisor_object, &promisor_objects, - FOR_EACH_OBJECT_PROMISOR_ONLY | - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/packfile.h b/packfile.h index acc5c55ad5..15551258bd 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); + void *data, enum odb_for_each_object_flags flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 diff --git a/reachable.c b/reachable.c index 4b532039d5..82676b2668 100644 --- a/reachable.c +++ b/reachable.c @@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum for_each_object_flags flags; + enum odb_for_each_object_flags flags; int r; data.revs = revs; @@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, data.extra_recent_oids_loaded = 0; r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); + ODB_FOR_EACH_OBJECT_LOCAL_ONLY); if (r) goto done; - flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) - flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f6..45c330b9a5 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo, ctx.cmd = &cmd; ctx.algop = repo->hash_algo; for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index b65a763770..5aadf46dac 100644 --- a/revision.c +++ b/revision.c @@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs) if (revs->exclude_promisor_objects) { for_each_packed_object(revs->repo, mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); } if (!revs->reflog_info) From 710c9c431e5c6577e64a7daf6be6144275bdb0bc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:31 +0100 Subject: [PATCH 02/14] odb: fix flags parameter to be unsigned The `flags` parameter accepted by various `for_each_object()` functions is a bitfield of multiple flags. Such parameters are typically unsigned in the Git codebase, but we use `enum odb_for_each_object_flags` in some places. Adapt these function signatures to use the correct type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 ++- object-file.h | 3 ++- packfile.c | 4 ++-- packfile.h | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 64e9e239dc..8fa461dd59 100644 --- a/object-file.c +++ b/object-file.c @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + unsigned flags) { int ret; int fd; diff --git a/object-file.h b/object-file.h index 42bb50e10c..2acf19fb91 100644 --- a/object-file.h +++ b/object-file.h @@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + unsigned flags); int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, diff --git a/packfile.c b/packfile.c index b65f0b43f1..79fe64a25b 100644 --- a/packfile.c +++ b/packfile.c @@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum odb_for_each_object_flags flags) + unsigned flags) { uint32_t i; int r = 0; @@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags) + void *data, unsigned flags) { struct odb_source *source; int r = 0; diff --git a/packfile.h b/packfile.h index 15551258bd..447c44c4a7 100644 --- a/packfile.h +++ b/packfile.h @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum odb_for_each_object_flags flags); + unsigned flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags); + void *data, unsigned flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 46732b8ee67a29d334d9654b1707b47c1a20e565 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:32 +0100 Subject: [PATCH 03/14] object-file: extract function to read object info from path Extract a new function that allows us to read object info for a specific loose object via a user-supplied path. This function will be used in a subsequent commit. Note that this also allows us to drop `stat_loose_object()`, which is a simple wrapper around `odb_loose_path()` plus lstat(3p). Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 8fa461dd59..a651129426 100644 --- a/object-file.c +++ b/object-file.c @@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in given source. - * Returns 0 on success, negative on failure. + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, - struct stat *st, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - - *path = odb_loose_path(loose->source, &buf, oid); - if (!lstat(*path, st)) - return 0; - - return -1; -} - -/* - * Like stat_loose_object(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { @@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int odb_source_loose_read_object_info(struct odb_source *source, +static int read_object_info_from_path(struct odb_source *source, + const char *path, const struct object_id *oid, struct object_info *oi, unsigned flags) @@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source, int ret; int fd; unsigned long mapsize; - const char *path; void *map = NULL; git_zstream stream, *stream_to_end = NULL; char hdr[MAX_HEADER_LEN]; @@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - if (stat_loose_object(source->loose, oid, &st, &path) < 0) { + if (lstat(path, &st) < 0) { ret = -1; goto out; } @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source, goto out; } - fd = open_loose_object(source->loose, oid, &path); + fd = git_open(path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -534,6 +517,16 @@ out: return ret; } +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + static struct strbuf buf = STRBUF_INIT; + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(source, buf.buf, oid, oi, flags); +} + static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, From b8578bfc57e8f097b1df0e4a9b17b92429af10e4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:33 +0100 Subject: [PATCH 04/14] object-file: introduce function to iterate through objects We have multiple divergent interfaces to iterate through objects of a specific backend: - `for_each_loose_object()` yields all loose objects. - `for_each_packed_object()` (somewhat obviously) yields all packed objects. These functions have different function signatures, which makes it hard to create a common abstraction layer that covers both of these. Introduce a new function `odb_source_loose_for_each_object()` to plug this gap. This function doesn't take any data specific to loose objects, but instead it accepts a `struct object_info` that will be populated the exact same as if `odb_source_loose_read_object()` was called. The benefit of this new interface is that we can continue to pass backend-specific data, as `struct object_info` contains a union for these exact use cases. This will allow us to unify how we iterate through objects across both loose and packed objects in a subsequent commit. The `for_each_loose_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++ object-file.h | 11 +++++++++++ odb.h | 12 ++++++++++++ 3 files changed, 64 insertions(+) diff --git a/object-file.c b/object-file.c index a651129426..65e730684b 100644 --- a/object-file.c +++ b/object-file.c @@ -1801,6 +1801,47 @@ int for_each_loose_object(struct object_database *odb, return 0; } +struct for_each_object_wrapper_data { + struct odb_source *source; + struct object_info *oi; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->oi && + read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0) + return -1; + return data->cb(oid, data->oi, data->cb_data); +} + +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct for_each_object_wrapper_data data = { + .source = source, + .oi = oi, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + return 0; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + return 0; + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index 2acf19fb91..048b778531 100644 --- a/object-file.h +++ b/object-file.h @@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, enum odb_for_each_object_flags flags); +/* + * Iterate through all loose objects in the given object database source and + * invoke the callback function for each of them. If given, the object info + * will be populated with the object's data as if you had called + * `odb_source_loose_read_object_info()` on the object. + */ +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /** * format_object_header() is a thin wrapper around s xsnprintf() that diff --git a/odb.h b/odb.h index 74503addf1..f97f249580 100644 --- a/odb.h +++ b/odb.h @@ -463,6 +463,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + enum { /* * By default, `odb_write_object()` does not actually write anything From 900d89743667e28db4de002427e950a270bfb734 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:34 +0100 Subject: [PATCH 05/14] packfile: extract function to iterate through objects of a store In the next commit we're about to introduce a new function that knows to iterate through objects of a given packfile store. Same as with the equivalent function for loose objects, this new function will also be agnostic of backends by using a `struct object_info`. Prepare for this by extracting a new shared function to iterate through a single packfile store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 78 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/packfile.c b/packfile.c index 79fe64a25b..d15a2ce12b 100644 --- a/packfile.c +++ b/packfile.c @@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p, return r; } +static int packfile_store_for_each_object_internal(struct packfile_store *store, + each_packed_object_fn cb, + void *data, + unsigned flags, + int *pack_errors) +{ + struct packfile_list_entry *e; + int ret = 0; + + store->skip_mru_updates = true; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + *pack_errors = 1; + continue; + } + + ret = for_each_object_in_pack(p, cb, data, flags); + if (ret) + break; + } + + store->skip_mru_updates = false; + + return ret; +} + int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, unsigned flags) { struct odb_source *source; - int r = 0; int pack_errors = 0; + int ret = 0; odb_prepare_alternates(repo->objects); for (source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *e; - - source->packfiles->skip_mru_updates = true; - - for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { - struct packed_git *p = e->pack; - - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; - } - - r = for_each_object_in_pack(p, cb, data, flags); - if (r) - break; - } - - source->packfiles->skip_mru_updates = false; - - if (r) + ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, + flags, &pack_errors); + if (ret) break; } - return r ? r : pack_errors; + return ret ? ret : pack_errors; } static int add_promisor_object(const struct object_id *oid, From b2aa629132b9dea035b1636557096611369bde5d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:35 +0100 Subject: [PATCH 06/14] packfile: introduce function to iterate through objects Introduce a new function `packfile_store_for_each_object()`. This function is the equivalent to `odb_source_loose_for_each_object()` in that it: - Works on a single packfile store and thus per object source. - Passes a `struct object_info` to the callback function. As such, it provides the same callback interface as we already provide for loose objects now. These functions will be used in a subsequent step to implement `odb_for_each_object()`. The `for_each_packed_object()` function continues to exist for now, but it will be removed at the end of this patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 14 ++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/packfile.c b/packfile.c index d15a2ce12b..cd45c6f21c 100644 --- a/packfile.c +++ b/packfile.c @@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, return ret ? ret : pack_errors; } +struct packfile_store_for_each_object_wrapper_data { + struct packfile_store *store; + struct object_info *oi; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int packfile_store_for_each_object_wrapper(const struct object_id *oid, + struct packed_git *pack, + uint32_t index_pos, + void *cb_data) +{ + struct packfile_store_for_each_object_wrapper_data *data = cb_data; + + if (data->oi) { + off_t offset = nth_packed_object_offset(pack, index_pos); + + if (packed_object_info(pack, offset, data->oi) < 0) { + mark_bad_packed_object(pack, oid); + return -1; + } + } + + return data->cb(oid, data->oi, data->cb_data); +} + +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct packfile_store_for_each_object_wrapper_data data = { + .store = store, + .oi = oi, + .cb = cb, + .cb_data = cb_data, + }; + int pack_errors = 0, ret; + + ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, + &data, flags, &pack_errors); + if (ret) + return ret; + + return pack_errors ? -1 : 0; +} + static int add_promisor_object(const struct object_id *oid, struct packed_git *pack, uint32_t pos UNUSED, diff --git a/packfile.h b/packfile.h index 447c44c4a7..ab0637fbe9 100644 --- a/packfile.h +++ b/packfile.h @@ -343,6 +343,20 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, unsigned flags); +/* + * Iterate through all packed objects in the given packfile store and invoke + * the callback function for each of them. If given, the object info will be + * populated with the object's data as if you had called + * `packfile_store_read_object_info()` on the object. + * + * The flags parameter is a combination of `odb_for_each_object_flags`. + */ +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From 85f8f1c2fa7552dfb19e33793516c33e3961cd8f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:36 +0100 Subject: [PATCH 07/14] odb: introduce `odb_for_each_object()` Introduce a new function `odb_for_each_object()` that knows to iterate through all objects part of a given object database. This function is essentially a simple wrapper around the object database sources. Subsequent commits will adapt callers to use this new function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 27 +++++++++++++++++++++++++++ odb.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/odb.c b/odb.c index ac70b6a099..65f0447aa5 100644 --- a/odb.c +++ b/odb.c @@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f97f249580..8f6d95aee5 100644 --- a/odb.h +++ b/odb.h @@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Iterate through all objects contained in the object database. Note that + * objects may be iterated over multiple times in case they are either stored + * in different backends or in case they are stored in multiple sources. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + * + * Returns 0 on success, a negative error code in case a failure occurred, or + * an arbitrary non-zero error code returned by the callback itself. + */ +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + enum { /* * By default, `odb_write_object()` does not actually write anything From 7305e6d60ed2a936b0c7f377e42a31cd28756043 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:37 +0100 Subject: [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()` In git-fsck(1) we have two callsites where we iterate over all objects via `for_each_loose_object()` and `for_each_packed_object()`. Both of these are trivially convertible with `odb_for_each_object()`. Refactor these callsites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 57 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 4979bc795e..96107695ae 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, return 0; } -static void mark_unreachable_referents(const struct object_id *oid) +static int mark_unreachable_referents(const struct object_id *oid, + struct object_info *io UNUSED, + void *data UNUSED) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) - return; /* not part of our original set */ + return 0; /* not part of our original set */ if (obj->flags & REACHABLE) - return; /* reachable objects already traversed */ + return 0; /* reachable objects already traversed */ /* * Avoid passing OBJ_NONE to fsck_walk, which will parse the object @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid) fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) free_tree_buffer((struct tree *)obj); -} -static int mark_loose_unreachable_referents(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); - return 0; -} - -static int mark_packed_unreachable_referents(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); return 0; } @@ -394,12 +381,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(the_repository->objects, - mark_loose_unreachable_referents, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_unreachable_referents, - NULL, - 0); + odb_for_each_object(the_repository->objects, NULL, + mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path, fsck_resolve_undo(istate, index_path); } -static void mark_object_for_connectivity(const struct object_id *oid) +static int mark_object_for_connectivity(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data UNUSED) { struct object *obj = lookup_unknown_object(the_repository, oid); obj->flags |= HAS_OBJ; -} - -static int mark_loose_for_connectivity(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); - return 0; -} - -static int mark_packed_for_connectivity(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); return 0; } @@ -1001,10 +970,8 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(the_repository->objects, - mark_loose_for_connectivity, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_for_connectivity, NULL, 0); + odb_for_each_object(the_repository->objects, NULL, + mark_object_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) From 98f6927c6052b73a70d80780e17c86dc73c149a7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:38 +0100 Subject: [PATCH 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` We have multiple callsites where we enumerate all promisor objects in the object database via `for_each_packed_object()`. This is done by passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to skip over all non-promisor objects. These callsites can be trivially converted to `odb_for_each_object()` as we know to skip enumeration of loose objects in case the `PROMISOR_ONLY` flag was passed by the caller. Refactor the sites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 37 ++++++++++++++++++++++--------------- repack-promisor.c | 8 ++++---- revision.c | 10 ++++------ 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/packfile.c b/packfile.c index cd45c6f21c..4f84bc19d9 100644 --- a/packfile.c +++ b/packfile.c @@ -2408,28 +2408,32 @@ int packfile_store_for_each_object(struct packfile_store *store, return pack_errors ? -1 : 0; } +struct add_promisor_object_data { + struct repository *repo; + struct oidset *set; +}; + static int add_promisor_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos UNUSED, - void *set_) + struct object_info *oi UNUSED, + void *cb_data) { - struct oidset *set = set_; + struct add_promisor_object_data *data = cb_data; struct object *obj; int we_parsed_object; - obj = lookup_object(pack->repo, oid); + obj = lookup_object(data->repo, oid); if (obj && obj->parsed) { we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object_with_flags(pack->repo, oid, + obj = parse_object_with_flags(data->repo, oid, PARSE_OBJECT_SKIP_HASH_CHECK); } if (!obj) return 1; - oidset_insert(set, oid); + oidset_insert(data->set, oid); /* * If this is a tree, commit, or tag, the objects it refers @@ -2447,19 +2451,19 @@ static int add_promisor_object(const struct object_id *oid, */ return 0; while (tree_entry_gently(&desc, &entry)) - oidset_insert(set, &entry.oid); + oidset_insert(data->set, &entry.oid); if (we_parsed_object) free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, get_commit_tree_oid(commit)); + oidset_insert(data->set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) - oidset_insert(set, &parents->item->object.oid); + oidset_insert(data->set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; - oidset_insert(set, get_tagged_oid(tag)); + oidset_insert(data->set, get_tagged_oid(tag)); } return 0; } @@ -2471,10 +2475,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (!promisor_objects_prepared) { if (repo_has_promisor_remote(r)) { - for_each_packed_object(r, add_promisor_object, - &promisor_objects, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | - ODB_FOR_EACH_OBJECT_PACK_ORDER); + struct add_promisor_object_data data = { + .repo = r, + .set = &promisor_objects, + }; + + odb_for_each_object(r->objects, NULL, add_promisor_object, &data, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/repack-promisor.c b/repack-promisor.c index 45c330b9a5..35c4073632 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -17,8 +17,8 @@ struct write_oid_context { * necessary. */ static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) + struct object_info *oi UNUSED, + void *data) { struct write_oid_context *ctx = data; struct child_process *cmd = ctx->cmd; @@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo, */ ctx.cmd = &cmd; ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); + odb_for_each_object(repo->objects, NULL, write_oid, &ctx, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index 5aadf46dac..e34bcd8e88 100644 --- a/revision.c +++ b/revision.c @@ -3626,8 +3626,7 @@ void reset_revision_walk(void) } static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, + struct object_info *oi UNUSED, void *cb) { struct rev_info *revs = cb; @@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(revs->repo, mark_uninteresting, revs, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); - } + if (revs->exclude_promisor_objects) + odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting, + revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); From fe718f8e9804e623e91f54382f0ae1599e0b3825 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:39 +0100 Subject: [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` We're using `for_each_loose_object()` and `for_each_packed_object()` at a couple of callsites to enumerate all loose and packed objects, respectively. These functions will be removed in a subsequent commit in favor of the newly introduced `odb_source_loose_for_each_object()` and `packfile_store_for_each_object()` replacements. Prepare for this by refactoring the sites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 28 ++++++++++++++++++++++------ commit-graph.c | 46 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 6964a5a52c..7d16fbc1b8 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -806,11 +806,14 @@ struct for_each_object_payload { void *payload; }; -static int batch_one_object_loose(const struct object_id *oid, - const char *path UNUSED, - void *_payload) +static int batch_one_object_oi(const struct object_id *oid, + struct object_info *oi, + void *_payload) { struct for_each_object_payload *payload = _payload; + if (oi && oi->whence == OI_PACKED) + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset, + payload->payload); return payload->callback(oid, NULL, 0, payload->payload); } @@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt, .payload = _payload, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + struct odb_source *source; - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, + &payload, flags); + if (ret) + break; + } if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { @@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt, &payload, flags); } } else { - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + struct object_info oi = { 0 }; + + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = packfile_store_for_each_object(source->packfiles, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } free_bitmap_index(bitmap); diff --git a/commit-graph.c b/commit-graph.c index 7f1145a082..a3087d7883 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1479,24 +1479,16 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, return 0; } -static int add_packed_commits(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) +static int add_packed_commits_oi(const struct object_id *oid, + struct object_info *oi, + void *data) { struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; - enum object_type type; - off_t offset = nth_packed_object_offset(pack, pos); - struct object_info oi = OBJECT_INFO_INIT; if (ctx->progress) display_progress(ctx->progress, ++ctx->progress_done); - oi.typep = &type; - if (packed_object_info(pack, offset, &oi) < 0) - die(_("unable to get type of object %s"), oid_to_hex(oid)); - - if (type != OBJ_COMMIT) + if (*oi->typep != OBJ_COMMIT) return 0; oid_array_append(&ctx->oids, oid); @@ -1505,6 +1497,22 @@ static int add_packed_commits(const struct object_id *oid, return 0; } +static int add_packed_commits(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + enum object_type type; + off_t offset = nth_packed_object_offset(pack, pos); + struct object_info oi = OBJECT_INFO_INIT; + + oi.typep = &type; + if (packed_object_info(pack, offset, &oi) < 0) + die(_("unable to get type of object %s"), oid_to_hex(oid)); + + return add_packed_commits_oi(oid, &oi, data); +} + static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) { struct commit_list *parent; @@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx, static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { + struct odb_source *source; + enum object_type type; + struct object_info oi = { + .typep = &type, + }; + if (ctx->report_progress) ctx->progress = start_delayed_progress( ctx->r, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); - for_each_packed_object(ctx->r, add_packed_commits, ctx, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + + odb_prepare_alternates(ctx->r->objects); + for (source = ctx->r->objects->sources; source; source = source->next) + packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi, + ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); From 16a2043a679578145672bfb311b24ca2db98ec31 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:40 +0100 Subject: [PATCH 11/14] odb: introduce mtime fields for object info requests There are some use cases where we need to figure out the mtime for objects. Most importantly, this is the case when we want to prune unreachable objects. But getting at that data requires users to manually derive the info either via the loose object's mtime, the packfiles' mtime or via the ".mtimes" file. Introduce a new `struct object_info::mtimep` pointer that allows callers to request an object's mtime. This new field will be used in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++---- odb.c | 2 ++ odb.h | 1 + packfile.c | 40 +++++++++++++++++++++++++++++++++------- 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/object-file.c b/object-file.c index 65e730684b..c0f896673b 100644 --- a/object-file.c +++ b/object-file.c @@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source, char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; + struct stat st; /* * If we don't care about type or size, then we don't @@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source, if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { struct stat st; - if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) { + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { ret = quick_has_loose(source->loose, oid) ? 0 : -1; goto out; } @@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - if (oi && oi->disk_sizep) - *oi->disk_sizep = st.st_size; + if (oi) { + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + } ret = 0; goto out; @@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source, goto out; } - map = map_fd(fd, path, &mapsize); + if (fstat(fd, &st)) { + close(fd); + ret = -1; + goto out; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + ret = error(_("object file %s is empty"), path); + goto out; + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); if (!map) { ret = -1; goto out; @@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source, if (oi->disk_sizep) *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; stream_to_end = &stream; diff --git a/odb.c b/odb.c index 65f0447aa5..67decd3908 100644 --- a/odb.c +++ b/odb.c @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb, oidclr(oi->delta_base_oid, odb->repo->hash_algo); if (oi->contentp) *oi->contentp = xmemdupz(co->buf, co->size); + if (oi->mtimep) + *oi->mtimep = 0; oi->whence = OI_CACHED; } return 0; diff --git a/odb.h b/odb.h index 8f6d95aee5..9e22f79172 100644 --- a/odb.h +++ b/odb.h @@ -317,6 +317,7 @@ struct object_info { off_t *disk_sizep; struct object_id *delta_base_oid; void **contentp; + time_t *mtimep; /* Response */ enum { diff --git a/packfile.c b/packfile.c index 4f84bc19d9..c96ec21f86 100644 --- a/packfile.c +++ b/packfile.c @@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct packed_git *p, - off_t obj_offset, struct object_info *oi) +static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset, + uint32_t *maybe_index_pos, struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; enum object_type type = OBJ_NONE; + uint32_t pack_pos; int ret; /* @@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p, } } - if (oi->disk_sizep) { - uint32_t pos; - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); ret = -1; goto out; } + } - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; + if (oi->disk_sizep) + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + + if (oi->mtimep) { + if (p->is_cruft) { + uint32_t index_pos; + + if (load_pack_mtimes(p) < 0) + die(_("could not load cruft pack .mtimes")); + + if (maybe_index_pos) + index_pos = *maybe_index_pos; + else + index_pos = pack_pos_to_index(p, pack_pos); + + *oi->mtimep = nth_packed_mtime(p, index_pos); + } else { + *oi->mtimep = p->mtime; + } } if (oi->typep) { @@ -1681,6 +1700,12 @@ out: return ret; } +int packed_object_info(struct packed_git *p, off_t obj_offset, + struct object_info *oi) +{ + return packed_object_info_with_index_pos(p, obj_offset, NULL, oi); +} + static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@ -2377,7 +2402,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, if (data->oi) { off_t offset = nth_packed_object_offset(pack, index_pos); - if (packed_object_info(pack, offset, data->oi) < 0) { + if (packed_object_info_with_index_pos(pack, offset, + &index_pos, data->oi) < 0) { mark_bad_packed_object(pack, oid); return -1; } From b591d25a5dd9487c228007e8440266030d51c97f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:41 +0100 Subject: [PATCH 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` When enumerating objects that are supposed to be stored in a new cruft pack we use `for_each_packed_object()` and then derive each object's mtime individually. Refactor this logic to instead use the new `packfile_store_for_each_object()` function with an object info request that asks for the respective mtimes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 45 ++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 74317051fd..223ec3b49e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit) } static int add_object_in_unpacked_pack(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, + struct object_info *oi, void *data UNUSED) { if (cruft) { - off_t offset; - time_t mtime; - - if (pack->is_cruft) { - if (load_pack_mtimes(pack) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(pack, pos); - } else { - mtime = pack->mtime; - } - offset = nth_packed_object_offset(pack, pos); - - add_cruft_object_entry(oid, OBJ_NONE, pack, offset, - NULL, mtime); + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack, + oi->u.packed.offset, NULL, *oi->mtimep); } else { add_object_entry(oid, OBJ_NONE, "", 0); } @@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, static void add_objects_in_unpacked_packs(void) { - if (for_each_packed_object(to_pack.repo, - add_object_in_unpacked_pack, - NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) - die(_("cannot open pack index")); + struct odb_source *source; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + }; + + odb_prepare_alternates(to_pack.repo->objects); + for (source = to_pack.repo->objects->sources; source; source = source->next) { + if (!source->local) + continue; + + if (packfile_store_for_each_object(source->packfiles, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); + } } static int add_loose_object(const struct object_id *oid, const char *path, From 9a9a207cb3e62b199cb2b53c288846b5ce79115c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:42 +0100 Subject: [PATCH 13/14] reachable: convert to use `odb_for_each_object()` To figure out which objects expired objects we enumerate all loose and packed objects individually so that we can figure out their respective mtimes. Refactor the code to instead use `odb_for_each_object()` with a request that ask for the object mtime instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reachable.c | 125 +++++++++++++++------------------------------------- 1 file changed, 35 insertions(+), 90 deletions(-) diff --git a/reachable.c b/reachable.c index 82676b2668..101cfc2727 100644 --- a/reachable.c +++ b/reachable.c @@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, return oidset_contains(&data->extra_recent_oids, oid); } -static void add_recent_object(const struct object_id *oid, - struct packed_git *pack, - off_t offset, - timestamp_t mtime, - struct recent_data *data) +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) { + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + return 0; + return 1; +} + +static int add_recent_object(const struct object_id *oid, + struct object_info *oi, + void *cb_data) +{ + struct recent_data *data = cb_data; struct object *obj; - enum object_type type; - if (!obj_is_recent(oid, mtime, data)) - return; + if (!want_recent_object(data, oid) || + !obj_is_recent(oid, *oi->mtimep, data)) + return 0; - /* - * We do not want to call parse_object here, because - * inflating blobs and trees could be very expensive. - * However, we do need to know the correct type for - * later processing, and the revision machinery expects - * commits and tags to have been parsed. - */ - type = odb_read_object_info(the_repository->objects, oid, NULL); - if (type < 0) - die("unable to get object info for %s", oid_to_hex(oid)); - - switch (type) { + switch (*oi->typep) { case OBJ_TAG: case OBJ_COMMIT: obj = parse_object_or_die(the_repository, oid, NULL); @@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid, break; default: die("unknown object type for %s: %s", - oid_to_hex(oid), type_name(type)); + oid_to_hex(oid), type_name(*oi->typep)); } if (!obj) die("unable to lookup %s", oid_to_hex(oid)); + if (obj->flags & SEEN) + return 0; add_pending_object(data->revs, obj, ""); - if (data->cb) - data->cb(obj, pack, offset, mtime); -} - -static int want_recent_object(struct recent_data *data, - const struct object_id *oid) -{ - if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) - return 0; - return 1; -} - -static int add_recent_loose(const struct object_id *oid, - const char *path, void *data) -{ - struct stat st; - struct object *obj; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - - if (stat(path, &st) < 0) { - /* - * It's OK if an object went away during our iteration; this - * could be due to a simultaneous repack. But anything else - * we should abort, since we might then fail to mark objects - * which should not be pruned. - */ - if (errno == ENOENT) - return 0; - return error_errno("unable to stat %s", oid_to_hex(oid)); + if (data->cb) { + if (oi->whence == OI_PACKED) + data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep); + else + data->cb(obj, NULL, 0, *oi->mtimep); } - add_recent_object(oid, NULL, 0, st.st_mtime, data); - return 0; -} - -static int add_recent_packed(const struct object_id *oid, - struct packed_git *p, - uint32_t pos, - void *data) -{ - struct object *obj; - timestamp_t mtime = p->mtime; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - if (p->is_cruft) { - if (load_pack_mtimes(p) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(p, pos); - } - add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } @@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum odb_for_each_object_flags flags; + unsigned flags; + enum object_type type; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + .typep = &type, + }; int r; data.revs = revs; @@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - ODB_FOR_EACH_OBJECT_LOCAL_ONLY); - if (r) - goto done; - flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); + r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags); + if (r) + goto done; done: oidset_clear(&data.extra_recent_oids); From e5901df2f3625d475f3b0d3e89f34c3ef2cf91fc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 15 Jan 2026 12:04:43 +0100 Subject: [PATCH 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions We have converted all callers of `for_each_loose_object()` and `for_each_packed_object()` to use their new replacement functions instead. We can thus remove them now. Do so and inline `packfile_store_for_each_object_internal()` now that it only has a single callsite again. This makes it a bit easier to follow the callback indirection that is happening there. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 20 ----------- object-file.h | 11 ------ packfile.c | 92 ++++++++++++++++----------------------------------- packfile.h | 2 -- 4 files changed, 29 insertions(+), 96 deletions(-) diff --git a/object-file.c b/object-file.c index c0f896673b..bc5209f2fe 100644 --- a/object-file.c +++ b/object-file.c @@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn cb, void *data, - enum odb_for_each_object_flags flags) -{ - struct odb_source *source; - - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - int r = for_each_loose_file_in_source(source, cb, NULL, - NULL, data); - if (r) - return r; - - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) - break; - } - - return 0; -} - struct for_each_object_wrapper_data { struct odb_source *source; struct object_info *oi; diff --git a/object-file.h b/object-file.h index 048b778531..af7f57d2a1 100644 --- a/object-file.h +++ b/object-file.h @@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_subdir_fn subdir_cb, void *data); -/* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. - */ -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn, void *, - enum odb_for_each_object_flags flags); - /* * Iterate through all loose objects in the given object database source and * invoke the callback function for each of them. If given, the object info diff --git a/packfile.c b/packfile.c index c96ec21f86..493d81fdca 100644 --- a/packfile.c +++ b/packfile.c @@ -2326,65 +2326,6 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -static int packfile_store_for_each_object_internal(struct packfile_store *store, - each_packed_object_fn cb, - void *data, - unsigned flags, - int *pack_errors) -{ - struct packfile_list_entry *e; - int ret = 0; - - store->skip_mru_updates = true; - - for (e = packfile_store_get_packs(store); e; e = e->next) { - struct packed_git *p = e->pack; - - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - *pack_errors = 1; - continue; - } - - ret = for_each_object_in_pack(p, cb, data, flags); - if (ret) - break; - } - - store->skip_mru_updates = false; - - return ret; -} - -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) -{ - struct odb_source *source; - int pack_errors = 0; - int ret = 0; - - odb_prepare_alternates(repo->objects); - - for (source = repo->objects->sources; source; source = source->next) { - ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, - flags, &pack_errors); - if (ret) - break; - } - - return ret ? ret : pack_errors; -} - struct packfile_store_for_each_object_wrapper_data { struct packfile_store *store; struct object_info *oi; @@ -2424,12 +2365,37 @@ int packfile_store_for_each_object(struct packfile_store *store, .cb = cb, .cb_data = cb_data, }; + struct packfile_list_entry *e; int pack_errors = 0, ret; - ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, - &data, flags, &pack_errors); - if (ret) - return ret; + store->skip_mru_updates = true; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, + &data, flags); + if (ret) + break; + } + + store->skip_mru_updates = false; return pack_errors ? -1 : 0; } diff --git a/packfile.h b/packfile.h index ab0637fbe9..8e0d2b7661 100644 --- a/packfile.h +++ b/packfile.h @@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, unsigned flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags); /* * Iterate through all packed objects in the given packfile store and invoke