From 3c7c41d6b7ee4c4576490a3b6cfefe4d59d24172 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Mon, 8 Dec 2025 17:48:56 -0800 Subject: [PATCH 1/2] object: apply skip_hash and discard_tree optimizations to unknown blobs too parse_object_with_flags() has an optimization to skip parsing blobs if PARSE_OBJECT_SKIP_HASH_CHECK is set and the object hasn't been seen before or might be a blob but hasn't been parsed yet. The latter can happen, for example, if add_tree_entries() walks a path that references a blob object that hasn't been seen before: lookup_blob() marks the referenced oid as being a blob, but does not provide any additional information about it until it is parsed. It's possible for an object to be created without even a type, such as when prepare_revision_walk() uses mark_uninteresting() to mark all promisor objects as uninteresting. These objects have obj->parsed == false and obj->type == OBJ_NONE. The skip_hash optimization does not consider this kind of object, so parse_object_with_flags() proceeds to fully parse the object to determine its type. Improve the optimization by applying it to OBJ_NONE objects as well as OBJ_BLOB ones. Apply a similar fix for trees. Fixes: 8db2dad7a045 ("parse_object(): check on-disk type of suspected blob") Signed-off-by: Aaron Plattner Signed-off-by: Junio C Hamano --- object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object.c b/object.c index b08fc7a163..4669b8d65e 100644 --- a/object.c +++ b/object.c @@ -328,7 +328,7 @@ struct object *parse_object_with_flags(struct repository *r, return &commit->object; } - if ((!obj || obj->type == OBJ_BLOB) && + if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) && odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) { if (!skip_hash && stream_object_signature(r, repl) < 0) { error(_("hash mismatch %s"), oid_to_hex(oid)); @@ -344,7 +344,7 @@ struct object *parse_object_with_flags(struct repository *r, * have the on-disk object with the correct type. */ if (skip_hash && discard_tree && - (!obj || obj->type == OBJ_TREE) && + (!obj || obj->type == OBJ_NONE || obj->type == OBJ_TREE) && odb_read_object_info(r->objects, oid, NULL) == OBJ_TREE) { return &lookup_tree(r, oid)->object; } From 3f5d1749e7eb8ab745b348aa138564b809957d3d Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Mon, 8 Dec 2025 17:48:57 -0800 Subject: [PATCH 2/2] packfile: skip hash checks in add_promisor_object() When is_promisor_object() is called for the first time, it lazily initializes a set of all promisor objects by iterating through all objects in promisor packs. For each object, add_promisor_object() calls parse_object(), which decompresses and hashes the entire object. For repositories with large pack files, this can take an extremely long time. For example, on a production repository with a 176 GB promisor pack: $ time ~/git/git/git-rev-list --objects --all --exclude-promisor-objects --quiet ________________________________________________________ Executed in 76.10 mins fish external usr time 72.10 mins 1.83 millis 72.10 mins sys time 3.56 mins 0.17 millis 3.56 mins add_promisor_object() just wants to construct the set of all promisor objects, so it doesn't really need to verify the hash of every object. Set PARSE_OBJECT_SKIP_HASH_CHECK to skip the hash check. This has the side effect of skipping decompression of blob objects completely, saving a significant amount of time: $ time ~/git/git/git-rev-list --objects --all --exclude-promisor-objects --quiet ________________________________________________________ Executed in 124.70 secs fish external usr time 46.94 secs 0.00 millis 46.94 secs sys time 43.11 secs 1.03 millis 43.11 secs Signed-off-by: Aaron Plattner Signed-off-by: Junio C Hamano --- packfile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packfile.c b/packfile.c index 9cc11b6dc5..01b992a4e1 100644 --- a/packfile.c +++ b/packfile.c @@ -2310,7 +2310,8 @@ static int add_promisor_object(const struct object_id *oid, we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object(pack->repo, oid); + obj = parse_object_with_flags(pack->repo, oid, + PARSE_OBJECT_SKIP_HASH_CHECK); } if (!obj)