From 0000d6543f1c2ceea017161a2807167cdfbf8c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 6 Jan 2019 17:45:30 +0100 Subject: [PATCH 1/4] object-store: factor out odb_loose_cache() Add and use a function for loading the entries of a loose object subdirectory for a given object ID. It frees callers from deriving the fanout key; they can use the returned oid_array reference for lookups or forward range scans. Suggested-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- object-store.h | 7 +++++++ sha1-file.c | 12 +++++++++--- sha1-name.c | 10 +++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/object-store.h b/object-store.h index 60758efad8..7236c571c0 100644 --- a/object-store.h +++ b/object-store.h @@ -54,6 +54,13 @@ void add_to_alternates_memory(const char *dir); */ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr); +/* + * Populate and return the loose object cache array corresponding to the + * given object ID. + */ +struct oid_array *odb_loose_cache(struct object_directory *odb, + const struct object_id *oid); + struct packed_git { struct packed_git *next; struct list_head mru; diff --git a/sha1-file.c b/sha1-file.c index 5a272f70de..cb8583b634 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -924,7 +924,6 @@ static int open_sha1_file(struct repository *r, static int quick_has_loose(struct repository *r, const unsigned char *sha1) { - int subdir_nr = sha1[0]; struct object_id oid; struct object_directory *odb; @@ -932,8 +931,7 @@ static int quick_has_loose(struct repository *r, prepare_alt_odb(r); for (odb = r->objects->odb; odb; odb = odb->next) { - odb_load_loose_cache(odb, subdir_nr); - if (oid_array_lookup(&odb->loose_objects_cache, &oid) >= 0) + if (oid_array_lookup(odb_loose_cache(odb, &oid), &oid) >= 0) return 1; } return 0; @@ -2152,6 +2150,14 @@ static int append_loose_object(const struct object_id *oid, const char *path, return 0; } +struct oid_array *odb_loose_cache(struct object_directory *odb, + const struct object_id *oid) +{ + int subdir_nr = oid->hash[0]; + odb_load_loose_cache(odb, subdir_nr); + return &odb->loose_objects_cache; +} + void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) { struct strbuf buf = STRBUF_INIT; diff --git a/sha1-name.c b/sha1-name.c index b24502811b..a656481c6a 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -87,21 +87,21 @@ static int match_sha(unsigned, const unsigned char *, const unsigned char *); static void find_short_object_filename(struct disambiguate_state *ds) { - int subdir_nr = ds->bin_pfx.hash[0]; struct object_directory *odb; for (odb = the_repository->objects->odb; odb && !ds->ambiguous; odb = odb->next) { int pos; + struct oid_array *loose_objects; - odb_load_loose_cache(odb, subdir_nr); - pos = oid_array_lookup(&odb->loose_objects_cache, &ds->bin_pfx); + loose_objects = odb_loose_cache(odb, &ds->bin_pfx); + pos = oid_array_lookup(loose_objects, &ds->bin_pfx); if (pos < 0) pos = -1 - pos; - while (!ds->ambiguous && pos < odb->loose_objects_cache.nr) { + while (!ds->ambiguous && pos < loose_objects->nr) { const struct object_id *oid; - oid = odb->loose_objects_cache.oid + pos; + oid = loose_objects->oid + pos; if (!match_sha(ds->len, ds->bin_pfx.hash, oid->hash)) break; update_candidates(ds, oid); From d4e19e516325e211cedb070a487453ad2d1043be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 6 Jan 2019 17:45:39 +0100 Subject: [PATCH 2/4] object-store: factor out odb_clear_loose_cache() Add and use a function for emptying the loose object cache, so callers don't have to know any of its implementation details. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- object-store.h | 3 +++ object.c | 2 +- packfile.c | 7 ++----- sha1-file.c | 7 +++++++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/object-store.h b/object-store.h index 7236c571c0..709bf856b6 100644 --- a/object-store.h +++ b/object-store.h @@ -61,6 +61,9 @@ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr); struct oid_array *odb_loose_cache(struct object_directory *odb, const struct object_id *oid); +/* Empty the loose object cache for the specified object directory. */ +void odb_clear_loose_cache(struct object_directory *odb); + struct packed_git { struct packed_git *next; struct list_head mru; diff --git a/object.c b/object.c index 79d636091c..a5c5cf830f 100644 --- a/object.c +++ b/object.c @@ -485,7 +485,7 @@ struct raw_object_store *raw_object_store_new(void) static void free_object_directory(struct object_directory *odb) { free(odb->path); - oid_array_clear(&odb->loose_objects_cache); + odb_clear_loose_cache(odb); free(odb); } diff --git a/packfile.c b/packfile.c index 8c6b47cc77..0fe9c21bf1 100644 --- a/packfile.c +++ b/packfile.c @@ -994,11 +994,8 @@ void reprepare_packed_git(struct repository *r) { struct object_directory *odb; - for (odb = r->objects->odb; odb; odb = odb->next) { - oid_array_clear(&odb->loose_objects_cache); - memset(&odb->loose_objects_subdir_seen, 0, - sizeof(odb->loose_objects_subdir_seen)); - } + for (odb = r->objects->odb; odb; odb = odb->next) + odb_clear_loose_cache(odb); r->objects->approximate_object_count_valid = 0; r->objects->packed_git_initialized = 0; diff --git a/sha1-file.c b/sha1-file.c index cb8583b634..2f965b2688 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -2178,6 +2178,13 @@ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) strbuf_release(&buf); } +void odb_clear_loose_cache(struct object_directory *odb) +{ + oid_array_clear(&odb->loose_objects_cache); + memset(&odb->loose_objects_subdir_seen, 0, + sizeof(odb->loose_objects_subdir_seen)); +} + static int check_stream_sha1(git_zstream *stream, const char *hdr, unsigned long size, From 4cea1ce0f69d079819f2a189febcea215045dabf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 6 Jan 2019 17:45:52 +0100 Subject: [PATCH 3/4] object-store: use one oid_array per subdirectory for loose cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The loose objects cache is filled one subdirectory at a time as needed. It is stored in an oid_array, which has to be resorted after each add operation. So when querying a wide range of objects, the partially filled array needs to be resorted up to 255 times, which takes over 100 times longer than sorting once. Use one oid_array for each subdirectory. This ensures that entries have to only be sorted a single time. It also avoids eight binary search steps for each cache lookup as a small bonus. The cache is used for collision checks for the log placeholders %h, %t and %p, and we can see the change speeding them up in a repository with ca. 100 objects per subdirectory: $ git count-objects 26733 objects, 68808 kilobytes Test HEAD^ HEAD -------------------------------------------------------------------- 4205.1: log with %H 0.51(0.47+0.04) 0.51(0.49+0.02) +0.0% 4205.2: log with %h 0.84(0.82+0.02) 0.60(0.57+0.03) -28.6% 4205.3: log with %T 0.53(0.49+0.04) 0.52(0.48+0.03) -1.9% 4205.4: log with %t 0.84(0.80+0.04) 0.60(0.59+0.01) -28.6% 4205.5: log with %P 0.52(0.48+0.03) 0.51(0.50+0.01) -1.9% 4205.6: log with %p 0.85(0.78+0.06) 0.61(0.56+0.05) -28.2% 4205.7: log with %h-%h-%h 0.96(0.92+0.03) 0.69(0.64+0.04) -28.1% Reported-by: Ævar Arnfjörð Bjarmason Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- object-store.h | 2 +- sha1-file.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/object-store.h b/object-store.h index 709bf856b6..2fb6c0e4db 100644 --- a/object-store.h +++ b/object-store.h @@ -20,7 +20,7 @@ struct object_directory { * Be sure to call odb_load_loose_cache() before using. */ char loose_objects_subdir_seen[256]; - struct oid_array loose_objects_cache; + struct oid_array loose_objects_cache[256]; /* * Path to the alternative object store. If this is a relative path, diff --git a/sha1-file.c b/sha1-file.c index 2f965b2688..c3c6e50704 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -2155,7 +2155,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb, { int subdir_nr = oid->hash[0]; odb_load_loose_cache(odb, subdir_nr); - return &odb->loose_objects_cache; + return &odb->loose_objects_cache[subdir_nr]; } void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) @@ -2173,14 +2173,17 @@ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) for_each_file_in_obj_subdir(subdir_nr, &buf, append_loose_object, NULL, NULL, - &odb->loose_objects_cache); + &odb->loose_objects_cache[subdir_nr]); odb->loose_objects_subdir_seen[subdir_nr] = 1; strbuf_release(&buf); } void odb_clear_loose_cache(struct object_directory *odb) { - oid_array_clear(&odb->loose_objects_cache); + int i; + + for (i = 0; i < ARRAY_SIZE(odb->loose_objects_cache); i++) + oid_array_clear(&odb->loose_objects_cache[i]); memset(&odb->loose_objects_subdir_seen, 0, sizeof(odb->loose_objects_subdir_seen)); } From 8be88dbcb1322df53c41c496bf718c5564c82bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 7 Jan 2019 18:29:16 +0100 Subject: [PATCH 4/4] object-store: retire odb_load_loose_cache() Inline odb_load_loose_cache() into its only remaining caller, odb_loose_cache(). The latter offers a nicer interface for loading the cache, as it doesn't require callers to deal with fanout directory numbers directly. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- object-store.h | 7 ------- sha1-file.c | 9 ++------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/object-store.h b/object-store.h index 2fb6c0e4db..e16aa38cae 100644 --- a/object-store.h +++ b/object-store.h @@ -47,13 +47,6 @@ void add_to_alternates_file(const char *dir); */ void add_to_alternates_memory(const char *dir); -/* - * Populate an odb's loose object cache for one particular subdirectory (i.e., - * the one that corresponds to the first byte of objects you're interested in, - * from 0 to 255 inclusive). - */ -void odb_load_loose_cache(struct object_directory *odb, int subdir_nr); - /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/sha1-file.c b/sha1-file.c index c3c6e50704..efcb2cbe74 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -2154,12 +2154,6 @@ struct oid_array *odb_loose_cache(struct object_directory *odb, const struct object_id *oid) { int subdir_nr = oid->hash[0]; - odb_load_loose_cache(odb, subdir_nr); - return &odb->loose_objects_cache[subdir_nr]; -} - -void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) -{ struct strbuf buf = STRBUF_INIT; if (subdir_nr < 0 || @@ -2167,7 +2161,7 @@ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) BUG("subdir_nr out of range"); if (odb->loose_objects_subdir_seen[subdir_nr]) - return; + return &odb->loose_objects_cache[subdir_nr]; strbuf_addstr(&buf, odb->path); for_each_file_in_obj_subdir(subdir_nr, &buf, @@ -2176,6 +2170,7 @@ void odb_load_loose_cache(struct object_directory *odb, int subdir_nr) &odb->loose_objects_cache[subdir_nr]); odb->loose_objects_subdir_seen[subdir_nr] = 1; strbuf_release(&buf); + return &odb->loose_objects_cache[subdir_nr]; } void odb_clear_loose_cache(struct object_directory *odb)