From 5c11529c665fa2c206c00781ddf8710c4000cbda Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:48:47 +0100 Subject: [PATCH 01/13] reftable/pq: use `size_t` to track iterator index The reftable priority queue is used by the merged iterator to yield records from its sub-iterators in the expected order. Each entry has a record corresponding to such a sub-iterator as well as an index that indicates which sub-iterator the record belongs to. But while the sub-iterators are tracked with a `size_t`, we store the index as an `int` in the entry. Fix this and use `size_t` consistently. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/pq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reftable/pq.h b/reftable/pq.h index e85bac9b52..9e25a43a36 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -12,7 +12,7 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" struct pq_entry { - int index; + size_t index; struct reftable_record rec; }; From 48929d2e479df7c3fcbbe9027e23099f206a701c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:48:51 +0100 Subject: [PATCH 02/13] reftable/merged: make `merged_iter` structure private The `merged_iter` structure is not used anywhere outside of "merged.c", but is declared in its header. Move it into the code file so that it is clear that its implementation details are never exposed to anything. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 9 +++++++++ reftable/merged.h | 11 +---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 1aa6cd31b7..12ebd732e8 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -17,6 +17,15 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-error.h" #include "system.h" +struct merged_iter { + struct reftable_iterator *stack; + uint32_t hash_id; + size_t stack_len; + uint8_t typ; + int suppress_deletions; + struct merged_iter_pqueue pq; +}; + static int merged_iter_init(struct merged_iter *mi) { for (size_t i = 0; i < mi->stack_len; i++) { diff --git a/reftable/merged.h b/reftable/merged.h index 7d9f95d27e..a2571dbc99 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -9,7 +9,7 @@ https://developers.google.com/open-source/licenses/bsd #ifndef MERGED_H #define MERGED_H -#include "pq.h" +#include "system.h" struct reftable_merged_table { struct reftable_table *stack; @@ -24,15 +24,6 @@ struct reftable_merged_table { uint64_t max; }; -struct merged_iter { - struct reftable_iterator *stack; - uint32_t hash_id; - size_t stack_len; - uint8_t typ; - int suppress_deletions; - struct merged_iter_pqueue pq; -}; - void merged_table_release(struct reftable_merged_table *mt); #endif From aad8ad6fe1fe15d4b024425cd64389928838efd3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:48:55 +0100 Subject: [PATCH 03/13] reftable/merged: advance subiter on subsequent iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When advancing the merged iterator, we pop the topmost entry from its priority queue and then advance the sub-iterator that the entry belongs to, adding the result as a new entry. This is quite sensible in the case where the merged iterator is used to actually iterate through records. But the merged iterator is also used when we look up a single record, only, so advancing the sub-iterator is wasted effort because we would never even look at the result. Instead of immediately advancing the sub-iterator, we can also defer this to the next iteration of the merged iterator by storing the intent-to-advance. This results in a small speedup when reading many records. The following benchmark creates 10000 refs, which will also end up with many ref lookups: Benchmark 1: update-ref: create many refs (revision = HEAD~) Time (mean ± σ): 337.2 ms ± 7.3 ms [User: 200.1 ms, System: 136.9 ms] Range (min … max): 329.3 ms … 373.2 ms 100 runs Benchmark 2: update-ref: create many refs (revision = HEAD) Time (mean ± σ): 332.5 ms ± 5.9 ms [User: 197.2 ms, System: 135.1 ms] Range (min … max): 327.6 ms … 359.8 ms 100 runs Summary update-ref: create many refs (revision = HEAD) ran 1.01 ± 0.03 times faster than update-ref: create many refs (revision = HEAD~) While this speedup alone isn't really worth it, this refactoring will also allow two additional optimizations in subsequent patches. First, it will allow us to special-case when there is only a single sub-iter left to circumvent the priority queue altogether. And second, it makes it easier to avoid copying records to the caller. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 12ebd732e8..9b1ccfff00 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -19,11 +19,12 @@ https://developers.google.com/open-source/licenses/bsd struct merged_iter { struct reftable_iterator *stack; + struct merged_iter_pqueue pq; uint32_t hash_id; size_t stack_len; uint8_t typ; int suppress_deletions; - struct merged_iter_pqueue pq; + ssize_t advance_index; }; static int merged_iter_init(struct merged_iter *mi) @@ -96,13 +97,17 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry entry = { 0 }; int err = 0; + if (mi->advance_index >= 0) { + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + mi->advance_index = -1; + } + if (merged_iter_pqueue_is_empty(mi->pq)) return 1; entry = merged_iter_pqueue_remove(&mi->pq); - err = merged_iter_advance_subiter(mi, entry.index); - if (err < 0) - return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -116,14 +121,6 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; - /* - * When the next entry comes from the same queue as the current - * entry then it must by definition be larger. This avoids a - * comparison in the most common case. - */ - if (top.index == entry.index) - break; - cmp = reftable_record_cmp(&top.rec, &entry.rec); if (cmp > 0) break; @@ -137,6 +134,7 @@ static int merged_iter_next_entry(struct merged_iter *mi, reftable_record_release(rec); *rec = entry.rec; + mi->advance_index = entry.index; done: if (err) @@ -160,9 +158,8 @@ static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) static int merged_iter_next_void(void *p, struct reftable_record *rec) { struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq)) + if (merged_iter_pqueue_is_empty(mi->pq) && mi->advance_index < 0) return 1; - return merged_iter_next(mi, rec); } @@ -255,6 +252,7 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, + .advance_index = -1, }; struct merged_iter *p; int err; From bb2d6be4c1010af3f92a7f4a6feaab957e0e906b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:48:59 +0100 Subject: [PATCH 04/13] reftable/merged: make subiters own their records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For each subiterator, the merged table needs to track their current record. This record is owned by the priority queue though instead of by the merged iterator. This is not optimal performance-wise. For one, we need to move around records whenever we add or remove a record from the priority queue. Thus, the bigger the entries the more bytes we need to copy around. And compared to pointers, a reftable record is rather on the bigger side. The other issue is that this makes it harder to reuse the records. Refactor the code so that the merged iterator tracks ownership of the records per-subiter. Instead of having records in the priority queue, we can now use mere pointers to the per-subiter records. This also allows us to swap records between the caller and the per-subiter record instead of doing an actual copy via `reftable_record_copy_from()`, which removes the need to release the caller-provided record. This results in a noticeable speedup when iterating through many refs. The following benchmark iterates through 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 145.5 ms ± 4.5 ms [User: 142.5 ms, System: 2.8 ms] Range (min … max): 141.3 ms … 177.0 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 139.0 ms ± 4.7 ms [User: 136.1 ms, System: 2.8 ms] Range (min … max): 134.2 ms … 182.2 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.05 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) This refactoring also allows a subsequent refactoring where we start reusing memory allocated by the reftable records because we do not need to release the caller-provided record anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 54 ++++++++++++++++++++++++---------------------- reftable/pq.c | 8 ++----- reftable/pq.h | 2 +- reftable/pq_test.c | 41 ++++++++++++++++------------------- 4 files changed, 49 insertions(+), 56 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 9b1ccfff00..ae74234472 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -17,8 +17,13 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-error.h" #include "system.h" +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; + struct merged_iter { - struct reftable_iterator *stack; + struct merged_subiter *subiters; struct merged_iter_pqueue pq; uint32_t hash_id; size_t stack_len; @@ -32,16 +37,18 @@ static int merged_iter_init(struct merged_iter *mi) for (size_t i = 0; i < mi->stack_len; i++) { struct pq_entry e = { .index = i, + .rec = &mi->subiters[i].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[i], &e.rec); + reftable_record_init(&mi->subiters[i].rec, mi->typ); + err = iterator_next(&mi->subiters[i].iter, + &mi->subiters[i].rec); if (err < 0) return err; if (err > 0) { - reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&e.rec); + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); continue; } @@ -56,9 +63,11 @@ static void merged_iter_close(void *p) struct merged_iter *mi = p; merged_iter_pqueue_release(&mi->pq); - for (size_t i = 0; i < mi->stack_len; i++) - reftable_iterator_destroy(&mi->stack[i]); - reftable_free(mi->stack); + for (size_t i = 0; i < mi->stack_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); + } + reftable_free(mi->subiters); } static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, @@ -66,17 +75,16 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, { struct pq_entry e = { .index = idx, + .rec = &mi->subiters[idx].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[idx], &e.rec); + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); if (err < 0) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[idx]); - reftable_record_release(&e.rec); + reftable_iterator_destroy(&mi->subiters[idx].iter); + reftable_record_release(&mi->subiters[idx].rec); return 0; } @@ -86,7 +94,7 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { - if (iterator_is_null(&mi->stack[idx])) + if (iterator_is_null(&mi->subiters[idx].iter)) return 0; return merged_iter_advance_nonnull_subiter(mi, idx); } @@ -121,25 +129,19 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; - cmp = reftable_record_cmp(&top.rec, &entry.rec); + cmp = reftable_record_cmp(top.rec, entry.rec); if (cmp > 0) break; merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); if (err < 0) - goto done; - reftable_record_release(&top.rec); + return err; } - reftable_record_release(rec); - *rec = entry.rec; mi->advance_index = entry.index; - -done: - if (err) - reftable_record_release(&entry.rec); - return err; + SWAP(*rec, *entry.rec); + return 0; } static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) @@ -257,10 +259,10 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, struct merged_iter *p; int err; - REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len); + REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len); for (size_t i = 0; i < mt->stack_len; i++) { err = reftable_table_seek_record(&mt->stack[i], - &merged.stack[merged.stack_len], rec); + &merged.subiters[merged.stack_len].iter, rec); if (err < 0) goto out; if (!err) diff --git a/reftable/pq.c b/reftable/pq.c index e0ccce2b97..0074d6bc43 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,7 +14,7 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - int cmp = reftable_record_cmp(&a->rec, &b->rec); + int cmp = reftable_record_cmp(a->rec, b->rec); if (cmp == 0) return a->index > b->index; return cmp < 0; @@ -82,10 +82,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) { - int i = 0; - for (i = 0; i < pq->len; i++) { - reftable_record_release(&pq->heap[i].rec); - } FREE_AND_NULL(pq->heap); - pq->len = pq->cap = 0; + memset(pq, 0, sizeof(*pq)); } diff --git a/reftable/pq.h b/reftable/pq.h index 9e25a43a36..ce23972c16 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -13,7 +13,7 @@ https://developers.google.com/open-source/licenses/bsd struct pq_entry { size_t index; - struct reftable_record rec; + struct reftable_record *rec; }; struct merged_iter_pqueue { diff --git a/reftable/pq_test.c b/reftable/pq_test.c index c202eff848..b7d3c80cc7 100644 --- a/reftable/pq_test.c +++ b/reftable/pq_test.c @@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq) static void test_pq(void) { - char *names[54] = { NULL }; - int N = ARRAY_SIZE(names) - 1; - struct merged_iter_pqueue pq = { NULL }; + struct reftable_record recs[54]; + int N = ARRAY_SIZE(recs) - 1, i; char *last = NULL; - int i = 0; for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "%02d", i); - names[i] = xstrdup(name); + struct strbuf refname = STRBUF_INIT; + strbuf_addf(&refname, "%02d", i); + + reftable_record_init(&recs[i], BLOCK_TYPE_REF); + recs[i].u.ref.refname = strbuf_detach(&refname, NULL); } i = 1; do { - struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = names[i], - } } }; + struct pq_entry e = { + .rec = &recs[i], + }; + merged_iter_pqueue_add(&pq, &e); merged_iter_pqueue_check(pq); + i = (i * 7) % N; } while (i != 1); while (!merged_iter_pqueue_is_empty(pq)) { struct pq_entry e = merged_iter_pqueue_remove(&pq); - struct reftable_record *rec = &e.rec; merged_iter_pqueue_check(pq); - EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF); - if (last) { - EXPECT(strcmp(last, rec->u.ref.refname) < 0); - } - /* this is names[i], so don't dealloc. */ - last = rec->u.ref.refname; - rec->u.ref.refname = NULL; - reftable_record_release(rec); - } - for (i = 0; i < N; i++) { - reftable_free(names[i]); + EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF); + if (last) + EXPECT(strcmp(last, e.rec->u.ref.refname) < 0); + last = e.rec->u.ref.refname; } + for (i = 0; i < N; i++) + reftable_record_release(&recs[i]); merged_iter_pqueue_release(&pq); } From 2d71a1d4a23a42ac7dfc927d7263b8eef03fee2f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:03 +0100 Subject: [PATCH 05/13] reftable/merged: remove unnecessary null check for subiters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever we advance a subiter we first call `iterator_is_null()`. This is not needed though because we only ever advance subiters which have entries in the priority queue, and we do not end entries to the priority queue when the subiter has been exhausted. Drop the check as well as the now-unused function. This results in a surprisingly big speedup: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 138.1 ms ± 4.4 ms [User: 135.1 ms, System: 2.8 ms] Range (min … max): 133.4 ms … 167.3 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 134.4 ms ± 4.2 ms [User: 131.5 ms, System: 2.8 ms] Range (min … max): 130.0 ms … 164.0 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.03 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/iter.c | 5 ----- reftable/iter.h | 4 ---- reftable/merged.c | 10 +--------- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/reftable/iter.c b/reftable/iter.c index 8b5ebf6183..7aa30c4a51 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -16,11 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reader.h" #include "reftable-error.h" -int iterator_is_null(struct reftable_iterator *it) -{ - return !it->ops; -} - static void filtering_ref_iterator_close(void *iter_arg) { struct filtering_ref_iterator *fri = iter_arg; diff --git a/reftable/iter.h b/reftable/iter.h index 47d67d84df..537431baba 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -16,10 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-iterator.h" #include "reftable-generic.h" -/* Returns true for a zeroed out iterator, such as the one returned from - * iterator_destroy. */ -int iterator_is_null(struct reftable_iterator *it); - /* iterator that produces only ref records that point to `oid` */ struct filtering_ref_iterator { int double_check; diff --git a/reftable/merged.c b/reftable/merged.c index ae74234472..29ad09f3d8 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -70,8 +70,7 @@ static void merged_iter_close(void *p) reftable_free(mi->subiters); } -static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, - size_t idx) +static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { struct pq_entry e = { .index = idx, @@ -92,13 +91,6 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, return 0; } -static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) -{ - if (iterator_is_null(&mi->subiters[idx].iter)) - return 0; - return merged_iter_advance_nonnull_subiter(mi, idx); -} - static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { From 3b6dd6ad1da614dec42576de416e819cb77b1592 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:08 +0100 Subject: [PATCH 06/13] reftable/merged: handle subiter cleanup on close only When advancing one of the subiters fails we immediately release resources associated with that subiter. This is not necessary though as we will release these resources when closing the merged iterator anyway. Drop the logic and only release resources when the merged iterator is done. This is a mere cleanup that should help reduce the cognitive load when reading through the code. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 29ad09f3d8..d9ed4a19dd 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -46,11 +46,8 @@ static int merged_iter_init(struct merged_iter *mi) &mi->subiters[i].rec); if (err < 0) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->subiters[i].iter); - reftable_record_release(&mi->subiters[i].rec); + if (err > 0) continue; - } merged_iter_pqueue_add(&mi->pq, &e); } @@ -79,13 +76,8 @@ static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) int err; err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); - if (err < 0) + if (err) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->subiters[idx].iter); - reftable_record_release(&mi->subiters[idx].rec); - return 0; - } merged_iter_pqueue_add(&mi->pq, &e); return 0; From f8c1a8e2e18f71b46549ba722fc5e4a62db0864b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:13 +0100 Subject: [PATCH 07/13] reftable/merged: circumvent pqueue with single subiter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merged iterator uses a priority queue to order records so that we can yielid them in the expected order. This priority queue of course comes with some overhead as we need to add, compare and remove entries in that priority queue. In the general case, that overhead cannot really be avoided. But when we have a single subiter left then there is no need to use the priority queue anymore because the order is exactly the same as what that subiter would return. While having a single subiter may sound like an edge case, it happens more frequently than one might think. In the most common scenario, you can expect a repository to have a single large table that contains most of the records and then a set of smaller tables which contain later additions to the reftable stack. In this case it is quite likely that we exhaust subiters of those smaller stacks before exhausting the large table. Special-case this and return records directly from the remaining subiter. This results in a sizeable speedup when iterating over 1m refs in a repository with a single table: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 135.4 ms ± 4.4 ms [User: 132.5 ms, System: 2.8 ms] Range (min … max): 131.0 ms … 166.3 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 126.3 ms ± 3.9 ms [User: 123.3 ms, System: 2.8 ms] Range (min … max): 122.7 ms … 157.0 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.07 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index d9ed4a19dd..29161a32cf 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -87,16 +87,36 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { struct pq_entry entry = { 0 }; - int err = 0; + int err = 0, empty; + + empty = merged_iter_pqueue_is_empty(mi->pq); if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + err = merged_iter_advance_subiter(mi, mi->advance_index); if (err < 0) return err; + if (!err) + empty = 0; mi->advance_index = -1; } - if (merged_iter_pqueue_is_empty(mi->pq)) + if (empty) return 1; entry = merged_iter_pqueue_remove(&mi->pq); From 080f8c4565609dab2969587d1f8f11e960d0af34 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:18 +0100 Subject: [PATCH 08/13] reftable/merged: avoid duplicate pqueue emptiness check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling `merged_iter_next_void()` we first check whether the iter has been exhausted already. We already perform this check two levels down the stack in `merged_iter_next_entry()` though, which makes this check redundant. Now if this check was there to accelerate the common case it might have made sense to keep it. But the iterator being exhausted is rather the uncommon case because you can expect most reftable stacks to contain more than two refs. Simplify the code by removing the check. As `merged_iter_next_void()` is basically empty except for calling `merged_iter_next()` now, merge these two functions. This also results in a tiny speedup when iterating over many refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 125.6 ms ± 3.8 ms [User: 122.7 ms, System: 2.8 ms] Range (min … max): 122.4 ms … 153.4 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.8 ms] Range (min … max): 120.1 ms … 156.4 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.01 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 29161a32cf..f85a24c678 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -148,25 +148,17 @@ static int merged_iter_next_entry(struct merged_iter *mi, return 0; } -static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) -{ - while (1) { - int err = merged_iter_next_entry(mi, rec); - if (err == 0 && mi->suppress_deletions && - reftable_record_is_deletion(rec)) { - continue; - } - - return err; - } -} - static int merged_iter_next_void(void *p, struct reftable_record *rec) { struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq) && mi->advance_index < 0) - return 1; - return merged_iter_next(mi, rec); + while (1) { + int err = merged_iter_next_entry(mi, rec); + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) + continue; + return 0; + } } static struct reftable_iterator_vtable merged_iter_vtable = { From 71d9a2e99138ed401e2678b4295b72210a398346 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:22 +0100 Subject: [PATCH 09/13] reftable/record: reuse refname when decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When decoding a reftable record we will first release the user-provided record and then decode the new record into it. This is quite inefficient as we basically need to reallocate at least the refname every time. Refactor the function to start tracking the refname capacity. Like this, we can stow away the refname, release, restore and then grow the refname to the required number of bytes via `REFTABLE_ALLOC_GROW()`. This refactoring is safe to do because all functions that assigning to the refname will first call `reftable_ref_record_release()`, which will zero out the complete record after releasing memory. This change results in a nice speedup when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms] Range (min … max): 120.4 ms … 152.7 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms] Range (min … max): 111.0 ms … 152.1 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Furthermore, with this change we now perform a mostly constant number of allocations when iterating. Before this change: HEAP SUMMARY: in use at exit: 13,603 bytes in 125 blocks total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated After this change: HEAP SUMMARY: in use at exit: 13,603 bytes in 125 blocks total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 16 ++++++++++++---- reftable/reftable-record.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index d6bb42e887..2b52e47c30 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -368,16 +368,24 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, struct reftable_ref_record *r = rec; struct string_view start = in; uint64_t update_index = 0; - int n = get_var_int(&update_index, &in); + const char *refname = NULL; + size_t refname_cap = 0; + int n; + + assert(hash_size > 0); + + n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); + SWAP(refname, r->refname); + SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); + SWAP(r->refname, refname); + SWAP(r->refname_cap, refname_cap); - assert(hash_size > 0); - - r->refname = reftable_malloc(key.len + 1); + REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap); memcpy(r->refname, key.buf, key.len); r->refname[key.len] = 0; diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index bb6e99acd3..e657001d42 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -22,6 +22,7 @@ https://developers.google.com/open-source/licenses/bsd /* reftable_ref_record holds a ref database entry target_value */ struct reftable_ref_record { char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; uint64_t update_index; /* Logical timestamp at which this value is * written */ From 6620f9134cc36189eecd88375a226a7bd7836a8a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:26 +0100 Subject: [PATCH 10/13] reftable/record: reuse refname when copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do the same optimization as in the preceding commit, but this time for `reftable_record_copy()`. While not as noticeable, it still results in a small speedup when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 114.0 ms ± 3.8 ms [User: 111.1 ms, System: 2.7 ms] Range (min … max): 110.9 ms … 144.3 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 112.5 ms ± 3.7 ms [User: 109.5 ms, System: 2.8 ms] Range (min … max): 109.2 ms … 140.7 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.01 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index 2b52e47c30..12a44f70e5 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -205,14 +205,26 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + assert(hash_size > 0); - /* This is simple and correct, but we could probably reuse the hash - * fields. */ + SWAP(refname, ref->refname); + SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); + SWAP(ref->refname, refname); + SWAP(ref->refname_cap, refname_cap); + if (src->refname) { - ref->refname = xstrdup(src->refname); + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1, + ref->refname_cap); + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; } + ref->update_index = src->update_index; ref->value_type = src->value_type; switch (src->value_type) { From daf4f43d0d84234c2308c95ba63e54bdb8846859 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:31 +0100 Subject: [PATCH 11/13] reftable/record: decode keys in place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading a record from a block, we need to decode the record's key. As reftable keys are prefix-compressed, meaning they reuse a prefix from the preceding record's key, this is a bit more involved than just having to copy the relevant bytes: we need to figure out the prefix and suffix lengths, copy the prefix from the preceding record and finally copy the suffix from the current record. This is done by passing three buffers to `reftable_decode_key()`: one buffer that holds the result, one buffer that holds the last key, and one buffer that points to the current record. The final key is then assembled by calling `strbuf_add()` twice to copy over the prefix and suffix. Performing two memory copies is inefficient though. And we can indeed do better by decoding keys in place. Instead of providing two buffers, the caller may only call a single buffer that is already pre-populated with the last key. Like this, we only have to call `strbuf_setlen()` to trim the record to its prefix and then `strbuf_add()` to add the suffix. This refactoring leads to a noticeable performance bump when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 112.2 ms ± 3.9 ms [User: 109.3 ms, System: 2.8 ms] Range (min … max): 109.2 ms … 149.6 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 106.0 ms ± 3.5 ms [User: 103.2 ms, System: 2.7 ms] Range (min … max): 103.2 ms … 133.7 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.06 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 25 +++++++++++-------------- reftable/block.h | 2 -- reftable/record.c | 19 +++++++++---------- reftable/record.h | 9 ++++++--- reftable/record_test.c | 3 ++- 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 72eb73b380..ad9074dba6 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -291,9 +291,8 @@ static int restart_key_less(size_t idx, void *args) /* the restart key is verbatim in the block, so this could avoid the alloc for decoding the key */ struct strbuf rkey = STRBUF_INIT; - struct strbuf last_key = STRBUF_INIT; uint8_t unused_extra; - int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); + int n = reftable_decode_key(&rkey, &unused_extra, in); int result; if (n < 0) { a->error = 1; @@ -326,35 +325,34 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) if (it->next_off >= it->br->block_len) return 1; - n = reftable_decode_key(&it->key, &extra, it->last_key, in); + n = reftable_decode_key(&it->last_key, &extra, in); if (n < 0) return -1; - - if (!it->key.len) + if (!it->last_key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size); if (n < 0) return -1; string_view_consume(&in, n); - strbuf_swap(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } int block_reader_first_key(struct block_reader *br, struct strbuf *key) { - struct strbuf empty = STRBUF_INIT; - int off = br->header_off + 4; + int off = br->header_off + 4, n; struct string_view in = { .buf = br->block.data + off, .len = br->block_len - off, }; - uint8_t extra = 0; - int n = reftable_decode_key(key, &extra, empty, in); + + strbuf_reset(key); + + n = reftable_decode_key(key, &extra, in); if (n < 0) return n; if (!key->len) @@ -371,7 +369,6 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want) void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); - strbuf_release(&it->key); } int block_reader_seek(struct block_reader *br, struct block_iter *it, @@ -408,8 +405,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, if (err < 0) goto done; - reftable_record_key(&rec, &it->key); - if (err > 0 || strbuf_cmp(&it->key, want) >= 0) { + reftable_record_key(&rec, &it->last_key); + if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) { err = 0; goto done; } diff --git a/reftable/block.h b/reftable/block.h index 17481e6331..51699af233 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -84,12 +84,10 @@ struct block_iter { /* key for last entry we read. */ struct strbuf last_key; - struct strbuf key; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ - .key = STRBUF_INIT, \ } /* initializes a block reader. */ diff --git a/reftable/record.c b/reftable/record.c index 12a44f70e5..b9c6eee88a 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -159,20 +159,19 @@ int reftable_encode_key(int *restart, struct string_view dest, return start.len - dest.len; } -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in) +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in) { int start_len = in.len; uint64_t prefix_len = 0; uint64_t suffix_len = 0; - int n = get_var_int(&prefix_len, &in); + int n; + + n = get_var_int(&prefix_len, &in); if (n < 0) return -1; string_view_consume(&in, n); - if (prefix_len > last_key.len) - return -1; - n = get_var_int(&suffix_len, &in); if (n <= 0) return -1; @@ -181,12 +180,12 @@ int reftable_decode_key(struct strbuf *key, uint8_t *extra, *extra = (uint8_t)(suffix_len & 0x7); suffix_len >>= 3; - if (in.len < suffix_len) + if (in.len < suffix_len || + prefix_len > last_key->len) return -1; - strbuf_reset(key); - strbuf_add(key, last_key.buf, prefix_len); - strbuf_add(key, in.buf, suffix_len); + strbuf_setlen(last_key, prefix_len); + strbuf_add(last_key, in.buf, suffix_len); string_view_consume(&in, suffix_len); return start_len - in.len; diff --git a/reftable/record.h b/reftable/record.h index a05e2be179..91c9c6ebfd 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -81,9 +81,12 @@ int reftable_encode_key(int *is_restart, struct string_view dest, struct strbuf prev_key, struct strbuf key, uint8_t extra); -/* Decode into `key` and `extra` from `in` */ -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in); +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in); /* reftable_index_record are used internally to speed up lookups. */ struct reftable_index_record { diff --git a/reftable/record_test.c b/reftable/record_test.c index a86cff5526..89209894d8 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -295,7 +295,8 @@ static void test_key_roundtrip(void) EXPECT(!restart); EXPECT(n > 0); - m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest); + strbuf_addstr(&roundtrip, "refs/heads/master"); + m = reftable_decode_key(&roundtrip, &rt_extra, dest); EXPECT(n == m); EXPECT(0 == strbuf_cmp(&key, &roundtrip)); EXPECT(rt_extra == extra); From f1bf54aee30c879b27602aa159264d5e9803c6b7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:35 +0100 Subject: [PATCH 12/13] reftable: allow inlining of a few functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a few functions which are basically just accessors to structures. As those functions are executed inside the hot loop when iterating through many refs, the fact that they cannot be inlined is costing us some performance. Move the function definitions into their respective headers so that they can be inlined. This results in a performance improvement when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 105.9 ms ± 3.6 ms [User: 103.0 ms, System: 2.8 ms] Range (min … max): 103.1 ms … 133.4 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 100.7 ms ± 3.4 ms [User: 97.8 ms, System: 2.8 ms] Range (min … max): 97.8 ms … 124.0 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.05 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/pq.c | 10 ---------- reftable/pq.h | 12 ++++++++++-- reftable/record.c | 11 ----------- reftable/record.h | 12 ++++++++++-- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/reftable/pq.c b/reftable/pq.c index 0074d6bc43..7fb45d8c60 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -20,16 +20,6 @@ int pq_less(struct pq_entry *a, struct pq_entry *b) return cmp < 0; } -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) -{ - return pq.heap[0]; -} - -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) -{ - return pq.len == 0; -} - struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) { int i = 0; diff --git a/reftable/pq.h b/reftable/pq.h index ce23972c16..f796c23179 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -22,12 +22,20 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + #endif diff --git a/reftable/record.c b/reftable/record.c index b9c6eee88a..367de04600 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -1176,11 +1176,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); } -uint8_t reftable_record_type(struct reftable_record *rec) -{ - return rec->type; -} - int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size) { @@ -1302,12 +1297,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) return (log->value_type == REFTABLE_LOG_DELETION); } -void string_view_consume(struct string_view *s, int n) -{ - s->buf += n; - s->len -= n; -} - static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { diff --git a/reftable/record.h b/reftable/record.h index 91c9c6ebfd..5e8304e052 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -25,7 +25,11 @@ struct string_view { }; /* Advance `s.buf` by `n`, and decrease length. */ -void string_view_consume(struct string_view *s, int n); +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} /* utilities for de/encoding varints */ @@ -127,7 +131,6 @@ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); -uint8_t reftable_record_type(struct reftable_record *rec); void reftable_record_copy_from(struct reftable_record *rec, struct reftable_record *src, int hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); @@ -138,6 +141,11 @@ int reftable_record_decode(struct reftable_record *rec, struct strbuf key, int hash_size); int reftable_record_is_deletion(struct reftable_record *rec); +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + /* frees and zeroes out the embedded record */ void reftable_record_release(struct reftable_record *rec); From 43f70eaea0e3fa9d98c895e9341674a67262b657 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 4 Mar 2024 11:49:40 +0100 Subject: [PATCH 13/13] refs/reftable: precompute prefix length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're recomputing the prefix length on every iteration of the ref iterator. Precompute it for another speedup when iterating over 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 100.3 ms ± 3.7 ms [User: 97.3 ms, System: 2.8 ms] Range (min … max): 97.5 ms … 139.7 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 95.8 ms ± 3.4 ms [User: 92.9 ms, System: 2.8 ms] Range (min … max): 93.0 ms … 121.9 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.05 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index a14f2ad7f4..4d27fdde54 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -346,6 +346,7 @@ struct reftable_ref_iterator { struct object_id oid; const char *prefix; + size_t prefix_len; unsigned int flags; int err; }; @@ -371,8 +372,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) if (!starts_with(iter->ref.refname, "refs/")) continue; - if (iter->prefix && - strncmp(iter->prefix, iter->ref.refname, strlen(iter->prefix))) { + if (iter->prefix_len && + strncmp(iter->prefix, iter->ref.refname, iter->prefix_len)) { iter->err = 1; break; } @@ -481,6 +482,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ iter = xcalloc(1, sizeof(*iter)); base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable, 1); iter->prefix = prefix; + iter->prefix_len = prefix ? strlen(prefix) : 0; iter->base.oid = &iter->oid; iter->flags = flags; iter->refs = refs;