From 41d97837ab1e5a35fdcfd7f6af9b5d56af62e92a Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 28 Jul 2025 22:05:19 +0300 Subject: [PATCH 1/2] xdiff: refactor xdl_hash_record() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline the check for whitespace flags so that the compiler can hoist it out of the loop in xdl_prepare_ctx(). This improves the performance by 8%. $ hyperfine --warmup=1 -L rev HEAD,HEAD^ --setup='git checkout {rev} -- :/ && make git' ': {rev}; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0' Benchmark 1: : HEAD; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0 Time (mean ± σ): 1.670 s ± 0.044 s [User: 1.473 s, System: 0.196 s] Range (min … max): 1.619 s … 1.754 s 10 runs Benchmark 2: : HEAD^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0 Time (mean ± σ): 1.801 s ± 0.021 s [User: 1.605 s, System: 0.192 s] Range (min … max): 1.766 s … 1.831 s 10 runs Summary ': HEAD^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0' ran 1.08 ± 0.03 times faster than ': HEAD^^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0' Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xutils.c | 7 ++----- xdiff/xutils.h | 10 +++++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 444a108f87..e070ed649f 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) return 1; } -static unsigned long xdl_hash_record_with_whitespace(char const **data, +unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags) { unsigned long ha = 5381; char const *ptr = *data; @@ -294,13 +294,10 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data, return ha; } -unsigned long xdl_hash_record(char const **data, char const *top, long flags) { +unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { unsigned long ha = 5381; char const *ptr = *data; - if (flags & XDF_WHITESPACE_FLAGS) - return xdl_hash_record_with_whitespace(data, top, flags); - for (; ptr < top && *ptr != '\n'; ptr++) { ha += (ha << 5); ha ^= (unsigned long) *ptr; diff --git a/xdiff/xutils.h b/xdiff/xutils.h index fd0bba94e8..13f6831047 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha); long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_blankline(const char *line, long size, long flags); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); -unsigned long xdl_hash_record(char const **data, char const *top, long flags); +unsigned long xdl_hash_record_verbatim(char const **data, char const *top); +unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags); +static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags) +{ + if (flags & XDF_WHITESPACE_FLAGS) + return xdl_hash_record_with_whitespace(data, top, flags); + else + return xdl_hash_record_verbatim(data, top); +} unsigned int xdl_hashbits(unsigned int size); int xdl_num_out(char *out, long val); int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, From a4bbe8af0b48f9c80ccc2c4619309c4a81c1460a Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Mon, 28 Jul 2025 22:05:20 +0300 Subject: [PATCH 2/2] xdiff: optimize xdl_hash_record_verbatim xdl_hash_record_verbatim uses modified djb2 hash with XOR instead of ADD for combining. The ADD-based variant is used as the basis of the modern ("GNU") symbol lookup scheme in ELF. Glibc dynamic loader received an optimized version of this hash function thanks to Noah Goldstein [1]. Switch xdl_hash_record_verbatim to additive hashing and implement an optimized loop following the scheme suggested by Noah. Timing 'git log --oneline --shortstat v2.0.0..v2.5.0' under perf, I got version | cycles, bn | instructions, bn --------------------------------------- A 6.38 11.3 B 6.21 10.89 C 5.80 9.95 D 5.83 8.74 --------------------------------------- A: baseline (git master at e4ef0485fd78) B: plus 'xdiff: refactor xdl_hash_record()' C: and plus this patch D: with 'xdiff: use xxhash' by Phillip Wood The resulting speedup for xdl_hash_record_verbatim itself is about 1.5x. [1] https://inbox.sourceware.org/libc-alpha/20220519221803.57957-6-goldstein.w.n@gmail.com/ Signed-off-by: Alexander Monakov Signed-off-by: Junio C Hamano --- xdiff/xutils.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/xdiff/xutils.c b/xdiff/xutils.c index e070ed649f..78d1cf74b1 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -294,16 +294,67 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, return ha; } -unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { - unsigned long ha = 5381; - char const *ptr = *data; +/* + * Compiler reassociation barrier: pretend to modify X and Y to disallow + * changing evaluation order with respect to following uses of X and Y. + */ +#ifdef __GNUC__ +#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y)) +#else +#define REASSOC_FENCE(x, y) +#endif +unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { + unsigned long ha = 5381, c0, c1; + char const *ptr = *data; +#if 0 + /* + * The baseline form of the optimized loop below. This is the djb2 + * hash (the above function uses a variant with XOR instead of ADD). + */ for (; ptr < top && *ptr != '\n'; ptr++) { ha += (ha << 5); - ha ^= (unsigned long) *ptr; + ha += (unsigned long) *ptr; } *data = ptr < top ? ptr + 1: ptr; +#else + /* Process two characters per iteration. */ + if (top - ptr >= 2) do { + if ((c0 = ptr[0]) == '\n') { + *data = ptr + 1; + return ha; + } + if ((c1 = ptr[1]) == '\n') { + *data = ptr + 2; + c0 += ha; + REASSOC_FENCE(c0, ha); + ha = ha * 32 + c0; + return ha; + } + /* + * Combine characters C0 and C1 into the hash HA. We have + * HA = (HA * 33 + C0) * 33 + C1, and we want to ensure + * that dependency chain over HA is just one multiplication + * and one addition, i.e. we want to evaluate this as + * HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer + * (C0 * 32 + (C0 + C1)) for the expression in parenthesis. + */ + ha *= 33 * 33; + c1 += c0; + REASSOC_FENCE(c1, c0); + c1 += c0 * 32; + REASSOC_FENCE(c1, ha); + ha += c1; + ptr += 2; + } while (ptr < top - 1); + *data = top; + if (ptr < top && (c0 = ptr[0]) != '\n') { + c0 += ha; + REASSOC_FENCE(c0, ha); + ha = ha * 32 + c0; + } +#endif return ha; }