From f1b85243034a690691d34af0576e6d3cb1a08743 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Jul 2025 21:12:26 +0200 Subject: [PATCH 1/5] bswap.h: add support for __BYTE_ORDER__ The __BYTE_ORDER__ define is provided by gcc (since ~v4.6), clang (since ~v3.2) and icc (since ~16.0.3). The __BYTE_ORDER and BYTE_ORDER macros are libc specific and are not available on all supported platforms such as mingw. Add support for the __BYTE_ORDER__ macro as a fallback. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Junio C Hamano --- compat/bswap.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compat/bswap.h b/compat/bswap.h index b34054f2bd..0a457542dd 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -116,6 +116,12 @@ static inline uint64_t git_bswap64(uint64_t x) # define GIT_LITTLE_ENDIAN LITTLE_ENDIAN # define GIT_BIG_ENDIAN BIG_ENDIAN +#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && defined(__ORDER_BIG_ENDIAN__) + +# define GIT_BYTE_ORDER __BYTE_ORDER__ +# define GIT_LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# define GIT_BIG_ENDIAN __ORDER_BIG_ENDIAN__ + #else # define GIT_BIG_ENDIAN 4321 From 30dea7ddf7a10d11818e754deba8120cef8446ca Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Jul 2025 21:12:27 +0200 Subject: [PATCH 2/5] bswap.h: define GIT_LITTLE_ENDIAN on msvc as little endian The Microsoft Visual C++ (MSVC) compiler (as of Visual Studio 2022 version 17.13.6) does not define __BYTE_ORDER__ and its C-library does not define __BYTE_ORDER. The compiler is supported only on arm64 and x86 which are all little endian. Define GIT_BYTE_ORDER on msvc as little endian to avoid further checks. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Junio C Hamano --- compat/bswap.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/compat/bswap.h b/compat/bswap.h index 0a457542dd..fd604d9f7b 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -81,6 +81,10 @@ static inline uint64_t git_bswap64(uint64_t x) #define bswap32(x) _byteswap_ulong(x) #define bswap64(x) _byteswap_uint64(x) +#define GIT_LITTLE_ENDIAN 1234 +#define GIT_BIG_ENDIAN 4321 +#define GIT_BYTE_ORDER GIT_LITTLE_ENDIAN + #endif #if defined(bswap32) @@ -122,7 +126,7 @@ static inline uint64_t git_bswap64(uint64_t x) # define GIT_LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ # define GIT_BIG_ENDIAN __ORDER_BIG_ENDIAN__ -#else +#elif !defined(GIT_BYTE_ORDER) # define GIT_BIG_ENDIAN 4321 # define GIT_LITTLE_ENDIAN 1234 From 4544cd19e429975882e20fa89dab7e73956f26e4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Jul 2025 21:12:28 +0200 Subject: [PATCH 3/5] bswap.h: always overwrite ntohl/ ntohll macros The ntohl and htonl macros are redefined because the provided macros were not always optimal. Sometimes it was a function call, sometimes it was a macro which did the shifting. Using the 'bswap' opcode on x86 provides probably better performance than performing the shifting. These macros are only overwritten on x86 if the "optimized" version is available. The ntohll and htonll macros are not available on every platform (at least glibc does not provide them) which means they need to be defined once the endianness of the system is determined. In order to get a more symmetrical setup, redfine the macros once the endianness of the system has been determined. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Junio C Hamano --- compat/bswap.h | 54 ++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/compat/bswap.h b/compat/bswap.h index fd604d9f7b..aeef304f67 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -87,27 +87,6 @@ static inline uint64_t git_bswap64(uint64_t x) #endif -#if defined(bswap32) - -#undef ntohl -#undef htonl -#define ntohl(x) bswap32(x) -#define htonl(x) bswap32(x) - -#endif - -#if defined(bswap64) - -#undef ntohll -#undef htonll -#define ntohll(x) bswap64(x) -#define htonll(x) bswap64(x) - -#else - -#undef ntohll -#undef htonll - #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) # define GIT_BYTE_ORDER __BYTE_ORDER @@ -145,14 +124,33 @@ static inline uint64_t git_bswap64(uint64_t x) #endif -#if GIT_BYTE_ORDER == GIT_BIG_ENDIAN -# define ntohll(n) (n) -# define htonll(n) (n) -#else -# define ntohll(n) default_bswap64(n) -# define htonll(n) default_bswap64(n) -#endif +#undef ntohl +#undef htonl +#undef ntohll +#undef htonll +#if GIT_BYTE_ORDER == GIT_BIG_ENDIAN +# define ntohl(x) (x) +# define htonl(x) (x) +# define ntohll(x) (x) +# define htonll(x) (x) +#else + +# if defined(bswap32) +# define ntohl(x) bswap32(x) +# define htonl(x) bswap32(x) +# else +# define ntohl(x) default_swab32(x) +# define htonl(x) default_swab32(x) +# endif + +# if defined(bswap64) +# define ntohll(x) bswap64(x) +# define htonll(x) bswap64(x) +# else +# define ntohll(x) default_bswap64(x) +# define htonll(x) default_bswap64(x) +# endif #endif static inline uint16_t get_be16(const void *ptr) From 0132f114efe90fb5f0baf61dbda8a1a33eace929 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Jul 2025 21:12:29 +0200 Subject: [PATCH 4/5] bswap.h: remove optimized x86 version of bswap32/64 On x86 the bswap32/64 macro is implemented based on the x86 opcode which performs the required shifting in just one opcode. The other CPUs fallback to the generic shifting as implemented by default_swab32() and default_bswap64() if needed. I've been looking at how good a compiler is at recognizing the default shift and emitting an optimized operation: - x86, arm64 msvc v19.20 default_swab32() optimized default_bswap64() shifts _byteswap_uint64() optimized - x86, arm64 msvc v19.37 default_swab32() optimized default_bswap64() optimized _byteswap_uint64() optimized - arm64, gcc-4.9.4: optimized - x86-64, gcc-4.4.7: shifts - x86-64, gcc-4.5.3: optimized - x86-64, clang-3.0: optimized Given that gcc-4.5 and clang-3.0 are fairly old, any recent compiler should recognize the shift. Remove the optimized x86 version and rely on the compiler. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Junio C Hamano --- compat/bswap.h | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/compat/bswap.h b/compat/bswap.h index aeef304f67..ed00f6d1d5 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -35,46 +35,7 @@ static inline uint64_t default_bswap64(uint64_t val) #undef bswap32 #undef bswap64 -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) - -#define bswap32 git_bswap32 -static inline uint32_t git_bswap32(uint32_t x) -{ - uint32_t result; - if (__builtin_constant_p(x)) - result = default_swab32(x); - else - __asm__("bswap %0" : "=r" (result) : "0" (x)); - return result; -} - -#define bswap64 git_bswap64 -#if defined(__x86_64__) -static inline uint64_t git_bswap64(uint64_t x) -{ - uint64_t result; - if (__builtin_constant_p(x)) - result = default_bswap64(x); - else - __asm__("bswap %q0" : "=r" (result) : "0" (x)); - return result; -} -#else -static inline uint64_t git_bswap64(uint64_t x) -{ - union { uint64_t i64; uint32_t i32[2]; } tmp, result; - if (__builtin_constant_p(x)) - result.i64 = default_bswap64(x); - else { - tmp.i64 = x; - result.i32[0] = git_bswap32(tmp.i32[1]); - result.i32[1] = git_bswap32(tmp.i32[0]); - } - return result.i64; -} -#endif - -#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64)) +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64)) #include From f4ac32c03af5ac53964a05c4de435da53a59615c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Jul 2025 21:12:30 +0200 Subject: [PATCH 5/5] bswap.h: provide a built-in based version of bswap32/64 if possible The compiler is in general able to recognize the endian shift and replace it with an optimized opcode if possible. On certain architectures such as RiscV or MIPS the situation can get complicated. They don't provide an optimized opcode and masking the "higher" bits may required loading a constant which needs shifting. This causes the compiler to emit a lot of instructions for the operation. The provided builtin directive on these architecture calls a function which does the operation instead of emitting the code for operation. Bring back the change from commit 6547d1c9 (bswap.h: add support for built-in bswap functions, 2025-04-23). The bswap32/64 macro can now be defined unconditionally so it won't regress on big endian architectures. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Junio C Hamano --- compat/bswap.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compat/bswap.h b/compat/bswap.h index ed00f6d1d5..28635ebc69 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -32,6 +32,14 @@ static inline uint64_t default_bswap64(uint64_t val) ((val & (uint64_t)0xff00000000000000ULL) >> 56)); } +/* + * __has_builtin is available since Clang 10 and GCC 10. + * Below is a fallback for older compilers. + */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + #undef bswap32 #undef bswap64 @@ -46,6 +54,11 @@ static inline uint64_t default_bswap64(uint64_t val) #define GIT_BIG_ENDIAN 4321 #define GIT_BYTE_ORDER GIT_LITTLE_ENDIAN +#elif __has_builtin(__builtin_bswap32) && __has_builtin(__builtin_bswap64) + +#define bswap32(x) __builtin_bswap32((x)) +#define bswap64(x) __builtin_bswap64((x)) + #endif #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)