From 9e76ed9bdc3b9dc2f691899752c3da3644a81c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20B=C3=B6gershausen?= Date: Thu, 8 Jan 2026 18:40:11 +0100 Subject: [PATCH 1/2] utf8.c: Prepare workaround for iconv under macOS 14/15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MacOS14 (Sonoma) has started to ship an iconv library with bugs. The same bugs exists even in MacOS 15 (Sequoia) A bug report running the Git test suite says: three tests of t3900 fail on macOS 26.1 for me: not ok 17 - ISO-2022-JP should be shown in UTF-8 now not ok 25 - ISO-2022-JP should be shown in UTF-8 now not ok 38 - commit --fixup into ISO-2022-JP from UTF-8 Here's the verbose output of the first one: ----- snip! ----- expecting success of 3900.17 'ISO-2022-JP should be shown in UTF-8 now': compare_with ISO-2022-JP "$TEST_DIRECTORY"/t3900/2-UTF-8.t xt --- /Users/x/src/git/t/t3900/2-UTF-8.txt 2024-10-01 19:43:24.605230684 + 0000 +++ current 2025-12-08 21:52:45.786161909 +0000 @@ -1,4 +1,4 @@ はれひほふ しているのが、い るので。 -濱浜ほれぷりぽれ まびぐりろへ。 +濱浜ほれぷりぽれ まび$0$j$m$X!# not ok 17 - ISO-2022-JP should be shown in UTF-8 now 1..17 ----- snap! ----- compare_with runs git show to display a commit message, which in this case here was encoded using ISO-2022-JP and is supposed to be reencoded to UTF-8, but git show only does that half-way -- the "$0$j$m$X!#" part is from the original ISO-2022-JP representation. That botched conversion is done by utf8.c::reencode_string_iconv(). It calls iconv(3) to do the actual work, initially with an output buffer of the same size as the input. If the output needs more space the function enlarges the buffer and calls iconv(3) again. iconv(3) won't tell us how much space it needs, but it will report what part it already managed to convert, so we can increase the buffer and continue from there. ISO-2022-JP has escape codes for switching between character sets, so it's a stateful encoding. I guess the iconv(3) on my machine forgets the state at the end of part one and then messes up part two. [end of citation] Working around the buggy iconv shipped with the OS can be done in two ways: a) Link Git against a different version of iconv b) Improve the handling when iconv needs a larger output buffer a) is already done by default when either Fink [1] or MacPorts [2] or Homebrew [3] is installed. b) is implemented here, in case that no fixed iconv is available: When the output buffer is too short, increase it (as before) and start from scratch (this is new). This workound needs to be enabled with '#define ICONV_RESTART_RESET' and a makefile knob will be added in the next commit Suggested-by: René Scharfe Signed-off-by: Torsten Bögershausen [1] https://www.finkproject.org/ [2] https://www.macports.org/ [3] https://brew.sh/ Signed-off-by: Torsten Bögershausen Signed-off-by: Junio C Hamano --- utf8.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/utf8.c b/utf8.c index 35a0251939..96460cc414 100644 --- a/utf8.c +++ b/utf8.c @@ -515,6 +515,19 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, out = xrealloc(out, outalloc); outpos = out + sofar; outsz = outalloc - sofar - 1; +#ifdef ICONV_RESTART_RESET + /* + * If iconv(3) messes up piecemeal conversions + * then restore the original pointers, sizes, + * and converter state, then retry converting + * the full string using the reallocated buffer. + */ + insz += cp - (iconv_ibp)in; /* Restore insz */ + cp = (iconv_ibp)in; /* original start value */ + outpos = out + bom_len; /* original start value */ + outsz = outalloc - bom_len - 1; /* new len */ + iconv(conv, NULL, NULL, NULL, NULL); /* reset iconv machinery */ +#endif } else { *outpos = '\0'; From be5864e08d92bd70a621037399177325207c7bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20B=C3=B6gershausen?= Date: Thu, 8 Jan 2026 18:40:12 +0100 Subject: [PATCH 2/2] utf8.c: Enable workaround for iconv under macOS 14/15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit introduced a workaround in utf8.c to deal with broken iconv implementations. It is enabled when A MacOS version is used that has a buggy iconv library and there is no external library provided (and linked against) from neither MacPorts nor Homebrew. Signed-off-by: Torsten Bögershausen Signed-off-by: Junio C Hamano --- Makefile | 7 +++++++ config.mak.uname | 1 + 2 files changed, 8 insertions(+) diff --git a/Makefile b/Makefile index b7eba509c6..5a3823bb67 100644 --- a/Makefile +++ b/Makefile @@ -1692,6 +1692,7 @@ ifeq ($(uname_S),Darwin) ifeq ($(shell test -d /opt/local/lib && echo y),y) BASIC_CFLAGS += -I/opt/local/include BASIC_LDFLAGS += -L/opt/local/lib + HAS_GOOD_LIBICONV = Yes endif endif ifndef NO_APPLE_COMMON_CRYPTO @@ -1714,6 +1715,7 @@ endif ifdef USE_HOMEBREW_LIBICONV ifeq ($(shell test -d $(HOMEBREW_PREFIX)/opt/libiconv && echo y),y) ICONVDIR ?= $(HOMEBREW_PREFIX)/opt/libiconv + HAS_GOOD_LIBICONV = Yes endif endif endif @@ -1859,6 +1861,11 @@ ifndef NO_ICONV endif EXTLIBS += $(ICONV_LINK) -liconv endif + ifdef NEEDS_GOOD_LIBICONV + ifndef HAS_GOOD_LIBICONV + BASIC_CFLAGS += -DICONV_RESTART_RESET + endif + endif endif ifdef ICONV_OMITS_BOM BASIC_CFLAGS += -DICONV_OMITS_BOM diff --git a/config.mak.uname b/config.mak.uname index 38b35af366..3c35ae33a3 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -157,6 +157,7 @@ ifeq ($(uname_S),Darwin) endif ifeq ($(shell test "$(DARWIN_MAJOR_VERSION)" -ge 24 && echo 1),1) USE_HOMEBREW_LIBICONV = UnfortunatelyYes + NEEDS_GOOD_LIBICONV = UnfortunatelyYes endif # The builtin FSMonitor on MacOS builds upon Simple-IPC. Both require