From f85b49f3d4af5ee0b428285799ac711d6abe1cfb Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Fri, 16 Jan 2026 01:05:03 +0100 Subject: [PATCH 1/2] diff: improve scaling of filenames in diffstat to handle UTF-8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `show_stats()` function tries to scale the filenames in the diffstat to ensure they don't exceed the given `name-width`. It does so by calculating the "display width" of the characters to be dropped, but then advances the filename pointer by that number of bytes. However, the "display width" of a character is not always equal to its byte count. The result is that sometimes, when displaying UTF-8 characters, filenames exceed the given `name-width`, and frequently the bytes of the UTF-8 characters are truncated. The following is an example of the issue, where the 2 files are "HelloHi" and "Hello你好", and `name-width=6`: ...oHi | 0 ...好 | 0 Make the filename pointer move by the actual number of bytes of the characters to drop from the filename, rather than their display width, using the `utf8_width()` function. Force `len` to not be less than 0 (this happens if the given `name-width` is 2 or less), otherwise an infinite loop is entered. Signed-off-by: LorenzoPegorari Signed-off-by: Junio C Hamano --- diff.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/diff.c b/diff.c index a1961526c0..86fdf4d8d7 100644 --- a/diff.c +++ b/diff.c @@ -2823,17 +2823,12 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options) char *slash; prefix = "..."; len -= 3; - /* - * NEEDSWORK: (name_len - len) counts the display - * width, which would be shorter than the byte - * length of the corresponding substring. - * Advancing "name" by that number of bytes does - * *NOT* skip over that many columns, so it is - * very likely that chomping the pathname at the - * slash we will find starting from "name" will - * leave the resulting string still too long. - */ - name += name_len - len; + if (len < 0) + len = 0; + + while (name_len > len) + name_len -= utf8_width((const char**)&name, NULL); + slash = strchr(name, '/'); if (slash) name = slash; From 04f5d95ef7715e952c93f078e2973c44bb6f3396 Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Fri, 16 Jan 2026 01:05:38 +0100 Subject: [PATCH 2/2] t4073: add test for diffstat paths length when containing UTF-8 chars Add test checking the length of filepaths containing UTF-8 chars when generating a diffstat with various `name-width`s. Signed-off-by: LorenzoPegorari [jc: fixed up t/meson.build to spell the name of the new test file correctly] Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t4073-diff-stat-name-width.sh | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100755 t/t4073-diff-stat-name-width.sh diff --git a/t/meson.build b/t/meson.build index a5531df415..73edae4e3d 100644 --- a/t/meson.build +++ b/t/meson.build @@ -496,6 +496,7 @@ integration_tests = [ 't4070-diff-pairs.sh', 't4071-diff-minimal.sh', 't4072-diff-max-depth.sh', + 't4073-diff-stat-name-width.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4073-diff-stat-name-width.sh b/t/t4073-diff-stat-name-width.sh new file mode 100755 index 0000000000..ec5d3c3c1f --- /dev/null +++ b/t/t4073-diff-stat-name-width.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='git-diff check diffstat filepaths length when containing UTF-8 chars' + +. ./test-lib.sh + + +create_files () { + mkdir -p "d你好" && + touch "d你好/f再见" +} + +test_expect_success 'setup' ' + git init && + git config core.quotepath off && + git commit -m "Initial commit" --allow-empty && + create_files && + git add . && + git commit -m "Added files" +' + +test_expect_success 'test name-width long enough for filepath' ' + git diff HEAD~1 HEAD --stat --stat-name-width=12 >out && + grep "d你好/f再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=11 >out && + grep "d你好/f再见 |" out +' + +test_expect_success 'test name-width not long enough for dir name' ' + git diff HEAD~1 HEAD --stat --stat-name-width=10 >out && + grep ".../f再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=9 >out && + grep ".../f再见 |" out +' + +test_expect_success 'test name-width not long enough for slash' ' + git diff HEAD~1 HEAD --stat --stat-name-width=8 >out && + grep "...f再见 |" out +' + +test_expect_success 'test name-width not long enough for file name' ' + git diff HEAD~1 HEAD --stat --stat-name-width=7 >out && + grep "...再见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=6 >out && + grep "...见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=5 >out && + grep "...见 |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=4 >out && + grep "... |" out +' + +test_expect_success 'test name-width minimum length' ' + git diff HEAD~1 HEAD --stat --stat-name-width=3 >out && + grep "... |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=2 >out && + grep "... |" out && + git diff HEAD~1 HEAD --stat --stat-name-width=1 >out && + grep "... |" out +' + +test_done