From 4f9c8d896397a1748132060d3465e8573c861633 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:17 -0700 Subject: [PATCH 1/7] string-list: report programming error with BUG Passing a string list that has .strdup_strings bit unset to string_list_split(), or one that has .strdup_strings bit set to string_list_split_in_place(), is a programmer error. Do not use die() to abort the execution. Use BUG() instead. As a developer-facing message, the message string itself should be a lot more concise, but let's keep the original one for now. Signed-off-by: Junio C Hamano --- string-list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/string-list.c b/string-list.c index 53faaa8420..0cb920e9b0 100644 --- a/string-list.c +++ b/string-list.c @@ -283,7 +283,7 @@ int string_list_split(struct string_list *list, const char *string, const char *p = string, *end; if (!list->strdup_strings) - die("internal error in string_list_split(): " + BUG("internal error in string_list_split(): " "list->strdup_strings must be set"); for (;;) { count++; @@ -309,7 +309,7 @@ int string_list_split_in_place(struct string_list *list, char *string, char *p = string, *end; if (list->strdup_strings) - die("internal error in string_list_split_in_place(): " + BUG("internal error in string_list_split_in_place(): " "list->strdup_strings must not be set"); for (;;) { count++; From 9f6dfe43c8a55b833ae16486bcafe29b543461f9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:18 -0700 Subject: [PATCH 2/7] string-list: align string_list_split() with its _in_place() counterpart The string_list_split_in_place() function was updated by 52acddf3 (string-list: multi-delimiter `string_list_split_in_place()`, 2023-04-24) to take more than one delimiter characters, hoping that we can later use it to replace our uses of strtok(). We however did not make a matching change to the string_list_split() function, which is very similar. Before giving both functions more features in future commits, allow string_list_split() to also take more than one delimiter characters to make them closer to each other. Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- builtin/merge.c | 2 +- builtin/var.c | 2 +- connect.c | 2 +- diff.c | 2 +- fetch-pack.c | 2 +- notes.c | 2 +- parse-options.c | 2 +- pathspec.c | 2 +- protocol.c | 2 +- ref-filter.c | 4 ++-- setup.c | 3 ++- string-list.c | 4 ++-- string-list.h | 16 ++++++++-------- t/helper/test-path-utils.c | 3 ++- t/helper/test-ref-store.c | 2 +- t/unit-tests/u-string-list.c | 16 ++++++++-------- transport.c | 2 +- upload-pack.c | 2 +- 19 files changed, 37 insertions(+), 35 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 91586e6852..70a6460401 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -420,7 +420,7 @@ static void parse_color_fields(const char *s) colorfield_nr = 0; /* Ideally this would be stripped and split at the same time? */ - string_list_split(&l, s, ',', -1); + string_list_split(&l, s, ",", -1); ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc); for_each_string_list_item(item, &l) { diff --git a/builtin/merge.c b/builtin/merge.c index 18b22c0a26..893f8950bf 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -875,7 +875,7 @@ static void add_strategies(const char *string, unsigned attr) if (string) { struct string_list list = STRING_LIST_INIT_DUP; struct string_list_item *item; - string_list_split(&list, string, ' ', -1); + string_list_split(&list, string, " ", -1); for_each_string_list_item(item, &list) append_strategy(get_strategy(item->string)); string_list_clear(&list, 0); diff --git a/builtin/var.c b/builtin/var.c index ada642a9fe..4ae7af0eff 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -181,7 +181,7 @@ static void list_vars(void) if (ptr->multivalued && *val) { struct string_list list = STRING_LIST_INIT_DUP; - string_list_split(&list, val, '\n', -1); + string_list_split(&list, val, "\n", -1); for (size_t i = 0; i < list.nr; i++) printf("%s=%s\n", ptr->name, list.items[i].string); string_list_clear(&list, 0); diff --git a/connect.c b/connect.c index e77287f426..867b12bde5 100644 --- a/connect.c +++ b/connect.c @@ -407,7 +407,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list, * name. Subsequent fields (symref-target and peeled) are optional and * don't have a particular order. */ - if (string_list_split(&line_sections, line, ' ', -1) < 2) { + if (string_list_split(&line_sections, line, " ", -1) < 2) { ret = 0; goto out; } diff --git a/diff.c b/diff.c index dca87e164f..a81949a422 100644 --- a/diff.c +++ b/diff.c @@ -327,7 +327,7 @@ static unsigned parse_color_moved_ws(const char *arg) struct string_list l = STRING_LIST_INIT_DUP; struct string_list_item *i; - string_list_split(&l, arg, ',', -1); + string_list_split(&l, arg, ",", -1); for_each_string_list_item(i, &l) { struct strbuf sb = STRBUF_INIT; diff --git a/fetch-pack.c b/fetch-pack.c index c1be9b76eb..9866270696 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1914,7 +1914,7 @@ static void fetch_pack_config(void) char *str; if (!git_config_get_string("fetch.uriprotocols", &str) && str) { - string_list_split(&uri_protocols, str, ',', -1); + string_list_split(&uri_protocols, str, ",", -1); free(str); } } diff --git a/notes.c b/notes.c index 97b995f3f2..6afcf088b9 100644 --- a/notes.c +++ b/notes.c @@ -892,7 +892,7 @@ static int string_list_add_note_lines(struct string_list *list, * later, along with any empty strings that came from empty * lines within the file. */ - string_list_split(list, data, '\n', -1); + string_list_split(list, data, "\n", -1); free(data); return 0; } diff --git a/parse-options.c b/parse-options.c index 5224203ffe..9e7cb75192 100644 --- a/parse-options.c +++ b/parse-options.c @@ -1338,7 +1338,7 @@ static enum parse_opt_result usage_with_options_internal(struct parse_opt_ctx_t if (!saw_empty_line && !*str) saw_empty_line = 1; - string_list_split(&list, str, '\n', -1); + string_list_split(&list, str, "\n", -1); for (j = 0; j < list.nr; j++) { const char *line = list.items[j].string; diff --git a/pathspec.c b/pathspec.c index a3ddd701c7..de325f7ef9 100644 --- a/pathspec.c +++ b/pathspec.c @@ -201,7 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va if (!value || !*value) die(_("attr spec must not be empty")); - string_list_split(&list, value, ' ', -1); + string_list_split(&list, value, " ", -1); string_list_remove_empty_items(&list, 0); item->attr_check = attr_check_alloc(); diff --git a/protocol.c b/protocol.c index bae7226ff4..54b9f49c01 100644 --- a/protocol.c +++ b/protocol.c @@ -61,7 +61,7 @@ enum protocol_version determine_protocol_version_server(void) if (git_protocol) { struct string_list list = STRING_LIST_INIT_DUP; const struct string_list_item *item; - string_list_split(&list, git_protocol, ':', -1); + string_list_split(&list, git_protocol, ":", -1); for_each_string_list_item(item, &list) { const char *value; diff --git a/ref-filter.c b/ref-filter.c index f9f2c512a8..4edfb9c83b 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -435,7 +435,7 @@ static int remote_ref_atom_parser(struct ref_format *format UNUSED, } atom->u.remote_ref.nobracket = 0; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; @@ -831,7 +831,7 @@ static int align_atom_parser(struct ref_format *format UNUSED, align->position = ALIGN_LEFT; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; int position; diff --git a/setup.c b/setup.c index 6f52dab64c..b9f5eb8b51 100644 --- a/setup.c +++ b/setup.c @@ -1460,8 +1460,9 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir, if (env_ceiling_dirs) { int empty_entry_found = 0; + static const char path_sep[] = { PATH_SEP, '\0' }; - string_list_split(&ceiling_dirs, env_ceiling_dirs, PATH_SEP, -1); + string_list_split(&ceiling_dirs, env_ceiling_dirs, path_sep, -1); filter_string_list(&ceiling_dirs, 0, canonicalize_ceiling_entry, &empty_entry_found); ceil_offset = longest_ancestor_length(dir->buf, &ceiling_dirs); diff --git a/string-list.c b/string-list.c index 0cb920e9b0..2284a009cb 100644 --- a/string-list.c +++ b/string-list.c @@ -277,7 +277,7 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ } int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit) + const char *delim, int maxsplit) { int count = 0; const char *p = string, *end; @@ -291,7 +291,7 @@ int string_list_split(struct string_list *list, const char *string, string_list_append(list, p); return count; } - end = strchr(p, delim); + end = strpbrk(p, delim); if (end) { string_list_append_nodup(list, xmemdupz(p, end - p)); p = end + 1; diff --git a/string-list.h b/string-list.h index 122b318641..6c8650efde 100644 --- a/string-list.h +++ b/string-list.h @@ -254,7 +254,7 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list, void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util); /** - * Split string into substrings on character `delim` and append the + * Split string into substrings on characters in `delim` and append the * substrings to `list`. The input string is not modified. * list->strdup_strings must be set, as new memory needs to be * allocated to hold the substrings. If maxsplit is non-negative, @@ -262,15 +262,15 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ * appended to list. * * Examples: - * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"] - * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"] - * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"] - * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""] - * string_list_split(l, "", ':', -1) -> [""] - * string_list_split(l, ":", ':', -1) -> ["", ""] + * string_list_split(l, "foo:bar:baz", ":", -1) -> ["foo", "bar", "baz"] + * string_list_split(l, "foo:bar:baz", ":", 0) -> ["foo:bar:baz"] + * string_list_split(l, "foo:bar:baz", ":", 1) -> ["foo", "bar:baz"] + * string_list_split(l, "foo:bar:", ":", -1) -> ["foo", "bar", ""] + * string_list_split(l, "", ":", -1) -> [""] + * string_list_split(l, ":", ":", -1) -> ["", ""] */ int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit); + const char *delim, int maxsplit); /* * Like string_list_split(), except that string is split in-place: the diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 086238c826..f5f33751da 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -348,6 +348,7 @@ int cmd__path_utils(int argc, const char **argv) if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) { int len; struct string_list ceiling_dirs = STRING_LIST_INIT_DUP; + const char path_sep[] = { PATH_SEP, '\0' }; char *path = xstrdup(argv[2]); /* @@ -362,7 +363,7 @@ int cmd__path_utils(int argc, const char **argv) */ if (normalize_path_copy(path, path)) die("Path \"%s\" could not be normalized", argv[2]); - string_list_split(&ceiling_dirs, argv[3], PATH_SEP, -1); + string_list_split(&ceiling_dirs, argv[3], path_sep, -1); filter_string_list(&ceiling_dirs, 0, normalize_ceiling_entry, NULL); len = longest_ancestor_length(path, &ceiling_dirs); diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 8d9a271845..aa1cb9b4ac 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -29,7 +29,7 @@ static unsigned int parse_flags(const char *str, struct flag_definition *defs) if (!strcmp(str, "0")) return 0; - string_list_split(&masks, str, ',', 64); + string_list_split(&masks, str, ",", 64); for (size_t i = 0; i < masks.nr; i++) { const char *name = masks.items[i].string; struct flag_definition *def = defs; diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index d4ba5f9fa5..150a5f505f 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -43,7 +43,7 @@ static void t_string_list_equal(struct string_list *list, expected_strings->items[i].string); } -static void t_string_list_split(const char *data, int delim, int maxsplit, ...) +static void t_string_list_split(const char *data, const char *delim, int maxsplit, ...) { struct string_list expected_strings = STRING_LIST_INIT_DUP; struct string_list list = STRING_LIST_INIT_DUP; @@ -65,13 +65,13 @@ static void t_string_list_split(const char *data, int delim, int maxsplit, ...) void test_string_list__split(void) { - t_string_list_split("foo:bar:baz", ':', -1, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:baz", ':', 0, "foo:bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 1, "foo", "bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 2, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:", ':', -1, "foo", "bar", "", NULL); - t_string_list_split("", ':', -1, "", NULL); - t_string_list_split(":", ':', -1, "", "", NULL); + t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:baz", ":", 0, "foo:bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 1, "foo", "bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 2, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:", ":", -1, "foo", "bar", "", NULL); + t_string_list_split("", ":", -1, "", NULL); + t_string_list_split(":", ":", -1, "", "", NULL); } static void t_string_list_split_in_place(const char *data, const char *delim, diff --git a/transport.c b/transport.c index c123ac1e38..76487b5453 100644 --- a/transport.c +++ b/transport.c @@ -1042,7 +1042,7 @@ static const struct string_list *protocol_allow_list(void) if (enabled < 0) { const char *v = getenv("GIT_ALLOW_PROTOCOL"); if (v) { - string_list_split(&allowed, v, ':', -1); + string_list_split(&allowed, v, ":", -1); string_list_sort(&allowed); enabled = 1; } else { diff --git a/upload-pack.c b/upload-pack.c index 4f26f6afc7..91fcdcad9b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1685,7 +1685,7 @@ static void process_args(struct packet_reader *request, if (data->uri_protocols.nr) send_err_and_die(data, "multiple packfile-uris lines forbidden"); - string_list_split(&data->uri_protocols, p, ',', -1); + string_list_split(&data->uri_protocols, p, ",", -1); continue; } From 527535fcdd2d9dec56877435f609852d0f2bf163 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:19 -0700 Subject: [PATCH 3/7] string-list: unify string_list_split* functions Thanks to the previous step, the only difference between these two related functions is that string_list_split() works on a string without modifying its contents (i.e. taking "const char *") and the resulting pieces of strings are their own copies in a string list, while string_list_split_in_place() works on a mutable string and the resulting pieces of strings come from the original string. Consolidate their implementations into a single helper function, and make them a thin wrapper around it. We can later add an extra flags parameter to extend both of these functions by updating only the internal helper function. Signed-off-by: Junio C Hamano --- string-list.c | 102 +++++++++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/string-list.c b/string-list.c index 2284a009cb..65b6ceb259 100644 --- a/string-list.c +++ b/string-list.c @@ -276,55 +276,71 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ list->nr--; } +/* + * append a substring [p..end] to list; return number of things it + * appended to the list. + */ +static int append_one(struct string_list *list, + const char *p, const char *end, + int in_place) +{ + if (!end) + end = p + strlen(p); + + if (in_place) { + *((char *)end) = '\0'; + string_list_append(list, p); + } else { + string_list_append_nodup(list, xmemdupz(p, end - p)); + } + return 1; +} + +/* + * Unfortunately this cannot become a public interface, as _in_place() + * wants to have "const char *string" while the other variant wants to + * have "char *string" for type safety. + * + * This accepts "const char *string" to allow both wrappers to use it; + * it internally casts away the constness when in_place is true by + * taking advantage of strpbrk() that takes a "const char *" arg and + * returns "char *" pointer into that const string. Yucky but works ;-). + */ +static int split_string(struct string_list *list, const char *string, const char *delim, + int maxsplit, int in_place) +{ + int count = 0; + const char *p = string; + + if (in_place && list->strdup_strings) + BUG("string_list_split_in_place() called with strdup_strings"); + else if (!in_place && !list->strdup_strings) + BUG("string_list_split() called without strdup_strings"); + + for (;;) { + char *end; + + if (0 <= maxsplit && maxsplit <= count) + end = NULL; + else + end = strpbrk(p, delim); + + count += append_one(list, p, end, in_place); + + if (!end) + return count; + p = end + 1; + } +} + int string_list_split(struct string_list *list, const char *string, const char *delim, int maxsplit) { - int count = 0; - const char *p = string, *end; - - if (!list->strdup_strings) - BUG("internal error in string_list_split(): " - "list->strdup_strings must be set"); - for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; - } - end = strpbrk(p, delim); - if (end) { - string_list_append_nodup(list, xmemdupz(p, end - p)); - p = end + 1; - } else { - string_list_append(list, p); - return count; - } - } + return split_string(list, string, delim, maxsplit, 0); } int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit) { - int count = 0; - char *p = string, *end; - - if (list->strdup_strings) - BUG("internal error in string_list_split_in_place(): " - "list->strdup_strings must not be set"); - for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; - } - end = strpbrk(p, delim); - if (end) { - *end = '\0'; - string_list_append(list, p); - p = end + 1; - } else { - string_list_append(list, p); - return count; - } - } + return split_string(list, string, delim, maxsplit, 1); } From 576454974165d51b7e39c0608cde1c84978f1a8a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:20 -0700 Subject: [PATCH 4/7] string-list: optionally trim string pieces split by string_list_split*() Teach the unified split_string() to take an optional "flags" word, and define the first flag STRING_LIST_SPLIT_TRIM to cause the split pieces to be trimmed before they are placed in the string list. Signed-off-by: Junio C Hamano --- string-list.c | 35 +++++++++++++++++--- string-list.h | 15 +++++++++ t/unit-tests/u-string-list.c | 64 ++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/string-list.c b/string-list.c index 65b6ceb259..86a309f8fb 100644 --- a/string-list.c +++ b/string-list.c @@ -282,11 +282,18 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ */ static int append_one(struct string_list *list, const char *p, const char *end, - int in_place) + int in_place, unsigned flags) { if (!end) end = p + strlen(p); + if ((flags & STRING_LIST_SPLIT_TRIM)) { + /* rtrim */ + for (; p < end; end--) + if (!isspace(end[-1])) + break; + } + if (in_place) { *((char *)end) = '\0'; string_list_append(list, p); @@ -307,7 +314,7 @@ static int append_one(struct string_list *list, * returns "char *" pointer into that const string. Yucky but works ;-). */ static int split_string(struct string_list *list, const char *string, const char *delim, - int maxsplit, int in_place) + int maxsplit, int in_place, unsigned flags) { int count = 0; const char *p = string; @@ -320,12 +327,18 @@ static int split_string(struct string_list *list, const char *string, const char for (;;) { char *end; + if (flags & STRING_LIST_SPLIT_TRIM) { + /* ltrim */ + while (*p && isspace(*p)) + p++; + } + if (0 <= maxsplit && maxsplit <= count) end = NULL; else end = strpbrk(p, delim); - count += append_one(list, p, end, in_place); + count += append_one(list, p, end, in_place, flags); if (!end) return count; @@ -336,11 +349,23 @@ static int split_string(struct string_list *list, const char *string, const char int string_list_split(struct string_list *list, const char *string, const char *delim, int maxsplit) { - return split_string(list, string, delim, maxsplit, 0); + return split_string(list, string, delim, maxsplit, 0, 0); } int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit) { - return split_string(list, string, delim, maxsplit, 1); + return split_string(list, string, delim, maxsplit, 1, 0); +} + +int string_list_split_f(struct string_list *list, const char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 0, flags); +} + +int string_list_split_in_place_f(struct string_list *list, char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 1, flags); } diff --git a/string-list.h b/string-list.h index 6c8650efde..40e148712d 100644 --- a/string-list.h +++ b/string-list.h @@ -281,4 +281,19 @@ int string_list_split(struct string_list *list, const char *string, */ int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit); + +/* Flag bits for split_f and split_in_place_f functions */ +enum { + /* + * trim whitespaces around resulting string piece before adding + * it to the list + */ + STRING_LIST_SPLIT_TRIM = (1 << 0), +}; + +int string_list_split_f(struct string_list *, const char *string, + const char *delim, int maxsplit, unsigned flags); + +int string_list_split_in_place_f(struct string_list *, char *string, + const char *delim, int maxsplit, unsigned flags); #endif /* STRING_LIST_H */ diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index 150a5f505f..daa9307e45 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -63,6 +63,70 @@ static void t_string_list_split(const char *data, const char *delim, int maxspli string_list_clear(&list, 0); } +static void t_string_list_split_f(const char *data, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_DUP; + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_f(void) +{ + t_string_list_split_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); +} + +static void t_string_list_split_in_place_f(const char *data_, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_NODUP; + char *data = xstrdup(data_); + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_in_place_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + free(data); + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_in_place_f(void) +{ + t_string_list_split_in_place_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_in_place_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); +} + void test_string_list__split(void) { t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL); From f3a303aef017ad6e53fa44643d832a1fa0de0d91 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:21 -0700 Subject: [PATCH 5/7] diff: simplify parsing of diff.colormovedws The code to parse this configuration variable, whose value is a comma-separated list of known tokens like "ignore-space-change" and "ignore-all-space", uses string_list_split() to split the value into pieces, and then places each piece of string in a strbuf to trim, before comparing the result with the list of known tokens. Thanks to the previous steps, now string_list_split() can trim the resulting pieces before it places them in the string list. Use it to simplify the code. Signed-off-by: Junio C Hamano --- diff.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/diff.c b/diff.c index a81949a422..70666ad2cd 100644 --- a/diff.c +++ b/diff.c @@ -327,29 +327,23 @@ static unsigned parse_color_moved_ws(const char *arg) struct string_list l = STRING_LIST_INIT_DUP; struct string_list_item *i; - string_list_split(&l, arg, ",", -1); + string_list_split_f(&l, arg, ",", -1, STRING_LIST_SPLIT_TRIM); for_each_string_list_item(i, &l) { - struct strbuf sb = STRBUF_INIT; - strbuf_addstr(&sb, i->string); - strbuf_trim(&sb); - - if (!strcmp(sb.buf, "no")) + if (!strcmp(i->string, "no")) ret = 0; - else if (!strcmp(sb.buf, "ignore-space-change")) + else if (!strcmp(i->string, "ignore-space-change")) ret |= XDF_IGNORE_WHITESPACE_CHANGE; - else if (!strcmp(sb.buf, "ignore-space-at-eol")) + else if (!strcmp(i->string, "ignore-space-at-eol")) ret |= XDF_IGNORE_WHITESPACE_AT_EOL; - else if (!strcmp(sb.buf, "ignore-all-space")) + else if (!strcmp(i->string, "ignore-all-space")) ret |= XDF_IGNORE_WHITESPACE; - else if (!strcmp(sb.buf, "allow-indentation-change")) + else if (!strcmp(i->string, "allow-indentation-change")) ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE; else { ret |= COLOR_MOVED_WS_ERROR; - error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), sb.buf); + error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), i->string); } - - strbuf_release(&sb); } if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) && From 27531efa41cfa882473513dd93e696a16f6eb87b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:22 -0700 Subject: [PATCH 6/7] string-list: optionally omit empty string pieces in string_list_split*() Teach the unified split_string() machinery a new flag bit, STRING_LIST_SPLIT_NONEMPTY, to cause empty split pieces to be omitted from the resulting string list. Signed-off-by: Junio C Hamano --- string-list.c | 3 +++ string-list.h | 2 ++ t/unit-tests/u-string-list.c | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/string-list.c b/string-list.c index 86a309f8fb..343cf1ca90 100644 --- a/string-list.c +++ b/string-list.c @@ -294,6 +294,9 @@ static int append_one(struct string_list *list, break; } + if ((flags & STRING_LIST_SPLIT_NONEMPTY) && (end <= p)) + return 0; + if (in_place) { *((char *)end) = '\0'; string_list_append(list, p); diff --git a/string-list.h b/string-list.h index 40e148712d..2b438c7733 100644 --- a/string-list.h +++ b/string-list.h @@ -289,6 +289,8 @@ enum { * it to the list */ STRING_LIST_SPLIT_TRIM = (1 << 0), + /* omit adding empty string piece to the resulting list */ + STRING_LIST_SPLIT_NONEMPTY = (1 << 1), }; int string_list_split_f(struct string_list *, const char *string, diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index daa9307e45..a2457d7b1e 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -92,6 +92,13 @@ void test_string_list__split_f(void) "foo", "bar", "baz", NULL); t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, "a", "b c", NULL); + t_string_list_split_f("::foo::bar:baz:", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); } static void t_string_list_split_in_place_f(const char *data_, const char *delim, @@ -125,6 +132,14 @@ void test_string_list__split_in_place_f(void) "foo", "bar", "baz", NULL); t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, "a", "b c", NULL); + t_string_list_split_in_place_f("::foo::bar:baz:", ":", -1, + STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_in_place_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); } void test_string_list__split(void) From 2ab2aac73d234ae75096e2186b07cc14c57d2586 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:23 -0700 Subject: [PATCH 7/7] string-list: split-then-remove-empty can be done while splitting Thanks to the new STRING_LIST_SPLIT_NONEMPTY flag, a common pattern to split a string into a string list and then remove empty items in the resulting list is no longer needed. Instead, just tell the string_list_split*() to omit empty ones while splitting. Signed-off-by: Junio C Hamano --- notes.c | 4 ++-- pathspec.c | 3 +-- t/helper/test-hashmap.c | 4 ++-- t/helper/test-json-writer.c | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/notes.c b/notes.c index 6afcf088b9..3603c4a42b 100644 --- a/notes.c +++ b/notes.c @@ -970,8 +970,8 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, char *globs_copy = xstrdup(globs); int i; - string_list_split_in_place(&split, globs_copy, ":", -1); - string_list_remove_empty_items(&split, 0); + string_list_split_in_place_f(&split, globs_copy, ":", -1, + STRING_LIST_SPLIT_NONEMPTY); for (i = 0; i < split.nr; i++) string_list_add_refs_by_glob(list, split.items[i].string); diff --git a/pathspec.c b/pathspec.c index de325f7ef9..5993c4afa0 100644 --- a/pathspec.c +++ b/pathspec.c @@ -201,8 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va if (!value || !*value) die(_("attr spec must not be empty")); - string_list_split(&list, value, " ", -1); - string_list_remove_empty_items(&list, 0); + string_list_split_f(&list, value, " ", -1, STRING_LIST_SPLIT_NONEMPTY); item->attr_check = attr_check_alloc(); CALLOC_ARRAY(item->attr_match, list.nr); diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 7782ae585e..e4dc02bd7a 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -149,8 +149,8 @@ int cmd__hashmap(int argc UNUSED, const char **argv UNUSED) /* break line into command and up to two parameters */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line.buf, DELIM, 2); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line.buf, DELIM, 2, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr) diff --git a/t/helper/test-json-writer.c b/t/helper/test-json-writer.c index a288069b04..f8316a7d29 100644 --- a/t/helper/test-json-writer.c +++ b/t/helper/test-json-writer.c @@ -492,8 +492,8 @@ static int scripted(void) /* break line into command and zero or more tokens */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line, " ", -1); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line, " ", -1, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr || !*parts.items[0].string)