From f62dcc7f30d16af29c0f707005aceb5eb6119279 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:29 -0700 Subject: [PATCH 1/7] remote: remove branch->merge_name and fix branch_release() The branch structure has both branch->merge_name and branch->merge for tracking the merge information. The former is allocated by add_merge() and stores the names read from the configuration file. The latter is allocated by set_merge() which is called by branch_get() when an external caller requests a branch. This leads to the confusing situation where branch->merge_nr tracks both the size of branch->merge (once its allocated) and branch->merge_name. The branch_release() function incorrectly assumes that branch->merge is always set when branch->merge_nr is non-zero, and can potentially crash if read_config() is called without branch_get() being called on every branch. In addition, branch_release() fails to free some of the memory associated with the structure including: * Failure to free the refspec_item containers in branch->merge[i] * Failure to free the strings in branch->merge_name[i] * Failure to free the branch->merge_name parent array. The set_merge() function sets branch->merge_nr to 0 when there is no valid remote_name, to avoid external callers seeing a non-zero merge_nr but a NULL merge array. This results in failure to release most of the merge data as well. These issues could be fixed directly, and indeed I initially proposed such a change at [1] in the past. While this works, there was some confusion during review because of the inconsistencies. Instead, its time to clean up the situation properly. Remove branch->merge_name entirely. Instead, allocate branch->merge earlier within add_merge() instead of within set_merge(). Instead of having set_merge() copy from merge_name[i] to merge[i]->src, just have add_merge() directly initialize merge[i]->src. Modify the add_merge() to call xstrdup() itself, instead of having the caller of add_merge() do so. This makes it more obvious which code owns the memory. Update all callers which use branch->merge_name[i] to use branch->merge[i]->src instead. Add a merge_clear() function which properly releases all of the merge-related memory, and which sets branch->merge_nr to zero. Use this both in branch_release() and in set_merge(), fixing the leak when set_merge() finds no valid remote_name. Add a set_merge variable to the branch structure, which indicates whether set_merge() has been called. This replaces the previous use of a NULL check against the branch->merge array. With these changes, the merge array is always allocated when merge_nr is non-zero. This use of refspec_item to store the names should be safe. External callers should be using branch_get() to obtain a pointer to the branch, which will call set_merge(), and the callers internal to remote.c already handle the partially initialized refpsec_item structure safely. This end result is cleaner, and avoids duplicating the merge names twice. Signed-off-by: Jacob Keller Link: [1] https://lore.kernel.org/git/20250617-jk-submodule-helper-use-url-v2-1-04cbb003177d@gmail.com/ Signed-off-by: Junio C Hamano --- branch.c | 4 ++-- builtin/pull.c | 2 +- remote.c | 44 ++++++++++++++++++++++++++++---------------- remote.h | 4 ++-- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/branch.c b/branch.c index 6d01d7d6bd..93f5b4e8dd 100644 --- a/branch.c +++ b/branch.c @@ -230,7 +230,7 @@ static int inherit_tracking(struct tracking *tracking, const char *orig_ref) return -1; } - if (branch->merge_nr < 1 || !branch->merge_name || !branch->merge_name[0]) { + if (branch->merge_nr < 1 || !branch->merge || !branch->merge[0] || !branch->merge[0]->src) { warning(_("asked to inherit tracking from '%s', but no merge configuration is set"), bare_ref); return -1; @@ -238,7 +238,7 @@ static int inherit_tracking(struct tracking *tracking, const char *orig_ref) tracking->remote = branch->remote_name; for (i = 0; i < branch->merge_nr; i++) - string_list_append(tracking->srcs, branch->merge_name[i]); + string_list_append(tracking->srcs, branch->merge[i]->src); return 0; } diff --git a/builtin/pull.c b/builtin/pull.c index a1ebc6ad33..f4556ae155 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -487,7 +487,7 @@ static void NORETURN die_no_merge_candidates(const char *repo, const char **refs } else fprintf_ln(stderr, _("Your configuration specifies to merge with the ref '%s'\n" "from the remote, but no such ref was fetched."), - *curr_branch->merge_name); + curr_branch->merge[0]->src); exit(1); } diff --git a/remote.c b/remote.c index 4099183cac..ee95126f3f 100644 --- a/remote.c +++ b/remote.c @@ -174,9 +174,15 @@ static void remote_clear(struct remote *remote) static void add_merge(struct branch *branch, const char *name) { - ALLOC_GROW(branch->merge_name, branch->merge_nr + 1, + struct refspec_item *merge; + + ALLOC_GROW(branch->merge, branch->merge_nr + 1, branch->merge_alloc); - branch->merge_name[branch->merge_nr++] = name; + + merge = xcalloc(1, sizeof(*merge)); + merge->src = xstrdup(name); + + branch->merge[branch->merge_nr++] = merge; } struct branches_hash_key { @@ -247,15 +253,23 @@ static struct branch *make_branch(struct remote_state *remote_state, return ret; } +static void merge_clear(struct branch *branch) +{ + for (int i = 0; i < branch->merge_nr; i++) { + refspec_item_clear(branch->merge[i]); + free(branch->merge[i]); + } + FREE_AND_NULL(branch->merge); + branch->merge_nr = 0; +} + static void branch_release(struct branch *branch) { free((char *)branch->name); free((char *)branch->refname); free(branch->remote_name); free(branch->pushremote_name); - for (int i = 0; i < branch->merge_nr; i++) - refspec_item_clear(branch->merge[i]); - free(branch->merge); + merge_clear(branch); } static struct rewrite *make_rewrite(struct rewrites *r, @@ -429,7 +443,7 @@ static int handle_config(const char *key, const char *value, } else if (!strcmp(subkey, "merge")) { if (!value) return config_error_nonbool(key); - add_merge(branch, xstrdup(value)); + add_merge(branch, value); } return 0; } @@ -692,7 +706,7 @@ char *remote_ref_for_branch(struct branch *branch, int for_push) if (branch) { if (!for_push) { if (branch->merge_nr) { - return xstrdup(branch->merge_name[0]); + return xstrdup(branch->merge[0]->src); } } else { char *dst; @@ -1731,32 +1745,30 @@ static void set_merge(struct remote_state *remote_state, struct branch *ret) if (!ret) return; /* no branch */ - if (ret->merge) + if (ret->set_merge) return; /* already run */ if (!ret->remote_name || !ret->merge_nr) { /* * no merge config; let's make sure we don't confuse callers * with a non-zero merge_nr but a NULL merge */ - ret->merge_nr = 0; + merge_clear(ret); return; } + ret->set_merge = 1; remote = remotes_remote_get(remote_state, ret->remote_name); - CALLOC_ARRAY(ret->merge, ret->merge_nr); for (i = 0; i < ret->merge_nr; i++) { - ret->merge[i] = xcalloc(1, sizeof(**ret->merge)); - ret->merge[i]->src = xstrdup(ret->merge_name[i]); if (!remote_find_tracking(remote, ret->merge[i]) || strcmp(ret->remote_name, ".")) continue; - if (repo_dwim_ref(the_repository, ret->merge_name[i], - strlen(ret->merge_name[i]), &oid, &ref, + if (repo_dwim_ref(the_repository, ret->merge[i]->src, + strlen(ret->merge[i]->src), &oid, &ref, 0) == 1) ret->merge[i]->dst = ref; else - ret->merge[i]->dst = xstrdup(ret->merge_name[i]); + ret->merge[i]->dst = xstrdup(ret->merge[i]->src); } } @@ -1776,7 +1788,7 @@ struct branch *branch_get(const char *name) int branch_has_merge_config(struct branch *branch) { - return branch && !!branch->merge; + return branch && branch->set_merge; } int branch_merge_matches(struct branch *branch, diff --git a/remote.h b/remote.h index 7e4943ae3a..76d93bf88d 100644 --- a/remote.h +++ b/remote.h @@ -315,8 +315,8 @@ struct branch { char *pushremote_name; - /* An array of the "merge" lines in the configuration. */ - const char **merge_name; + /* True if set_merge() has been called to finalize the merge array */ + int set_merge; /** * An array of the struct refspecs used for the merge lines. That is, From 2084f119b4d8252116493336f597d205cfa8f0b8 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:30 -0700 Subject: [PATCH 2/7] remote: fix tear down of struct remote The remote_clear() function failed to free the remote->push and remote->fetch refspec fields. This should be caught by the leak sanitizer. However, for callers which use ``the_repository``, the values never go out of scope and the sanitizer doesn't complain. A future change is going to add a caller of read_config() for a submodule repository structure, which would result in the leak sanitizer complaining. Fix remote_clear(), updating it to properly call refspec_clear() for both the push and fetch members. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- remote.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/remote.c b/remote.c index ee95126f3f..194bb44778 100644 --- a/remote.c +++ b/remote.c @@ -165,6 +165,9 @@ static void remote_clear(struct remote *remote) strvec_clear(&remote->url); strvec_clear(&remote->pushurl); + refspec_clear(&remote->push); + refspec_clear(&remote->fetch); + free((char *)remote->receivepack); free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); From 059268fd056c4063eb913e6ec265bcdf85437b03 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:31 -0700 Subject: [PATCH 3/7] dir: move starts_with_dot(_dot)_slash to dir.h Both submodule--helper.c and submodule-config.c have an implementation of starts_with_dot_slash and starts_with_dot_dot_slash. The dir.h header has starts_with_dot(_dot)_slash_native, which sets PATH_MATCH_NATIVE. Move the helpers to dir.h as static inlines. I thought about renaming them to postfix with _platform but that felt too long and ugly. On the other hand it might be slightly confusing with _native. This simplifies a submodule refactor which wants to use the helpers earlier in the submodule--helper.c file. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 12 ------------ dir.h | 23 +++++++++++++++++++++++ submodule-config.c | 12 ------------ 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 53da2116dd..9e8cdfe1b2 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -438,18 +438,6 @@ cleanup: return ret; } -static int starts_with_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int starts_with_dot_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - struct init_cb { const char *prefix; const char *super_prefix; diff --git a/dir.h b/dir.h index d7e71aa8da..fc9be7b427 100644 --- a/dir.h +++ b/dir.h @@ -676,4 +676,27 @@ static inline int starts_with_dot_dot_slash_native(const char *const path) return path_match_flags(path, what | PATH_MATCH_NATIVE); } +/** + * starts_with_dot_slash: convenience wrapper for + * patch_match_flags() with PATH_MATCH_STARTS_WITH_DOT_SLASH and + * PATH_MATCH_XPLATFORM. + */ +static inline int starts_with_dot_slash(const char *const path) +{ + const enum path_match_flags what = PATH_MATCH_STARTS_WITH_DOT_SLASH; + + return path_match_flags(path, what | PATH_MATCH_XPLATFORM); +} + +/** + * starts_with_dot_dot_slash: convenience wrapper for + * patch_match_flags() with PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH and + * PATH_MATCH_XPLATFORM. + */ +static inline int starts_with_dot_dot_slash(const char *const path) +{ + const enum path_match_flags what = PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH; + + return path_match_flags(path, what | PATH_MATCH_XPLATFORM); +} #endif diff --git a/submodule-config.c b/submodule-config.c index 8630e27947..d64438b2a1 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -235,18 +235,6 @@ in_component: return 0; } -static int starts_with_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - -static int starts_with_dot_dot_slash(const char *const path) -{ - return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH | - PATH_MATCH_XPLATFORM); -} - static int submodule_url_is_relative(const char *url) { return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); From f8542961da88bee31f7e0da21fd8d2792d62f888 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:32 -0700 Subject: [PATCH 4/7] remote: remove the_repository from some functions The remotes_remote_get_1 (and its caller, remotes_remote_get, have an implicit dependency on the_repository due to calling read_branches_file() and read_remotes_file(), both of which use the_repository. The branch_get() function calls set_merge() which has an implicit dependency on the_repository as well. Because of this use of the_repository, the helper functions cannot be used in code paths which operate on other repositories. A future refactor of the submodule--helper will want to make use of some of these functions. Refactor to break the dependency by passing struct repository *repo instead of struct remote_state *remote_state in a few places. The public callers and many other helper functions still depend on the_repository. A repo-aware function will be exposed in a following change for git submodule--helper. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- remote.c | 58 +++++++++++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/remote.c b/remote.c index 194bb44778..e7ff21dc03 100644 --- a/remote.c +++ b/remote.c @@ -334,11 +334,10 @@ static void warn_about_deprecated_remote_type(const char *type, type, remote->name, remote->name, remote->name); } -static void read_remotes_file(struct remote_state *remote_state, - struct remote *remote) +static void read_remotes_file(struct repository *repo, struct remote *remote) { struct strbuf buf = STRBUF_INIT; - FILE *f = fopen_or_warn(repo_git_path_append(the_repository, &buf, + FILE *f = fopen_or_warn(repo_git_path_append(repo, &buf, "remotes/%s", remote->name), "r"); if (!f) @@ -354,7 +353,7 @@ static void read_remotes_file(struct remote_state *remote_state, strbuf_rtrim(&buf); if (skip_prefix(buf.buf, "URL:", &v)) - add_url_alias(remote_state, remote, + add_url_alias(repo->remote_state, remote, skip_spaces(v)); else if (skip_prefix(buf.buf, "Push:", &v)) refspec_append(&remote->push, skip_spaces(v)); @@ -367,12 +366,11 @@ out: strbuf_release(&buf); } -static void read_branches_file(struct remote_state *remote_state, - struct remote *remote) +static void read_branches_file(struct repository *repo, struct remote *remote) { char *frag, *to_free = NULL; struct strbuf buf = STRBUF_INIT; - FILE *f = fopen_or_warn(repo_git_path_append(the_repository, &buf, + FILE *f = fopen_or_warn(repo_git_path_append(repo, &buf, "branches/%s", remote->name), "r"); if (!f) @@ -399,9 +397,9 @@ static void read_branches_file(struct remote_state *remote_state, if (frag) *(frag++) = '\0'; else - frag = to_free = repo_default_branch_name(the_repository, 0); + frag = to_free = repo_default_branch_name(repo, 0); - add_url_alias(remote_state, remote, buf.buf); + add_url_alias(repo->remote_state, remote, buf.buf); refspec_appendf(&remote->fetch, "refs/heads/%s:refs/heads/%s", frag, remote->name); @@ -698,7 +696,7 @@ const char *pushremote_for_branch(struct branch *branch, int *explicit) branch, explicit); } -static struct remote *remotes_remote_get(struct remote_state *remote_state, +static struct remote *remotes_remote_get(struct repository *repo, const char *name); char *remote_ref_for_branch(struct branch *branch, int for_push) @@ -717,7 +715,7 @@ char *remote_ref_for_branch(struct branch *branch, int for_push) the_repository->remote_state, branch, NULL); struct remote *remote = remotes_remote_get( - the_repository->remote_state, remote_name); + the_repository, remote_name); if (remote && remote->push.nr && (dst = apply_refspecs(&remote->push, @@ -774,10 +772,11 @@ loop_cleanup: } static struct remote * -remotes_remote_get_1(struct remote_state *remote_state, const char *name, +remotes_remote_get_1(struct repository *repo, const char *name, const char *(*get_default)(struct remote_state *, struct branch *, int *)) { + struct remote_state *remote_state = repo->remote_state; struct remote *ret; int name_given = 0; @@ -791,9 +790,9 @@ remotes_remote_get_1(struct remote_state *remote_state, const char *name, #ifndef WITH_BREAKING_CHANGES if (valid_remote_nick(name) && have_git_dir()) { if (!valid_remote(ret)) - read_remotes_file(remote_state, ret); + read_remotes_file(repo, ret); if (!valid_remote(ret)) - read_branches_file(remote_state, ret); + read_branches_file(repo, ret); } #endif /* WITH_BREAKING_CHANGES */ if (name_given && !valid_remote(ret)) @@ -807,35 +806,33 @@ remotes_remote_get_1(struct remote_state *remote_state, const char *name, } static inline struct remote * -remotes_remote_get(struct remote_state *remote_state, const char *name) +remotes_remote_get(struct repository *repo, const char *name) { - return remotes_remote_get_1(remote_state, name, - remotes_remote_for_branch); + return remotes_remote_get_1(repo, name, remotes_remote_for_branch); } struct remote *remote_get(const char *name) { read_config(the_repository, 0); - return remotes_remote_get(the_repository->remote_state, name); + return remotes_remote_get(the_repository, name); } struct remote *remote_get_early(const char *name) { read_config(the_repository, 1); - return remotes_remote_get(the_repository->remote_state, name); + return remotes_remote_get(the_repository, name); } static inline struct remote * -remotes_pushremote_get(struct remote_state *remote_state, const char *name) +remotes_pushremote_get(struct repository *repo, const char *name) { - return remotes_remote_get_1(remote_state, name, - remotes_pushremote_for_branch); + return remotes_remote_get_1(repo, name, remotes_pushremote_for_branch); } struct remote *pushremote_get(const char *name) { read_config(the_repository, 0); - return remotes_pushremote_get(the_repository->remote_state, name); + return remotes_pushremote_get(the_repository, name); } int remote_is_configured(struct remote *remote, int in_repo) @@ -1739,7 +1736,7 @@ void set_ref_status_for_push(struct ref *remote_refs, int send_mirror, } } -static void set_merge(struct remote_state *remote_state, struct branch *ret) +static void set_merge(struct repository *repo, struct branch *ret) { struct remote *remote; char *ref; @@ -1760,13 +1757,13 @@ static void set_merge(struct remote_state *remote_state, struct branch *ret) } ret->set_merge = 1; - remote = remotes_remote_get(remote_state, ret->remote_name); + remote = remotes_remote_get(repo, ret->remote_name); for (i = 0; i < ret->merge_nr; i++) { if (!remote_find_tracking(remote, ret->merge[i]) || strcmp(ret->remote_name, ".")) continue; - if (repo_dwim_ref(the_repository, ret->merge[i]->src, + if (repo_dwim_ref(repo, ret->merge[i]->src, strlen(ret->merge[i]->src), &oid, &ref, 0) == 1) ret->merge[i]->dst = ref; @@ -1785,7 +1782,7 @@ struct branch *branch_get(const char *name) else ret = make_branch(the_repository->remote_state, name, strlen(name)); - set_merge(the_repository->remote_state, ret); + set_merge(the_repository, ret); return ret; } @@ -1856,13 +1853,14 @@ static const char *tracking_for_push_dest(struct remote *remote, return ret; } -static const char *branch_get_push_1(struct remote_state *remote_state, +static const char *branch_get_push_1(struct repository *repo, struct branch *branch, struct strbuf *err) { + struct remote_state *remote_state = repo->remote_state; struct remote *remote; remote = remotes_remote_get( - remote_state, + repo, remotes_pushremote_for_branch(remote_state, branch, NULL)); if (!remote) return error_buf(err, @@ -1929,7 +1927,7 @@ const char *branch_get_push(struct branch *branch, struct strbuf *err) if (!branch->push_tracking_ref) branch->push_tracking_ref = branch_get_push_1( - the_repository->remote_state, branch, err); + the_repository, branch, err); return branch->push_tracking_ref; } From e759275c8fbf76e380600a87f72d6857d3b48ba3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:33 -0700 Subject: [PATCH 5/7] submodule--helper: improve logic for fallback remote name The repo_get_default_remote() function in submodule--helper currently tries to figure out the proper remote name to use for a submodule based on a few factors. First, it tries to find the remote for the currently checked out branch. This works if the submodule is configured to checkout to a branch instead of a detached HEAD state. In the detached HEAD state, the code calls back to using "origin", on the assumption that this is the default remote name. Some users may change this, such as by setting clone.defaultRemoteName, or by changing the remote name manually within the submodule repository. As a first step to improving this situation, refactor to reuse the logic from remotes_remote_for_branch(). This function uses the remote from the branch if it has one. If it doesn't then it checks to see if there is exactly one remote. It uses this remote first before attempting to fall back to "origin". To allow using this helper function, introduce a repo_default_remote() helper to remote.c which takes a repository structure. This helper will load the remote configuration and get the "HEAD" branch. Then it will call remotes_remote_for_branch to find the default remote. Replace calls of repo_get_default_remote() with the calls to this new function. To maintain consistency with the existing callers, continue copying the returned string with xstrdup. This isn't a perfect solution for users who change remote names, but it should help in cases where the remote name is changed but users haven't added any additional remotes. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 46 ++++--------------------------------- remote.c | 25 ++++++++++++++++---- remote.h | 3 +++ t/t7406-submodule-update.sh | 29 +++++++++++++++++++++++ 4 files changed, 57 insertions(+), 46 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 9e8cdfe1b2..4aa237033a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -41,61 +41,25 @@ typedef void (*each_submodule_fn)(const struct cache_entry *list_item, void *cb_data); -static int repo_get_default_remote(struct repository *repo, char **default_remote) -{ - char *dest = NULL; - struct strbuf sb = STRBUF_INIT; - struct ref_store *store = get_main_ref_store(repo); - const char *refname = refs_resolve_ref_unsafe(store, "HEAD", 0, NULL, - NULL); - - if (!refname) - return die_message(_("No such ref: %s"), "HEAD"); - - /* detached HEAD */ - if (!strcmp(refname, "HEAD")) { - *default_remote = xstrdup("origin"); - return 0; - } - - if (!skip_prefix(refname, "refs/heads/", &refname)) - return die_message(_("Expecting a full ref name, got %s"), - refname); - - strbuf_addf(&sb, "branch.%s.remote", refname); - if (repo_config_get_string(repo, sb.buf, &dest)) - *default_remote = xstrdup("origin"); - else - *default_remote = dest; - - strbuf_release(&sb); - return 0; -} - static int get_default_remote_submodule(const char *module_path, char **default_remote) { struct repository subrepo; - int ret; if (repo_submodule_init(&subrepo, the_repository, module_path, null_oid(the_hash_algo)) < 0) return die_message(_("could not get a repository handle for submodule '%s'"), module_path); - ret = repo_get_default_remote(&subrepo, default_remote); + + *default_remote = xstrdup(repo_default_remote(&subrepo)); + repo_clear(&subrepo); - return ret; + return 0; } static char *get_default_remote(void) { - char *default_remote; - int code = repo_get_default_remote(the_repository, &default_remote); - - if (code) - exit(code); - - return default_remote; + return xstrdup(repo_default_remote(the_repository)); } static char *resolve_relative_url(const char *rel_url, const char *up_path, int quiet) diff --git a/remote.c b/remote.c index e7ff21dc03..e35cf7ec61 100644 --- a/remote.c +++ b/remote.c @@ -1772,20 +1772,35 @@ static void set_merge(struct repository *repo, struct branch *ret) } } -struct branch *branch_get(const char *name) +static struct branch *repo_branch_get(struct repository *repo, const char *name) { struct branch *ret; - read_config(the_repository, 0); + read_config(repo, 0); if (!name || !*name || !strcmp(name, "HEAD")) - ret = the_repository->remote_state->current_branch; + ret = repo->remote_state->current_branch; else - ret = make_branch(the_repository->remote_state, name, + ret = make_branch(repo->remote_state, name, strlen(name)); - set_merge(the_repository, ret); + set_merge(repo, ret); return ret; } +struct branch *branch_get(const char *name) +{ + return repo_branch_get(the_repository, name); +} + +const char *repo_default_remote(struct repository *repo) +{ + struct branch *branch; + + read_config(repo, 0); + branch = repo_branch_get(repo, "HEAD"); + + return remotes_remote_for_branch(repo->remote_state, branch, NULL); +} + int branch_has_merge_config(struct branch *branch) { return branch && branch->set_merge; diff --git a/remote.h b/remote.h index 76d93bf88d..8dc5cfa49e 100644 --- a/remote.h +++ b/remote.h @@ -9,6 +9,7 @@ struct option; struct transport_ls_refs_options; +struct repository; /** * The API gives access to the configuration related to remotes. It handles @@ -338,6 +339,8 @@ const char *remote_for_branch(struct branch *branch, int *explicit); const char *pushremote_for_branch(struct branch *branch, int *explicit); char *remote_ref_for_branch(struct branch *branch, int for_push); +const char *repo_default_remote(struct repository *repo); + /* returns true if the given branch has merge configuration given. */ int branch_has_merge_config(struct branch *branch); diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index c562bad042..748b529745 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1134,6 +1134,35 @@ test_expect_success 'setup clean recursive superproject' ' git clone --recurse-submodules top top-clean ' +test_expect_success 'submodule update with renamed remote' ' + test_when_finished "rm -fr top-cloned" && + cp -r top-clean top-cloned && + + # Create a commit in each repo, starting with bottom + test_commit -C bottom rename_commit && + # Create middle commit + git -C middle/bottom fetch && + git -C middle/bottom checkout -f FETCH_HEAD && + git -C middle add bottom && + git -C middle commit -m "rename_commit" && + # Create top commit + git -C top/middle fetch && + git -C top/middle checkout -f FETCH_HEAD && + git -C top add middle && + git -C top commit -m "rename_commit" && + + # rename the submodule remote + git -C top-cloned/middle remote rename origin upstream && + + # Make the update of "middle" a no-op, otherwise we error out + # because of its unmerged state + test_config -C top-cloned submodule.middle.update !true && + git -C top-cloned submodule update --recursive 2>actual.err && + cat >expect.err <<-\EOF && + EOF + test_cmp expect.err actual.err +' + test_expect_success 'submodule update should skip unmerged submodules' ' test_when_finished "rm -fr top-cloned" && cp -r top-clean top-cloned && From fedfb0735b2d2dd7b47287925ad5a0aa4fbb9712 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:34 -0700 Subject: [PATCH 6/7] submodule: move get_default_remote_submodule() A future refactor got get_default_remote_submodule() is going to depend on resolve_relative_url(). That function depends on get_default_remote(). Move get_default_remote_submodule() after resolve_relative_url() first to make the additional functionality easier to review. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 4aa237033a..1aa87435c2 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -41,22 +41,6 @@ typedef void (*each_submodule_fn)(const struct cache_entry *list_item, void *cb_data); -static int get_default_remote_submodule(const char *module_path, char **default_remote) -{ - struct repository subrepo; - - if (repo_submodule_init(&subrepo, the_repository, module_path, - null_oid(the_hash_algo)) < 0) - return die_message(_("could not get a repository handle for submodule '%s'"), - module_path); - - *default_remote = xstrdup(repo_default_remote(&subrepo)); - - repo_clear(&subrepo); - - return 0; -} - static char *get_default_remote(void) { return xstrdup(repo_default_remote(the_repository)); @@ -86,6 +70,22 @@ static char *resolve_relative_url(const char *rel_url, const char *up_path, int return resolved_url; } +static int get_default_remote_submodule(const char *module_path, char **default_remote) +{ + struct repository subrepo; + + if (repo_submodule_init(&subrepo, the_repository, module_path, + null_oid(the_hash_algo)) < 0) + return die_message(_("could not get a repository handle for submodule '%s'"), + module_path); + + *default_remote = xstrdup(repo_default_remote(&subrepo)); + + repo_clear(&subrepo); + + return 0; +} + /* the result should be freed by the caller. */ static char *get_submodule_displaypath(const char *path, const char *prefix, const char *super_prefix) From ca62f524c1eaef606b5c312de53ef7c4d9eefa4f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 23 Jun 2025 16:11:35 -0700 Subject: [PATCH 7/7] submodule: look up remotes by URL first The get_default_remote_submodule() function performs a lookup to find the appropriate remote to use within a submodule. The function first checks to see if it can find the remote for the current branch. If this fails, it then checks to see if there is exactly one remote. It will use this, before finally falling back to "origin" as the default. If a user happens to rename their default remote from origin, either manually or by setting something like clone.defaultRemoteName, this fallback will not work. In such cases, the submodule logic will try to use a non-existent remote. This usually manifests as a failure to trigger the submodule update. The parent project already knows and stores the submodule URL in either .gitmodules or its .git/config. Add a new repo_remote_from_url() helper which will iterate over all the remotes in a repository and return the first remote which has a matching URL. Refactor get_default_remote_submodule to find the submodule and get its URL. If a valid URL exists, first try to obtain a remote using the new repo_remote_from_url(). Fall back to the repo_default_remote() otherwise. The fallback logic is kept in case for some reason the user has manually changed the URL within the submodule. Additionally, we still try to use a remote rather than directly passing the URL in the fetch_in_submodule() logic. This ensures that an update will properly update the remote refs within the submodule as expected, rather than just fetching into FETCH_HEAD. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 26 +++++++++++++++++++++++++- remote.c | 15 +++++++++++++++ remote.h | 1 + t/t7406-submodule-update.sh | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 1aa87435c2..84a96d300d 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -72,16 +72,40 @@ static char *resolve_relative_url(const char *rel_url, const char *up_path, int static int get_default_remote_submodule(const char *module_path, char **default_remote) { + const struct submodule *sub; struct repository subrepo; + const char *remote_name = NULL; + char *url = NULL; + + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), module_path); + if (sub && sub->url) { + url = xstrdup(sub->url); + + /* Possibly a url relative to parent */ + if (starts_with_dot_dot_slash(url) || + starts_with_dot_slash(url)) { + char *oldurl = url; + + url = resolve_relative_url(oldurl, NULL, 1); + free(oldurl); + } + } if (repo_submodule_init(&subrepo, the_repository, module_path, null_oid(the_hash_algo)) < 0) return die_message(_("could not get a repository handle for submodule '%s'"), module_path); - *default_remote = xstrdup(repo_default_remote(&subrepo)); + /* Look up by URL first */ + if (url) + remote_name = repo_remote_from_url(&subrepo, url); + if (!remote_name) + remote_name = repo_default_remote(&subrepo); + + *default_remote = xstrdup(remote_name); repo_clear(&subrepo); + free(url); return 0; } diff --git a/remote.c b/remote.c index e35cf7ec61..60b4aec3de 100644 --- a/remote.c +++ b/remote.c @@ -1801,6 +1801,21 @@ const char *repo_default_remote(struct repository *repo) return remotes_remote_for_branch(repo->remote_state, branch, NULL); } +const char *repo_remote_from_url(struct repository *repo, const char *url) +{ + read_config(repo, 0); + + for (int i = 0; i < repo->remote_state->remotes_nr; i++) { + struct remote *remote = repo->remote_state->remotes[i]; + if (!remote) + continue; + + if (remote_has_url(remote, url)) + return remote->name; + } + return NULL; +} + int branch_has_merge_config(struct branch *branch) { return branch && branch->set_merge; diff --git a/remote.h b/remote.h index 8dc5cfa49e..0ca399e183 100644 --- a/remote.h +++ b/remote.h @@ -340,6 +340,7 @@ const char *pushremote_for_branch(struct branch *branch, int *explicit); char *remote_ref_for_branch(struct branch *branch, int for_push); const char *repo_default_remote(struct repository *repo); +const char *repo_remote_from_url(struct repository *repo, const char *url); /* returns true if the given branch has merge configuration given. */ int branch_has_merge_config(struct branch *branch); diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index 748b529745..c09047b5f4 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1134,6 +1134,38 @@ test_expect_success 'setup clean recursive superproject' ' git clone --recurse-submodules top top-clean ' +test_expect_success 'submodule update with multiple remotes' ' + test_when_finished "rm -fr top-cloned" && + cp -r top-clean top-cloned && + + # Create a commit in each repo, starting with bottom + test_commit -C bottom multiple_remote_commit && + # Create middle commit + git -C middle/bottom fetch && + git -C middle/bottom checkout -f FETCH_HEAD && + git -C middle add bottom && + git -C middle commit -m "multiple_remote_commit" && + # Create top commit + git -C top/middle fetch && + git -C top/middle checkout -f FETCH_HEAD && + git -C top add middle && + git -C top commit -m "multiple_remote_commit" && + + # rename the submodule remote + git -C top-cloned/middle remote rename origin upstream && + + # Add another remote + git -C top-cloned/middle remote add other bogus && + + # Make the update of "middle" a no-op, otherwise we error out + # because of its unmerged state + test_config -C top-cloned submodule.middle.update !true && + git -C top-cloned submodule update --recursive 2>actual.err && + cat >expect.err <<-\EOF && + EOF + test_cmp expect.err actual.err +' + test_expect_success 'submodule update with renamed remote' ' test_when_finished "rm -fr top-cloned" && cp -r top-clean top-cloned &&