The 16th batch

Signed-off-by: Junio C Hamano <gitster@pobox.com>
Merge branch 'en/ort-recursive-d-f-conflict-fix'
2026-01-11 13:23:12 +09:00 · 2026-01-08 16:40:12 +09:00 · 2026-01-08 16:40:12 +09:00 · 2026-01-08 16:40:12 +09:00 · 2026-01-08 16:40:11 +09:00 · 2026-01-08 16:40:11 +09:00
16 changed files with 198 additions and 27 deletions
--- a/Documentation/RelNotes/2.53.0.adoc
+++ b/Documentation/RelNotes/2.53.0.adoc
@ -228,6 +228,16 @@ Fixes since v2.52
   handling stateful ISO/IEC 2022 encoded strings.
   (merge cee341e9dd rs/macos-iconv-workaround later to maint).

+ * Running "git diff" with "--name-only" and other options that allows
+   us not to look at the blob contents, while objects that are lazily
+   fetched from a promisor remote, caused use-after-free, which has
+   been corrected.
+
+ * The ort merge machinery hit an assertion failure in a history with
+   criss-cross merges renamed a directory and a non-directory, which
+   has been corrected.
+   (merge 979ee83e8a en/ort-recursive-d-f-conflict-fix later to maint).
+
 * Other code cleanup, docfix, build fix, etc.
   (merge 46207a54cc qj/doc-http-bad-want-response later to maint).
   (merge df90eccd93 kh/doc-commit-extra-references later to maint).
@ -252,3 +262,4 @@ Fixes since v2.52
   (merge 93f894c001 bc/checkout-error-message-fix later to maint).
   (merge abf05d856f rs/show-branch-prio-queue later to maint).
   (merge 06188ea5f3 rs/parse-config-expiry-simplify later to maint).
+   (merge 861dbb1586 dd/t5403-modernise later to maint).
--- a/builtin/describe.c
+++ b/builtin/describe.c
@ -112,13 +112,13 @@ static int replace_name(struct commit_name *e,

 		if (!e->tag) {
 			t = lookup_tag(the_repository, &e->oid);
-			if (!t || parse_tag(t))
+			if (!t || parse_tag(the_repository, t))
 				return 1;
 			e->tag = t;
 		}

 		t = lookup_tag(the_repository, oid);
-		if (!t || parse_tag(t))
+		if (!t || parse_tag(the_repository, t))
 			return 0;
 		*tag = t;

@ -335,7 +335,7 @@ static void append_name(struct commit_name *n, struct strbuf *dst)
 {
 	if (n->prio == 2 && !n->tag) {
 		n->tag = lookup_tag(the_repository, &n->oid);
-		if (!n->tag || parse_tag(n->tag))
+		if (!n->tag || parse_tag(the_repository, n->tag))
 			die(_("annotated tag %s not available"), n->path);
 	}
 	if (n->tag && !n->name_checked) {
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@ -3293,7 +3293,7 @@ static void add_tag_chain(const struct object_id *oid)

 	tag = lookup_tag(the_repository, oid);
 	while (1) {
-		if (!tag || parse_tag(tag) || !tag->tagged)
+		if (!tag || parse_tag(the_repository, tag) || !tag->tagged)
 			die(_("unable to pack objects reachable from tag %s"),
 			    oid_to_hex(oid));

--- a/builtin/tag.c
+++ b/builtin/tag.c
@ -149,7 +149,7 @@ static int verify_tag(const char *name, const char *ref UNUSED,
 	if (format->format)
 		flags = GPG_VERIFY_OMIT_STATUS;

-	if (gpg_verify_tag(oid, name, flags))
+	if (gpg_verify_tag(the_repository, oid, name, flags))
 		return -1;

 	if (format->format)
--- a/builtin/verify-tag.c
+++ b/builtin/verify-tag.c
@ -61,7 +61,7 @@ int cmd_verify_tag(int argc,
 			continue;
 		}

-		if (gpg_verify_tag(&oid, name, flags)) {
+		if (gpg_verify_tag(repo, &oid, name, flags)) {
 			had_error = 1;
 			continue;
 		}
--- a/diff.c
+++ b/diff.c
@ -7098,6 +7098,7 @@ static void diffcore_skip_stat_unmatch(struct diff_options *diffopt)
 			if (!diffopt->flags.no_index)
 				diffopt->skip_stat_unmatch++;
 			diff_free_filepair(p);
+			q->queue[i] = NULL;
 		}
 	}
 	free(q->queue);
@ -7141,6 +7142,10 @@ void diff_queued_diff_prefetch(void *repository)

 	for (i = 0; i < q->nr; i++) {
 		struct diff_filepair *p = q->queue[i];
+
+		if (!p)
+			continue;
+
 		diff_add_if_missing(repo, &to_fetch, p->one);
 		diff_add_if_missing(repo, &to_fetch, p->two);
 	}
--- a/fsck.c
+++ b/fsck.c
@ -474,7 +474,7 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio
 {
 	const char *name = fsck_get_object_name(options, &tag->object.oid);

-	if (parse_tag(tag))
+	if (parse_tag(the_repository, tag))
 		return -1;
 	if (name)
 		fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
--- a/merge-ort.c
+++ b/merge-ort.c
@ -1502,11 +1502,44 @@ static void resolve_trivial_directory_merge(struct conflict_info *ci, int side)
 	VERIFY_CI(ci);
 	assert((side == 1 && ci->match_mask == 5) ||
 	       (side == 2 && ci->match_mask == 3));
+
+	/*
+	 * Since ci->stages[0] matches ci->stages[3-side], resolve merge in
+	 * favor of ci->stages[side].
+	 */
 	oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
 	ci->merged.result.mode = ci->stages[side].mode;
 	ci->merged.is_null = is_null_oid(&ci->stages[side].oid);
+
+	/*
+	 * Because we resolved in favor of "side", we are no longer
+	 * considering the paths which matched (i.e. had the same hash) any
+	 * more.  Strip the matching paths from both dirmask & filemask.
+	 * Another consequence of merging in favor of side is that we can no
+	 * longer have a directory/file conflict either..but there's a slight
+	 * nuance we consider before clearing it.
+	 *
+	 * In most cases, resolving in favor of the other side means there's
+	 * no conflict at all, but if we had a directory/file conflict to
+	 * start, and the directory is resolved away, the remaining file could
+	 * still be part of a rename.  If the remaining file is part of a
+	 * rename, then it may also be part of a rename conflict (e.g.
+	 * rename/delete or rename/rename(1to2)), so we can't
+	 * mark it as a clean merge if we started with a directory/file
+	 * conflict and still have a file left.
+	 *
+	 * In contrast, if we started with a directory/file conflict and
+	 * still have a directory left, no file under that directory can be
+	 * part of a rename, otherwise we would have had to recurse into the
+	 * directory and would have never ended up within
+	 * resolve_trivial_directory_merge() for that directory.
+	 */
+	ci->dirmask &= (~ci->match_mask);
+	ci->filemask &= (~ci->match_mask);
+	assert(!ci->filemask || !ci->dirmask);
 	ci->match_mask = 0;
-	ci->merged.clean = 1; /* (ci->filemask == 0); */
+	ci->merged.clean = !ci->df_conflict || ci->dirmask;
+	ci->df_conflict = 0;
 }

 static int handle_deferred_entries(struct merge_options *opt,
--- a/object-name.c
+++ b/object-name.c
@ -449,7 +449,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data)
 	} else if (type == OBJ_TAG) {
 		struct tag *tag = lookup_tag(ds->repo, oid);

-		if (!parse_tag(tag) && tag->tag) {
+		if (!parse_tag(ds->repo, tag) && tag->tag) {
 			/*
 			 * TRANSLATORS: This is a line of ambiguous
 			 * tag object output. E.g.:
--- a/ref-filter.c
+++ b/ref-filter.c
@ -2866,7 +2866,7 @@ static int match_points_at(struct oid_array *points_at,
 	while (obj && obj->type == OBJ_TAG) {
 		struct tag *tag = (struct tag *)obj;

-		if (parse_tag(tag) < 0) {
+		if (parse_tag(the_repository, tag) < 0) {
 			obj = NULL;
 			break;
 		}
--- a/t/t4067-diff-partial-clone.sh
+++ b/t/t4067-diff-partial-clone.sh
@ -132,6 +132,41 @@ test_expect_success 'diff with rename detection batches blobs' '
 	test_line_count = 1 done_lines
 '

+test_expect_success 'diff succeeds even if entries are removed from queue' '
+	test_when_finished "rm -rf server client trace" &&
+
+	test_create_repo server &&
+	for l in a c e g i p
+	do
+		echo $l >server/$l &&
+		git -C server add $l || return 1
+	done &&
+	git -C server commit -m x &&
+
+	for l in a e i
+	do
+		git -C server rm $l || return 1
+	done &&
+
+	for l in b d f i
+		do
+		echo $l$l >server/$l &&
+		git -C server add $l || return 1
+	done &&
+	git -C server commit -a -m x &&
+
+	test_config -C server uploadpack.allowfilter 1 &&
+	test_config -C server uploadpack.allowanysha1inwant 1 &&
+	git clone --filter=blob:limit=0 "file://$(pwd)/server" client &&
+
+	for file in $(ls client)
+	do
+		cat client/$file >$file &&
+		mv $file client/$file || return 1
+	done &&
+	git -C client diff --name-only --relative HEAD^
+'
+
 test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' '
 	test_when_finished "rm -rf server client trace" &&

--- a/t/t5403-post-checkout-hook.sh
+++ b/t/t5403-post-checkout-hook.sh
@ -109,7 +109,7 @@ test_expect_success 'post-checkout hook is triggered by clone' '
 	echo "$@" >"$GIT_DIR/post-checkout.args"
 	EOF
 	git clone --template=templates . clone3 &&
-	test -f clone3/.git/post-checkout.args
+	test_path_is_file clone3/.git/post-checkout.args
 '

 test_done
--- a/t/t6422-merge-rename-corner-cases.sh
+++ b/t/t6422-merge-rename-corner-cases.sh
@ -1439,4 +1439,90 @@ test_expect_success 'rename/rename(1to2) with a binary file' '
 	)
 '

+# Testcase preliminary submodule/directory conflict and submodule rename
+#   Commit O: <empty, or additional irrelevant stuff>
+#   Commit A1: introduce "folder" (as a tree)
+#   Commit B1: introduce "folder" (as a submodule)
+#   Commit A2: merge B1 into A1, but keep folder as a tree
+#   Commit B2: merge A1 into B1, but keep folder as a submodule
+#   Merge A2 & B2
+test_setup_submodule_directory_preliminary_conflict () {
+	git init submodule_directory_preliminary_conflict &&
+	(
+		cd submodule_directory_preliminary_conflict &&
+
+		# Trying to do the A2 and B2 merges above is slightly more
+		# challenging with a local submodule (because checking out
+		# another commit has the submodule in the way).  Instead,
+		# first create the commits with the wrong parents but right
+		# trees, in the order A1, A2, B1, B2...
+		#
+		# Then go back and create new A2 & B2 with the correct
+		# parents and the same trees.
+
+		git commit --allow-empty -m orig &&
+
+		git branch A &&
+		git branch B &&
+
+		git checkout B &&
+		mkdir folder &&
+		echo A>folder/A &&
+		echo B>folder/B &&
+		echo C>folder/C &&
+		echo D>folder/D &&
+		echo E>folder/E &&
+		git add folder &&
+		git commit -m B1 &&
+
+		git commit --allow-empty -m B2 &&
+
+		git checkout A &&
+		git init folder &&
+		(
+			cd folder &&
+			>Z &&
+			>Y &&
+			git add Z Y &&
+			git commit -m "original submodule commit"
+		) &&
+		git add folder &&
+		git commit -m A1 &&
+
+		git commit --allow-empty -m A2 &&
+
+		NewA2=$(git commit-tree -p A^ -p B^ -m "Merge B into A" A^{tree}) &&
+		NewB2=$(git commit-tree -p B^ -p A^ -m "Merge A into B" B^{tree}) &&
+		git update-ref refs/heads/A $NewA2 &&
+		git update-ref refs/heads/B $NewB2
+	)
+}
+
+test_expect_success 'submodule/directory preliminary conflict' '
+	test_setup_submodule_directory_preliminary_conflict &&
+	(
+		cd submodule_directory_preliminary_conflict &&
+
+		git checkout A^0 &&
+
+		test_expect_code 1 git merge B^0 &&
+
+		# Make sure the index has the right number of entries
+		git ls-files -s >actual &&
+		test_line_count = 2 actual &&
+
+		# The "folder" as directory should have been resolved away
+		# as part of the merge.  The "folder" as submodule got
+		# renamed to "folder~Temporary merge branch 2" in the
+		# virtual merge base, resulting in a
+		#    "folder~Temporary merge branch 2" -> "folder"
+		# rename in the outermerge for the submodule, which then
+		# becomes part of a rename/delete conflict (because "folder"
+		# as a submodule was deleted in A2).
+		submod=$(git rev-parse A:folder) &&
+		printf "160000 $submod 1\tfolder\n160000 $submod 2\tfolder\n" >expect &&
+		test_cmp expect actual
+	)
+'
+
 test_done
--- a/tag.c
+++ b/tag.c
@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
 #define DISABLE_SIGN_COMPARE_WARNINGS

 #include "git-compat-util.h"
@ -13,6 +12,7 @@
 #include "gpg-interface.h"
 #include "hex.h"
 #include "packfile.h"
+#include "repository.h"

 const char *tag_type = "tag";

@ -44,28 +44,28 @@ static int run_gpg_verify(const char *buf, unsigned long size, unsigned flags)
 	return ret;
 }

-int gpg_verify_tag(const struct object_id *oid, const char *name_to_report,
-		unsigned flags)
+int gpg_verify_tag(struct repository *r, const struct object_id *oid,
+		   const char *name_to_report, unsigned flags)
 {
 	enum object_type type;
 	char *buf;
 	unsigned long size;
 	int ret;

-	type = odb_read_object_info(the_repository->objects, oid, NULL);
+	type = odb_read_object_info(r->objects, oid, NULL);
 	if (type != OBJ_TAG)
 		return error("%s: cannot verify a non-tag object of type %s.",
 				name_to_report ?
 				name_to_report :
-				repo_find_unique_abbrev(the_repository, oid, DEFAULT_ABBREV),
+				oid_to_hex(oid),
 				type_name(type));

-	buf = odb_read_object(the_repository->objects, oid, &type, &size);
+	buf = odb_read_object(r->objects, oid, &type, &size);
 	if (!buf)
 		return error("%s: unable to read file.",
 				name_to_report ?
 				name_to_report :
-				repo_find_unique_abbrev(the_repository, oid, DEFAULT_ABBREV));
+				oid_to_hex(oid));

 	ret = run_gpg_verify(buf, size, flags);

@ -148,9 +148,11 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u
 		FREE_AND_NULL(item->tag);
 	}

-	if (size < the_hash_algo->hexsz + 24)
+	if (size < r->hash_algo->hexsz + 24)
 		return -1;
-	if (memcmp("object ", bufptr, 7) || parse_oid_hex(bufptr + 7, &oid, &bufptr) || *bufptr++ != '\n')
+	if (memcmp("object ", bufptr, 7) ||
+	    parse_oid_hex_algop(bufptr + 7, &oid, &bufptr, r->hash_algo) ||
+	    *bufptr++ != '\n')
 		return -1;

 	if (!starts_with(bufptr, "type "))
@ -201,7 +203,7 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u
 	return 0;
 }

-int parse_tag(struct tag *item)
+int parse_tag(struct repository *r, struct tag *item)
 {
 	enum object_type type;
 	void *data;
@ -210,8 +212,7 @@ int parse_tag(struct tag *item)

 	if (item->object.parsed)
 		return 0;
-	data = odb_read_object(the_repository->objects, &item->object.oid,
-			       &type, &size);
+	data = odb_read_object(r->objects, &item->object.oid, &type, &size);
 	if (!data)
 		return error("Could not read %s",
 			     oid_to_hex(&item->object.oid));
@ -220,7 +221,7 @@ int parse_tag(struct tag *item)
 		return error("Object %s not a tag",
 			     oid_to_hex(&item->object.oid));
 	}
-	ret = parse_tag_buffer(the_repository, item, data, size);
+	ret = parse_tag_buffer(r, item, data, size);
 	free(data);
 	return ret;
 }
--- a/tag.h
+++ b/tag.h
@ -13,10 +13,10 @@ struct tag {
 };
 struct tag *lookup_tag(struct repository *r, const struct object_id *oid);
 int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, unsigned long size);
-int parse_tag(struct tag *item);
+int parse_tag(struct repository *r, struct tag *item);
 void release_tag_memory(struct tag *t);
 struct object *deref_tag(struct repository *r, struct object *, const char *, int);
-int gpg_verify_tag(const struct object_id *oid,
+int gpg_verify_tag(struct repository *r, const struct object_id *oid,
 		   const char *name_to_report, unsigned flags);
 struct object_id *get_tagged_oid(struct tag *tag);

--- a/walker.c
+++ b/walker.c
@ -115,7 +115,7 @@ static int process_commit(struct walker *walker, struct commit *commit)

 static int process_tag(struct walker *walker, struct tag *tag)
 {
-	if (parse_tag(tag))
+	if (parse_tag(the_repository, tag))
 		return -1;
 	return process(walker, tag->tagged);
 }
Author	SHA1	Message	Date
Junio C Hamano	d529f3a197	The 16th batch Signed-off-by: Junio C Hamano <gitster@pobox.com>	2026-01-08 16:40:12 +09:00
Junio C Hamano	2db806d817	Merge branch 'en/ort-recursive-d-f-conflict-fix' The ort merge machinery hit an assertion failure in a history with criss-cross merges renamed a directory and a non-directory, which has been corrected. * en/ort-recursive-d-f-conflict-fix: merge-ort: fix corner case recursive submodule/directory conflict handling	2026-01-08 16:40:12 +09:00
Junio C Hamano	512351f2a8	Merge branch 'dd/t5403-modernise' Test micro-clean-up. * dd/t5403-modernise: t5403: use test_path_is_file instead of test -f	2026-01-08 16:40:12 +09:00
Junio C Hamano	c0754dc423	Merge branch 'ds/diff-lazy-fetch-with-name-only-fix' Running "git diff" with "--name-only" and other options that allows us not to look at the blob contents, while objects that are lazily fetched from a promisor remote, caused use-after-free, which has been corrected. * ds/diff-lazy-fetch-with-name-only-fix: diff: avoid segfault with freed entries	2026-01-08 16:40:11 +09:00
Junio C Hamano	d28d2be5f2	Merge branch 'rs/tag-wo-the-repository' Code clean-up. * rs/tag-wo-the-repository: tag: stop using the_repository tag: support arbitrary repositories in parse_tag() tag: support arbitrary repositories in gpg_verify_tag() tag: use algo of repo parameter in parse_tag_buffer()	2026-01-08 16:40:11 +09:00
Derrick Stolee	56d388e6ad	diff: avoid segfault with freed entries When computing a diff in a partial clone, there is a chance that we could trigger a prefetch of missing objects at the same time as we are freeing entries from the global diff queue. This is difficult to reproduce, as we need to have some objects be freed from the queue before triggering the prefetch of missing objects. There is a new test in t4067 that does trigger the segmentation fault that results in this case. The fix is to set the queue pointer to NULL after it is freed, and then to be careful about NULL values in the prefetch. The more elaborate explanation is that within diffcore_std(), we may skip the initial prefetch due to the output format (--name-only in the test) and go straight to diffcore_skip_stat_unmatch(). In that method, the index entries that have been invalidated by path changes show up as entries but may be deleted because they are not actually content diffs and only newer timestamps than expected. As those entries are deleted, later entries are checked with diff_filespec_check_stat_unmatch(), which uses diff_queued_diff_prefetch() as the missing_object_cb in its diff options. That can trigger downloading missing objects if the appropriate scenario occurs to trigger a call to diff_popoulate_filespec(). It's finally within that callback to diff_queued_diff_prefetch() that the segfault occurs. The test was hard to find because it required some real differences, some not-different files that had a newer modified time, and the order of those files alphabetically was important to trigger the deletion before the prefetch was triggered. I briefly considered a "lock" member for the diff queue, but it was a much larger diff and introduced many more possible error scenarios. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-30 10:53:47 +09:00
Deveshi Dwivedi	861dbb1586	t5403: use test_path_is_file instead of test -f Replace 'test -f' with the test_path_is_file in t5403-post-checkout-hook.sh. This helper provides better error messages when tests fail, making it easier to debug issues. Signed-off-by: Deveshi Dwivedi <deveshigurgaon@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-30 09:23:00 +09:00
Elijah Newren	979ee83e8a	merge-ort: fix corner case recursive submodule/directory conflict handling At GitHub, a few repositories were triggering errors of the form: git: merge-ort.c:3037: process_renames: Assertion `newinfo && !newinfo->merged.clean' failed. Aborted (core dumped) While these may look similar to both a562d90a350d (merge-ort: fix failing merges in special corner case, 2025-11-03) and f6ecb603ff8a (merge-ort: fix directory rename on top of source of other rename/delete, 2025-08-06) the cause is different and in this case the problem is not an over-conservative assertion, but a bug before the assertion where we did not update all relevant state appropriately. It sadly took me a really long time to figure out how to get a simple reproducer for this one. It doesn't really have that many moving parts, but there are multiple pieces of background information needed to understand it. First of all, when we have two files added at the same path, merge-ort does a two-way merge of those files. If we have two directories added at the same path, we basically do the same thing (taking the union of files, and two-way merging files with the same name). But two-way merging requires components of the same type. We can't merge the contents of a regular file with a directory, or with a symlink, or with a submodule. Nor can any of those other types be merged with each other, e.g. merging a submodule with a directory is a bad idea. When two paths have the same name but their types do not match, merge-ort is forced to move one of them to an alternate filename (using the unique_path() function). Second, if two commits being merged have more than one merge-base, merge-ort will merge the merge-bases to create a virtual merge-base, and use that as the base commit. Third, one of the really important optimizations in merge-ort is trivial tree-level resolution (roughly meaning merging trees without recursing into them). This optimization has some nuance to it that is important to the current bug, and to understand it, it helps to first look at the high-level overview of how merge-ort runs; there are basically three high-level functions that the work is divided between: collect_merge_info() - walks the top-level trees getting individual paths of interest detect_renames() - detect renames between paths in order to match up paths for three-way merging process_entries() - does a few things of interest: * three-way merging of files, * other special handling (e.g. adjusting paths with conflicting types to avoid path collisions) * as it finishes handling all the files within a subdirectory, writes out a new tree object for that directory If it were not for renames, we could just always do tree-level merging whenever the tree on at least one side was unmodified. Unfortunately, we need to recurse into trees to determine whether there are renames. However, we can also do tree-level merging so long as there aren't any relevant renames (another merge-ort optimization), which we can determine without recursing into trees. We would also be able to do tree-level merging if we somehow apriori knew what renames existed, by only recursing into the trees which we could otherwise trivially merge if they contained files involved in renames. That might not seem useful, because we need to find out the renames and we have to recurse into trees to do so, but when you find out that the process_entries() step is more computationally expensive than the collect_merge_info() step, it yields an interesting strategy: * run collect_merge_info() * run detect_renames() * cache the renames() * restart -- rerun collect_merge_info(), using the cached renames to only recurse into the needed trees * we already have the renames cached so no need to re-detect * run process_entries() on the reduced list of paths which was implemented back in 7bee6c100431 (merge-ort: avoid recursing into directories when we don't need to, 2021-07-16) Crucially, this restarting only occurs if the number of paths we could skip recursing into exceeds the number we still need to recurse into by some safety factor (wanted_factor in handle_deferred_entries()); forgetting this fact is a great way to repeatedly fail to create a minimal testcase for several days and go down alternate wrong paths). Now, I earlier summarized this optimization as "merging trees without recursing into them", but this optimization does not require that all three sides of history has a directory at a given path. So long as the tree on one side matches the tree in the base version, we can decide to resolve in favor of whatever the other side of history has at that path -- be it a directory, a file, a submodule, or a symlink. Unfortunately, the code in question didn't fully realize this, and was written assuming the base version and both sides would have a directory at the given path, as can be seen by the "ci->filemask == 0" comment in resolve_trivial_directory_merge() that was added as part of 7bee6c100431 (merge-ort: avoid recursing into directories when we don't need to, 2021-07-16). A few additional lines of code are needed to handle cases where we have something other than a directory on the other side of history. But, knowing that resolve_trivial_directory_merge() doesn't have sufficient state updating logic doesn't show us how to trigger a bug without combining with the other bits of information we provided above. Here's a relevant testcase: * branches A & B * commit A1: adds "folder" as a directory with files tracked under it * commit B1: adds "folder" as a submodule * commit A2: merges B1 into A1, keeping "folder" as a directory (and in fact, with no changes to "folder" since A1), discarding the submodule * commit B2: merges A1 into B1, keeping "folder" as a submodule (and in fact, with no changes to "folder" since B1), discarding the directory Here, if we try to merge A2 & B2, the logic proceeds as follows: * we have multiple merge-bases: A1 & B1. So we have to merge those to get a virtual merge base. * due to "folder" as a directory and "folder" as a submodule, the path collision logic triggers and renames "folder" as a submodule to "folder~Temporary merge branch 2" so we can keep it alongside "folder" as a directory. * we now have a virtual merge base (containing both "folder" directory and a "folder~Temporary merge branch 2" submodule) and can now do the outer merge * in the first step of the outer merge, we attempt to defer recursing into folder/ as a directory, but find we need to for rename detection. * in rename detection, we note that "folder~Temporary merge branch 2" has the same hash as "folder" as a submodule in B2, which means we have an exact rename. * after rename detection, we discover no path in folder/ is needed for renames, and so we can cache renames and restart. * after restarting, we avoid recursing into "folder/" and realize we can resolve it trivially since it hasn't been modified. The resolution removes "folder/", leaving us only "folder" as a submodule from commit B2. * After this point, we should have a rename/delete conflict on "folder~Temporary merge branch 2" -> "folder", but our marking of the merge of "folder" as clean broke our ability to handle that and in fact triggers an assertion in process_renames(). When there was a df_conflict (directory/"file" conflict, where "file" could be submodule or regular file or symlink), ensure resolve_trivial_directory_merge() handles it properly. In particular: * do not pre-emptively mark the path as cleanly merged if the remaining path is a file; allow it to be processed in process_entries() later to determine if it was clean * clear the parts of dirmask or filemask corresponding to the matching sides of history, since we are resolving those away * clear the df_conflict bit afterwards; since we cleared away the two matching sides and only have one side left, that one side can't have a directory/file conflict with itself. Also add the above minimal testcase showcasing this bug to t6422, with a sufficient number of paths under the folder/ directory to actually trigger it. (I wish I could have all those days back from all the wrong paths I went down due to not having enough files under that directory...) I know this commit has a very high ratio of lines in the commit message to lines of comments, and a relatively high ratio of comments to actual code, but given how long it took me to track down, on the off chance that we ever need to further modify this logic, I wanted it thoroughly documented for future me and for whatever other poor soul might end up needing to read this commit message. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-30 08:59:52 +09:00
René Scharfe	009fceeda2	tag: stop using the_repository gpg_verify_tag() shows the passed in object name on error. Both callers provide one. It falls back to abbreviated hashes for future callers that pass in a NULL name. DEFAULT_ABBREV is default_abbrev, which in turn is a global variable that's populated by git_default_config() and only available with USE_THE_REPOSITORY_VARIABLE. Don't let that hypothetical hold us back from getting rid of the_repository in tag.c. Fall back to full hashes, which are more appropriate for error messages anyway. This allows us to stop setting USE_THE_REPOSITORY_VARIABLE. Signed-off-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-29 22:02:54 +09:00
René Scharfe	b6e4cc8c32	tag: support arbitrary repositories in parse_tag() Allow callers of parse_tag() pass in the repository to use. Let most of them pass in the_repository to get the same result as before. One of them has stopped using the_repository in ef9b0370da (sha1-name.c: store and use repo in struct disambiguate_state, 2019-04-16); let it pass in its stored repository. Signed-off-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-29 22:02:54 +09:00
René Scharfe	154717b3b0	tag: support arbitrary repositories in gpg_verify_tag() Allow callers of gpg_verify_tag() specify the repository to use by providing a parameter for that. One of the two has not been using the_repository since 43a8391977 (builtin/verify-tag: stop using `the_repository`, 2025-03-08); let it pass in the correct repository. The other simply passes the_repository to get the same result as before. Signed-off-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-29 22:02:53 +09:00
René Scharfe	e61f227d06	tag: use algo of repo parameter in parse_tag_buffer() Stop using "the_hash_algo" explicitly and implictly via parse_oid_hex() and instead use the "hash_algo" member of the passed in repository, which is more correct. Signed-off-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>	2025-12-29 22:02:53 +09:00