From b3de3832ce7497d6567d2d8270c829585b9fbf61 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:06 +0200 Subject: [PATCH 1/6] refs: add function to translate errors to strings The commit 76e760b999 (refs: introduce enum-based transaction error types, 2025-04-08) introduced enum-based transaction error types. The refs transaction logic was also modified to propagate these errors. For clients of the ref transaction system, it would be beneficial to provide human readable messages for these errors. There is already an existing mapping in 'builtin/update-ref.c', move it to 'refs.c' as `ref_transaction_error_msg()` and use the same within the 'builtin/update-ref.c'. Helped-by: Junio C Hamano Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/update-ref.c | 25 +------------------------ refs.c | 20 ++++++++++++++++++++ refs.h | 5 +++++ 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 2b1e336ba1..1e6131e04a 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -575,30 +575,7 @@ static void print_rejected_refs(const char *refname, void *cb_data UNUSED) { struct strbuf sb = STRBUF_INIT; - const char *reason = ""; - - switch (err) { - case REF_TRANSACTION_ERROR_NAME_CONFLICT: - reason = "refname conflict"; - break; - case REF_TRANSACTION_ERROR_CREATE_EXISTS: - reason = "reference already exists"; - break; - case REF_TRANSACTION_ERROR_NONEXISTENT_REF: - reason = "reference does not exist"; - break; - case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE: - reason = "incorrect old value provided"; - break; - case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE: - reason = "invalid new value provided"; - break; - case REF_TRANSACTION_ERROR_EXPECTED_SYMREF: - reason = "expected symref but found regular ref"; - break; - default: - reason = "unkown failure"; - } + const char *reason = ref_transaction_error_msg(err); strbuf_addf(&sb, "rejected %s %s %s %s\n", refname, new_oid ? oid_to_hex(new_oid) : new_target, diff --git a/refs.c b/refs.c index dce5c49ca2..3d580d03bb 100644 --- a/refs.c +++ b/refs.c @@ -3314,3 +3314,23 @@ int ref_update_expects_existing_old_ref(struct ref_update *update) return (update->flags & REF_HAVE_OLD) && (!is_null_oid(&update->old_oid) || update->old_target); } + +const char *ref_transaction_error_msg(enum ref_transaction_error err) +{ + switch (err) { + case REF_TRANSACTION_ERROR_NAME_CONFLICT: + return "refname conflict"; + case REF_TRANSACTION_ERROR_CREATE_EXISTS: + return "reference already exists"; + case REF_TRANSACTION_ERROR_NONEXISTENT_REF: + return "reference does not exist"; + case REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE: + return "incorrect old value provided"; + case REF_TRANSACTION_ERROR_INVALID_NEW_VALUE: + return "invalid new value provided"; + case REF_TRANSACTION_ERROR_EXPECTED_SYMREF: + return "expected symref but found regular ref"; + default: + return "unknown failure"; + } +} diff --git a/refs.h b/refs.h index 46a6008e07..2d58af3d88 100644 --- a/refs.h +++ b/refs.h @@ -907,6 +907,11 @@ void ref_transaction_for_each_rejected_update(struct ref_transaction *transactio ref_transaction_for_each_rejected_update_fn cb, void *cb_data); +/* + * Translate errors to human readable error messages. + */ +const char *ref_transaction_error_msg(enum ref_transaction_error err); + /* * Free `*transaction` and all associated data. */ From 0e358de64a9e014575d11ef884bfc9beb931e37f Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:07 +0200 Subject: [PATCH 2/6] fetch: use batched reference updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference updates performed as a part of 'git-fetch(1)', take place one at a time. For each reference update, a new transaction is created and committed. This is necessary to ensure we can allow individual updates to fail without failing the entire command. The command also supports an '--atomic' mode, which uses a single transaction to update all of the references. But this mode has an all-or-nothing approach, where if a single update fails, all updates would fail. In 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08), we introduced a new mechanism to batch reference updates. Under the hood, this uses a single transaction to perform a batch of reference updates, while allowing only individual updates to fail. Utilize this newly introduced batch update mechanism in 'git-fetch(1)'. This provides a significant bump in performance, especially when dealing with repositories with large number of references. Adding support for batched updates is simply modifying the flow to also create a batch update transaction in the non-atomic flow. With the reftable backend there is a 22x performance improvement, when performing 'git-fetch(1)' with 10000 refs: Benchmark 1: fetch: many refs (refformat = reftable, refcount = 10000, revision = master) Time (mean ± σ): 3.403 s ± 0.775 s [User: 1.875 s, System: 1.417 s] Range (min … max): 2.454 s … 4.529 s 10 runs Benchmark 2: fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) Time (mean ± σ): 154.3 ms ± 17.6 ms [User: 102.5 ms, System: 56.1 ms] Range (min … max): 145.2 ms … 220.5 ms 18 runs Summary fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) ran 22.06 ± 5.62 times faster than fetch: many refs (refformat = reftable, refcount = 10000, revision = master) In similar conditions, the files backend sees a 1.25x performance improvement: Benchmark 1: fetch: many refs (refformat = files, refcount = 10000, revision = master) Time (mean ± σ): 605.5 ms ± 9.4 ms [User: 117.8 ms, System: 483.3 ms] Range (min … max): 595.6 ms … 621.5 ms 10 runs Benchmark 2: fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) Time (mean ± σ): 485.8 ms ± 4.3 ms [User: 91.1 ms, System: 396.7 ms] Range (min … max): 477.6 ms … 494.3 ms 10 runs Summary fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) ran 1.25 ± 0.02 times faster than fetch: many refs (refformat = files, refcount = 10000, revision = master) With this we'll either be using a regular transaction or a batch update transaction. This helps cleanup some code which is no longer needed as we'll now always have some type of 'ref_transaction' object being propagated. One big change is that earlier, each individual update would propagate a failure. Whereas now, the `ref_transaction_for_each_rejected_update` function is called at the end of the flow to capture the exit status for 'git-fetch(1)' and also to print F/D conflict errors. This does change the order of the errors being printed, but the behavior stays the same. Since transaction errors are now explicitly defined as part of 76e760b999 (refs: introduce enum-based transaction error types, 2025-04-08), utilize them and get rid of custom errors defined within 'builtin/fetch.c'. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 127 ++++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 54 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index cda6eaf1fd..cc0a3deb61 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -640,9 +640,6 @@ static struct ref *get_ref_map(struct remote *remote, return ref_map; } -#define STORE_REF_ERROR_OTHER 1 -#define STORE_REF_ERROR_DF_CONFLICT 2 - static int s_update_ref(const char *action, struct ref *ref, struct ref_transaction *transaction, @@ -650,7 +647,6 @@ static int s_update_ref(const char *action, { char *msg; char *rla = getenv("GIT_REFLOG_ACTION"); - struct ref_transaction *our_transaction = NULL; struct strbuf err = STRBUF_INIT; int ret; @@ -660,43 +656,10 @@ static int s_update_ref(const char *action, rla = default_rla.buf; msg = xstrfmt("%s: %s", rla, action); - /* - * If no transaction was passed to us, we manage the transaction - * ourselves. Otherwise, we trust the caller to handle the transaction - * lifecycle. - */ - if (!transaction) { - transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - 0, &err); - if (!transaction) { - ret = STORE_REF_ERROR_OTHER; - goto out; - } - } - ret = ref_transaction_update(transaction, ref->name, &ref->new_oid, check_old ? &ref->old_oid : NULL, NULL, NULL, 0, msg, &err); - if (ret) { - ret = STORE_REF_ERROR_OTHER; - goto out; - } - if (our_transaction) { - switch (ref_transaction_commit(our_transaction, &err)) { - case 0: - break; - case REF_TRANSACTION_ERROR_NAME_CONFLICT: - ret = STORE_REF_ERROR_DF_CONFLICT; - goto out; - default: - ret = STORE_REF_ERROR_OTHER; - goto out; - } - } - -out: - ref_transaction_free(our_transaction); if (ret) error("%s", err.buf); strbuf_release(&err); @@ -1139,7 +1102,6 @@ N_("it took %.2f seconds to check forced updates; you can use\n" "to avoid this check\n"); static int store_updated_refs(struct display_state *display_state, - const char *remote_name, int connectivity_checked, struct ref_transaction *transaction, struct ref *ref_map, struct fetch_head *fetch_head, @@ -1277,11 +1239,6 @@ static int store_updated_refs(struct display_state *display_state, } } - if (rc & STORE_REF_ERROR_DF_CONFLICT) - error(_("some local refs could not be updated; try running\n" - " 'git remote prune %s' to remove any old, conflicting " - "branches"), remote_name); - if (advice_enabled(ADVICE_FETCH_SHOW_FORCED_UPDATES)) { if (!config->show_forced_updates) { warning(_(warn_show_forced_updates)); @@ -1365,9 +1322,8 @@ static int fetch_and_consume_refs(struct display_state *display_state, } trace2_region_enter("fetch", "consume_refs", the_repository); - ret = store_updated_refs(display_state, transport->remote->name, - connectivity_checked, transaction, ref_map, - fetch_head, config); + ret = store_updated_refs(display_state, connectivity_checked, + transaction, ref_map, fetch_head, config); trace2_region_leave("fetch", "consume_refs", the_repository); out: @@ -1687,6 +1643,36 @@ cleanup: return result; } +struct ref_rejection_data { + int *retcode; + int conflict_msg_shown; + const char *remote_name; +}; + +static void ref_transaction_rejection_handler(const char *refname, + const struct object_id *old_oid UNUSED, + const struct object_id *new_oid UNUSED, + const char *old_target UNUSED, + const char *new_target UNUSED, + enum ref_transaction_error err, + void *cb_data) +{ + struct ref_rejection_data *data = cb_data; + + if (err == REF_TRANSACTION_ERROR_NAME_CONFLICT && !data->conflict_msg_shown) { + error(_("some local refs could not be updated; try running\n" + " 'git remote prune %s' to remove any old, conflicting " + "branches"), data->remote_name); + data->conflict_msg_shown = 1; + } else { + const char *reason = ref_transaction_error_msg(err); + + error(_("fetching ref %s failed: %s"), refname, reason); + } + + *data->retcode = 1; +} + static int do_fetch(struct transport *transport, struct refspec *rs, const struct fetch_config *config) @@ -1807,6 +1793,24 @@ static int do_fetch(struct transport *transport, retcode = 1; } + /* + * If not atomic, we can still use batched updates, which would be much + * more performant. We don't initiate the transaction before pruning, + * since pruning must be an independent step, to avoid F/D conflicts. + * + * TODO: if reference transactions gain logical conflict resolution, we + * can delete and create refs (with F/D conflicts) in the same transaction + * and this can be moved above the 'prune_refs()' block. + */ + if (!transaction) { + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + retcode = -1; + goto cleanup; + } + } + if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map, &fetch_head, config)) { retcode = 1; @@ -1838,16 +1842,31 @@ static int do_fetch(struct transport *transport, free_refs(tags_ref_map); } - if (transaction) { - if (retcode) - goto cleanup; + if (retcode) + goto cleanup; - retcode = ref_transaction_commit(transaction, &err); + retcode = ref_transaction_commit(transaction, &err); + if (retcode) { + /* + * Explicitly handle transaction cleanup to avoid + * aborting an already closed transaction. + */ + ref_transaction_free(transaction); + transaction = NULL; + goto cleanup; + } + + if (!atomic_fetch) { + struct ref_rejection_data data = { + .retcode = &retcode, + .conflict_msg_shown = 0, + .remote_name = transport->remote->name, + }; + + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &data); if (retcode) { - /* - * Explicitly handle transaction cleanup to avoid - * aborting an already closed transaction. - */ ref_transaction_free(transaction); transaction = NULL; goto cleanup; From 77188b5bbaf1f77963968ea3acedda3108102b18 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:08 +0200 Subject: [PATCH 3/6] send-pack: fix memory leak around duplicate refs The 'git-send-pack(1)' allows users to push objects to a remote repository and explicitly list the references to be pushed. The status of each reference pushed is captured into a list mapped by refname. If a reference fails to be updated, its error message is captured in the `ref->remote_status` field. While the command allows duplicate ref inputs, the list doesn't accommodate this behavior as a particular refname is linked to a single `struct ref*` element. So if the user inputs a reference twice like: git send-pack remote.git A:foo B:foo where the user is trying to update the same reference 'foo' twice and the reference fails to be updated, we first fill `ref->remote_status` with error message for the input 'A:foo' then we override the same field with the error message for 'B:foo'. This override happens without first free'ing the previous value. Fix this leak. The current tests already incorporate the above example, but in the test 'A:foo' succeeds while 'B:foo' fails, meaning that the memory leak isn't triggered. Add a new test with multiple duplicates. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- send-pack.c | 7 +++++++ t/t5408-send-pack-stdin.sh | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/send-pack.c b/send-pack.c index 86592ce526..e2faa25b98 100644 --- a/send-pack.c +++ b/send-pack.c @@ -257,6 +257,13 @@ static int receive_status(struct repository *r, refname); continue; } + + /* + * Clients sending duplicate refs can cause the same value + * to be overridden, causing a memory leak. + */ + free(hint->remote_status); + if (!strcmp(head, "ng")) { hint->status = REF_STATUS_REMOTE_REJECT; if (p) diff --git a/t/t5408-send-pack-stdin.sh b/t/t5408-send-pack-stdin.sh index 526a675045..45fb20179b 100755 --- a/t/t5408-send-pack-stdin.sh +++ b/t/t5408-send-pack-stdin.sh @@ -73,6 +73,12 @@ test_expect_success 'cmdline refs written in order' ' verify_push A foo ' +test_expect_success 'cmdline refs with multiple duplicates' ' + clear_remote && + test_must_fail git send-pack remote.git A:foo B:foo C:foo && + verify_push A foo +' + test_expect_success '--stdin refs come after cmdline' ' clear_remote && echo A:foo >input && From 9d2962a7c44fd5f9abec634d05fe28cdafd60036 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 19 May 2025 11:58:09 +0200 Subject: [PATCH 4/6] receive-pack: use batched reference updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference updates performed as a part of 'git-receive-pack(1)', take place one at a time. For each reference update, a new transaction is created and committed. This is necessary to ensure we can allow individual updates to fail without failing the entire command. The command also supports an 'atomic' mode, which uses a single transaction to update all of the references. But this mode has an all-or-nothing approach, where if a single update fails, all updates would fail. In 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08), we introduced a new mechanism to batch reference updates. Under the hood, this uses a single transaction to perform a batch of reference updates, while allowing only individual updates to fail. Utilize this newly introduced batch update mechanism in 'git-receive-pack(1)'. This provides a significant bump in performance, especially when dealing with repositories with large number of references. With the reftable backend there is a 18x performance improvement, when performing receive-pack with 10000 refs: Benchmark 1: receive: many refs (refformat = reftable, refcount = 10000, revision = master) Time (mean ± σ): 4.276 s ± 0.078 s [User: 0.796 s, System: 3.318 s] Range (min … max): 4.185 s … 4.430 s 10 runs Benchmark 2: receive: many refs (refformat = reftable, refcount = 10000, revision = HEAD) Time (mean ± σ): 235.4 ms ± 6.9 ms [User: 75.4 ms, System: 157.3 ms] Range (min … max): 228.5 ms … 254.2 ms 11 runs Summary receive: many refs (refformat = reftable, refcount = 10000, revision = HEAD) ran 18.16 ± 0.63 times faster than receive: many refs (refformat = reftable, refcount = 10000, revision = master) In similar conditions, the files backend sees a 1.21x performance improvement: Benchmark 1: receive: many refs (refformat = files, refcount = 10000, revision = master) Time (mean ± σ): 1.121 s ± 0.021 s [User: 0.128 s, System: 0.975 s] Range (min … max): 1.097 s … 1.156 s 10 runs Benchmark 2: receive: many refs (refformat = files, refcount = 10000, revision = HEAD) Time (mean ± σ): 927.9 ms ± 22.6 ms [User: 99.0 ms, System: 815.2 ms] Range (min … max): 903.1 ms … 978.0 ms 10 runs Summary receive: many refs (refformat = files, refcount = 10000, revision = HEAD) ran 1.21 ± 0.04 times faster than receive: many refs (refformat = files, refcount = 10000, revision = master) As using batched updates requires the error handling to be moved to the end of the flow, create and use a 'struct strset' to track the failed refs and attribute the correct errors to them. This change also uncovers an issue when a client provides multiple updates to the same reference. For example: $ git send-pack remote.git A:foo B:foo Enumerating objects: 3, done. Counting objects: 100% (3/3), done. Delta compression using up to 20 threads Compressing objects: 100% (2/2), done. Writing objects: 100% (3/3), 226 bytes | 226.00 KiB/s, done. Total 3 (delta 1), reused 0 (delta 0), pack-reused 0 (from 0) remote: error: cannot lock ref 'refs/heads/foo': reference already exists To remote.git ! [remote rejected] A -> foo (failed to update ref) ! [remote failure] B -> foo (remote failed to report status) As you can see, the remote runs into an error because it cannot lock the target reference for the second update. Furthermore, the remote complains that the first update has been rejected whereas the second update didn't receive any status update because we failed to lock it. Reading this status message alone a user would probably expect that `foo` has not been updated at all. But that's not the case: while we claim that the ref wasn't updated, it surprisingly points to `A` now. One could argue that this is merely an error in how we report the result of this push. But ultimately, the user's request itself is already broken and doesn't make any sense in the first place and cannot ever lead to a sensible outcome that honors the full request. The conversion to batched transactions fixes the issue because we now try to queue both updates in the same transaction. As such, the transaction itself will notice this conflict and refuse the update altogether before we commit any of the values. Note that this requires changes to a couple of tests in t5408 that happened to exercise this behaviour. Given that the generated output is misleading and given that the user request cannot ever be fully honored this really feels more like a bug than properly designed behaviour. As such, changing the behaviour feels like the right thing to do. Since now reference updates are batched, the 'reference-transaction' hook will be invoked with all updates together. Currently git will 'die' when the hook returns with a non-zero exit status in the 'prepared' stage. For 'git-receive-pack(1)', this allowed users to reject an individual reference update, git would have applied previous updates but immediately abort further execution. This is definitely an incorrect usage of this hook, since the right place to do this would be the 'update' hook. This patch retains the latter behavior, but 'reference-transaction' hook now changes to a all-or-nothing behavior when a non-zero exit status is returned in the 'prepared' stage, since batch updates use a transaction under the hood. This explains the change in 't1416'. Helped-by: Jeff King Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 66 ++++++++++++++++++++++++-------- t/t1416-ref-transaction-hooks.sh | 2 - t/t5408-send-pack-stdin.sh | 13 ++++--- 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c92e57ba18..01e5601342 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1845,35 +1845,67 @@ static void BUG_if_skipped_connectivity_check(struct command *commands, BUG_if_bug("connectivity check skipped???"); } +static void ref_transaction_rejection_handler(const char *refname, + const struct object_id *old_oid UNUSED, + const struct object_id *new_oid UNUSED, + const char *old_target UNUSED, + const char *new_target UNUSED, + enum ref_transaction_error err, + void *cb_data) +{ + struct strmap *failed_refs = cb_data; + + strmap_put(failed_refs, refname, (char *)ref_transaction_error_msg(err)); +} + static void execute_commands_non_atomic(struct command *commands, struct shallow_info *si) { struct command *cmd; struct strbuf err = STRBUF_INIT; + const char *reported_error = NULL; + struct strmap failed_refs = STRMAP_INIT; + + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + rp_error("%s", err.buf); + strbuf_reset(&err); + reported_error = "transaction failed to start"; + goto failure; + } for (cmd = commands; cmd; cmd = cmd->next) { if (!should_process_cmd(cmd) || cmd->run_proc_receive) continue; - transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - 0, &err); - if (!transaction) { - rp_error("%s", err.buf); - strbuf_reset(&err); - cmd->error_string = "transaction failed to start"; - continue; - } - cmd->error_string = update(cmd, si); - - if (!cmd->error_string - && ref_transaction_commit(transaction, &err)) { - rp_error("%s", err.buf); - strbuf_reset(&err); - cmd->error_string = "failed to update ref"; - } - ref_transaction_free(transaction); } + + if (ref_transaction_commit(transaction, &err)) { + rp_error("%s", err.buf); + reported_error = "failed to update refs"; + goto failure; + } + + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &failed_refs); + + if (strmap_empty(&failed_refs)) + goto cleanup; + +failure: + for (cmd = commands; cmd; cmd = cmd->next) { + if (reported_error) + cmd->error_string = reported_error; + else if (strmap_contains(&failed_refs, cmd->ref_name)) + cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); + } + +cleanup: + ref_transaction_free(transaction); + strmap_clear(&failed_refs, 0); strbuf_release(&err); } diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index 8c777f7cf8..d91dd3a3b5 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -120,8 +120,6 @@ test_expect_success 'interleaving hook calls succeed' ' cat >expect <<-EOF && hooks/update refs/tags/PRE $ZERO_OID $PRE_OID - hooks/reference-transaction prepared - hooks/reference-transaction committed hooks/update refs/tags/POST $ZERO_OID $POST_OID hooks/reference-transaction prepared hooks/reference-transaction committed diff --git a/t/t5408-send-pack-stdin.sh b/t/t5408-send-pack-stdin.sh index 45fb20179b..ec339761c2 100755 --- a/t/t5408-send-pack-stdin.sh +++ b/t/t5408-send-pack-stdin.sh @@ -69,21 +69,24 @@ test_expect_success 'stdin mixed with cmdline' ' test_expect_success 'cmdline refs written in order' ' clear_remote && - test_must_fail git send-pack remote.git A:foo B:foo && - verify_push A foo + test_must_fail git send-pack remote.git A:foo B:foo 2>err && + test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err && + test_must_fail git --git-dir=remote.git rev-parse foo ' test_expect_success 'cmdline refs with multiple duplicates' ' clear_remote && - test_must_fail git send-pack remote.git A:foo B:foo C:foo && - verify_push A foo + test_must_fail git send-pack remote.git A:foo B:foo C:foo 2>err && + test_grep "multiple updates for ref ${SQ}refs/heads/foo${SQ} not allowed" err && + test_must_fail git --git-dir=remote.git rev-parse foo ' test_expect_success '--stdin refs come after cmdline' ' clear_remote && echo A:foo >input && test_must_fail git send-pack remote.git --stdin B:foo Date: Fri, 20 Jun 2025 09:15:44 +0200 Subject: [PATCH 5/6] refs/files: skip updates with errors in batched updates The commit 23fc8e4f61 (refs: implement batch reference update support, 2025-04-08) introduced support for batched reference updates. This allows users to batch updates together, while allowing some of the updates to fail. Under the hood, batched updates use the reference transaction mechanism. Each update which fails is marked as such. Any failed updates must be skipped over in the rest of the code, as they wouldn't apply any more. In two of the loops within 'files_transaction_finish()' of the files backend, the failed updates aren't skipped over. This can cause a SEGFAULT otherwise. Add the missing skips and a test to validate the same. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs/files-backend.c | 7 +++++++ t/t1400-update-ref.sh | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/refs/files-backend.c b/refs/files-backend.c index 4d1f65a57a..c4a0f29072 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3208,6 +3208,10 @@ static int files_transaction_finish(struct ref_store *ref_store, */ for (i = 0; i < transaction->nr; i++) { struct ref_update *update = transaction->updates[i]; + + if (update->rejection_err) + continue; + if (update->flags & REF_DELETING && !(update->flags & REF_LOG_ONLY) && !(update->flags & REF_IS_PRUNING)) { @@ -3239,6 +3243,9 @@ static int files_transaction_finish(struct ref_store *ref_store, struct ref_update *update = transaction->updates[i]; struct ref_lock *lock = update->backend_data; + if (update->rejection_err) + continue; + if (update->flags & REF_DELETING && !(update->flags & REF_LOG_ONLY)) { update->flags |= REF_DELETED_RMDIR; diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index d29d23cb89..ca7eee7de2 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -2299,6 +2299,51 @@ do test_grep -q "refname conflict" stdout ) ' + + test_expect_success "stdin $type batch-updates delete incorrect symbolic ref" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit c1 && + head=$(git rev-parse HEAD) && + git symbolic-ref refs/heads/symbolic refs/heads/non-existent && + + format_command $type "delete refs/heads/symbolic" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "reference does not exist" stdout + ) + ' + + test_expect_success "stdin $type batch-updates delete with incorrect old_oid" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit c1 && + git branch new-branch && + test_commit c2 && + head=$(git rev-parse HEAD) && + + format_command $type "delete refs/heads/new-branch" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "incorrect old value provided" stdout + ) + ' + + test_expect_success "stdin $type batch-updates delete non-existent ref" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit commit && + head=$(git rev-parse HEAD) && + + format_command $type "delete refs/heads/non-existent" "$head" >stdin && + git update-ref $type --stdin --batch-updates stdout && + test_grep "reference does not exist" stdout + ) + ' done test_expect_success 'update-ref should also create reflog for HEAD' ' From 5c697f0b7ddbc85965bcba00c29d4b823bd221b7 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 20 Jun 2025 09:15:45 +0200 Subject: [PATCH 6/6] receive-pack: handle reference deletions separately In 9d2962a7c4 (receive-pack: use batched reference updates, 2025-05-19) we updated the 'git-receive-pack(1)' command to use batched reference updates. One edge case which was missed during this implementation was when a user pushes multiple branches such as: delete refs/heads/branch/conflict create refs/heads/branch Before using batched updates, the references would be applied sequentially and hence no conflicts would arise. With batched updates, while the first update applies, the second fails due to D/F conflict. A similar issue was present in 'git-fetch(1)' and was fixed by separating out reference pruning into a separate transaction in the commit 'fetch: use batched reference updates'. Apply a similar mechanism for 'git-receive-pack(1)' and separate out reference deletions into its own batch. This means 'git-receive-pack(1)' will now use up to two transactions, whereas before using batched updates it would use _at least_ two transactions. So using batched updates is still the better option. Add a test to validate this behavior. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 114 ++++++++++++++++++++++++++--------------- t/t5516-fetch-push.sh | 17 ++++-- 2 files changed, 87 insertions(+), 44 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 01e5601342..5bf5d1fcac 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1866,47 +1866,81 @@ static void execute_commands_non_atomic(struct command *commands, const char *reported_error = NULL; struct strmap failed_refs = STRMAP_INIT; - transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - REF_TRANSACTION_ALLOW_FAILURE, &err); - if (!transaction) { - rp_error("%s", err.buf); - strbuf_reset(&err); - reported_error = "transaction failed to start"; - goto failure; + /* + * Reference updates, where D/F conflicts shouldn't arise due to + * one reference being deleted, while the other being created + * are treated as conflicts in batched updates. This is because + * we don't do conflict resolution inside a transaction. To + * mitigate this, delete references in a separate batch. + * + * NEEDSWORK: Add conflict resolution between deletion and creation + * of reference updates within a transaction. With that, we can + * combine the two phases. + */ + enum processing_phase { + PHASE_DELETIONS, + PHASE_OTHERS + }; + + for (enum processing_phase phase = PHASE_DELETIONS; phase <= PHASE_OTHERS; phase++) { + for (cmd = commands; cmd; cmd = cmd->next) { + if (!should_process_cmd(cmd) || cmd->run_proc_receive) + continue; + + if (phase == PHASE_DELETIONS && !is_null_oid(&cmd->new_oid)) + continue; + else if (phase == PHASE_OTHERS && is_null_oid(&cmd->new_oid)) + continue; + + /* + * Lazily create a transaction only when we know there are + * updates to be added. + */ + if (!transaction) { + transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), + REF_TRANSACTION_ALLOW_FAILURE, &err); + if (!transaction) { + rp_error("%s", err.buf); + strbuf_reset(&err); + reported_error = "transaction failed to start"; + goto failure; + } + } + + cmd->error_string = update(cmd, si); + } + + /* No transaction, so nothing to commit */ + if (!transaction) + goto cleanup; + + if (ref_transaction_commit(transaction, &err)) { + rp_error("%s", err.buf); + reported_error = "failed to update refs"; + goto failure; + } + + ref_transaction_for_each_rejected_update(transaction, + ref_transaction_rejection_handler, + &failed_refs); + + if (strmap_empty(&failed_refs)) + goto cleanup; + + failure: + for (cmd = commands; cmd; cmd = cmd->next) { + if (reported_error) + cmd->error_string = reported_error; + else if (strmap_contains(&failed_refs, cmd->ref_name)) + cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); + } + + cleanup: + ref_transaction_free(transaction); + transaction = NULL; + strmap_clear(&failed_refs, 0); + strbuf_release(&err); } - - for (cmd = commands; cmd; cmd = cmd->next) { - if (!should_process_cmd(cmd) || cmd->run_proc_receive) - continue; - - cmd->error_string = update(cmd, si); - } - - if (ref_transaction_commit(transaction, &err)) { - rp_error("%s", err.buf); - reported_error = "failed to update refs"; - goto failure; - } - - ref_transaction_for_each_rejected_update(transaction, - ref_transaction_rejection_handler, - &failed_refs); - - if (strmap_empty(&failed_refs)) - goto cleanup; - -failure: - for (cmd = commands; cmd; cmd = cmd->next) { - if (reported_error) - cmd->error_string = reported_error; - else if (strmap_contains(&failed_refs, cmd->ref_name)) - cmd->error_string = strmap_get(&failed_refs, cmd->ref_name); - } - -cleanup: - ref_transaction_free(transaction); - strmap_clear(&failed_refs, 0); - strbuf_release(&err); } static void execute_commands_atomic(struct command *commands, diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index dabcc5f811..1649667441 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -744,8 +744,8 @@ test_expect_success 'pushing valid refs triggers post-receive and post-update ho EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/next $orgnext $newnext + refs/heads/main $orgmain $newmain EOF cat >post-receive.expect <<-EOF && @@ -808,8 +808,8 @@ test_expect_success 'deletion of a non-existent ref is not fed to post-receive a EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/nonexistent $ZERO_OID $ZERO_OID + refs/heads/main $orgmain $newmain EOF cat >post-receive.expect <<-EOF && @@ -868,10 +868,10 @@ test_expect_success 'mixed ref updates, deletes, invalid deletes trigger hooks w EOF cat >update.expect <<-EOF && - refs/heads/main $orgmain $newmain refs/heads/next $orgnext $newnext - refs/heads/seen $orgseen $newseen refs/heads/nonexistent $ZERO_OID $ZERO_OID + refs/heads/main $orgmain $newmain + refs/heads/seen $orgseen $newseen EOF cat >post-receive.expect <<-EOF && @@ -1909,4 +1909,13 @@ test_expect_success 'push with config push.useBitmaps' ' --thin --delta-base-offset -q --no-use-bitmap-index