From 0ea94b023a64d1341a11252954bb7ce37dd3d922 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:14 +0200 Subject: [PATCH 01/11] builtin/gc: remove global `repack` variable The global `repack` variable is used to store all command line arguments that we eventually want to pass to git-repack(1). It is being appended to from multiple different functions, which makes it hard to follow the logic. Besides being hard to follow, it also makes it unnecessarily hard to reuse this infrastructure in new code. Refactor the code so that we store this variable on the stack and pass a pointer to it around as needed. This is done so that we can reuse `add_repack_all_options()` in a subsequent commit. The refactoring itself is straight-forward. One function that deserves attention though is `need_to_gc()`: this function determines whether or not we need to execute garbage collection for `git gc --auto`, but also for `git maintenance run --auto`. But besides figuring out whether we have to perform GC, the function also sets up the `repack` arguments. For `git gc --auto` it's trivial to adapt, as we already have the on-stack variable at our fingertips. But for the maintenance condition it's less obvious what to do. As it turns out, we can just use another temporary variable there that we then immediately discard. If we need to perform GC we execute a child git-gc(1) process to repack objects for us, and that process will have to recompute the arguments anyway. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 74 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d978..e9772eb3a3 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -55,7 +55,6 @@ static const char * const builtin_gc_usage[] = { }; static timestamp_t gc_log_expire_time; -static struct strvec repack = STRVEC_INIT; static struct tempfile *pidfile; static struct lock_file log_lock; static struct string_list pack_garbage = STRING_LIST_INIT_DUP; @@ -618,48 +617,50 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg, return os_cache + heap; } -static int keep_one_pack(struct string_list_item *item, void *data UNUSED) +static int keep_one_pack(struct string_list_item *item, void *data) { - strvec_pushf(&repack, "--keep-pack=%s", basename(item->string)); + struct strvec *args = data; + strvec_pushf(args, "--keep-pack=%s", basename(item->string)); return 0; } static void add_repack_all_option(struct gc_config *cfg, - struct string_list *keep_pack) + struct string_list *keep_pack, + struct strvec *args) { if (cfg->prune_expire && !strcmp(cfg->prune_expire, "now") && !(cfg->cruft_packs && cfg->repack_expire_to)) - strvec_push(&repack, "-a"); + strvec_push(args, "-a"); else if (cfg->cruft_packs) { - strvec_push(&repack, "--cruft"); + strvec_push(args, "--cruft"); if (cfg->prune_expire) - strvec_pushf(&repack, "--cruft-expiration=%s", cfg->prune_expire); + strvec_pushf(args, "--cruft-expiration=%s", cfg->prune_expire); if (cfg->max_cruft_size) - strvec_pushf(&repack, "--max-cruft-size=%lu", + strvec_pushf(args, "--max-cruft-size=%lu", cfg->max_cruft_size); if (cfg->repack_expire_to) - strvec_pushf(&repack, "--expire-to=%s", cfg->repack_expire_to); + strvec_pushf(args, "--expire-to=%s", cfg->repack_expire_to); } else { - strvec_push(&repack, "-A"); + strvec_push(args, "-A"); if (cfg->prune_expire) - strvec_pushf(&repack, "--unpack-unreachable=%s", cfg->prune_expire); + strvec_pushf(args, "--unpack-unreachable=%s", cfg->prune_expire); } if (keep_pack) - for_each_string_list(keep_pack, keep_one_pack, NULL); + for_each_string_list(keep_pack, keep_one_pack, args); if (cfg->repack_filter && *cfg->repack_filter) - strvec_pushf(&repack, "--filter=%s", cfg->repack_filter); + strvec_pushf(args, "--filter=%s", cfg->repack_filter); if (cfg->repack_filter_to && *cfg->repack_filter_to) - strvec_pushf(&repack, "--filter-to=%s", cfg->repack_filter_to); + strvec_pushf(args, "--filter-to=%s", cfg->repack_filter_to); } -static void add_repack_incremental_option(void) +static void add_repack_incremental_option(struct strvec *args) { - strvec_push(&repack, "--no-write-bitmap-index"); + strvec_push(args, "--no-write-bitmap-index"); } -static int need_to_gc(struct gc_config *cfg) +static int need_to_gc(struct gc_config *cfg, struct strvec *repack_args) { /* * Setting gc.auto to 0 or negative can disable the @@ -700,10 +701,10 @@ static int need_to_gc(struct gc_config *cfg) string_list_clear(&keep_pack, 0); } - add_repack_all_option(cfg, &keep_pack); + add_repack_all_option(cfg, &keep_pack, repack_args); string_list_clear(&keep_pack, 0); } else if (too_many_loose_objects(cfg)) - add_repack_incremental_option(); + add_repack_incremental_option(repack_args); else return 0; @@ -852,6 +853,7 @@ int cmd_gc(int argc, int keep_largest_pack = -1; int skip_foreground_tasks = 0; timestamp_t dummy; + struct strvec repack_args = STRVEC_INIT; struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; struct gc_config cfg = GC_CONFIG_INIT; const char *prune_expire_sentinel = "sentinel"; @@ -891,7 +893,7 @@ int cmd_gc(int argc, show_usage_with_options_if_asked(argc, argv, builtin_gc_usage, builtin_gc_options); - strvec_pushl(&repack, "repack", "-d", "-l", NULL); + strvec_pushl(&repack_args, "repack", "-d", "-l", NULL); gc_config(&cfg); @@ -914,14 +916,14 @@ int cmd_gc(int argc, die(_("failed to parse prune expiry value %s"), cfg.prune_expire); if (aggressive) { - strvec_push(&repack, "-f"); + strvec_push(&repack_args, "-f"); if (cfg.aggressive_depth > 0) - strvec_pushf(&repack, "--depth=%d", cfg.aggressive_depth); + strvec_pushf(&repack_args, "--depth=%d", cfg.aggressive_depth); if (cfg.aggressive_window > 0) - strvec_pushf(&repack, "--window=%d", cfg.aggressive_window); + strvec_pushf(&repack_args, "--window=%d", cfg.aggressive_window); } if (opts.quiet) - strvec_push(&repack, "-q"); + strvec_push(&repack_args, "-q"); if (opts.auto_flag) { if (cfg.detach_auto && opts.detach < 0) @@ -930,7 +932,7 @@ int cmd_gc(int argc, /* * Auto-gc should be least intrusive as possible. */ - if (!need_to_gc(&cfg)) { + if (!need_to_gc(&cfg, &repack_args)) { ret = 0; goto out; } @@ -952,7 +954,7 @@ int cmd_gc(int argc, find_base_packs(&keep_pack, cfg.big_pack_threshold); } - add_repack_all_option(&cfg, &keep_pack); + add_repack_all_option(&cfg, &keep_pack, &repack_args); string_list_clear(&keep_pack, 0); } @@ -1014,9 +1016,9 @@ int cmd_gc(int argc, repack_cmd.git_cmd = 1; repack_cmd.close_object_store = 1; - strvec_pushv(&repack_cmd.args, repack.v); + strvec_pushv(&repack_cmd.args, repack_args.v); if (run_command(&repack_cmd)) - die(FAILED_RUN, repack.v[0]); + die(FAILED_RUN, repack_args.v[0]); if (cfg.prune_expire) { struct child_process prune_cmd = CHILD_PROCESS_INIT; @@ -1067,6 +1069,7 @@ int cmd_gc(int argc, out: maintenance_run_opts_release(&opts); + strvec_clear(&repack_args); gc_config_release(&cfg); return 0; } @@ -1269,6 +1272,19 @@ static int maintenance_task_gc_background(struct maintenance_run_opts *opts, return run_command(&child); } +static int gc_condition(struct gc_config *cfg) +{ + /* + * Note that it's fine to drop the repack arguments here, as we execute + * git-gc(1) as a separate child process anyway. So it knows to compute + * these arguments again. + */ + struct strvec repack_args = STRVEC_INIT; + int ret = need_to_gc(cfg, &repack_args); + strvec_clear(&repack_args); + return ret; +} + static int prune_packed(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -1596,7 +1612,7 @@ static const struct maintenance_task tasks[] = { .name = "gc", .foreground = maintenance_task_gc_foreground, .background = maintenance_task_gc_background, - .auto_condition = need_to_gc, + .auto_condition = gc_condition, }, [TASK_COMMIT_GRAPH] = { .name = "commit-graph", From 60c0af8e20b7c347003c40ca342a074239ea8453 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:15 +0200 Subject: [PATCH 02/11] builtin/gc: make `too_many_loose_objects()` reusable without GC config To decide whether or not a repository needs to be repacked we estimate the number of loose objects. If the number exceeds a certain threshold we perform the repack, otherwise we don't. This is done via `too_many_loose_objects()`, which takes as parameter the `struct gc_config`. This configuration is only used to determine the threshold. In a subsequent commit we'll add another caller of this function that wants to pass a different limit than the one stored in that structure. Refactor the function accordingly so that we only take the limit as parameter instead of the whole structure. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index e9772eb3a3..026d3a1d71 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -447,7 +447,7 @@ out: return should_gc; } -static int too_many_loose_objects(struct gc_config *cfg) +static int too_many_loose_objects(int limit) { /* * Quickly check if a "gc" is needed, by estimating how @@ -469,7 +469,7 @@ static int too_many_loose_objects(struct gc_config *cfg) if (!dir) return 0; - auto_threshold = DIV_ROUND_UP(cfg->gc_auto_threshold, 256); + auto_threshold = DIV_ROUND_UP(limit, 256); while ((ent = readdir(dir)) != NULL) { if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || ent->d_name[hexsz_loose] != '\0') @@ -703,7 +703,7 @@ static int need_to_gc(struct gc_config *cfg, struct strvec *repack_args) add_repack_all_option(cfg, &keep_pack, repack_args); string_list_clear(&keep_pack, 0); - } else if (too_many_loose_objects(cfg)) + } else if (too_many_loose_objects(cfg->gc_auto_threshold)) add_repack_incremental_option(repack_args); else return 0; @@ -1057,7 +1057,7 @@ int cmd_gc(int argc, !opts.quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0, NULL); - if (opts.auto_flag && too_many_loose_objects(&cfg)) + if (opts.auto_flag && too_many_loose_objects(cfg.gc_auto_threshold)) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); From 9bc151850c1c593f4baf8d6d2a1d14bb4875844a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:16 +0200 Subject: [PATCH 03/11] builtin/maintenance: introduce "geometric-repack" task Introduce a new "geometric-repack" task. This task uses our geometric repack infrastructure as provided by git-repack(1) itself, which is a strategy that especially hosting providers tend to use to amortize the costs of repacking objects. There is one issue though with geometric repacks, namely that they unconditionally pack all loose objects, regardless of whether or not they are reachable. This is done because it means that we can completely skip the reachability step, which significantly speeds up the operation. But it has the big downside that we are unable to expire objects over time. To address this issue we thus use a split strategy in this new task: whenever a geometric repack would merge together all packs, we instead do an all-into-one repack. By default, these all-into-one repacks have cruft packs enabled, so unreachable objects would now be written into their own pack. Consequently, they won't be soaked up during geometric repacking anymore and can be expired with the next full repack, assuming that their expiry date has surpassed. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 11 ++ builtin/gc.c | 102 +++++++++++++++++++ t/t7900-maintenance.sh | 138 ++++++++++++++++++++++++++ 3 files changed, 251 insertions(+) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 2f71934218..26dc5de423 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -75,6 +75,17 @@ maintenance.incremental-repack.auto:: number of pack-files not in the multi-pack-index is at least the value of `maintenance.incremental-repack.auto`. The default value is 10. +maintenance.geometric-repack.auto:: + This integer config option controls how often the `geometric-repack` + task should be run as part of `git maintenance run --auto`. If zero, + then the `geometric-repack` task will not run with the `--auto` + option. A negative value will force the task to run every time. + Otherwise, a positive value implies the command should run either when + there are packfiles that need to be merged together to retain the + geometric progression, or when there are at least this many loose + objects that would be written into a new packfile. The default value is + 100. + maintenance.reflog-expire.auto:: This integer config option controls how often the `reflog-expire` task should be run as part of `git maintenance run --auto`. If zero, then diff --git a/builtin/gc.c b/builtin/gc.c index 026d3a1d71..2c9ecd464d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -34,6 +34,7 @@ #include "pack-objects.h" #include "path.h" #include "reflog.h" +#include "repack.h" #include "rerere.h" #include "blob.h" #include "tree.h" @@ -254,6 +255,7 @@ enum maintenance_task_label { TASK_PREFETCH, TASK_LOOSE_OBJECTS, TASK_INCREMENTAL_REPACK, + TASK_GEOMETRIC_REPACK, TASK_GC, TASK_COMMIT_GRAPH, TASK_PACK_REFS, @@ -1566,6 +1568,101 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts return 0; } +static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, + struct gc_config *cfg) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + struct child_process child = CHILD_PROCESS_INIT; + int ret; + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + child.git_cmd = 1; + + strvec_pushl(&child.args, "repack", "-d", "-l", NULL); + if (geometry.split < geometry.pack_nr) + strvec_push(&child.args, "--geometric=2"); + else + add_repack_all_option(cfg, NULL, &child.args); + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + if (the_repository->settings.core_multi_pack_index) + strvec_push(&child.args, "--write-midx"); + + if (run_command(&child)) { + ret = error(_("failed to perform geometric repack")); + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + +static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED) +{ + struct pack_geometry geometry = { + .split_factor = 2, + }; + struct pack_objects_args po_args = { + .local = 1, + }; + struct existing_packs existing_packs = EXISTING_PACKS_INIT; + struct string_list kept_packs = STRING_LIST_INIT_DUP; + int auto_value = 100; + int ret; + + repo_config_get_int(the_repository, "maintenance.geometric-repack.auto", + &auto_value); + if (!auto_value) + return 0; + if (auto_value < 0) + return 1; + + existing_packs.repo = the_repository; + existing_packs_collect(&existing_packs, &kept_packs); + pack_geometry_init(&geometry, &existing_packs, &po_args); + pack_geometry_split(&geometry); + + /* + * When we'd merge at least two packs with one another we always + * perform the repack. + */ + if (geometry.split) { + ret = 1; + goto out; + } + + /* + * Otherwise, we estimate the number of loose objects to determine + * whether we want to create a new packfile or not. + */ + if (too_many_loose_objects(auto_value)) { + ret = 1; + goto out; + } + + ret = 0; + +out: + existing_packs_release(&existing_packs); + pack_geometry_release(&geometry); + return ret; +} + typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts, struct gc_config *cfg); typedef int (*maintenance_auto_fn)(struct gc_config *cfg); @@ -1608,6 +1705,11 @@ static const struct maintenance_task tasks[] = { .background = maintenance_task_incremental_repack, .auto_condition = incremental_repack_auto_condition, }, + [TASK_GEOMETRIC_REPACK] = { + .name = "geometric-repack", + .background = maintenance_task_geometric_repack, + .auto_condition = geometric_repack_auto_condition, + }, [TASK_GC] = { .name = "gc", .foreground = maintenance_task_gc_foreground, diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index ddd273d8dc..ace0ba8300 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -465,6 +465,144 @@ test_expect_success 'maintenance.incremental-repack.auto (when config is unset)' ) ' +run_and_verify_geometric_pack () { + EXPECTED_PACKS="$1" && + + # Verify that we perform a geometric repack. + rm -f "trace2.txt" && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --geometric=2 \ + --quiet --write-midx packfiles && + test_line_count = "$EXPECTED_PACKS" packfiles && + + # And verify that there are no loose objects anymore. + git count-objects -v >count && + test_grep '^count: 0$' count +} + +test_expect_success 'geometric repacking task' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + test_commit initial && + + # The initial repack causes an all-into-one repack. + GIT_TRACE2_EVENT="$(pwd)/initial-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \ + --quiet --write-midx before && + run_and_verify_geometric_pack 1 && + ls -l .git/objects/pack >after && + test_cmp before after && + + # This incremental change creates a new packfile that only + # soaks up loose objects. The packfiles are not getting merged + # at this point. + test_commit loose && + run_and_verify_geometric_pack 2 && + + # Both packfiles have 3 objects, so the next run would cause us + # to merge all packfiles together. This should be turned into + # an all-into-one-repack. + GIT_TRACE2_EVENT="$(pwd)/all-into-one-repack.txt" \ + git maintenance run --task=geometric-repack 2>/dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \ + --quiet --write-midx /dev/null && + test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \ + --quiet --write-midx packs && + test_line_count = 2 packs && + ls .git/objects/pack/*.mtimes >cruft && + test_line_count = 1 cruft + ) +' + +test_geometric_repack_needed () { + NEEDED="$1" + GEOMETRIC_CONFIG="$2" && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git ${GEOMETRIC_CONFIG:+-c maintenance.geometric-repack.$GEOMETRIC_CONFIG} \ + maintenance run --auto --task=geometric-repack 2>/dev/null && + case "$NEEDED" in + true) + test_grep "\[\"git\",\"repack\"," trace2.txt;; + false) + ! test_grep "\[\"git\",\"repack\"," trace2.txt;; + *) + BUG "invalid parameter: $NEEDED";; + esac +} + +test_expect_success 'geometric repacking with --auto' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # An empty repository does not need repacking, except when + # explicitly told to do it. + test_geometric_repack_needed false && + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed false auto=1 && + test_geometric_repack_needed true auto=-1 && + + test_oid_init && + + # Loose objects cause a repack when crossing the limit. Note + # that the number of objects gets extrapolated by having a look + # at the "objects/17/" shard. + test_commit "$(test_oid blob17_1)" && + test_geometric_repack_needed false && + test_commit "$(test_oid blob17_2)" && + test_geometric_repack_needed false auto=257 && + test_geometric_repack_needed true auto=256 && + + # Force another repack. + test_commit first && + test_commit second && + test_geometric_repack_needed true auto=-1 && + + # We now have two packfiles that would be merged together. As + # such, the repack should always happen unless the user has + # disabled the auto task. + test_geometric_repack_needed false auto=0 && + test_geometric_repack_needed true auto=9000 + ) +' + test_expect_success 'pack-refs task' ' for n in $(test_seq 1 5) do From 5c2ad50193896dc74e51e4b7a5af4ea734746316 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:17 +0200 Subject: [PATCH 04/11] builtin/maintenance: make the geometric factor configurable The geometric repacking task uses a factor of two for its geometric sequence, meaning that each next pack must contain at least twice as many objects as the next-smaller one. In some cases it may be helpful to configure this factor though to reduce the number of packfile merges even further, e.g. in very big repositories. But while git-repack(1) itself supports doing this, the maintenance task does not give us a way to tune it. Introduce a new "maintenance.geometric-repack.splitFactor" configuration to plug this gap. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 5 +++++ builtin/gc.c | 9 +++++++- t/t7900-maintenance.sh | 32 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 26dc5de423..45fdafc2c6 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -86,6 +86,11 @@ maintenance.geometric-repack.auto:: objects that would be written into a new packfile. The default value is 100. +maintenance.geometric-repack.splitFactor:: + This integer config option controls the factor used for the geometric + sequence. See the `--geometric=` option in linkgit:git-repack[1] for + more details. Defaults to `2`. + maintenance.reflog-expire.auto:: This integer config option controls how often the `reflog-expire` task should be run as part of `git maintenance run --auto`. If zero, then diff --git a/builtin/gc.c b/builtin/gc.c index 2c9ecd464d..fb1a82e030 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1582,6 +1582,9 @@ static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, struct child_process child = CHILD_PROCESS_INIT; int ret; + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + existing_packs.repo = the_repository; existing_packs_collect(&existing_packs, &kept_packs); pack_geometry_init(&geometry, &existing_packs, &po_args); @@ -1591,7 +1594,8 @@ static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts, strvec_pushl(&child.args, "repack", "-d", "-l", NULL); if (geometry.split < geometry.pack_nr) - strvec_push(&child.args, "--geometric=2"); + strvec_pushf(&child.args, "--geometric=%d", + geometry.split_factor); else add_repack_all_option(cfg, NULL, &child.args); if (opts->quiet) @@ -1632,6 +1636,9 @@ static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED) if (auto_value < 0) return 1; + repo_config_get_int(the_repository, "maintenance.geometric-repack.splitFactor", + &geometry.split_factor); + existing_packs.repo = the_repository; existing_packs_collect(&existing_packs, &kept_packs); pack_geometry_init(&geometry, &existing_packs, &po_args); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index ace0ba8300..e0352fd196 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -603,6 +603,38 @@ test_expect_success 'geometric repacking with --auto' ' ) ' +test_expect_success 'geometric repacking honors configured split factor' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + # Create three different packs with 9, 2 and 1 object, respectively. + # This is done so that only a subset of packs would be merged + # together so that we can verify that `git repack` receives the + # correct geometric factor. + for i in $(test_seq 9) + do + echo first-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + for i in $(test_seq 2) + do + echo second-$i | git hash-object -w --stdin -t blob || return 1 + done && + git repack --geometric=2 -d && + + echo third | git hash-object -w --stdin -t blob && + git repack --geometric=2 -d && + + test_geometric_repack_needed false splitFactor=2 && + test_geometric_repack_needed true splitFactor=3 && + test_subcommand git repack -d -l --geometric=3 --quiet --write-midx Date: Fri, 24 Oct 2025 08:57:18 +0200 Subject: [PATCH 05/11] builtin/maintenance: don't silently ignore invalid strategy When parsing maintenance strategies we completely ignore the user-configured value in case it is unknown to us. This makes it basically undiscoverable to the user that scheduled maintenance is devolving into a no-op. Change this to instead die when seeing an unknown maintenance strategy. While at it, pull out the parsing logic into a separate function so that we can reuse it in a subsequent commit. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 17 +++++++++++------ t/t7900-maintenance.sh | 5 +++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index fb1a82e030..726d944d3b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1855,6 +1855,13 @@ static const struct maintenance_strategy incremental_strategy = { }, }; +static struct maintenance_strategy parse_maintenance_strategy(const char *name) +{ + if (!strcasecmp(name, "incremental")) + return incremental_strategy; + die(_("unknown maintenance strategy: '%s'"), name); +} + static void initialize_task_config(struct maintenance_run_opts *opts, const struct string_list *selected_tasks) { @@ -1890,12 +1897,10 @@ static void initialize_task_config(struct maintenance_run_opts *opts, * override specific aspects of our strategy. */ if (opts->schedule) { - strategy = none_strategy; - - if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) { - if (!strcasecmp(config_str, "incremental")) - strategy = incremental_strategy; - } + if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) + strategy = parse_maintenance_strategy(config_str); + else + strategy = none_strategy; } else { strategy = default_strategy; } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index e0352fd196..0fb917dd7b 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -1263,6 +1263,11 @@ test_expect_success 'fails when running outside of a repository' ' nongit test_must_fail git maintenance unregister ' +test_expect_success 'fails when configured to use an invalid strategy' ' + test_must_fail git -c maintenance.strategy=invalid maintenance run --schedule=hourly 2>err && + test_grep "unknown maintenance strategy: .invalid." err +' + test_expect_success 'register and unregister bare repo' ' test_when_finished "git config --global --unset-all maintenance.repo || :" && test_might_fail git config --global --unset-all maintenance.repo && From e83e92e87672def24d971cdfef801bb0de0d5955 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:19 +0200 Subject: [PATCH 06/11] builtin/maintenance: improve readability of strategies Our maintenance strategies are essentially a large array of structures, where each of the tasks can be enabled and scheduled individually. With the current layout though all the configuration sits on the same nesting layer, which makes it a bit hard to discern which initialized fields belong to what task. Improve readability of the individual tasks by using nested designated initializers instead. Suggested-by: Taylor Blau Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 726d944d3b..0ba6e59de1 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1835,23 +1835,37 @@ struct maintenance_strategy { }; static const struct maintenance_strategy none_strategy = { 0 }; + static const struct maintenance_strategy default_strategy = { .tasks = { - [TASK_GC].enabled = 1, + [TASK_GC] = { + .enabled = 1, + }, }, }; + static const struct maintenance_strategy incremental_strategy = { .tasks = { - [TASK_COMMIT_GRAPH].enabled = 1, - [TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY, - [TASK_PREFETCH].enabled = 1, - [TASK_PREFETCH].schedule = SCHEDULE_HOURLY, - [TASK_INCREMENTAL_REPACK].enabled = 1, - [TASK_INCREMENTAL_REPACK].schedule = SCHEDULE_DAILY, - [TASK_LOOSE_OBJECTS].enabled = 1, - [TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY, - [TASK_PACK_REFS].enabled = 1, - [TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY, + [TASK_COMMIT_GRAPH] = { + .enabled = 1, + .schedule = SCHEDULE_HOURLY, + }, + [TASK_PREFETCH] = { + .enabled = 1, + .schedule = SCHEDULE_HOURLY, + }, + [TASK_INCREMENTAL_REPACK] = { + .enabled = 1, + .schedule = SCHEDULE_DAILY, + }, + [TASK_LOOSE_OBJECTS] = { + .enabled = 1, + .schedule = SCHEDULE_DAILY, + }, + [TASK_PACK_REFS] = { + .enabled = 1, + .schedule = SCHEDULE_WEEKLY, + }, }, }; From 6a7d3eeb4703ab27ec7520d6e7fa9145e66f43dc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:20 +0200 Subject: [PATCH 07/11] builtin/maintenance: run maintenance tasks depending on type We basically have three different ways to execute repository maintenance: 1. Manual maintenance via `git maintenance run`. 2. Automatic maintenance via `git maintenance run --auto`. 3. Scheduled maintenance via `git maintenance run --schedule=`. At the moment, maintenance strategies only have an effect for the last type of maintenance. This is about to change in subsequent commits, but to do so we need to be able to skip some tasks depending on how exactly maintenance was invoked. Introduce a new maintenance type that discern between manual (1 & 2) and scheduled (3) maintenance. Convert the `enabled` field into a bitset so that it becomes possible to specifiy which tasks exactly should run in a specific context. The types picked for existing strategies match the status quo: - The default strategy is only ever executed as part of a manual maintenance run. It is not possible to use it for scheduled maintenance. - The incremental strategy is only ever executed as part of a scheduled maintenance run. It is not possible to use it for manual maintenance. The strategies will be tweaked in subsequent commits to make use of this new infrastructure. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 0ba6e59de1..6cc4f98c7a 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1827,9 +1827,16 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts, return result; } +enum maintenance_type { + /* As invoked via `git maintenance run --schedule=`. */ + MAINTENANCE_TYPE_SCHEDULED = (1 << 0), + /* As invoked via `git maintenance run` and with `--auto`. */ + MAINTENANCE_TYPE_MANUAL = (1 << 1), +}; + struct maintenance_strategy { struct { - int enabled; + unsigned type; enum schedule_priority schedule; } tasks[TASK__COUNT]; }; @@ -1839,7 +1846,7 @@ static const struct maintenance_strategy none_strategy = { 0 }; static const struct maintenance_strategy default_strategy = { .tasks = { [TASK_GC] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_MANUAL, }, }, }; @@ -1847,23 +1854,23 @@ static const struct maintenance_strategy default_strategy = { static const struct maintenance_strategy incremental_strategy = { .tasks = { [TASK_COMMIT_GRAPH] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_HOURLY, }, [TASK_PREFETCH] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_HOURLY, }, [TASK_INCREMENTAL_REPACK] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_DAILY, }, [TASK_LOOSE_OBJECTS] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_DAILY, }, [TASK_PACK_REFS] = { - .enabled = 1, + .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_WEEKLY, }, }, @@ -1881,6 +1888,7 @@ static void initialize_task_config(struct maintenance_run_opts *opts, { struct strbuf config_name = STRBUF_INIT; struct maintenance_strategy strategy; + enum maintenance_type type; const char *config_str; /* @@ -1915,8 +1923,10 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strategy = parse_maintenance_strategy(config_str); else strategy = none_strategy; + type = MAINTENANCE_TYPE_SCHEDULED; } else { strategy = default_strategy; + type = MAINTENANCE_TYPE_MANUAL; } for (size_t i = 0; i < TASK__COUNT; i++) { @@ -1926,8 +1936,8 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strbuf_addf(&config_name, "maintenance.%s.enabled", tasks[i].name); if (!repo_config_get_bool(the_repository, config_name.buf, &config_value)) - strategy.tasks[i].enabled = config_value; - if (!strategy.tasks[i].enabled) + strategy.tasks[i].type = config_value ? type : 0; + if (!(strategy.tasks[i].type & type)) continue; if (opts->schedule) { From 0e994d9f38ebf20c8492882a12b5fbbf0415e015 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:21 +0200 Subject: [PATCH 08/11] builtin/maintenance: extend "maintenance.strategy" to manual maintenance The "maintenance.strategy" configuration allows users to configure how Git is supposed to perform repository maintenance. The idea is that we provide a set of high-level strategies that may be useful in different contexts, like for example when handling a large monorepo. Furthermore, the strategy can be tweaked by the user by overriding specific tasks. In its current form though, the strategy only applies to scheduled maintenance. This creates something of a gap, as scheduled and manual maintenance will now use _different_ strategies as the latter would continue to use git-gc(1) by default. This makes the strategies way less useful than they could be on the one hand. But even more importantly, the two different strategies might clash with one another, where one of the strategies performs maintenance in such a way that it discards benefits from the other strategy. So ideally, it should be possible to pick one strategy that then applies globally to all the different ways that we perform maintenance. This doesn't necessarily mean that the strategy always does the _same_ thing for every maintenance type. But it means that the strategy can configure the different types to work in tandem with each other. Change the meaning of "maintenance.strategy" accordingly so that the strategy is applied to both types, manual and scheduled. As preceding commits have introduced logic to run maintenance tasks depending on this type we can tweak strategies so that they perform those tasks depending on the context. Note that this raises the question of backwards compatibility: when the user has configured the "incremental" strategy we would have ignored that strategy beforehand. Instead, repository maintenance would have continued to use git-gc(1) by default. But luckily, we can match that behaviour by: - Keeping all current tasks of the incremental strategy as `MAINTENANCE_TYPE_SCHEDULED`. This ensures that those tasks will not run during manual maintenance. - Configuring the "gc" task so that it is invoked during manual maintenance. Like this, the user shouldn't observe any difference in behaviour. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 22 +++++++++------ builtin/gc.c | 25 +++++++++++++---- t/t7900-maintenance.sh | 40 +++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 45fdafc2c6..b7e90a71a3 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -16,19 +16,25 @@ detach. maintenance.strategy:: This string config option provides a way to specify one of a few - recommended schedules for background maintenance. This only affects - which tasks are run during `git maintenance run --schedule=X` - commands, provided no `--task=` arguments are provided. - Further, if a `maintenance..schedule` config value is set, - then that value is used instead of the one provided by - `maintenance.strategy`. The possible strategy strings are: + recommended strategies for repository maintenance. This affects + which tasks are run during `git maintenance run`, provided no + `--task=` arguments are provided. This setting impacts manual + maintenance, auto-maintenance as well as scheduled maintenance. The + tasks that run may be different depending on the maintenance type. + -* `none`: This default setting implies no tasks are run at any schedule. +The maintenance strategy can be further tweaked by setting +`maintenance..enabled` and `maintenance..schedule`. If set, these +values are used instead of the defaults provided by `maintenance.strategy`. ++ +The possible strategies are: ++ +* `none`: This strategy implies no tasks are run at all. This is the default + strategy for scheduled maintenance. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the `loose-objects` and `incremental-repack` tasks daily, and the `pack-refs` - task weekly. + task weekly. Manual repository maintenance uses the `gc` task. maintenance..enabled:: This boolean config option controls whether the maintenance task diff --git a/builtin/gc.c b/builtin/gc.c index 6cc4f98c7a..3c0a9a2e5d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1873,6 +1873,20 @@ static const struct maintenance_strategy incremental_strategy = { .type = MAINTENANCE_TYPE_SCHEDULED, .schedule = SCHEDULE_WEEKLY, }, + /* + * Historically, the "incremental" strategy was only available + * in the context of scheduled maintenance when set up via + * "maintenance.strategy". We have later expanded that config + * to also cover manual maintenance. + * + * To retain backwards compatibility with the previous status + * quo we thus run git-gc(1) in case manual maintenance was + * requested. This is the same as the default strategy, which + * would have been in use beforehand. + */ + [TASK_GC] = { + .type = MAINTENANCE_TYPE_MANUAL, + }, }, }; @@ -1916,19 +1930,20 @@ static void initialize_task_config(struct maintenance_run_opts *opts, * - Unscheduled maintenance uses our default strategy. * * Both of these are affected by the gitconfig though, which may - * override specific aspects of our strategy. + * override specific aspects of our strategy. Furthermore, both + * strategies can be overridden by setting "maintenance.strategy". */ if (opts->schedule) { - if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) - strategy = parse_maintenance_strategy(config_str); - else - strategy = none_strategy; + strategy = none_strategy; type = MAINTENANCE_TYPE_SCHEDULED; } else { strategy = default_strategy; type = MAINTENANCE_TYPE_MANUAL; } + if (!repo_config_get_string_tmp(the_repository, "maintenance.strategy", &config_str)) + strategy = parse_maintenance_strategy(config_str); + for (size_t i = 0; i < TASK__COUNT; i++) { int config_value; diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 0fb917dd7b..5219bc17a6 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -886,6 +886,46 @@ test_expect_success 'maintenance.strategy inheritance' ' expect && + rm -f trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git -c maintenance.strategy=$STRATEGY maintenance run --quiet "$@" && + sed -n 's/{"event":"child_start","sid":"[^/"]*",.*,"argv":\["\(.*\)\"]}/\1/p' actual + test_cmp expect actual +} + +test_expect_success 'maintenance.strategy is respected' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + + test_must_fail git -c maintenance.strategy=unknown maintenance run 2>err && + test_grep "unknown maintenance strategy: .unknown." err && + + test_strategy incremental <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy incremental --schedule=weekly <<-\EOF + git pack-refs --all --prune + git prune-packed --quiet + git multi-pack-index write --no-progress + git multi-pack-index expire --no-progress + git multi-pack-index repack --no-progress --batch-size=1 + git commit-graph write --split --reachable --no-progress + EOF + ) +' + test_expect_success 'register and unregister' ' test_when_finished git config --global --unset-all maintenance.repo && From 40a74158337f9154d26f82aa7923ca281ae131c2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:22 +0200 Subject: [PATCH 09/11] builtin/maintenance: make "gc" strategy accessible While the user can pick the "incremental" maintenance strategy, it is not possible to explicitly use the "gc" strategy. This has two downsides: - It is impossible to use the default "gc" strategy for a specific repository when the strategy was globally set to a different strategy. - It is not possible to use git-gc(1) for scheduled maintenance. Address these issues by making making the "gc" strategy configurable. Furthermore, extend the strategy so that git-gc(1) runs for both manual and scheduled maintenance. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 2 ++ builtin/gc.c | 9 ++++++--- t/t7900-maintenance.sh | 14 +++++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index b7e90a71a3..b2bacdc822 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -30,6 +30,8 @@ The possible strategies are: + * `none`: This strategy implies no tasks are run at all. This is the default strategy for scheduled maintenance. +* `gc`: This strategy runs the `gc` task. This is the default strategy for + manual maintenance. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the diff --git a/builtin/gc.c b/builtin/gc.c index 3c0a9a2e5d..8cab145009 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1843,10 +1843,11 @@ struct maintenance_strategy { static const struct maintenance_strategy none_strategy = { 0 }; -static const struct maintenance_strategy default_strategy = { +static const struct maintenance_strategy gc_strategy = { .tasks = { [TASK_GC] = { - .type = MAINTENANCE_TYPE_MANUAL, + .type = MAINTENANCE_TYPE_MANUAL | MAINTENANCE_TYPE_SCHEDULED, + .schedule = SCHEDULE_DAILY, }, }, }; @@ -1894,6 +1895,8 @@ static struct maintenance_strategy parse_maintenance_strategy(const char *name) { if (!strcasecmp(name, "incremental")) return incremental_strategy; + if (!strcasecmp(name, "gc")) + return gc_strategy; die(_("unknown maintenance strategy: '%s'"), name); } @@ -1937,7 +1940,7 @@ static void initialize_task_config(struct maintenance_run_opts *opts, strategy = none_strategy; type = MAINTENANCE_TYPE_SCHEDULED; } else { - strategy = default_strategy; + strategy = gc_strategy; type = MAINTENANCE_TYPE_MANUAL; } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 5219bc17a6..85e0cea4d9 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -915,7 +915,7 @@ test_expect_success 'maintenance.strategy is respected' ' git gc --quiet --no-detach --skip-foreground-tasks EOF - test_strategy incremental --schedule=weekly <<-\EOF + test_strategy incremental --schedule=weekly <<-\EOF && git pack-refs --all --prune git prune-packed --quiet git multi-pack-index write --no-progress @@ -923,6 +923,18 @@ test_expect_success 'maintenance.strategy is respected' ' git multi-pack-index repack --no-progress --batch-size=1 git commit-graph write --split --reachable --no-progress EOF + + test_strategy gc <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF + + test_strategy gc --schedule=weekly <<-\EOF + git pack-refs --all --prune + git reflog expire --all + git gc --quiet --no-detach --skip-foreground-tasks + EOF ) ' From d9bccf2ec3871963098dcd78c61990e27733eb03 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 08:57:23 +0200 Subject: [PATCH 10/11] builtin/maintenance: introduce "geometric" strategy We have two different repacking strategies in Git: - The "gc" strategy uses git-gc(1). - The "incremental" strategy uses multi-pack indices and `git multi-pack-index repack` to merge together smaller packfiles as determined by a specific batch size. The former strategy is our old and trusted default, whereas the latter has historically been used for our scheduled maintenance. But both strategies have their shortcomings: - The "gc" strategy performs regular all-into-one repacks. Furthermore it is rather inflexible, as it is not easily possible for a user to enable or disable specific subtasks. - The "incremental" strategy is not a full replacement for the "gc" strategy as it doesn't know to prune stale data. So today, we don't have a strategy that is well-suited for large repos while being a full replacement for the "gc" strategy. Introduce a new "geometric" strategy that aims to fill this gap. This strategy invokes all the usual cleanup tasks that git-gc(1) does like pruning reflogs and rerere caches as well as stale worktrees. But where it differs from both the "gc" and "incremental" strategy is that it uses our geometric repacking infrastructure exposed by git-repack(1) to repack packfiles. The advantage of geometric repacking is that we only need to perform an all-into-one repack when the object count in a repo has grown significantly. One downside of this strategy is that pruning of unreferenced objects is not going to happen regularly anymore. Every geometric repack knows to soak up all loose objects regardless of their reachability, and merging two or more packs doesn't consider reachability, either. Consequently, the number of unreachable objects will grow over time. This is remedied by doing an all-into-one repack instead of a geometric repack whenever we determine that the geometric repack would end up merging all packfiles anyway. This all-into-one repack then performs our usual reachability checks and writes unreachable objects into a cruft pack. As cruft packs won't ever be merged during geometric repacks we can thus phase out these objects over time. Of course, this still means that we retain unreachable objects for far longer than with the "gc" strategy. But the maintenance strategy is intended especially for large repositories, where the basic assumption is that the set of unreachable objects will be significantly dwarfed by the number of reachable objects. If this assumption is ever proven to be too disadvantageous we could for example introduce a time-based strategy: if the largest packfile has not been touched for longer than $T, we perform an all-into-one repack. But for now, such a mechanism is deferred into the future as it is not clear yet whether it is needed in the first place. Signed-off-by: Patrick Steinhardt Acked-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 9 ++++++++ builtin/gc.c | 31 +++++++++++++++++++++++++++ t/t7900-maintenance.sh | 20 ++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index b2bacdc822..d0c38f03fa 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -32,6 +32,15 @@ The possible strategies are: strategy for scheduled maintenance. * `gc`: This strategy runs the `gc` task. This is the default strategy for manual maintenance. +* `geometric`: This strategy performs geometric repacking of packfiles and + keeps auxiliary data structures up-to-date. The strategy expires data in the + reflog and removes worktrees that cannot be located anymore. When the + geometric repacking strategy would decide to do an all-into-one repack, then + the strategy generates a cruft pack for all unreachable objects. Objects that + are already part of a cruft pack will be expired. ++ +This repacking strategy is a full replacement for the `gc` strategy and is +recommended for large repositories. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` task, but runs the `prefetch` and `commit-graph` tasks hourly, the diff --git a/builtin/gc.c b/builtin/gc.c index 8cab145009..19be3f87e1 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1891,12 +1891,43 @@ static const struct maintenance_strategy incremental_strategy = { }, }; +static const struct maintenance_strategy geometric_strategy = { + .tasks = { + [TASK_COMMIT_GRAPH] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_HOURLY, + }, + [TASK_GEOMETRIC_REPACK] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_DAILY, + }, + [TASK_PACK_REFS] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_DAILY, + }, + [TASK_RERERE_GC] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_WEEKLY, + }, + [TASK_REFLOG_EXPIRE] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_WEEKLY, + }, + [TASK_WORKTREE_PRUNE] = { + .type = MAINTENANCE_TYPE_SCHEDULED | MAINTENANCE_TYPE_MANUAL, + .schedule = SCHEDULE_WEEKLY, + }, + }, +}; + static struct maintenance_strategy parse_maintenance_strategy(const char *name) { if (!strcasecmp(name, "incremental")) return incremental_strategy; if (!strcasecmp(name, "gc")) return gc_strategy; + if (!strcasecmp(name, "geometric")) + return geometric_strategy; die(_("unknown maintenance strategy: '%s'"), name); } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 85e0cea4d9..0d76693fee 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -930,11 +930,29 @@ test_expect_success 'maintenance.strategy is respected' ' git gc --quiet --no-detach --skip-foreground-tasks EOF - test_strategy gc --schedule=weekly <<-\EOF + test_strategy gc --schedule=weekly <<-\EOF && git pack-refs --all --prune git reflog expire --all git gc --quiet --no-detach --skip-foreground-tasks EOF + + test_strategy geometric <<-\EOF && + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF + + test_strategy geometric --schedule=weekly <<-\EOF + git pack-refs --all --prune + git reflog expire --all + git repack -d -l --geometric=2 --quiet --write-midx + git commit-graph write --split --reachable --no-progress + git worktree prune --expire 3.months.ago + git rerere gc + EOF ) ' From a4265572bb8488205b53a4a1af0c8d877f11dbe6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 27 Oct 2025 09:30:50 +0100 Subject: [PATCH 11/11] t7900: fix a flaky test due to git-repack always regenerating MIDX When a supposedly no-op "git repack" runs across a second boundary, because the command always touches the MIDX file and updates its timestamp, "ls -l $GIT_DIR/objects/pack/" before and after the operation can change, which causes such a test to fail. Only compare the *.pack files in the directory before and after the operation to work around this flakyness. Arguably, git-repack(1) should learn to not rewrite the MIDX in case we know it is already up-to-date. But this is not a new problem introduced via the new geometric maintenance task, so for now it should be good enough to paper over the issue. Signed-off-by: Patrick Steinhardt [jc: taken from diff to v4 from v3 that was already merged to 'next'] Signed-off-by: Junio C Hamano --- t/t7900-maintenance.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 0d76693fee..614184a097 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -500,9 +500,9 @@ test_expect_success 'geometric repacking task' ' # Repacking should now cause a no-op geometric repack because # no packfiles need to be combined. - ls -l .git/objects/pack >before && + ls -l .git/objects/pack/*.pack >before && run_and_verify_geometric_pack 1 && - ls -l .git/objects/pack >after && + ls -l .git/objects/pack/*.pack >after && test_cmp before after && # This incremental change creates a new packfile that only