git/builtin/repack.c
Patrick Steinhardt dcc9c7ef47 builtin/repack: handle promisor packs with geometric repacking
When performing a fetch with an object filter, we mark the resulting
packfile as a promisor pack. An object part of such a pack may miss any
of its referenced objects, and Git knows to handle this case by fetching
any such missing objects from the promisor remote.

The "promisor" property needs to be retained going forward. So every
time we pack a promisor object, the resulting pack must be marked as a
promisor pack. git-repack(1) does this already: when a repository has a
promisor remote, it knows to pass "--exclude-promisor-objects" to the
git-pack-objects(1) child process. Promisor packs are written separately
when doing an all-into-one repack via `repack_promisor_objects()`.

But we don't support promisor objects when doing a geometric repack yet.
Promisor packs do not get any special treatment there, as we simply
merge promisor and non-promisor packs. The resulting pack is not even
marked as a promisor pack, which essentially corrupts the repository.

This corruption couldn't happen in the real world though: we pass both
"--exclude-promisor-objects" and "--stdin-packs" to git-pack-objects(1)
if a repository has a promisor remote, but as those options are mutually
exclusive we always end up dying. And while we made those flags
compatible with one another in a preceding commit, we still end up dying
in case git-pack-objects(1) is asked to repack a promisor pack.

There's multiple ways to fix this:

  - We can exclude promisor packs from the geometric progression
    altogether. This would have the consequence that we never repack
    promisor packs at all. But in a partial clone it is quite likely
    that the user generates a bunch of promisor packs over time, as
    every backfill fetch would create another one. So this doesn't
    really feel like a sensible option.

  - We can adapt git-pack-objects(1) to support repacking promisor packs
    and include them in the normal geometric progression. But this would
    mean that the set of promisor objects expands over time as the packs
    are merged with normal packs.

  - We can use a separate geometric progression to repack promisor
    packs.

The first two options both have significant downsides, so they aren't
really feasible. But the third option fixes both of these downsides: we
make sure that promisor packs get merged, and at the same time we never
expand the set of promisor objects beyond the set of objects that are
already marked as promisor objects.

Implement this strategy so that geometric repacking works in partial
clones.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2026-01-14 06:29:24 -08:00

566 lines
18 KiB
C

#define USE_THE_REPOSITORY_VARIABLE
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
#include "config.h"
#include "environment.h"
#include "parse-options.h"
#include "path.h"
#include "run-command.h"
#include "server-info.h"
#include "string-list.h"
#include "midx.h"
#include "packfile.h"
#include "prune-packed.h"
#include "promisor-remote.h"
#include "repack.h"
#include "shallow.h"
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
#define PACK_CRUFT 4
#define DELETE_PACK 1
#define RETAIN_PACK 2
static int pack_everything;
static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static int midx_must_contain_cruft = 1;
static const char *const git_repack_usage[] = {
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
"[--write-midx] [--name-hash-version=<n>] [--path-walk]"),
NULL
};
static const char incremental_bitmap_conflict_error[] = N_(
"Incremental repacks are incompatible with bitmap indexes. Use\n"
"--no-write-bitmap-index or disable the pack.writeBitmaps configuration."
);
struct repack_config_ctx {
struct pack_objects_args *po_args;
struct pack_objects_args *cruft_po_args;
};
static int repack_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
struct repack_config_ctx *repack_ctx = cb;
struct pack_objects_args *po_args = repack_ctx->po_args;
struct pack_objects_args *cruft_po_args = repack_ctx->cruft_po_args;
if (!strcmp(var, "repack.usedeltabaseoffset")) {
po_args->delta_base_offset = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.packkeptobjects")) {
po_args->pack_kept_objects = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.writebitmaps") ||
!strcmp(var, "pack.writebitmaps")) {
write_bitmaps = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.usedeltaislands")) {
use_delta_islands = git_config_bool(var, value);
return 0;
}
if (strcmp(var, "repack.updateserverinfo") == 0) {
run_update_server_info = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.cruftwindow")) {
free(cruft_po_args->window);
return git_config_string(&cruft_po_args->window, var, value);
}
if (!strcmp(var, "repack.cruftwindowmemory")) {
free(cruft_po_args->window_memory);
return git_config_string(&cruft_po_args->window_memory, var, value);
}
if (!strcmp(var, "repack.cruftdepth")) {
free(cruft_po_args->depth);
return git_config_string(&cruft_po_args->depth, var, value);
}
if (!strcmp(var, "repack.cruftthreads")) {
free(cruft_po_args->threads);
return git_config_string(&cruft_po_args->threads, var, value);
}
if (!strcmp(var, "repack.midxmustcontaincruft")) {
midx_must_contain_cruft = git_config_bool(var, value);
return 0;
}
return git_default_config(var, value, ctx, cb);
}
int cmd_repack(int argc,
const char **argv,
const char *prefix,
struct repository *repo)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct string_list_item *item;
struct string_list names = STRING_LIST_INIT_DUP;
struct existing_packs existing = EXISTING_PACKS_INIT;
struct pack_geometry geometry = { 0 };
struct tempfile *refs_snapshot = NULL;
int i, ret;
int show_progress;
/* variables to be filled by option parsing */
struct repack_config_ctx config_ctx;
int delete_redundant = 0;
const char *unpack_unreachable = NULL;
int keep_unreachable = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT;
struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT;
int write_midx = 0;
const char *cruft_expiration = NULL;
const char *expire_to = NULL;
const char *filter_to = NULL;
const char *opt_window = NULL;
const char *opt_window_memory = NULL;
const char *opt_depth = NULL;
const char *opt_threads = NULL;
unsigned long combine_cruft_below_size = 0ul;
struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything,
N_("pack everything in a single pack"), ALL_INTO_ONE),
OPT_BIT('A', NULL, &pack_everything,
N_("same as -a, and turn unreachable objects loose"),
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
OPT_BIT(0, "cruft", &pack_everything,
N_("same as -a, pack unreachable cruft objects separately"),
PACK_CRUFT),
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
N_("with --cruft, expire objects older than this")),
OPT_UNSIGNED(0, "combine-cruft-below-size",
&combine_cruft_below_size,
N_("with --cruft, only repack cruft packs smaller than this")),
OPT_UNSIGNED(0, "max-cruft-size", &cruft_po_args.max_pack_size,
N_("with --cruft, limit the size of new cruft packs")),
OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
N_("pass --no-reuse-delta to git-pack-objects")),
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
N_("pass --no-reuse-object to git-pack-objects")),
OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
N_("specify the name hash version to use for grouping similar objects by path")),
OPT_BOOL(0, "path-walk", &po_args.path_walk,
N_("pass --path-walk to git-pack-objects")),
OPT_NEGBIT('n', NULL, &run_update_server_info,
N_("do not run git-update-server-info"), 1),
OPT__QUIET(&po_args.quiet, N_("be quiet")),
OPT_BOOL('l', "local", &po_args.local,
N_("pass --local to git-pack-objects")),
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
N_("write bitmap index")),
OPT_BOOL('i', "delta-islands", &use_delta_islands,
N_("pass --delta-islands to git-pack-objects")),
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
N_("with -A, do not loosen objects older than this")),
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
N_("with -a, repack unreachable objects")),
OPT_STRING(0, "window", &opt_window, N_("n"),
N_("size of the window used for delta compression")),
OPT_STRING(0, "window-memory", &opt_window_memory, N_("bytes"),
N_("same as the above, but limit memory size instead of entries count")),
OPT_STRING(0, "depth", &opt_depth, N_("n"),
N_("limits the maximum delta depth")),
OPT_STRING(0, "threads", &opt_threads, N_("n"),
N_("limits the maximum number of threads")),
OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size,
N_("maximum size of each packfile")),
OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options),
OPT_BOOL(0, "pack-kept-objects", &po_args.pack_kept_objects,
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
N_("do not repack this pack")),
OPT_INTEGER('g', "geometric", &geometry.split_factor,
N_("find a geometric progression with factor <N>")),
OPT_BOOL('m', "write-midx", &write_midx,
N_("write a multi-pack index of the resulting packs")),
OPT_STRING(0, "expire-to", &expire_to, N_("dir"),
N_("pack prefix to store a pack containing pruned objects")),
OPT_STRING(0, "filter-to", &filter_to, N_("dir"),
N_("pack prefix to store a pack containing filtered out objects")),
OPT_END()
};
list_objects_filter_init(&po_args.filter_options);
memset(&config_ctx, 0, sizeof(config_ctx));
config_ctx.po_args = &po_args;
config_ctx.cruft_po_args = &cruft_po_args;
repo_config(repo, repack_config, &config_ctx);
argc = parse_options(argc, argv, prefix, builtin_repack_options,
git_repack_usage, 0);
po_args.window = xstrdup_or_null(opt_window);
po_args.window_memory = xstrdup_or_null(opt_window_memory);
po_args.depth = xstrdup_or_null(opt_depth);
po_args.threads = xstrdup_or_null(opt_threads);
if (delete_redundant && repo->repository_format_precious_objects)
die(_("cannot delete packs in a precious-objects repo"));
die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A",
keep_unreachable, "-k/--keep-unreachable",
pack_everything & PACK_CRUFT, "--cruft");
if (pack_everything & PACK_CRUFT)
pack_everything |= ALL_INTO_ONE;
if (write_bitmaps < 0) {
if (!write_midx &&
(!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()))
write_bitmaps = 0;
}
if (po_args.pack_kept_objects < 0)
po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx;
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx)
die(_(incremental_bitmap_conflict_error));
if (write_bitmaps && po_args.local &&
odb_has_alternates(repo->objects)) {
/*
* When asked to do a local repack, but we have
* packfiles that are inherited from an alternate, then
* we cannot guarantee that the multi-pack-index would
* have full coverage of all objects. We thus disable
* writing bitmaps in that case.
*/
warning(_("disabling bitmap writing, as some objects are not being packed"));
write_bitmaps = 0;
}
if (write_midx && write_bitmaps) {
struct strbuf path = STRBUF_INIT;
strbuf_addf(&path, "%s/%s_XXXXXX",
repo_get_object_directory(repo),
"bitmap-ref-tips");
refs_snapshot = xmks_tempfile(path.buf);
midx_snapshot_refs(repo, refs_snapshot);
strbuf_release(&path);
}
packdir = mkpathdup("%s/pack", repo_get_object_directory(repo));
packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
existing.repo = repo;
existing_packs_collect(&existing, &keep_pack_list);
if (geometry.split_factor) {
if (pack_everything)
die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a");
pack_geometry_init(&geometry, &existing, &po_args);
pack_geometry_split(&geometry);
}
prepare_pack_objects(&cmd, &po_args, packtmp);
show_progress = !po_args.quiet && isatty(2);
strvec_push(&cmd.args, "--keep-true-parents");
for (i = 0; i < keep_pack_list.nr; i++)
strvec_pushf(&cmd.args, "--keep-pack=%s",
keep_pack_list.items[i].string);
strvec_push(&cmd.args, "--non-empty");
if (!geometry.split_factor) {
/*
* We need to grab all reachable objects, including those that
* are reachable from reflogs and the index.
*
* When repacking into a geometric progression of packs,
* however, we ask 'git pack-objects --stdin-packs', and it is
* not about packing objects based on reachability but about
* repacking all the objects in specified packs and loose ones
* (indeed, --stdin-packs is incompatible with these options).
*/
strvec_push(&cmd.args, "--all");
strvec_push(&cmd.args, "--reflog");
strvec_push(&cmd.args, "--indexed-objects");
}
if (repo_has_promisor_remote(repo))
strvec_push(&cmd.args, "--exclude-promisor-objects");
if (!write_midx) {
if (write_bitmaps > 0)
strvec_push(&cmd.args, "--write-bitmap-index");
else if (write_bitmaps < 0)
strvec_push(&cmd.args, "--write-bitmap-index-quiet");
}
if (use_delta_islands)
strvec_push(&cmd.args, "--delta-islands");
if (pack_everything & ALL_INTO_ONE) {
repack_promisor_objects(repo, &po_args, &names, packtmp);
if (existing_packs_has_non_kept(&existing) &&
delete_redundant &&
!(pack_everything & PACK_CRUFT)) {
for_each_string_list_item(item, &names) {
strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
packtmp_name, item->string);
}
if (unpack_unreachable) {
strvec_pushf(&cmd.args,
"--unpack-unreachable=%s",
unpack_unreachable);
} else if (pack_everything & LOOSEN_UNREACHABLE) {
strvec_push(&cmd.args,
"--unpack-unreachable");
} else if (keep_unreachable) {
strvec_push(&cmd.args, "--keep-unreachable");
}
}
if (keep_unreachable && delete_redundant &&
!(pack_everything & PACK_CRUFT))
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else if (geometry.split_factor) {
pack_geometry_repack_promisors(repo, &po_args, &geometry,
&names, packtmp);
if (midx_must_contain_cruft)
strvec_push(&cmd.args, "--stdin-packs");
else
strvec_push(&cmd.args, "--stdin-packs=follow");
strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
strvec_push(&cmd.args, "--incremental");
}
if (po_args.filter_options.choice)
strvec_pushf(&cmd.args, "--filter=%s",
expand_list_objects_filter_spec(&po_args.filter_options));
else if (filter_to)
die(_("option '%s' can only be used along with '%s'"), "--filter-to", "--filter");
if (geometry.split_factor)
cmd.in = -1;
else
cmd.no_stdin = 1;
ret = start_command(&cmd);
if (ret)
goto cleanup;
if (geometry.split_factor) {
FILE *in = xfdopen(cmd.in, "w");
/*
* The resulting pack should contain all objects in packs that
* are going to be rolled up, but exclude objects in packs which
* are being left alone.
*/
for (i = 0; i < geometry.split; i++)
fprintf(in, "%s\n", pack_basename(geometry.pack[i]));
for (i = geometry.split; i < geometry.pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry.pack[i]));
fclose(in);
}
{
struct write_pack_opts opts = {
.packdir = packdir,
.destination = packdir,
.packtmp = packtmp,
};
ret = finish_pack_objects_cmd(repo->hash_algo, &opts, &cmd,
&names);
if (ret)
goto cleanup;
}
if (!names.nr) {
if (!po_args.quiet)
printf_ln(_("Nothing new to pack."));
/*
* If we didn't write any new packs, the non-cruft packs
* may refer to once-unreachable objects in the cruft
* pack(s).
*
* If there isn't already a MIDX, the one we write
* must include the cruft pack(s), in case the
* non-cruft pack(s) refer to once-cruft objects.
*
* If there is already a MIDX, we can punt here, since
* midx_has_unknown_packs() will make the decision for
* us.
*/
if (!get_multi_pack_index(repo->objects->sources))
midx_must_contain_cruft = 1;
}
if (pack_everything & PACK_CRUFT) {
struct write_pack_opts opts = {
.po_args = &cruft_po_args,
.destination = packtmp,
.packtmp = packtmp,
.packdir = packdir,
};
if (!cruft_po_args.window)
cruft_po_args.window = xstrdup_or_null(po_args.window);
if (!cruft_po_args.window_memory)
cruft_po_args.window_memory = xstrdup_or_null(po_args.window_memory);
if (!cruft_po_args.depth)
cruft_po_args.depth = xstrdup_or_null(po_args.depth);
if (!cruft_po_args.threads)
cruft_po_args.threads = xstrdup_or_null(po_args.threads);
if (!cruft_po_args.max_pack_size)
cruft_po_args.max_pack_size = po_args.max_pack_size;
cruft_po_args.local = po_args.local;
cruft_po_args.quiet = po_args.quiet;
cruft_po_args.delta_base_offset = po_args.delta_base_offset;
cruft_po_args.pack_kept_objects = 0;
ret = write_cruft_pack(&opts, cruft_expiration,
combine_cruft_below_size, &names,
&existing);
if (ret)
goto cleanup;
if (delete_redundant && expire_to) {
/*
* If `--expire-to` is given with `-d`, it's possible
* that we're about to prune some objects. With cruft
* packs, pruning is implicit: any objects from existing
* packs that weren't picked up by new packs are removed
* when their packs are deleted.
*
* Generate an additional cruft pack, with one twist:
* `names` now includes the name of the cruft pack
* written in the previous step. So the contents of
* _this_ cruft pack exclude everything contained in the
* existing cruft pack (that is, all of the unreachable
* objects which are no older than
* `--cruft-expiration`).
*
* To make this work, cruft_expiration must become NULL
* so that this cruft pack doesn't actually prune any
* objects. If it were non-NULL, this call would always
* generate an empty pack (since every object not in the
* cruft pack generated above will have an mtime older
* than the expiration).
*
* Pretend we don't have a `--combine-cruft-below-size`
* argument, since we're not selectively combining
* anything based on size to generate the limbo cruft
* pack, but rather removing all cruft packs from the
* main repository regardless of size.
*/
opts.destination = expire_to;
ret = write_cruft_pack(&opts, NULL, 0ul, &names,
&existing);
if (ret)
goto cleanup;
}
}
if (po_args.filter_options.choice) {
struct write_pack_opts opts = {
.po_args = &po_args,
.destination = filter_to,
.packdir = packdir,
.packtmp = packtmp,
};
if (!opts.destination)
opts.destination = packtmp;
ret = write_filtered_pack(&opts, &existing, &names);
if (ret)
goto cleanup;
}
string_list_sort(&names);
odb_close(repo->objects);
/*
* Ok we have prepared all new packfiles.
*/
for_each_string_list_item(item, &names)
generated_pack_install(item->util, item->string, packdir,
packtmp);
/* End of pack replacement. */
if (delete_redundant && pack_everything & ALL_INTO_ONE)
existing_packs_mark_for_deletion(&existing, &names);
if (write_midx) {
struct repack_write_midx_opts opts = {
.existing = &existing,
.geometry = &geometry,
.names = &names,
.refs_snapshot = refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
.packdir = packdir,
.show_progress = show_progress,
.write_bitmaps = write_bitmaps > 0,
.midx_must_contain_cruft = midx_must_contain_cruft
};
ret = write_midx_included_packs(&opts);
if (ret)
goto cleanup;
}
odb_reprepare(repo->objects);
if (delete_redundant) {
int opts = 0;
existing_packs_remove_redundant(&existing, packdir);
if (geometry.split_factor)
pack_geometry_remove_redundant(&geometry, &names,
&existing, packdir);
if (show_progress)
opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts);
if (!keep_unreachable &&
(!(pack_everything & LOOSEN_UNREACHABLE) ||
unpack_unreachable) &&
is_repository_shallow(repo))
prune_shallow(PRUNE_QUICK);
}
if (run_update_server_info)
update_server_info(repo, 0);
if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) {
unsigned flags = 0;
if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0))
flags |= MIDX_WRITE_INCREMENTAL;
write_midx_file(repo->objects->sources,
NULL, NULL, flags);
}
cleanup:
string_list_clear(&keep_pack_list, 0);
string_list_clear(&names, 1);
existing_packs_release(&existing);
pack_geometry_release(&geometry);
pack_objects_args_release(&po_args);
pack_objects_args_release(&cruft_po_args);
return ret;
}