mirror of
https://github.com/git/git.git
synced 2026-01-11 21:33:13 +09:00
builtin/maintenance: introduce "geometric-repack" task
Introduce a new "geometric-repack" task. This task uses our geometric repack infrastructure as provided by git-repack(1) itself, which is a strategy that especially hosting providers tend to use to amortize the costs of repacking objects. There is one issue though with geometric repacks, namely that they unconditionally pack all loose objects, regardless of whether or not they are reachable. This is done because it means that we can completely skip the reachability step, which significantly speeds up the operation. But it has the big downside that we are unable to expire objects over time. To address this issue we thus use a split strategy in this new task: whenever a geometric repack would merge together all packs, we instead do an all-into-one repack. By default, these all-into-one repacks have cruft packs enabled, so unreachable objects would now be written into their own pack. Consequently, they won't be soaked up during geometric repacking anymore and can be expired with the next full repack, assuming that their expiry date has surpassed. Signed-off-by: Patrick Steinhardt <ps@pks.im> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
60c0af8e20
commit
9bc151850c
@ -75,6 +75,17 @@ maintenance.incremental-repack.auto::
|
||||
number of pack-files not in the multi-pack-index is at least the value
|
||||
of `maintenance.incremental-repack.auto`. The default value is 10.
|
||||
|
||||
maintenance.geometric-repack.auto::
|
||||
This integer config option controls how often the `geometric-repack`
|
||||
task should be run as part of `git maintenance run --auto`. If zero,
|
||||
then the `geometric-repack` task will not run with the `--auto`
|
||||
option. A negative value will force the task to run every time.
|
||||
Otherwise, a positive value implies the command should run either when
|
||||
there are packfiles that need to be merged together to retain the
|
||||
geometric progression, or when there are at least this many loose
|
||||
objects that would be written into a new packfile. The default value is
|
||||
100.
|
||||
|
||||
maintenance.reflog-expire.auto::
|
||||
This integer config option controls how often the `reflog-expire` task
|
||||
should be run as part of `git maintenance run --auto`. If zero, then
|
||||
|
||||
102
builtin/gc.c
102
builtin/gc.c
@ -34,6 +34,7 @@
|
||||
#include "pack-objects.h"
|
||||
#include "path.h"
|
||||
#include "reflog.h"
|
||||
#include "repack.h"
|
||||
#include "rerere.h"
|
||||
#include "blob.h"
|
||||
#include "tree.h"
|
||||
@ -254,6 +255,7 @@ enum maintenance_task_label {
|
||||
TASK_PREFETCH,
|
||||
TASK_LOOSE_OBJECTS,
|
||||
TASK_INCREMENTAL_REPACK,
|
||||
TASK_GEOMETRIC_REPACK,
|
||||
TASK_GC,
|
||||
TASK_COMMIT_GRAPH,
|
||||
TASK_PACK_REFS,
|
||||
@ -1566,6 +1568,101 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int maintenance_task_geometric_repack(struct maintenance_run_opts *opts,
|
||||
struct gc_config *cfg)
|
||||
{
|
||||
struct pack_geometry geometry = {
|
||||
.split_factor = 2,
|
||||
};
|
||||
struct pack_objects_args po_args = {
|
||||
.local = 1,
|
||||
};
|
||||
struct existing_packs existing_packs = EXISTING_PACKS_INIT;
|
||||
struct string_list kept_packs = STRING_LIST_INIT_DUP;
|
||||
struct child_process child = CHILD_PROCESS_INIT;
|
||||
int ret;
|
||||
|
||||
existing_packs.repo = the_repository;
|
||||
existing_packs_collect(&existing_packs, &kept_packs);
|
||||
pack_geometry_init(&geometry, &existing_packs, &po_args);
|
||||
pack_geometry_split(&geometry);
|
||||
|
||||
child.git_cmd = 1;
|
||||
|
||||
strvec_pushl(&child.args, "repack", "-d", "-l", NULL);
|
||||
if (geometry.split < geometry.pack_nr)
|
||||
strvec_push(&child.args, "--geometric=2");
|
||||
else
|
||||
add_repack_all_option(cfg, NULL, &child.args);
|
||||
if (opts->quiet)
|
||||
strvec_push(&child.args, "--quiet");
|
||||
if (the_repository->settings.core_multi_pack_index)
|
||||
strvec_push(&child.args, "--write-midx");
|
||||
|
||||
if (run_command(&child)) {
|
||||
ret = error(_("failed to perform geometric repack"));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
existing_packs_release(&existing_packs);
|
||||
pack_geometry_release(&geometry);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED)
|
||||
{
|
||||
struct pack_geometry geometry = {
|
||||
.split_factor = 2,
|
||||
};
|
||||
struct pack_objects_args po_args = {
|
||||
.local = 1,
|
||||
};
|
||||
struct existing_packs existing_packs = EXISTING_PACKS_INIT;
|
||||
struct string_list kept_packs = STRING_LIST_INIT_DUP;
|
||||
int auto_value = 100;
|
||||
int ret;
|
||||
|
||||
repo_config_get_int(the_repository, "maintenance.geometric-repack.auto",
|
||||
&auto_value);
|
||||
if (!auto_value)
|
||||
return 0;
|
||||
if (auto_value < 0)
|
||||
return 1;
|
||||
|
||||
existing_packs.repo = the_repository;
|
||||
existing_packs_collect(&existing_packs, &kept_packs);
|
||||
pack_geometry_init(&geometry, &existing_packs, &po_args);
|
||||
pack_geometry_split(&geometry);
|
||||
|
||||
/*
|
||||
* When we'd merge at least two packs with one another we always
|
||||
* perform the repack.
|
||||
*/
|
||||
if (geometry.split) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, we estimate the number of loose objects to determine
|
||||
* whether we want to create a new packfile or not.
|
||||
*/
|
||||
if (too_many_loose_objects(auto_value)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
existing_packs_release(&existing_packs);
|
||||
pack_geometry_release(&geometry);
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts,
|
||||
struct gc_config *cfg);
|
||||
typedef int (*maintenance_auto_fn)(struct gc_config *cfg);
|
||||
@ -1608,6 +1705,11 @@ static const struct maintenance_task tasks[] = {
|
||||
.background = maintenance_task_incremental_repack,
|
||||
.auto_condition = incremental_repack_auto_condition,
|
||||
},
|
||||
[TASK_GEOMETRIC_REPACK] = {
|
||||
.name = "geometric-repack",
|
||||
.background = maintenance_task_geometric_repack,
|
||||
.auto_condition = geometric_repack_auto_condition,
|
||||
},
|
||||
[TASK_GC] = {
|
||||
.name = "gc",
|
||||
.foreground = maintenance_task_gc_foreground,
|
||||
|
||||
@ -465,6 +465,144 @@ test_expect_success 'maintenance.incremental-repack.auto (when config is unset)'
|
||||
)
|
||||
'
|
||||
|
||||
run_and_verify_geometric_pack () {
|
||||
EXPECTED_PACKS="$1" &&
|
||||
|
||||
# Verify that we perform a geometric repack.
|
||||
rm -f "trace2.txt" &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git maintenance run --task=geometric-repack 2>/dev/null &&
|
||||
test_subcommand git repack -d -l --geometric=2 \
|
||||
--quiet --write-midx <trace2.txt &&
|
||||
|
||||
# Verify that the number of packfiles matches our expectation.
|
||||
ls -l .git/objects/pack/*.pack >packfiles &&
|
||||
test_line_count = "$EXPECTED_PACKS" packfiles &&
|
||||
|
||||
# And verify that there are no loose objects anymore.
|
||||
git count-objects -v >count &&
|
||||
test_grep '^count: 0$' count
|
||||
}
|
||||
|
||||
test_expect_success 'geometric repacking task' '
|
||||
test_when_finished "rm -rf repo" &&
|
||||
git init repo &&
|
||||
(
|
||||
cd repo &&
|
||||
git config set maintenance.auto false &&
|
||||
test_commit initial &&
|
||||
|
||||
# The initial repack causes an all-into-one repack.
|
||||
GIT_TRACE2_EVENT="$(pwd)/initial-repack.txt" \
|
||||
git maintenance run --task=geometric-repack 2>/dev/null &&
|
||||
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
|
||||
--quiet --write-midx <initial-repack.txt &&
|
||||
|
||||
# Repacking should now cause a no-op geometric repack because
|
||||
# no packfiles need to be combined.
|
||||
ls -l .git/objects/pack >before &&
|
||||
run_and_verify_geometric_pack 1 &&
|
||||
ls -l .git/objects/pack >after &&
|
||||
test_cmp before after &&
|
||||
|
||||
# This incremental change creates a new packfile that only
|
||||
# soaks up loose objects. The packfiles are not getting merged
|
||||
# at this point.
|
||||
test_commit loose &&
|
||||
run_and_verify_geometric_pack 2 &&
|
||||
|
||||
# Both packfiles have 3 objects, so the next run would cause us
|
||||
# to merge all packfiles together. This should be turned into
|
||||
# an all-into-one-repack.
|
||||
GIT_TRACE2_EVENT="$(pwd)/all-into-one-repack.txt" \
|
||||
git maintenance run --task=geometric-repack 2>/dev/null &&
|
||||
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
|
||||
--quiet --write-midx <all-into-one-repack.txt &&
|
||||
|
||||
# The geometric repack soaks up unreachable objects.
|
||||
echo blob-1 | git hash-object -w --stdin -t blob &&
|
||||
run_and_verify_geometric_pack 2 &&
|
||||
|
||||
# A second unreachable object should be written into another packfile.
|
||||
echo blob-2 | git hash-object -w --stdin -t blob &&
|
||||
run_and_verify_geometric_pack 3 &&
|
||||
|
||||
# And these two small packs should now be merged via the
|
||||
# geometric repack. The large packfile should remain intact.
|
||||
run_and_verify_geometric_pack 2 &&
|
||||
|
||||
# If we now add two more objects and repack twice we should
|
||||
# then see another all-into-one repack. This time around
|
||||
# though, as we have unreachable objects, we should also see a
|
||||
# cruft pack.
|
||||
echo blob-3 | git hash-object -w --stdin -t blob &&
|
||||
echo blob-4 | git hash-object -w --stdin -t blob &&
|
||||
run_and_verify_geometric_pack 3 &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/cruft-repack.txt" \
|
||||
git maintenance run --task=geometric-repack 2>/dev/null &&
|
||||
test_subcommand git repack -d -l --cruft --cruft-expiration=2.weeks.ago \
|
||||
--quiet --write-midx <cruft-repack.txt &&
|
||||
ls .git/objects/pack/*.pack >packs &&
|
||||
test_line_count = 2 packs &&
|
||||
ls .git/objects/pack/*.mtimes >cruft &&
|
||||
test_line_count = 1 cruft
|
||||
)
|
||||
'
|
||||
|
||||
test_geometric_repack_needed () {
|
||||
NEEDED="$1"
|
||||
GEOMETRIC_CONFIG="$2" &&
|
||||
rm -f trace2.txt &&
|
||||
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||
git ${GEOMETRIC_CONFIG:+-c maintenance.geometric-repack.$GEOMETRIC_CONFIG} \
|
||||
maintenance run --auto --task=geometric-repack 2>/dev/null &&
|
||||
case "$NEEDED" in
|
||||
true)
|
||||
test_grep "\[\"git\",\"repack\"," trace2.txt;;
|
||||
false)
|
||||
! test_grep "\[\"git\",\"repack\"," trace2.txt;;
|
||||
*)
|
||||
BUG "invalid parameter: $NEEDED";;
|
||||
esac
|
||||
}
|
||||
|
||||
test_expect_success 'geometric repacking with --auto' '
|
||||
test_when_finished "rm -rf repo" &&
|
||||
git init repo &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
# An empty repository does not need repacking, except when
|
||||
# explicitly told to do it.
|
||||
test_geometric_repack_needed false &&
|
||||
test_geometric_repack_needed false auto=0 &&
|
||||
test_geometric_repack_needed false auto=1 &&
|
||||
test_geometric_repack_needed true auto=-1 &&
|
||||
|
||||
test_oid_init &&
|
||||
|
||||
# Loose objects cause a repack when crossing the limit. Note
|
||||
# that the number of objects gets extrapolated by having a look
|
||||
# at the "objects/17/" shard.
|
||||
test_commit "$(test_oid blob17_1)" &&
|
||||
test_geometric_repack_needed false &&
|
||||
test_commit "$(test_oid blob17_2)" &&
|
||||
test_geometric_repack_needed false auto=257 &&
|
||||
test_geometric_repack_needed true auto=256 &&
|
||||
|
||||
# Force another repack.
|
||||
test_commit first &&
|
||||
test_commit second &&
|
||||
test_geometric_repack_needed true auto=-1 &&
|
||||
|
||||
# We now have two packfiles that would be merged together. As
|
||||
# such, the repack should always happen unless the user has
|
||||
# disabled the auto task.
|
||||
test_geometric_repack_needed false auto=0 &&
|
||||
test_geometric_repack_needed true auto=9000
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'pack-refs task' '
|
||||
for n in $(test_seq 1 5)
|
||||
do
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user