mirror of
https://github.com/git/git.git
synced 2026-01-11 21:33:13 +09:00
The MIDX file format currently requires that pack files be identified by
the lexicographic ordering of their names (that is, a pack having a
checksum beginning with "abc" would have a numeric pack_int_id which is
smaller than the same value for a pack beginning with "bcd").
As a result, it is impossible to combine adjacent MIDX layers together
without permuting bits from bitmaps that are in more recent layer(s).
To see why, consider the following example:
| packs | preferred pack
--------+-------------+---------------
MIDX #0 | { X, Y, Z } | Y
MIDX #1 | { A, B, C } | B
MIDX #2 | { D, E, F } | D
, where MIDX #2's base MIDX is MIDX #1, and so on. Suppose that we want
to combine MIDX layers #0 and #1, to create a new layer #0' containing
the packs from both layers. With the original three MIDX layers, objects
are laid out in the bitmap in the order they appear in their source
pack, and the packs themselves are arranged according to the pseudo-pack
order. In this case, that ordering is Y, X, Z, B, A, C.
But recall that the pseudo-pack ordering is defined by the order that
packs appear in the MIDX, with the exception of the preferred pack,
which sorts ahead of all other packs regardless of its position within
the MIDX. In the above example, that means that pack 'Y' could be placed
anywhere (so long as it is designated as preferred), however, all other
packs must be placed in the location listed above.
Because that ordering isn't sorted lexicographically, it is impossible
to compact MIDX layers in the above configuration without permuting the
object-to-bit-position mapping. Changing this mapping would affect all
bitmaps belonging to newer layers, rendering the bitmaps associated with
MIDX #2 unreadable.
One of the goals of MIDX compaction is that we are able to shrink the
length of the MIDX chain *without* invalidating bitmaps that belong to
newer layers, and the lexicographic ordering constraint is at odds with
this goal.
However, packs do not *need* to be lexicographically ordered within the
MIDX. As far as I can gather, the only reason they are sorted lexically
is to make it possible to perform a binary search over the pack names in
a MIDX, necessary to make `midx_contains_pack()`'s performance
logarithmic in the number of packs rather than linear.
Relax this constraint by allowing MIDX writes to proceed with packs that
are not arranged in lexicographic order. `midx_contains_pack()` will
lazily instantiate a `pack_names_sorted` array on the MIDX, which will
be used to implement the binary search over pack names.
Note that this produces MIDXs which may be incompatible with earlier
versions of Git that have stricter requirements on the layout of packs
within a MIDX. This patch does *not* modify the version number of the
MIDX format, since existing versions of Git already know to gracefully
ignore a MIDX with packs that appear out-of-order.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
141 lines
5.0 KiB
C
141 lines
5.0 KiB
C
#ifndef MIDX_H
|
|
#define MIDX_H
|
|
|
|
#include "string-list.h"
|
|
|
|
struct object_id;
|
|
struct pack_entry;
|
|
struct repository;
|
|
struct bitmapped_pack;
|
|
struct git_hash_algo;
|
|
struct odb_source;
|
|
|
|
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
|
|
#define MIDX_VERSION 1
|
|
#define MIDX_BYTE_FILE_VERSION 4
|
|
#define MIDX_BYTE_HASH_VERSION 5
|
|
#define MIDX_BYTE_NUM_CHUNKS 6
|
|
#define MIDX_BYTE_NUM_PACKS 8
|
|
#define MIDX_HEADER_SIZE 12
|
|
|
|
#define MIDX_CHUNK_ALIGNMENT 4
|
|
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
|
|
#define MIDX_CHUNKID_BITMAPPEDPACKS 0x42544d50 /* "BTMP" */
|
|
#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
|
|
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
|
|
#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
|
|
#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
|
|
#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
|
|
#define MIDX_CHUNKID_BASE 0x42415345 /* "BASE" */
|
|
#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
|
|
#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
|
|
|
|
#define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX"
|
|
#define GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL \
|
|
"GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL"
|
|
|
|
struct multi_pack_index {
|
|
struct odb_source *source;
|
|
|
|
const unsigned char *data;
|
|
size_t data_len;
|
|
|
|
const uint32_t *revindex_data;
|
|
const uint32_t *revindex_map;
|
|
size_t revindex_len;
|
|
|
|
uint32_t signature;
|
|
unsigned char version;
|
|
unsigned char hash_len;
|
|
unsigned char num_chunks;
|
|
uint32_t num_packs;
|
|
uint32_t num_objects;
|
|
int preferred_pack_idx;
|
|
|
|
int has_chain;
|
|
|
|
const unsigned char *chunk_pack_names;
|
|
size_t chunk_pack_names_len;
|
|
const uint32_t *chunk_bitmapped_packs;
|
|
size_t chunk_bitmapped_packs_len;
|
|
const uint32_t *chunk_oid_fanout;
|
|
const unsigned char *chunk_oid_lookup;
|
|
const unsigned char *chunk_object_offsets;
|
|
const unsigned char *chunk_large_offsets;
|
|
size_t chunk_large_offsets_len;
|
|
const unsigned char *chunk_revindex;
|
|
size_t chunk_revindex_len;
|
|
|
|
struct multi_pack_index *base_midx;
|
|
uint32_t num_objects_in_base;
|
|
uint32_t num_packs_in_base;
|
|
|
|
const char **pack_names;
|
|
size_t *pack_names_sorted;
|
|
struct packed_git **packs;
|
|
};
|
|
|
|
#define MIDX_PROGRESS (1 << 0)
|
|
#define MIDX_WRITE_REV_INDEX (1 << 1)
|
|
#define MIDX_WRITE_BITMAP (1 << 2)
|
|
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
|
|
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
|
|
#define MIDX_WRITE_INCREMENTAL (1 << 5)
|
|
|
|
#define MIDX_EXT_REV "rev"
|
|
#define MIDX_EXT_BITMAP "bitmap"
|
|
#define MIDX_EXT_MIDX "midx"
|
|
|
|
const char *get_midx_checksum(const struct multi_pack_index *m) /* static buffer */;
|
|
const unsigned char *get_midx_hash(const struct multi_pack_index *m);
|
|
void get_midx_filename(struct odb_source *source, struct strbuf *out);
|
|
void get_midx_filename_ext(struct odb_source *source, struct strbuf *out,
|
|
const unsigned char *hash, const char *ext);
|
|
void get_midx_chain_dirname(struct odb_source *source, struct strbuf *out);
|
|
void get_midx_chain_filename(struct odb_source *source, struct strbuf *out);
|
|
void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf,
|
|
const unsigned char *hash, const char *ext);
|
|
|
|
struct multi_pack_index *get_multi_pack_index(struct odb_source *source);
|
|
struct multi_pack_index *load_multi_pack_index(struct odb_source *source);
|
|
int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id);
|
|
struct packed_git *nth_midxed_pack(struct multi_pack_index *m,
|
|
uint32_t pack_int_id);
|
|
int nth_bitmapped_pack(struct multi_pack_index *m,
|
|
struct bitmapped_pack *bp, uint32_t pack_int_id);
|
|
int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
|
|
uint32_t *result);
|
|
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
|
|
uint32_t *result);
|
|
int midx_has_oid(struct multi_pack_index *m, const struct object_id *oid);
|
|
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos);
|
|
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos);
|
|
struct object_id *nth_midxed_object_oid(struct object_id *oid,
|
|
struct multi_pack_index *m,
|
|
uint32_t n);
|
|
int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e);
|
|
int midx_contains_pack(struct multi_pack_index *m,
|
|
const char *idx_or_pack_name);
|
|
int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id);
|
|
int prepare_multi_pack_index_one(struct odb_source *source);
|
|
|
|
/*
|
|
* Variant of write_midx_file which writes a MIDX containing only the packs
|
|
* specified in packs_to_include.
|
|
*/
|
|
int write_midx_file(struct odb_source *source,
|
|
const char *preferred_pack_name, const char *refs_snapshot,
|
|
unsigned flags);
|
|
int write_midx_file_only(struct odb_source *source,
|
|
struct string_list *packs_to_include,
|
|
const char *preferred_pack_name,
|
|
const char *refs_snapshot, unsigned flags);
|
|
void clear_midx_file(struct repository *r);
|
|
int verify_midx_file(struct odb_source *source, unsigned flags);
|
|
int expire_midx_packs(struct odb_source *source, unsigned flags);
|
|
int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags);
|
|
|
|
void close_midx(struct multi_pack_index *m);
|
|
|
|
#endif
|