Merge branch 'ps/read-object-info-improvements' into jch

The object-info API has been cleaned up.

Comments?

* ps/read-object-info-improvements:
  packfile: drop repository parameter from `packed_object_info()`
  packfile: skip unpacking object header for disk size requests
  packfile: disentangle return value of `packed_object_info()`
  packfile: always populate pack-specific info when reading object info
  packfile: extend `is_delta` field to allow for "unknown" state
  packfile: always declare object info to be OI_PACKED
  object-file: always set OI_LOOSE when reading object info
This commit is contained in:
Junio C Hamano 2026-01-08 16:40:37 +09:00
commit b43a84d22e
8 changed files with 124 additions and 79 deletions

View File

@ -487,8 +487,7 @@ static void batch_object_write(const char *obj_name,
data->info.sizep = &data->size;
if (pack)
ret = packed_object_info(the_repository, pack,
offset, &data->info);
ret = packed_object_info(pack, offset, &data->info);
else
ret = odb_read_object_info_extended(the_repository->objects,
&data->oid, &data->info,

View File

@ -2417,7 +2417,7 @@ static void drop_reused_delta(struct object_entry *entry)
oi.sizep = &size;
oi.typep = &type;
if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
/*
* We failed to get the info from this pack for some reason;
* fall back to odb_read_object_info, which may find another copy.
@ -3754,7 +3754,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
struct object_info oi = OBJECT_INFO_INIT;
oi.typep = &type;
if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
if (packed_object_info(p, ofs, &oi) < 0) {
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
} else if (type == OBJ_COMMIT) {

View File

@ -1493,7 +1493,7 @@ static int add_packed_commits(const struct object_id *oid,
display_progress(ctx->progress, ++ctx->progress_done);
oi.typep = &type;
if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
if (type != OBJ_COMMIT)

View File

@ -416,19 +416,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi, int flags)
{
int status = 0;
int ret;
int fd;
unsigned long mapsize;
const char *path;
void *map;
git_zstream stream;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
if (oi && oi->delta_base_oid)
oidclr(oi->delta_base_oid, source->odb->repo->hash_algo);
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
@ -439,71 +436,101 @@ int odb_source_loose_read_object_info(struct odb_source *source,
*/
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK))
return quick_has_loose(source->loose, oid) ? 0 : -1;
if (stat_loose_object(source->loose, oid, &st, &path) < 0)
return -1;
if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
ret = -1;
goto out;
}
if (oi && oi->disk_sizep)
*oi->disk_sizep = st.st_size;
return 0;
ret = 0;
goto out;
}
fd = open_loose_object(source->loose, oid, &path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
return -1;
ret = -1;
goto out;
}
map = map_fd(fd, path, &mapsize);
if (!map)
return -1;
if (!oi->sizep)
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
map = map_fd(fd, path, &mapsize);
if (!map) {
ret = -1;
goto out;
}
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
stream_to_end = &stream;
switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
case ULHR_OK:
if (parse_loose_header(hdr, oi) < 0)
status = error(_("unable to parse %s header"), oid_to_hex(oid));
else if (*oi->typep < 0)
if (!oi->sizep)
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
if (parse_loose_header(hdr, oi) < 0) {
ret = error(_("unable to parse %s header"), oid_to_hex(oid));
goto corrupt;
}
if (*oi->typep < 0)
die(_("invalid object type"));
if (!oi->contentp)
break;
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (*oi->contentp)
goto cleanup;
if (oi->contentp) {
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (!*oi->contentp) {
ret = -1;
goto corrupt;
}
}
status = -1;
break;
case ULHR_BAD:
status = error(_("unable to unpack %s header"),
oid_to_hex(oid));
break;
ret = error(_("unable to unpack %s header"),
oid_to_hex(oid));
goto corrupt;
case ULHR_TOO_LONG:
status = error(_("header for %s too long, exceeds %d bytes"),
oid_to_hex(oid), MAX_HEADER_LEN);
break;
ret = error(_("header for %s too long, exceeds %d bytes"),
oid_to_hex(oid), MAX_HEADER_LEN);
goto corrupt;
}
if (status && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
ret = 0;
corrupt:
if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
die(_("loose object %s (stored in %s) is corrupt"),
oid_to_hex(oid), path);
cleanup:
git_inflate_end(&stream);
munmap(map, mapsize);
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
if (oi->typep == &type_scratch)
oi->typep = NULL;
oi->whence = OI_LOOSE;
return status;
out:
if (stream_to_end)
git_inflate_end(stream_to_end);
if (map)
munmap(map, mapsize);
if (oi) {
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
if (oi->typep == &type_scratch)
oi->typep = NULL;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, source->odb->repo->hash_algo);
if (!ret)
oi->whence = OI_LOOSE;
}
return ret;
}
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,

8
odb.h
View File

@ -323,7 +323,6 @@ struct object_info {
OI_CACHED,
OI_LOOSE,
OI_PACKED,
OI_DBCACHED
} whence;
union {
/*
@ -337,7 +336,12 @@ struct object_info {
struct {
struct packed_git *pack;
off_t offset;
unsigned int is_delta;
enum packed_object_type {
PACKED_OBJECT_TYPE_UNKNOWN,
PACKED_OBJECT_TYPE_FULL,
PACKED_OBJECT_TYPE_OFS_DELTA,
PACKED_OBJECT_TYPE_REF_DELTA,
} type;
} packed;
} u;
};

View File

@ -1876,8 +1876,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
&oi) < 0) {
if (packed_object_info(pack, ofs, &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));

View File

@ -1578,24 +1578,25 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
int packed_object_info(struct repository *r, struct packed_git *p,
int packed_object_info(struct packed_git *p,
off_t obj_offset, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type;
enum object_type type = OBJ_NONE;
int ret;
/*
* We always get the representation type, but only convert it to
* a "real" type later if the caller is interested.
*/
if (oi->contentp) {
*oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep,
&type);
if (!*oi->contentp)
type = OBJ_BAD;
} else {
} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
type = unpack_object_header(p, &w_curs, &curpos, &size);
}
@ -1605,12 +1606,12 @@ int packed_object_info(struct repository *r, struct packed_git *p,
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
type, obj_offset);
if (!base_offset) {
type = OBJ_BAD;
ret = -1;
goto out;
}
*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
if (*oi->sizep == 0) {
type = OBJ_BAD;
ret = -1;
goto out;
}
} else {
@ -1623,7 +1624,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
type = OBJ_BAD;
ret = -1;
goto out;
}
@ -1632,12 +1633,12 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (oi->typep) {
enum object_type ptot;
ptot = packed_to_object_type(r, p, obj_offset,
ptot = packed_to_object_type(p->repo, p, obj_offset,
type, &w_curs, curpos);
if (oi->typep)
*oi->typep = ptot;
if (ptot < 0) {
type = OBJ_BAD;
ret = -1;
goto out;
}
}
@ -1647,19 +1648,37 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (get_delta_base_oid(p, &w_curs, curpos,
oi->delta_base_oid,
type, obj_offset) < 0) {
type = OBJ_BAD;
ret = -1;
goto out;
}
} else
oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
OI_PACKED;
oi->whence = OI_PACKED;
oi->u.packed.offset = obj_offset;
oi->u.packed.pack = p;
switch (type) {
case OBJ_NONE:
oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN;
break;
case OBJ_REF_DELTA:
oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA;
break;
case OBJ_OFS_DELTA:
oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA;
break;
default:
oi->u.packed.type = PACKED_OBJECT_TYPE_FULL;
break;
}
ret = 0;
out:
unuse_pack(&w_curs);
return type;
return ret;
}
static void *unpack_compressed_entry(struct packed_git *p,
@ -2133,7 +2152,7 @@ int packfile_store_read_object_info(struct packfile_store *store,
unsigned flags UNUSED)
{
struct pack_entry e;
int rtype;
int ret;
if (!find_pack_entry(store, oid, &e))
return 1;
@ -2145,19 +2164,12 @@ int packfile_store_read_object_info(struct packfile_store *store,
if (!oi)
return 0;
rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi);
if (rtype < 0) {
ret = packed_object_info(e.p, e.offset, oi);
if (ret < 0) {
mark_bad_packed_object(e.p, oid);
return -1;
}
if (oi->whence == OI_PACKED) {
oi->u.packed.offset = e.offset;
oi->u.packed.pack = e.p;
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}
return 0;
}
@ -2554,7 +2566,8 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
oi.sizep = &size;
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
oi.u.packed.is_delta ||
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
return -1;

View File

@ -441,8 +441,11 @@ void release_pack_memory(size_t);
/* global flag to enable extra checks when accessing packed objects */
extern int do_check_packed_object_crc;
int packed_object_info(struct repository *r,
struct packed_git *pack,
/*
* Look up the object info for a specific offset in the packfile.
* Returns zero on success, a negative error code otherwise.
*/
int packed_object_info(struct packed_git *pack,
off_t offset, struct object_info *);
void mark_bad_packed_object(struct packed_git *, const struct object_id *);