mirror of
https://github.com/git/git.git
synced 2026-01-10 12:53:12 +09:00
Already when introduced in c7a8a16239 (Add bundle transport, 2007-09-10), the `bundle` transport had a bug where it would open a file descriptor to the bundle file and then close it _twice_: First, the file descriptor (`data->fd`) is passed to `unbundle()`, which would use it as the `stdin` of the `index-pack` process, which as a consequence would close it via `start_command()`. However, `data->fd` would still hold the numerical value of the file descriptor, and `close_bundle()` would see that and happily close it again. This seems not to have caused too many problems in almost two decades, but I encountered a situation today where it _does_ cause problems: In i686 variants of Git for Windows, it seems that file descriptors are reused quickly after they have been closed. In the particular scenario I faced, `git fetch <bundle> <ref>` gets the same file descriptor value when opening the bundle file and importing its embedded packfile (which implicitly closes the file descriptor) and then when opening a pack file in `fetch_and_consume_refs()` while looking up an object's header. Later on, after the bundle has been imported (and the `close_bundle()` function erroneously closes the file descriptor that has _already_ been closed when using it as `stdin` for `git index-pack`), the same file descriptor value has now been reused via `use_pack()`. Now, when either the recursive fetch (which defaults to "on", unfortunately) or a commit-graph update needs to `mmap()` the packfile, it fails due to a now-invalid file descriptor that _should_ point to the pack file but doesn't anymore. To fix that, let's invalidate `data->fd` after calling `unbundle()`. That way, `close_bundle()` does not close a file descriptor that may have been reused for something different. While at it, document that `unbundle()` closes the file descriptor, and ensure that it also does that when failing to verify the bundle. Luckily, this bug does not affect the bundle URI feature, it only affects the `git fetch <bundle>` code path. Note that this patch does not _completely_ clarifies who is responsible to close that file descriptor, as `run_command()` may fail _without_ closing `cmd->in`. Addressing this issue thoroughly, however, would require a rather thorough re-design of the `start_command()` and `finish_command()` functionality to make it a lot less murky who is responsible for what file descriptors. At least this here patch is relatively easy to reason about, and addresses a hard failure (`fatal: mmap: could not determine filesize`) at the expense of leaking a file descriptor under very rare circumstances in which `git fetch` would error out anyway. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
75 lines
2.3 KiB
C
75 lines
2.3 KiB
C
#ifndef BUNDLE_H
|
|
#define BUNDLE_H
|
|
|
|
#include "strvec.h"
|
|
#include "string-list.h"
|
|
#include "list-objects-filter-options.h"
|
|
|
|
struct bundle_header {
|
|
unsigned version;
|
|
struct string_list prerequisites;
|
|
struct string_list references;
|
|
const struct git_hash_algo *hash_algo;
|
|
struct list_objects_filter_options filter;
|
|
};
|
|
|
|
#define BUNDLE_HEADER_INIT \
|
|
{ \
|
|
.prerequisites = STRING_LIST_INIT_DUP, \
|
|
.references = STRING_LIST_INIT_DUP, \
|
|
.filter = LIST_OBJECTS_FILTER_INIT, \
|
|
}
|
|
void bundle_header_init(struct bundle_header *header);
|
|
void bundle_header_release(struct bundle_header *header);
|
|
|
|
int is_bundle(const char *path, int quiet);
|
|
int read_bundle_header(const char *path, struct bundle_header *header);
|
|
int read_bundle_header_fd(int fd, struct bundle_header *header,
|
|
const char *report_path);
|
|
int create_bundle(struct repository *r, const char *path,
|
|
int argc, const char **argv, struct strvec *pack_options,
|
|
int version);
|
|
|
|
enum verify_bundle_flags {
|
|
VERIFY_BUNDLE_VERBOSE = (1 << 0),
|
|
VERIFY_BUNDLE_QUIET = (1 << 1),
|
|
VERIFY_BUNDLE_FSCK = (1 << 2),
|
|
};
|
|
|
|
int verify_bundle(struct repository *r, struct bundle_header *header,
|
|
enum verify_bundle_flags flags);
|
|
|
|
struct unbundle_opts {
|
|
enum verify_bundle_flags flags;
|
|
/*
|
|
* fsck_msg_types may optionally contain fsck message severity
|
|
* configuration. If present, this configuration gets directly appended
|
|
* to a '--fsck-objects' option and therefore must be prefixed with '='.
|
|
* (E.g. "=missingEmail=ignore,gitmodulesUrl=ignore")
|
|
*/
|
|
const char *fsck_msg_types;
|
|
};
|
|
|
|
/**
|
|
* Unbundle after reading the header with read_bundle_header().
|
|
*
|
|
* We'll invoke "git index-pack --stdin --fix-thin" for you on the
|
|
* provided `bundle_fd` from read_bundle_header().
|
|
*
|
|
* Provide "extra_index_pack_args" to pass any extra arguments
|
|
* (e.g. "-v" for verbose/progress), NULL otherwise. The provided
|
|
* "extra_index_pack_args" (if any) will be strvec_clear()'d for you.
|
|
*
|
|
* Before unbundling, this method will call verify_bundle() with 'flags'
|
|
* provided in 'opts'.
|
|
*
|
|
* Note that the `bundle_fd` will be closed as part of the operation.
|
|
*/
|
|
int unbundle(struct repository *r, struct bundle_header *header,
|
|
int bundle_fd, struct strvec *extra_index_pack_args,
|
|
struct unbundle_opts *opts);
|
|
int list_bundle_refs(struct bundle_header *header,
|
|
int argc, const char **argv);
|
|
|
|
#endif
|