From 758c1f9d1bcab5115f5d9e990c952602ea58a161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Apr 2017 19:29:53 +0200 Subject: [PATCH 1/7] archive-zip: add tests for big ZIP archives Test the creation of ZIP archives bigger than 4GB and containing files bigger than 4GB. They are marked as EXPENSIVE because they take quite a while and because the first one needs a bit more than 4GB of disk space to store the resulting archive. The big archive in the first test is made up of a tree containing thousands of copies of a small file. Yet the test has to write out the full archive because unzip doesn't offer a way to read from stdin. The big file in the second test is provided as a zipped pack file to avoid writing another 4GB file to disk and then adding it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t5004-archive-corner-cases.sh | 45 ++++++++++++++++++++++++++++++++ t/t5004/big-pack.zip | Bin 0 -> 7373 bytes 2 files changed, 45 insertions(+) create mode 100644 t/t5004/big-pack.zip diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh index cca23383c5..5c886fa823 100755 --- a/t/t5004-archive-corner-cases.sh +++ b/t/t5004-archive-corner-cases.sh @@ -155,4 +155,49 @@ test_expect_success ZIPINFO 'zip archive with many entries' ' test_cmp expect actual ' +test_expect_failure EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' + # build string containing 65536 characters + s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef && + s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s && + s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s && + + # create blob with a length of 65536 + 1 bytes + blob=$(echo $s | git hash-object -w --stdin) && + + # create tree containing 65500 entries of that blob + for i in $(test_seq 1 65500) + do + echo "100644 blob $blob $i" + done >tree && + tree=$(git mktree big.lst && + grep $size big.lst +' + test_done diff --git a/t/t5004/big-pack.zip b/t/t5004/big-pack.zip new file mode 100644 index 0000000000000000000000000000000000000000..caaf614eeece6f818c525e433561e37560a75b05 GIT binary patch literal 7373 zcmWIWW@Zs#U}E54xLY#A%SmVLl4u471|Jp%215oJhJwW8Y+ZvC^HlQ`3(KT5gS6zd z#6$y=#3VyYlN6KGlw?bTG()50MB`LTb5p&{l#0+0P6p-9-y6#!_6|EcGQyxED zyZY^&^YwM2r?Wic_gf3wz0#?R{8hB^$gt{iHiUEJN{iALjZ~|R(VzA zOp{_@IDE*S!H8sEfc%Wl8*lHd?-DJPX@5BLD~n)>se}uQ{sHa{9CBhhi*hb3*-*jg zc5o4&4%_XW4Bab?Enk9jiu`<7qbA8y|-Ck>2+hMBYN-pgCcFt>ev-5*|-&jb`JE>Hh`B*~cT z&t2=?C44}E8GDaU*PD0ekK%{l7w@(f14RzJnfvea+fQlS75nRoU$s?(g=&R(fOLb% zK_JPHAqeJ(fjJ$>oKcz4&>2l3kc=^!7e@2KXkHl23k(gTVK5p7z>;7z9gKznQp0() zeK6WS7;PVn){Ud}!f4$%I)=h9I*!CJ8V10UU^E?!h5@KqG@1@Z!(cQWK$^#7<%OS{ z_HQoT!K!n;di%NDON?i(ygj-((dOTWgOj)4mXFhko42#<@9S6BTc6*5|Cc?$n~_P5 z8P`mn1SleafRRC^5k!+Qug40R*F&4rL$?-n>J8c2VFDo0!BTPW}9!Q@6 I&6hC%0B76lcmMzZ literal 0 HcmV?d00001 From c061a149709195e0ec914a5d6ba173042e4197f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Apr 2017 19:30:49 +0200 Subject: [PATCH 2/7] archive-zip: use strbuf for ZIP directory Keep the ZIP central directory, which is written after all archive entries, in a strbuf instead of a custom-managed buffer. It contains binary data, so we can't (and don't want to) use the full range of strbuf functions and we don't need the terminating NUL, but the result is shorter and simpler code. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index b429a8d974..a6fac59602 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -11,16 +11,14 @@ static int zip_date; static int zip_time; -static unsigned char *zip_dir; -static unsigned int zip_dir_size; +/* We only care about the "buf" part here. */ +static struct strbuf zip_dir; static unsigned int zip_offset; -static unsigned int zip_dir_offset; static uint64_t zip_dir_entries; static unsigned int max_creator_version; -#define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) #define ZIP_STREAM (1 << 3) #define ZIP_UTF8 (1 << 11) @@ -268,7 +266,6 @@ static int write_zip_entry(struct archiver_args *args, unsigned long attr2; unsigned long compressed_size; unsigned long crc; - unsigned long direntsize; int method; unsigned char *out; void *deflated = NULL; @@ -356,13 +353,6 @@ static int write_zip_entry(struct archiver_args *args, extra.flags[0] = 1; /* just mtime */ copy_le32(extra.mtime, args->time); - /* make sure we have enough free space in the dictionary */ - direntsize = ZIP_DIR_HEADER_SIZE + pathlen + ZIP_EXTRA_MTIME_SIZE; - while (zip_dir_size < zip_dir_offset + direntsize) { - zip_dir_size += ZIP_DIRECTORY_MIN_SIZE; - zip_dir = xrealloc(zip_dir, zip_dir_size); - } - copy_le32(dirent.magic, 0x02014b50); copy_le16(dirent.creator_version, creator_version); copy_le16(dirent.version, 10); @@ -486,12 +476,9 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.attr1, !is_binary); - memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); - zip_dir_offset += ZIP_DIR_HEADER_SIZE; - memcpy(zip_dir + zip_dir_offset, path, pathlen); - zip_dir_offset += pathlen; - memcpy(zip_dir + zip_dir_offset, &extra, ZIP_EXTRA_MTIME_SIZE); - zip_dir_offset += ZIP_EXTRA_MTIME_SIZE; + strbuf_add(&zip_dir, &dirent, ZIP_DIR_HEADER_SIZE); + strbuf_add(&zip_dir, path, pathlen); + strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE); zip_dir_entries++; return 0; @@ -510,12 +497,12 @@ static void write_zip64_trailer(void) copy_le32(trailer64.directory_start_disk, 0); copy_le64(trailer64.entries_on_this_disk, zip_dir_entries); copy_le64(trailer64.entries, zip_dir_entries); - copy_le64(trailer64.size, zip_dir_offset); + copy_le64(trailer64.size, zip_dir.len); copy_le64(trailer64.offset, zip_offset); copy_le32(locator64.magic, 0x07064b50); copy_le32(locator64.disk, 0); - copy_le64(locator64.offset, zip_offset + zip_dir_offset); + copy_le64(locator64.offset, zip_offset + zip_dir.len); copy_le32(locator64.number_of_disks, 1); write_or_die(1, &trailer64, ZIP64_DIR_TRAILER_SIZE); @@ -533,11 +520,11 @@ static void write_zip_trailer(const unsigned char *sha1) copy_le16_clamp(trailer.entries_on_this_disk, zip_dir_entries, &clamped); copy_le16_clamp(trailer.entries, zip_dir_entries, &clamped); - copy_le32(trailer.size, zip_dir_offset); + copy_le32(trailer.size, zip_dir.len); copy_le32(trailer.offset, zip_offset); copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0); - write_or_die(1, zip_dir, zip_dir_offset); + write_or_die(1, zip_dir.buf, zip_dir.len); if (clamped) write_zip64_trailer(); write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE); @@ -568,14 +555,13 @@ static int write_zip_archive(const struct archiver *ar, dos_time(&args->time, &zip_date, &zip_time); - zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE); - zip_dir_size = ZIP_DIRECTORY_MIN_SIZE; + strbuf_init(&zip_dir, 0); err = write_archive_entries(args, write_zip_entry); if (!err) write_zip_trailer(args->commit_sha1); - free(zip_dir); + strbuf_release(&zip_dir); return err; } From 3c78fd808d8ed8848fe09735c7755ddd35cfe83f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Apr 2017 19:31:44 +0200 Subject: [PATCH 3/7] archive-zip: write ZIP dir entry directly to strbuf Write all fields of the ZIP directory record for an archive entry in the right order directly into the strbuf instead of taking a detour through a struct. Do that at end, when we have all necessary data like checksum and compressed size. The fields are documented just as well, the code becomes shorter and we save an extra copy. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 81 +++++++++++++++++---------------------------------- 1 file changed, 27 insertions(+), 54 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index a6fac59602..2d52bb3ade 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -45,27 +45,6 @@ struct zip_data_desc { unsigned char _end[1]; }; -struct zip_dir_header { - unsigned char magic[4]; - unsigned char creator_version[2]; - unsigned char version[2]; - unsigned char flags[2]; - unsigned char compression_method[2]; - unsigned char mtime[2]; - unsigned char mdate[2]; - unsigned char crc32[4]; - unsigned char compressed_size[4]; - unsigned char size[4]; - unsigned char filename_length[2]; - unsigned char extra_length[2]; - unsigned char comment_length[2]; - unsigned char disk[2]; - unsigned char attr1[2]; - unsigned char attr2[4]; - unsigned char offset[4]; - unsigned char _end[1]; -}; - struct zip_dir_trailer { unsigned char magic[4]; unsigned char disk[2]; @@ -166,6 +145,15 @@ static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped) copy_le16(dest, clamp_max(n, 0xffff, clamped)); } +static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n) +{ + while (size-- > 0) { + strbuf_addch(sb, n & 0xff); + n >>= 8; + } + return -!!n; +} + static void *zlib_deflate_raw(void *data, unsigned long size, int compression_level, unsigned long *compressed_size) @@ -212,16 +200,6 @@ static void write_zip_data_desc(unsigned long size, write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); } -static void set_zip_dir_data_desc(struct zip_dir_header *header, - unsigned long size, - unsigned long compressed_size, - unsigned long crc) -{ - copy_le32(header->crc32, crc); - copy_le32(header->compressed_size, compressed_size); - copy_le32(header->size, size); -} - static void set_zip_header_data_desc(struct zip_local_header *header, unsigned long size, unsigned long compressed_size, @@ -261,7 +239,7 @@ static int write_zip_entry(struct archiver_args *args, unsigned int mode) { struct zip_local_header header; - struct zip_dir_header dirent; + uintmax_t offset = zip_offset; struct zip_extra_mtime extra; unsigned long attr2; unsigned long compressed_size; @@ -353,21 +331,6 @@ static int write_zip_entry(struct archiver_args *args, extra.flags[0] = 1; /* just mtime */ copy_le32(extra.mtime, args->time); - copy_le32(dirent.magic, 0x02014b50); - copy_le16(dirent.creator_version, creator_version); - copy_le16(dirent.version, 10); - copy_le16(dirent.flags, flags); - copy_le16(dirent.compression_method, method); - copy_le16(dirent.mtime, zip_time); - copy_le16(dirent.mdate, zip_date); - set_zip_dir_data_desc(&dirent, size, compressed_size, crc); - copy_le16(dirent.filename_length, pathlen); - copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); - copy_le16(dirent.comment_length, 0); - copy_le16(dirent.disk, 0); - copy_le32(dirent.attr2, attr2); - copy_le32(dirent.offset, zip_offset); - copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); copy_le16(header.flags, flags); @@ -406,8 +369,6 @@ static int write_zip_entry(struct archiver_args *args, write_zip_data_desc(size, compressed_size, crc); zip_offset += ZIP_DATA_DESC_SIZE; - - set_zip_dir_data_desc(&dirent, size, compressed_size, crc); } else if (stream && method == 8) { unsigned char buf[STREAM_BUFFER_SIZE]; ssize_t readlen; @@ -464,8 +425,6 @@ static int write_zip_entry(struct archiver_args *args, write_zip_data_desc(size, compressed_size, crc); zip_offset += ZIP_DATA_DESC_SIZE; - - set_zip_dir_data_desc(&dirent, size, compressed_size, crc); } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); zip_offset += compressed_size; @@ -474,9 +433,23 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); - copy_le16(dirent.attr1, !is_binary); - - strbuf_add(&zip_dir, &dirent, ZIP_DIR_HEADER_SIZE); + strbuf_add_le(&zip_dir, 4, 0x02014b50); /* magic */ + strbuf_add_le(&zip_dir, 2, creator_version); + strbuf_add_le(&zip_dir, 2, 10); /* version */ + strbuf_add_le(&zip_dir, 2, flags); + strbuf_add_le(&zip_dir, 2, method); + strbuf_add_le(&zip_dir, 2, zip_time); + strbuf_add_le(&zip_dir, 2, zip_date); + strbuf_add_le(&zip_dir, 4, crc); + strbuf_add_le(&zip_dir, 4, compressed_size); + strbuf_add_le(&zip_dir, 4, size); + strbuf_add_le(&zip_dir, 2, pathlen); + strbuf_add_le(&zip_dir, 2, ZIP_EXTRA_MTIME_SIZE); + strbuf_add_le(&zip_dir, 2, 0); /* comment length */ + strbuf_add_le(&zip_dir, 2, 0); /* disk */ + strbuf_add_le(&zip_dir, 2, !is_binary); + strbuf_add_le(&zip_dir, 4, attr2); + strbuf_add_le(&zip_dir, 4, offset); strbuf_add(&zip_dir, path, pathlen); strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE); zip_dir_entries++; From af95749f9b06467e7536da24432b00989a7c8c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Apr 2017 19:32:36 +0200 Subject: [PATCH 4/7] archive-zip: support archives bigger than 4GB Add a zip64 extended information extra field to the central directory and emit the zip64 end of central directory records as well as locator if the offset of an entry within the archive exceeds 4GB. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 32 ++++++++++++++++++++++++++++---- t/t5004-archive-corner-cases.sh | 2 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 2d52bb3ade..7d6f2a85d0 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -14,7 +14,7 @@ static int zip_time; /* We only care about the "buf" part here. */ static struct strbuf zip_dir; -static unsigned int zip_offset; +static uintmax_t zip_offset; static uint64_t zip_dir_entries; static unsigned int max_creator_version; @@ -145,6 +145,11 @@ static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped) copy_le16(dest, clamp_max(n, 0xffff, clamped)); } +static void copy_le32_clamp(unsigned char *dest, uint64_t n, int *clamped) +{ + copy_le32(dest, clamp_max(n, 0xffffffff, clamped)); +} + static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n) { while (size-- > 0) { @@ -154,6 +159,12 @@ static int strbuf_add_le(struct strbuf *sb, size_t size, uintmax_t n) return -!!n; } +static uint32_t clamp32(uintmax_t n) +{ + const uintmax_t max = 0xffffffff; + return (n < max) ? n : max; +} + static void *zlib_deflate_raw(void *data, unsigned long size, int compression_level, unsigned long *compressed_size) @@ -254,6 +265,8 @@ static int write_zip_entry(struct archiver_args *args, int is_binary = -1; const char *path_without_prefix = path + args->baselen; unsigned int creator_version = 0; + size_t zip_dir_extra_size = ZIP_EXTRA_MTIME_SIZE; + size_t zip64_dir_extra_payload_size = 0; crc = crc32(0, NULL, 0); @@ -433,6 +446,11 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); + if (offset > 0xffffffff) { + zip64_dir_extra_payload_size += 8; + zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size; + } + strbuf_add_le(&zip_dir, 4, 0x02014b50); /* magic */ strbuf_add_le(&zip_dir, 2, creator_version); strbuf_add_le(&zip_dir, 2, 10); /* version */ @@ -444,14 +462,20 @@ static int write_zip_entry(struct archiver_args *args, strbuf_add_le(&zip_dir, 4, compressed_size); strbuf_add_le(&zip_dir, 4, size); strbuf_add_le(&zip_dir, 2, pathlen); - strbuf_add_le(&zip_dir, 2, ZIP_EXTRA_MTIME_SIZE); + strbuf_add_le(&zip_dir, 2, zip_dir_extra_size); strbuf_add_le(&zip_dir, 2, 0); /* comment length */ strbuf_add_le(&zip_dir, 2, 0); /* disk */ strbuf_add_le(&zip_dir, 2, !is_binary); strbuf_add_le(&zip_dir, 4, attr2); - strbuf_add_le(&zip_dir, 4, offset); + strbuf_add_le(&zip_dir, 4, clamp32(offset)); strbuf_add(&zip_dir, path, pathlen); strbuf_add(&zip_dir, &extra, ZIP_EXTRA_MTIME_SIZE); + if (zip64_dir_extra_payload_size) { + strbuf_add_le(&zip_dir, 2, 0x0001); /* magic */ + strbuf_add_le(&zip_dir, 2, zip64_dir_extra_payload_size); + if (offset >= 0xffffffff) + strbuf_add_le(&zip_dir, 8, offset); + } zip_dir_entries++; return 0; @@ -494,7 +518,7 @@ static void write_zip_trailer(const unsigned char *sha1) &clamped); copy_le16_clamp(trailer.entries, zip_dir_entries, &clamped); copy_le32(trailer.size, zip_dir.len); - copy_le32(trailer.offset, zip_offset); + copy_le32_clamp(trailer.offset, zip_offset, &clamped); copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0); write_or_die(1, zip_dir.buf, zip_dir.len); diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh index 5c886fa823..41183ea2cf 100755 --- a/t/t5004-archive-corner-cases.sh +++ b/t/t5004-archive-corner-cases.sh @@ -155,7 +155,7 @@ test_expect_success ZIPINFO 'zip archive with many entries' ' test_cmp expect actual ' -test_expect_failure EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' +test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' # build string containing 65536 characters s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef && s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s && From 4cdf3f9d84568da72f1dcade812de7a42ecb6d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Apr 2017 19:33:34 +0200 Subject: [PATCH 5/7] archive-zip: support files bigger than 4GB Write a zip64 extended information extra field for big files as part of their local headers and as part of their central directory headers. Also write a zip64 version of the data descriptor in that case. If we're streaming then we don't know the compressed size at the time we write the header. Deflate can end up making a file bigger instead of smaller if we're unlucky. Write a local zip64 header already for files with a size of 2GB or more in this case to be on the safe side. Both sizes need to be included in the local zip64 header, but the extra field for the directory must only contain 64-bit equivalents for 32-bit values of 0xffffffff. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 90 +++++++++++++++++++++++++++------ t/t5004-archive-corner-cases.sh | 2 +- 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 7d6f2a85d0..44ed78f163 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -45,6 +45,14 @@ struct zip_data_desc { unsigned char _end[1]; }; +struct zip64_data_desc { + unsigned char magic[4]; + unsigned char crc32[4]; + unsigned char compressed_size[8]; + unsigned char size[8]; + unsigned char _end[1]; +}; + struct zip_dir_trailer { unsigned char magic[4]; unsigned char disk[2]; @@ -65,6 +73,14 @@ struct zip_extra_mtime { unsigned char _end[1]; }; +struct zip64_extra { + unsigned char magic[2]; + unsigned char extra_size[2]; + unsigned char size[8]; + unsigned char compressed_size[8]; + unsigned char _end[1]; +}; + struct zip64_dir_trailer { unsigned char magic[4]; unsigned char record_size[8]; @@ -94,11 +110,15 @@ struct zip64_dir_trailer_locator { */ #define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end) #define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end) +#define ZIP64_DATA_DESC_SIZE offsetof(struct zip64_data_desc, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) #define ZIP_EXTRA_MTIME_SIZE offsetof(struct zip_extra_mtime, _end) #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \ (ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags)) +#define ZIP64_EXTRA_SIZE offsetof(struct zip64_extra, _end) +#define ZIP64_EXTRA_PAYLOAD_SIZE \ + (ZIP64_EXTRA_SIZE - offsetof(struct zip64_extra, size)) #define ZIP64_DIR_TRAILER_SIZE offsetof(struct zip64_dir_trailer, _end) #define ZIP64_DIR_TRAILER_RECORD_SIZE \ (ZIP64_DIR_TRAILER_SIZE - \ @@ -202,13 +222,23 @@ static void write_zip_data_desc(unsigned long size, unsigned long compressed_size, unsigned long crc) { - struct zip_data_desc trailer; - - copy_le32(trailer.magic, 0x08074b50); - copy_le32(trailer.crc32, crc); - copy_le32(trailer.compressed_size, compressed_size); - copy_le32(trailer.size, size); - write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); + if (size >= 0xffffffff || compressed_size >= 0xffffffff) { + struct zip64_data_desc trailer; + copy_le32(trailer.magic, 0x08074b50); + copy_le32(trailer.crc32, crc); + copy_le64(trailer.compressed_size, compressed_size); + copy_le64(trailer.size, size); + write_or_die(1, &trailer, ZIP64_DATA_DESC_SIZE); + zip_offset += ZIP64_DATA_DESC_SIZE; + } else { + struct zip_data_desc trailer; + copy_le32(trailer.magic, 0x08074b50); + copy_le32(trailer.crc32, crc); + copy_le32(trailer.compressed_size, compressed_size); + copy_le32(trailer.size, size); + write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); + zip_offset += ZIP_DATA_DESC_SIZE; + } } static void set_zip_header_data_desc(struct zip_local_header *header, @@ -252,6 +282,9 @@ static int write_zip_entry(struct archiver_args *args, struct zip_local_header header; uintmax_t offset = zip_offset; struct zip_extra_mtime extra; + struct zip64_extra extra64; + size_t header_extra_size = ZIP_EXTRA_MTIME_SIZE; + int need_zip64_extra = 0; unsigned long attr2; unsigned long compressed_size; unsigned long crc; @@ -344,21 +377,40 @@ static int write_zip_entry(struct archiver_args *args, extra.flags[0] = 1; /* just mtime */ copy_le32(extra.mtime, args->time); + if (size > 0xffffffff || compressed_size > 0xffffffff) + need_zip64_extra = 1; + if (stream && size > 0x7fffffff) + need_zip64_extra = 1; + copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); copy_le16(header.flags, flags); copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - set_zip_header_data_desc(&header, size, compressed_size, crc); + if (need_zip64_extra) { + set_zip_header_data_desc(&header, 0xffffffff, 0xffffffff, crc); + header_extra_size += ZIP64_EXTRA_SIZE; + } else { + set_zip_header_data_desc(&header, size, compressed_size, crc); + } copy_le16(header.filename_length, pathlen); - copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE); + copy_le16(header.extra_length, header_extra_size); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); zip_offset += ZIP_LOCAL_HEADER_SIZE; write_or_die(1, path, pathlen); zip_offset += pathlen; write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE); zip_offset += ZIP_EXTRA_MTIME_SIZE; + if (need_zip64_extra) { + copy_le16(extra64.magic, 0x0001); + copy_le16(extra64.extra_size, ZIP64_EXTRA_PAYLOAD_SIZE); + copy_le64(extra64.size, size); + copy_le64(extra64.compressed_size, compressed_size); + write_or_die(1, &extra64, ZIP64_EXTRA_SIZE); + zip_offset += ZIP64_EXTRA_SIZE; + } + if (stream && method == 0) { unsigned char buf[STREAM_BUFFER_SIZE]; ssize_t readlen; @@ -381,7 +433,6 @@ static int write_zip_entry(struct archiver_args *args, zip_offset += compressed_size; write_zip_data_desc(size, compressed_size, crc); - zip_offset += ZIP_DATA_DESC_SIZE; } else if (stream && method == 8) { unsigned char buf[STREAM_BUFFER_SIZE]; ssize_t readlen; @@ -437,7 +488,6 @@ static int write_zip_entry(struct archiver_args *args, zip_offset += compressed_size; write_zip_data_desc(size, compressed_size, crc); - zip_offset += ZIP_DATA_DESC_SIZE; } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); zip_offset += compressed_size; @@ -446,8 +496,14 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); - if (offset > 0xffffffff) { - zip64_dir_extra_payload_size += 8; + if (compressed_size > 0xffffffff || size > 0xffffffff || + offset > 0xffffffff) { + if (compressed_size >= 0xffffffff) + zip64_dir_extra_payload_size += 8; + if (size >= 0xffffffff) + zip64_dir_extra_payload_size += 8; + if (offset >= 0xffffffff) + zip64_dir_extra_payload_size += 8; zip_dir_extra_size += 2 + 2 + zip64_dir_extra_payload_size; } @@ -459,8 +515,8 @@ static int write_zip_entry(struct archiver_args *args, strbuf_add_le(&zip_dir, 2, zip_time); strbuf_add_le(&zip_dir, 2, zip_date); strbuf_add_le(&zip_dir, 4, crc); - strbuf_add_le(&zip_dir, 4, compressed_size); - strbuf_add_le(&zip_dir, 4, size); + strbuf_add_le(&zip_dir, 4, clamp32(compressed_size)); + strbuf_add_le(&zip_dir, 4, clamp32(size)); strbuf_add_le(&zip_dir, 2, pathlen); strbuf_add_le(&zip_dir, 2, zip_dir_extra_size); strbuf_add_le(&zip_dir, 2, 0); /* comment length */ @@ -473,6 +529,10 @@ static int write_zip_entry(struct archiver_args *args, if (zip64_dir_extra_payload_size) { strbuf_add_le(&zip_dir, 2, 0x0001); /* magic */ strbuf_add_le(&zip_dir, 2, zip64_dir_extra_payload_size); + if (size >= 0xffffffff) + strbuf_add_le(&zip_dir, 8, size); + if (compressed_size >= 0xffffffff) + strbuf_add_le(&zip_dir, 8, compressed_size); if (offset >= 0xffffffff) strbuf_add_le(&zip_dir, 8, offset); } diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh index 41183ea2cf..9106c53c4c 100755 --- a/t/t5004-archive-corner-cases.sh +++ b/t/t5004-archive-corner-cases.sh @@ -178,7 +178,7 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' "$GIT_UNZIP" -t many-big.zip ' -test_expect_failure EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' ' +test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' ' # Pack created with: # dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file mkdir -p .git/objects/pack && From ebdfa294c9e280ee14de27cf9d4ffb2cf82d2f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 27 Apr 2017 22:25:45 +0200 Subject: [PATCH 6/7] archive-zip: set version field for big files correctly Signal that extractors need to implement spec version 4.5 (or higher) for files with sizes of 4GB and more. Older unzippers might produce truncated results otherwise; they should rather refuse to extract. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 44ed78f163..e81c5ac15a 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -298,6 +298,7 @@ static int write_zip_entry(struct archiver_args *args, int is_binary = -1; const char *path_without_prefix = path + args->baselen; unsigned int creator_version = 0; + unsigned int version_needed = 10; size_t zip_dir_extra_size = ZIP_EXTRA_MTIME_SIZE; size_t zip64_dir_extra_payload_size = 0; @@ -382,8 +383,11 @@ static int write_zip_entry(struct archiver_args *args, if (stream && size > 0x7fffffff) need_zip64_extra = 1; + if (need_zip64_extra) + version_needed = 45; + copy_le32(header.magic, 0x04034b50); - copy_le16(header.version, 10); + copy_le16(header.version, version_needed); copy_le16(header.flags, flags); copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); @@ -509,7 +513,7 @@ static int write_zip_entry(struct archiver_args *args, strbuf_add_le(&zip_dir, 4, 0x02014b50); /* magic */ strbuf_add_le(&zip_dir, 2, creator_version); - strbuf_add_le(&zip_dir, 2, 10); /* version */ + strbuf_add_le(&zip_dir, 2, version_needed); strbuf_add_le(&zip_dir, 2, flags); strbuf_add_le(&zip_dir, 2, method); strbuf_add_le(&zip_dir, 2, zip_time); From 867e40ff3a731d9cf4ab1900a83ca21a9c2c8fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 30 Apr 2017 09:53:52 +0200 Subject: [PATCH 7/7] t5004: require 64-bit support for big ZIP tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if unzip supports the ZIP64 format and skip the tests that create big archives otherwise. Also skip the test that archives a big file on 32-bit platforms because the git object systems can't unpack files bigger than 4GB there. Reported-by: Torsten Bögershausen Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t5004-archive-corner-cases.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh index 9106c53c4c..f6207f42b5 100755 --- a/t/t5004-archive-corner-cases.sh +++ b/t/t5004-archive-corner-cases.sh @@ -27,6 +27,9 @@ check_dir() { test_cmp expect actual } +test_lazy_prereq UNZIP_ZIP64_SUPPORT ' + "$GIT_UNZIP" -v | grep ZIP64_SUPPORT +' # bsdtar/libarchive versions before 3.1.3 consider a tar file with a # global pax header that is not followed by a file record as corrupt. @@ -155,7 +158,8 @@ test_expect_success ZIPINFO 'zip archive with many entries' ' test_cmp expect actual ' -test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' +test_expect_success EXPENSIVE,UNZIP,UNZIP_ZIP64_SUPPORT \ + 'zip archive bigger than 4GB' ' # build string containing 65536 characters s=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef && s=$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s && @@ -178,7 +182,8 @@ test_expect_success EXPENSIVE,UNZIP 'zip archive bigger than 4GB' ' "$GIT_UNZIP" -t many-big.zip ' -test_expect_success EXPENSIVE,UNZIP,ZIPINFO 'zip archive with files bigger than 4GB' ' +test_expect_success EXPENSIVE,LONG_IS_64BIT,UNZIP,UNZIP_ZIP64_SUPPORT,ZIPINFO \ + 'zip archive with files bigger than 4GB' ' # Pack created with: # dd if=/dev/zero of=file bs=1M count=4100 && git hash-object -w file mkdir -p .git/objects/pack &&