diff options
author | Sasha Smundak <asmundak@google.com> | 2016-07-15 17:35:51 +0000 |
---|---|---|
committer | Yun Peng <pcloudy@google.com> | 2016-07-18 10:43:29 +0000 |
commit | 06a12e561268f67de6a417315944f8e4d05afc0f (patch) | |
tree | a9fe559e7f307d0a132c59def9fa309c8710465a | |
parent | 5ea55cbab969820da346d16c4998e957b8c3f60e (diff) |
C++ reimplementation of singlejar tool: first checkin, take two: fix the problem that caused the rollback.
*** Original change description ***
Automated [] rollback of commit f667aa54f4fcc2c04182de9bc267a7ee469f6445.
*** Reason for rollback ***
Breaks CI, see, e.g., http://ci.bazel.io/job/bazel-tests/BAZEL_VERSION=HEAD,PLATFORM_NAME=ubuntu_15.10-x86_64/92/console
*** Original change description ***
C++ reimplementation of singlejar tool: first checkin.
--
MOS_MIGRATED_REVID=127554239
-rw-r--r-- | src/BUILD | 1 | ||||
-rw-r--r-- | src/tools/singlejar/BUILD | 72 | ||||
-rw-r--r-- | src/tools/singlejar/diag.h | 31 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar.h | 148 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_bad_jar_test.cc | 67 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_random_jars_test.cc | 74 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_scan_entries_test.h | 302 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_scan_jartool_test.cc | 64 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_scan_ziptool_test.cc | 51 | ||||
-rw-r--r-- | src/tools/singlejar/mapped_file.h | 96 | ||||
-rw-r--r-- | src/tools/singlejar/zip_headers.h | 485 | ||||
-rw-r--r-- | src/tools/singlejar/zlib_interface.h | 98 | ||||
-rw-r--r-- | src/tools/singlejar/zlib_interface_test.cc | 100 |
13 files changed, 1589 insertions, 0 deletions
@@ -310,6 +310,7 @@ filegroup( "//src/tools/xcode/libtool:srcs", "//src/tools/xcode/momcwrapper:srcs", "//src/tools/xcode/realpath:srcs", + "//src/tools/singlejar:srcs", "//src/tools/xcode/stdredirect:srcs", "//src/tools/xcode/swiftstdlibtoolwrapper:srcs", "//src/tools/xcode/xcodelocator:srcs", diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD new file mode 100644 index 0000000000..bca525abb5 --- /dev/null +++ b/src/tools/singlejar/BUILD @@ -0,0 +1,72 @@ +# Description: +# singlejar C++ implementation. +package(default_visibility = ["//src:__subpackages__"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//src:__pkg__"], +) + +cc_test( + name = "input_jar_scan_jartool_test", + size = "large", + srcs = [ + "input_jar_scan_entries_test.h", + "input_jar_scan_jartool_test.cc", + ":input_jar", + ], + copts = ["-DJAR_TOOL_PATH=\\\"external/local_jdk/bin/jar\\\""], + data = [ + "//external:jar", + "//external:jdk-default", + ], + deps = ["//third_party:gtest"], +) + +cc_test( + name = "input_jar_scan_ziptool_test", + size = "large", + srcs = [ + "input_jar_scan_entries_test.h", + "input_jar_scan_ziptool_test.cc", + ":input_jar", + ], + deps = ["//third_party:gtest"], +) + +cc_test( + name = "input_jar_bad_jar_test", + srcs = [ + "input_jar_bad_jar_test.cc", + ":input_jar", + ], + deps = ["//third_party:gtest"], +) + +cc_test( + name = "zlib_interface_test", + srcs = [ + "zlib_interface_test.cc", + ":zlib_interface", + ], + deps = [ + "//third_party:gtest", + "//third_party/zlib", + ], +) + +filegroup( + name = "input_jar", + srcs = [ + "diag.h", + "input_jar.h", + "mapped_file.h", + "zip_headers.h", + ], +) + +filegroup( + name = "zlib_interface", + srcs = ["zlib_interface.h"], +) diff --git a/src/tools/singlejar/diag.h b/src/tools/singlejar/diag.h new file mode 100644 index 0000000000..9085e1260d --- /dev/null +++ b/src/tools/singlejar/diag.h @@ -0,0 +1,31 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_ 1 + +/* + * Various useful diagnostics functions from Linux err.h file, wrapped + * for portability. + */ +#if defined(__APPLE__) || defined(__linux) +#include <err.h> +#define diag_err(...) err(__VA_ARGS__) +#define diag_errx(...) errx(__VA_ARGS__) +#define diag_warn(...) warn(__VA_ARGS__) +#define diag_warnx(...) warnx(__VA_ARGS__) +#else +#error Unknown platform +#endif +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_ diff --git a/src/tools/singlejar/input_jar.h b/src/tools/singlejar/input_jar.h new file mode 100644 index 0000000000..c9d6677a97 --- /dev/null +++ b/src/tools/singlejar/input_jar.h @@ -0,0 +1,148 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_ 1 + +#include <inttypes.h> +#include <stdlib.h> + +#include "src/tools/singlejar/diag.h" +#include "src/tools/singlejar/mapped_file.h" +#include "src/tools/singlejar/zip_headers.h" + +/* + * An input jar. The usage pattern is: + * InputJar input_jar("path/to/file"); + * if (!input_jar.Open()) { fail...} + * CDH *dir_entry; + * LH *local_header; + * while (dir_entry = input_jar.NextExtry(&local_header)) { + * // process entry. + * } + * input_jar.Close(); // actually, called by destructor, too. + */ +class InputJar { + public: + InputJar() : path_(nullptr) {} + + ~InputJar() { Close(); } + + int fd() const { return mapped_file_.fd(); } + + // Opens the file, memory maps it and locates Central Directory. + bool Open(const char *path) { + if (path_ != nullptr) { + diag_errx(1, "%s:%d: This instance is already handling %s\n", __FILE__, + __LINE__, path_); + } + if (!mapped_file_.Open(path)) { + diag_warn("%s:%d: Cannot open input jar %s", __FILE__, __LINE__, path); + mapped_file_.Close(); + return false; + } + if (mapped_file_.size() < sizeof(ECD)) { + diag_warnx( + "%s:%d: %s is only %ld bytes long, should be at least %lu bytes long", + __FILE__, __LINE__, path_, mapped_file_.size(), sizeof(ECD)); + mapped_file_.Close(); + return false; + } + + // Now locate End of Central Directory (ECD) record. + const char *ecd_min = mapped_file_.end() - 65536 - sizeof(ECD); + if (ecd_min < mapped_file_.start()) { + ecd_min = mapped_file_.start(); + } + + const ECD *ecd = nullptr; + for (const char *ecd_ptr = mapped_file_.end() - sizeof(ECD); + ecd_ptr >= ecd_min; --ecd_ptr) { + ecd = reinterpret_cast<const ECD *>(ecd_ptr); + if (ecd->is() && ecd) { + break; + } + } + if (!ecd) { + diag_warnx("%s:%d: Cannot locate ECD record in %s", __FILE__, __LINE__, + path); + mapped_file_.Close(); + return false; + } + uint64_t offset_to_dir = ecd->cen_offset32(); + if (offset_to_dir == 0xFFFFFFFF) { + const ECD64 *ecd64 = reinterpret_cast<const ECD64 *>( + mapped_file_.address(ecd->ecd64_offset())); + offset_to_dir = ecd64->cen_offset(); + } + cdh_ = reinterpret_cast<const CDH *>(mapped_file_.address(offset_to_dir)); + if (!cdh_->is()) { + diag_warnx("in %s, expected central file header signature at 0x%" PRIx64, + path, offset_to_dir); + mapped_file_.Close(); + return false; + } + path_ = strdup(path); + return true; + } + + // Returns the next Central Directory Header or NULL. + const CDH *NextEntry(const LH **local_header_ptr) { + if (!path_) { + diag_errx(1, "%s:%d: call Open() first!", __FILE__, __LINE__); + } + if (!cdh_->is()) { + return nullptr; + } + const CDH *current_cdh = cdh_; + const uint8_t *new_cdr = byte_ptr(cdh_) + cdh_->size(); + if (!mapped_file_.mapped(new_cdr)) { + diag_errx( + 1, + "Bad directory record at offset 0x%" PRIx64 " of %s\n" + "file name length = %u, extra_field length = %u, comment length = %u", + CentralDirectoryRecordOffset(cdh_), path_, cdh_->file_name_length(), + cdh_->extra_fields_length(), cdh_->comment_length()); + } + cdh_ = reinterpret_cast<const CDH *>(new_cdr); + *local_header_ptr = LocalHeader(current_cdh); + return current_cdh; + } + + // Closes the file. + bool Close() { + mapped_file_.Close(); + if (path_ != nullptr) { + free(path_); + path_ = nullptr; + } + return true; + } + + uint64_t CentralDirectoryRecordOffset(const void *cdr) const { + return mapped_file_.offset(static_cast<const char *>(cdr)); + } + + const LH *LocalHeader(const CDH *cdh) const { + return reinterpret_cast<const LH *>( + mapped_file_.address(cdh->local_header_offset())); + } + + private: + char *path_; + MappedFile mapped_file_; + const CDH *cdh_; // current directory entry +}; + +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_ diff --git a/src/tools/singlejar/input_jar_bad_jar_test.cc b/src/tools/singlejar/input_jar_bad_jar_test.cc new file mode 100644 index 0000000000..7e976959f2 --- /dev/null +++ b/src/tools/singlejar/input_jar_bad_jar_test.cc @@ -0,0 +1,67 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <unistd.h> +#include <memory> +#include <string> + +#include "src/tools/singlejar/input_jar.h" + +#include "gtest/gtest.h" + +static const char kJar[] = "jar.jar"; + +class InputJarBadJarTest : public testing::Test { + protected: + void SetUp() override { + input_jar_.reset(new InputJar); + } + + // Allocates a with given name and with given size. + static bool AllocateFile(const char *name, size_t size) { + int fd = open(name, O_CREAT | O_RDWR | O_TRUNC, 0777); + if (fd < 0) { + perror(name); + return false; + } + if (size) { + if (ftruncate(fd, size) == 0) { + return close(fd) == 0; + } else { + auto last_error = errno; + close(fd); + errno = last_error; + return false; + } + } else { + return close(fd) == 0; + } + } + + std::unique_ptr<InputJar> input_jar_; +}; + +TEST_F(InputJarBadJarTest, NotAJar) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + AllocateFile(kJar, 1000); + ASSERT_FALSE(input_jar_->Open(kJar)); +} + +// Check that an empty file does not cause trouble in MappedFile. +TEST_F(InputJarBadJarTest, EmptyFile) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + AllocateFile(kJar, 0); + ASSERT_FALSE(input_jar_->Open(kJar)); +} diff --git a/src/tools/singlejar/input_jar_random_jars_test.cc b/src/tools/singlejar/input_jar_random_jars_test.cc new file mode 100644 index 0000000000..67e6db2042 --- /dev/null +++ b/src/tools/singlejar/input_jar_random_jars_test.cc @@ -0,0 +1,74 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dirent.h> +#include <errno.h> +#include <unistd.h> + +#include <string> + +#include "src/tools/singlejar/input_jar.h" + +#include "gtest/gtest.h" + +static const char kJarsDirPath[] = + "third_party/bazel/src/tools/singlejar/jars_to_test"; +class InputJarRandomJarsTest : public testing::Test {}; + +TEST_F(InputJarRandomJarsTest, ScanAllJars) { + int processed_jars = 0; + DIR *dirp = opendir(kJarsDirPath); + ASSERT_NE(nullptr, dirp); + + struct dirent *dirent; + InputJar input_jar; + while ((dirent = readdir(dirp)) != nullptr) { + if (dirent->d_type != DT_REG && dirent->d_type != DT_LNK) { + continue; + } + std::string path = std::string(kJarsDirPath) + "/" + dirent->d_name; + if (dirent->d_type == DT_LNK) { + struct stat st; + if (stat(path.c_str(), &st)) { + perror(path.c_str()); + continue; + } else if (!S_ISREG(st.st_mode)) { + continue; + } + } + EXPECT_TRUE(input_jar.Open(path.c_str())); + const LH *lh; + const CDH *cdh; + int file_count = 0; + int entry_count = 0; + for (; (cdh = input_jar.NextEntry(&lh)); ++entry_count) { + ASSERT_TRUE(cdh->is()); + ASSERT_NE(nullptr, lh); + ASSERT_TRUE(lh->is()); + EXPECT_EQ(lh->file_name_length(), cdh->file_name_length()); + EXPECT_NE(lh->file_name_length(), 0); + EXPECT_EQ(0, strncmp(lh->file_name(), cdh->file_name(), + lh->file_name_length())); + if ('/' != lh->file_name()[lh->file_name_length() - 1]) { + ++file_count; + } + } + input_jar.Close(); + fprintf(stderr, "%s: %d files, %d entries\n", dirent->d_name, file_count, + entry_count); + ++processed_jars; + } + closedir(dirp); + EXPECT_LT(0, processed_jars); +} diff --git a/src/tools/singlejar/input_jar_scan_entries_test.h b/src/tools/singlejar/input_jar_scan_entries_test.h new file mode 100644 index 0000000000..b86445e423 --- /dev/null +++ b/src/tools/singlejar/input_jar_scan_entries_test.h @@ -0,0 +1,302 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_ 1 + +#include <errno.h> +#include <unistd.h> +#include <memory> +#include <string> + +#include "src/tools/singlejar/input_jar.h" + +#include "gtest/gtest.h" + +static const char kJar[] = "jar.jar"; +static const char kXXXX[] = "4GB-1file"; +static const char kEmpty[] = "empty"; +static const char kRes1[] = "res1"; +static const char kRes2[] = "res2"; +static const char kHuge[] = "4GB+1file"; +static const int32_t res1_size = 123; +static const int32_t res2_size = 456; +static const int64_t huge_size = 0x100000001L; +static const int64_t kHugeOffset = 0x100000001L; + +/* Verifies that InputJar can handle zip/jar files created by a given creator. + * This includes basic directory scan, handling huge (>4GB) zip files and huge + * entries in them, and handling zip files with "huge" (>64K) number of entries. + * A creator is passed as a typed parameter. + */ +template <class ZipCreator> +class InputJarScanEntries : public testing::Test { + public: + static void SetUpTestCase() { ZipCreator::SetUpTestCase(); } + + static void TearDownTestCase() { ZipCreator::TearDownTestCase(); } + + // Allocates a with given name and with given size. + static bool AllocateFile(const char *name, size_t size) { + int fd = open(name, O_CREAT | O_RDWR | O_TRUNC, 0777); + if (fd < 0) { + perror(name); + return false; + } + if (size) { + if (ftruncate(fd, size) == 0) { + return close(fd) == 0; + } else { + auto last_error = errno; + close(fd); + errno = last_error; + return false; + } + } else { + return close(fd) == 0; + } + } + + static void CreateBasicJar() { + ASSERT_TRUE(AllocateFile(kRes1, res1_size)); + ASSERT_TRUE(AllocateFile(kRes2, res2_size)); + unlink(kJar); + ASSERT_EQ(0, ZipCreator::Jar(true, kJar, kRes1, kRes2, nullptr)); + unlink(kRes1); + unlink(kRes2); + } + + static void CreateJarWithHugeUncompressed() { + ASSERT_TRUE(AllocateFile(kHuge, huge_size)); + unlink(kJar); + ASSERT_EQ(0, ZipCreator::Jar(true, kJar, kHuge, nullptr)); + unlink(kHuge); + } + + static void CreateJarWithZip64Entries() { + ASSERT_TRUE(AllocateFile(kXXXX, 0xFFFFFFFF)); + ASSERT_TRUE(AllocateFile(kHuge, huge_size)); + ASSERT_TRUE(AllocateFile(kEmpty, 0)); + ASSERT_TRUE(AllocateFile(kRes1, res1_size)); + ASSERT_EQ( + 0, ZipCreator::Jar(false, kJar, kXXXX, kHuge, kEmpty, kRes1, nullptr)); + unlink(kXXXX); + unlink(kHuge); + unlink(kEmpty); + unlink(kRes1); + } + + static void CreateJarWithLotsOfEntries() { + unlink(kJar); + // Create 256 directories with 256 files in each one, + // make an archive from them + for (int dir = 0; dir < 256; ++dir) { + char dirname[10]; + snprintf(dirname, sizeof(dirname), "dir%d", dir); + ASSERT_EQ(0, mkdir(dirname, 0777)); + for (int file = 0; file < 256; ++file) { + char filepath[20]; + snprintf(filepath, sizeof(filepath), "%s/%d", dirname, file); + ASSERT_TRUE(AllocateFile(filepath, 1)); + } + } + ASSERT_EQ(0, ZipCreator::Jar(false, kJar, "dir*", nullptr)); + for (int dir = 0; dir < 256; ++dir) { + char rmdircmd[100]; + snprintf(rmdircmd, sizeof(rmdircmd), "rm dir%d/* && rmdir dir%d", dir, + dir); + ASSERT_EQ(0, system(rmdircmd)); + } + } + + static void LsZip(const char *zip_name) { +#if !defined(__APPLE__) + std::string command = (std::string("unzip -v ") + zip_name).c_str(); + ASSERT_EQ(0, system(command.c_str())) << "Failed command: " << command; +#endif + } + + void SetUp() override { input_jar_.reset(new InputJar); } + + static void SmogCheck(const CDH *cdh, const LH *lh) { + ASSERT_TRUE(cdh->is()) << "No expected tag in the Central Directory Entry."; + ASSERT_NE(nullptr, lh) << "No local header."; + ASSERT_TRUE(lh->is()) << "No expected tag in the Local Header."; + EXPECT_EQ(lh->file_name_length(), cdh->file_name_length()); + EXPECT_NE(lh->file_name_length(), 0); + std::string lh_name(lh->file_name(), lh->file_name_length()); + std::string cdh_name(cdh->file_name(), cdh->file_name_length()); + EXPECT_EQ(lh_name, cdh_name); + if (!cdh->no_size_in_local_header()) { + EXPECT_EQ(lh->compressed_file_size(), cdh->compressed_file_size()) + << "Entry: " << lh_name; + EXPECT_EQ(lh->uncompressed_file_size(), cdh->uncompressed_file_size()) + << "Entry: " << cdh_name; + } + } + + std::unique_ptr<InputJar> input_jar_; +}; + +TYPED_TEST_CASE_P(InputJarScanEntries); + +TYPED_TEST_P(InputJarScanEntries, OpenClose) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + this->CreateBasicJar(); + this->LsZip(kJar); + ASSERT_TRUE(this->input_jar_->Open(kJar)); + EXPECT_GE(this->input_jar_->fd(), 0); + this->input_jar_->Close(); + EXPECT_LT(this->input_jar_->fd(), 0); +} + +/* + * Check that the jar has the expected entries, they have expected + * sizes, and that we can access both central directory entries and + * local headers. + */ +TYPED_TEST_P(InputJarScanEntries, Basic) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + this->CreateBasicJar(); + ASSERT_TRUE(this->input_jar_->Open(kJar)); + const LH *lh; + const CDH *cdh; + int file_count = 0; + bool res1_present = false; + bool res2_present = false; + for (int entry_count = 0; (cdh = this->input_jar_->NextEntry(&lh)); + ++entry_count) { + this->SmogCheck(cdh, lh); + if ('/' != lh->file_name()[lh->file_name_length() - 1]) { + ++file_count; + if (cdh->file_name_is(kRes1)) { + EXPECT_EQ(res1_size, cdh->uncompressed_file_size()); + res1_present = true; + } else if (cdh->file_name_is(kRes2)) { + EXPECT_EQ(res2_size, cdh->uncompressed_file_size()); + res2_present = true; + } + } + } + + this->input_jar_->Close(); + unlink(kJar); + EXPECT_TRUE(res1_present) << "Jar file " << kJar << " lacks expected '" + << kRes1 << "' file."; + EXPECT_TRUE(res2_present) << "Jar file " << kJar << " lacks expected '" + << kRes2 << "' file."; +} + +/* + * Check we can handle >4GB jar with >4GB entry in it. + */ +TYPED_TEST_P(InputJarScanEntries, HugeUncompressed) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + this->CreateJarWithHugeUncompressed(); + this->LsZip(kJar); + ASSERT_TRUE(this->input_jar_->Open(kJar)); + const LH *lh; + const CDH *cdh; + bool huge_file_present = false; + + while ((cdh = this->input_jar_->NextEntry(&lh))) { + this->SmogCheck(cdh, lh); + if (cdh->file_name_is(kHuge)) { + std::string entry_name(cdh->file_name(), cdh->file_name_length()); + EXPECT_EQ(huge_size, cdh->uncompressed_file_size()) << "Entry: " + << entry_name; + huge_file_present = true; + } + } + this->input_jar_->Close(); + unlink(kJar); + EXPECT_TRUE(huge_file_present) << "Jar file " << kJar << " lacks expected '" + << kHuge << "' file."; +} + +/* + * Check we can handle >4GB jar with huge and small entries and huge and + * small offsets in the central directory. + */ +TYPED_TEST_P(InputJarScanEntries, TestZip64) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + this->CreateJarWithZip64Entries(); + this->LsZip(kJar); + ASSERT_TRUE(this->input_jar_->Open(kJar)); + const LH *lh; + const CDH *cdh; + while ((cdh = this->input_jar_->NextEntry(&lh))) { + this->SmogCheck(cdh, lh); + + if (cdh->file_name_is(kXXXX)) { + EXPECT_EQ(0xFFFFFFFF, cdh->uncompressed_file_size()); + EXPECT_EQ(0xFFFFFFFF, cdh->compressed_file_size()); + } else if (cdh->file_name_is(kHuge)) { + EXPECT_EQ(huge_size, cdh->uncompressed_file_size()); + EXPECT_EQ(huge_size, cdh->compressed_file_size()); + EXPECT_LT(kHugeOffset, cdh->local_header_offset()); + } else if (cdh->file_name_is(kEmpty)) { + EXPECT_EQ(0, cdh->uncompressed_file_size()); + EXPECT_EQ(0, cdh->compressed_file_size()); + EXPECT_EQ(0, lh->compressed_file_size()); + EXPECT_LT(kHugeOffset, cdh->local_header_offset()); + } else if (cdh->file_name_is(kRes1)) { + EXPECT_EQ(res1_size, cdh->uncompressed_file_size()); + EXPECT_LT(kHugeOffset, cdh->local_header_offset()); + } + } + this->input_jar_->Close(); + unlink(kJar); +} + +/* + * Check we can handle >64K entries. + */ +TYPED_TEST_P(InputJarScanEntries, LotsOfEntries) { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + this->CreateJarWithLotsOfEntries(); +#if !defined(__APPLE__) + const char kTailUnzip[] = "unzip -v jar.jar | tail"; + ASSERT_EQ(0, system(kTailUnzip)) << "Failed command: " << kTailUnzip; +#endif + ASSERT_TRUE(this->input_jar_->Open(kJar)); + const LH *lh; + const CDH *cdh; + int entry_count = 0; + int file_count = 0; + int dir_count = 0; + while ((cdh = this->input_jar_->NextEntry(&lh))) { + this->SmogCheck(cdh, lh); + ++entry_count; + if (cdh->file_name()[cdh->file_name_length() - 1] == '/') { + ++dir_count; + } else { + ++file_count; + } + } + this->input_jar_->Close(); + unlink(kJar); + + /* We cannot compare to the exact number because JDK's jar + * adds META-INF/ and META-INF/MANIFEST.MF. + */ + EXPECT_LE(256 * 257, entry_count); + EXPECT_LE(256, dir_count); + EXPECT_LE(256 * 256, file_count); +} + +REGISTER_TYPED_TEST_CASE_P(InputJarScanEntries, OpenClose, Basic, + HugeUncompressed, TestZip64, LotsOfEntries); + +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_ diff --git a/src/tools/singlejar/input_jar_scan_jartool_test.cc b/src/tools/singlejar/input_jar_scan_jartool_test.cc new file mode 100644 index 0000000000..dd12f3e81d --- /dev/null +++ b/src/tools/singlejar/input_jar_scan_jartool_test.cc @@ -0,0 +1,64 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* Verify that InputJar can scan zip/jar files created by JDK's jar tool. */ + +#include <stdarg.h> +#include <stdlib.h> + +#include "src/tools/singlejar/input_jar_scan_entries_test.h" + +#if !defined(JAR_TOOL_PATH) +#error "The path to jar tool has to be defined via -DJAR_TOOL_PATH=" +#endif + +class JartoolCreator { + public: + static void SetUpTestCase() { + jar_path_ = realpath(JAR_TOOL_PATH, nullptr); + if (!jar_path_) { + // At least show what's available. + system("ls -1R"); + } + ASSERT_NE(nullptr, jar_path_); + } + + static void TearDownTestCase() { + free(jar_path_); + } + + static int Jar(bool compress, const char *output_jar, ...) { + std::string command(jar_path_); + if (access(output_jar, F_OK) == 0) { + command += compress ? " -uf " : " -u0f "; + } else { + command += compress ? " -cf " : " -c0f "; + } + command += output_jar; + va_list paths; + va_start(paths, output_jar); + char *path; + while ((path = va_arg(paths, char *))) { + command += ' '; + command += path; + } + return system(command.c_str()); + } + static char * jar_path_; +}; + +char *JartoolCreator::jar_path_; + +typedef testing::Types<JartoolCreator> Creators; +INSTANTIATE_TYPED_TEST_CASE_P(Jartool, InputJarScanEntries, Creators); diff --git a/src/tools/singlejar/input_jar_scan_ziptool_test.cc b/src/tools/singlejar/input_jar_scan_ziptool_test.cc new file mode 100644 index 0000000000..c130981064 --- /dev/null +++ b/src/tools/singlejar/input_jar_scan_ziptool_test.cc @@ -0,0 +1,51 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* Verify that InputJar can scan zip/jar files created by JDK's jar tool. */ + +#include <stdarg.h> +#include <stdlib.h> + +#include "src/tools/singlejar/input_jar_scan_entries_test.h" + +/* Creates jar files using zip. */ +class ZiptoolCreator { + public: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } + + static int Jar(bool compress, const char *output_jar, ...) { + std::string command("zip -qr"); + if (access(output_jar, F_OK) == 0) { + command = compress ? "u " : "u0 "; + } else { + command += compress ? " " : "0 "; + } + command += output_jar; + va_list paths; + va_start(paths, output_jar); + char *path; + while ((path = va_arg(paths, char *))) { + command += ' '; + command += path; + } + return system(command.c_str()); + } +}; + +typedef testing::Types<ZiptoolCreator> Creators; +INSTANTIATE_TYPED_TEST_CASE_P(Jartool, InputJarScanEntries, Creators); diff --git a/src/tools/singlejar/mapped_file.h b/src/tools/singlejar/mapped_file.h new file mode 100644 index 0000000000..8302e599a2 --- /dev/null +++ b/src/tools/singlejar/mapped_file.h @@ -0,0 +1,96 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_ 1 + +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "src/tools/singlejar/diag.h" + +/* + * A mapped read-only file with auto closing. + * + * MappedFile::Open maps a file with specified name to memory as read-only. + * It is assumed that the address space is large enough for that. + * MappedFile::Close deletes the mapping. The destructor calls it, too. + * A predictable set of methods provide conversion between file offsets and + * mapped addresses, returns map size, etc. + * + * The implementation is 64-bit Linux or OSX specific. + */ +#if !((defined(__linux) || defined(__APPLE__)) && __SIZEOF_POINTER__ == 8) +#error This code for 64 bit Unix. +#endif + +class MappedFile { + public: + MappedFile() : mapped_start_(nullptr), mapped_end_(nullptr), fd_(-1) {} + + ~MappedFile() { Close(); } + + bool Open(const char *filename) { + if (is_open()) { + diag_errx(1, "%s:%d: This instance is already open", __FILE__, __LINE__); + } + if ((fd_ = open(filename, O_RDONLY)) < 0) { + diag_warn("%s:%d: open %s:", __FILE__, __LINE__, filename); + return false; + } + // Map the file, even if it is empty (in which case allocate 1 byte to it). + struct stat st; + if (fstat(fd_, &st) || + (mapped_start_ = static_cast<char *>( + mmap(nullptr, st.st_size ? st.st_size : 1, PROT_READ, MAP_PRIVATE, + fd_, 0))) == MAP_FAILED) { + diag_warn("%s:%d: mmap %s:", __FILE__, __LINE__, filename); + close(fd_); + fd_ = -1; + return false; + } + mapped_end_ = mapped_start_ + st.st_size; + return true; + } + + void Close() { + if (is_open()) { + munmap(mapped_start_, mapped_end_ - mapped_start_); + mapped_start_ = mapped_end_ = nullptr; + close(fd_); + fd_ = -1; + } + } + + bool mapped(const void *addr) const { + return mapped_start_ <= addr && addr < mapped_end_; + } + + const char *start() const { return mapped_start_; } + const char *end() const { return mapped_end_; } + const char *address(off_t offset) const { return mapped_start_ + offset; } + off_t offset(const char *address) const { return address - mapped_start_; } + int fd() const { return fd_; } + size_t size() const { return mapped_end_ - mapped_start_; } + bool is_open() { return fd_ >= 0; } + + private: + char *mapped_start_; + char *mapped_end_; + int fd_; +}; + +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_ diff --git a/src/tools/singlejar/zip_headers.h b/src/tools/singlejar/zip_headers.h new file mode 100644 index 0000000000..d1018d3553 --- /dev/null +++ b/src/tools/singlejar/zip_headers.h @@ -0,0 +1,485 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_ + +/* + * Zip file headers, as described in .ZIP File Format Specification + * http://www.pkware.com/documents/casestudies/APPNOTE.TXT + */ + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#if defined(__linux) +#include <endian.h> +#elif defined(__APPLE__) +// Hopefully OSX will keep running solely on little endian CPUs, so: +#define le16toh(x) (x) +#define le32toh(x) (x) +#define le64toh(x) (x) +#define htole16(x) (x) +#define htole32(x) (x) +#define htole64(x) (x) +#else +#error "This platform is not supported." +#endif + +#include <type_traits> + +static const uint8_t *byte_ptr(const void *ptr) { + return reinterpret_cast<const uint8_t *>(ptr); +} + +/* Overall .ZIP file format (section 4.3.6), and the corresponding classes + * [local file header 1] class LH + * [encryption header 1] + * [file data 1] + * [data descriptor 1] + * . + * . + * . + * [local file header n] + * [encryption header n] + * [file data n] + * [data descriptor n] + * [archive decryption header] + * [archive extra data record] + * [central directory header 1] class CDH + * . + * . + * . + * [central directory header n] + * [zip64 end of central directory record] class ECD64 + * [zip64 end of central directory locator] class ECDLocator + * [end of central directory record] class ECD + */ + +/* Zip64 Extra Field (section 4.5.3 of the .ZIP format spec) + * + * It is present if a value of a uncompressed_size/compressed_size/file_offset + * exceeds 32 bits. It consists of a 4-byte header followed by + * [64-bit uncompressed_size] [64-bit compressed_size] [64-bit file_offset] + * Only the entities whose value exceed 32 bits are present, and the present + * ones are always in the order shown above. The originating 32-bit field + * contains 0xFFFFFFFF to indicate that the value is 64-bit and is in + * Zip64 Extra Field. + */ +class Zip64ExtraField { + public: + static const Zip64ExtraField *find(const uint8_t *start, const uint8_t *end) { + while (start < end) { + const Zip64ExtraField *z64 = + reinterpret_cast<const Zip64ExtraField *>(start); + if (z64->is()) { + return z64; + } + start = byte_ptr(start) + z64->size(); + } + return nullptr; + } + + bool is() const { return 1 == le16toh(tag_); } + void signature() { tag_ = htole16(1); } + + uint16_t payload_size() const { return le16toh(payload_size_); } + void payload_size(uint16_t v) { payload_size_ = htole16(v); } + + uint16_t size() const { return sizeof(Zip64ExtraField) + payload_size(); } + + // The value of i-th attribute + uint64_t attr64(int index) const { return le64toh(attr_[index]); } + + private: + uint16_t tag_; + uint16_t payload_size_; + uint64_t attr_[]; +} __attribute__((packed)); +static_assert(4 == sizeof(Zip64ExtraField), + "Zip64ExtraField class fields layout is incorrect."); + +/* Local Header precedes each archive file data (section 4.3.7). */ +class LH { + public: + bool is() const { return 0x04034b50 == le32toh(signature_); } + void signature() { signature_ = htole32(0x04034b50); } + + uint16_t version() const { return le16toh(version_); } + void version(uint16_t v) { version_ = htole16(v); } + + void bit_flag(uint16_t v) { bit_flag_ = htole16(v); } + uint16_t bit_flag() const { return le16toh(bit_flag_); } + + uint16_t compression_method() const { return le16toh(compression_method_); } + void compression_method(uint16_t v) { compression_method_ = htole16(v); } + + uint16_t last_mod_file_time() const { return le16toh(last_mod_file_time_); } + void last_mod_file_time(uint16_t v) { last_mod_file_time_ = htole16(v); } + + uint16_t last_mod_file_date() const { return le16toh(last_mod_file_date_); } + void last_mod_file_date(uint16_t v) { last_mod_file_date_ = htole16(v); } + + uint32_t crc32() const { return le32toh(crc32_); } + void crc32(uint32_t v) { crc32_ = htole32(v); } + + size_t compressed_file_size() const { + size_t size32 = compressed_file_size32(); + if (size32 != 0xFFFFFFFF) { + return size32; + } + const Zip64ExtraField *z64 = zip64_extra_field(); + return z64 == nullptr + ? 0xFFFFFFFF + : z64->attr64(uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0); + } + size_t compressed_file_size32() const { + return le32toh(compressed_file_size32_); + } + void compressed_file_size32(uint32_t v) { + compressed_file_size32_ = htole32(v); + } + + size_t uncompressed_file_size() const { + size_t size32 = uncompressed_file_size32(); + if (size32 != 0xFFFFFFFF) { + return size32; + } + const Zip64ExtraField *z64 = zip64_extra_field(); + return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(0); + } + size_t uncompressed_file_size32() const { + return le32toh(uncompressed_file_size32_); + } + void uncompressed_file_size32(uint32_t v) { + uncompressed_file_size32_ = htole32(v); + } + + uint16_t file_name_length() const { return le16toh(file_name_length_); } + const char *file_name() const { return file_name_; } + void file_name(const char *filename, uint16_t len) { + file_name_length_ = htole16(len); + if (len) { + memcpy(file_name_, filename, file_name_length_); + } + } + bool file_name_is(const char *name) const { + size_t name_len = strlen(name); + return file_name_length() == name_len && + 0 == strncmp(file_name(), name, name_len); + } + + uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); } + void extra_fields_length(uint16_t v) { + extra_fields_length_ = htole16(extra_fields_length_); + } + const uint8_t *extra_fields() const { + return byte_ptr(file_name_ + file_name_length_); + } + + size_t size() const { + return sizeof(LH) + file_name_length() + extra_fields_length(); + } + const uint8_t *data() const { return extra_fields() + extra_fields_length(); } + size_t in_zip_size() const { + return compression_method() ? compressed_file_size() + : uncompressed_file_size(); + } + + const Zip64ExtraField *zip64_extra_field() const { + return Zip64ExtraField::find(extra_fields(), + extra_fields() + extra_fields_length()); + } + + private: + uint32_t signature_; + uint16_t version_; + uint16_t bit_flag_; + uint16_t compression_method_; + uint16_t last_mod_file_time_; + uint16_t last_mod_file_date_; + uint32_t crc32_; + uint32_t compressed_file_size32_; + uint32_t uncompressed_file_size32_; + uint16_t file_name_length_; + uint16_t extra_fields_length_; + char file_name_[0]; + // Followed by extra_fields. +} __attribute__((packed)); +static_assert(30 == sizeof(LH), "The fields layout for class LH is incorrect"); + +/* Central Directory Header. */ +class CDH { + public: + void signature() { signature_ = htole32(0x02014b50); } + bool is() const { return 0x02014b50 == le32toh(signature_); } + + void version(uint16_t v) { version_ = htole16(v); } + + void version_to_extract(uint16_t v) { version_to_extract_ = htole16(v); } + + void bit_flag(uint16_t v) { bit_flag_ = htole16(v); } + uint16_t bit_flag() const { return le16toh(bit_flag_); } + + void compression_method(uint16_t v) { compression_method_ = htole16(v); } + + void last_mod_file_time(uint16_t v) { last_mod_file_time_ = htole16(v); } + + void last_mod_file_date(uint16_t v) { last_mod_file_date_ = htole16(v); } + + void crc32(uint32_t v) { crc32_ = htole32(v); } + + size_t compressed_file_size() const { + size_t size32 = compressed_file_size32(); + if (size32 != 0xFFFFFFFF) { + return size32; + } + const Zip64ExtraField *z64 = zip64_extra_field(); + return z64 == nullptr + ? 0xFFFFFFFF + : z64->attr64(uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0); + } + size_t compressed_file_size32() const { + return le32toh(compressed_file_size32_); + } + void compressed_file_size32(uint32_t v) { + compressed_file_size32_ = htole32(v); + } + + size_t uncompressed_file_size() const { + uint32_t size32 = uncompressed_file_size32(); + if (size32 != 0xFFFFFFFF) { + return size32; + } + const Zip64ExtraField *z64 = zip64_extra_field(); + return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(0); + } + size_t uncompressed_file_size32() const { + return le32toh(uncompressed_file_size32_); + } + + void uncompressed_file_size32(uint32_t v) { + uncompressed_file_size32_ = htole32(v); + } + + uint16_t file_name_length() const { return le16toh(file_name_length_); } + const char *file_name() const { return file_name_; } + void file_name(const char *filename, uint16_t filename_len) { + file_name_length_ = htole16(filename_len); + if (filename_len) { + memcpy(file_name_, filename, filename_len); + } + } + bool file_name_is(const char *name) const { + size_t name_len = strlen(name); + return file_name_length() == name_len && + 0 == strncmp(file_name(), name, name_len); + } + + uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); } + const uint8_t *extra_fields() const { + return byte_ptr(file_name_ + file_name_length_); + } + void extra_fields_length(uint16_t v) { extra_fields_length_ = htole16(v); } + + uint16_t comment_length() const { return le16toh(comment_length_); } + void comment_length(uint16_t v) { comment_length_ = htole16(v); } + + uint16_t start_disk_nr() const { return le16toh(start_disk_nr_); } + void disk_number(uint16_t v) { start_disk_nr_ = htole16(v); } + + uint16_t internal_attributes() const { return le16toh(internal_attributes_); } + void internal_attributes(uint16_t v) { internal_attributes_ = htole16(v); } + + uint32_t external_attribute() const { return le32toh(external_attributes_); } + void external_attribute(uint32_t v) { external_attributes_ = htole32(v); } + + uint64_t local_header_offset() const { + uint32_t size32 = local_header_offset32(); + if (size32 != 0xFFFFFFFF) { + return size32; + } + const Zip64ExtraField *z64 = zip64_extra_field(); + int attr_no = uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0; + if (compressed_file_size32() == 0xFFFFFFFF) { + ++attr_no; + } + return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(attr_no); + } + + uint32_t local_header_offset32() const { + return le32toh(local_header_offset32_); + } + void local_header_offset32(uint32_t v) { + local_header_offset32_ = htole32(v); + } + bool no_size_in_local_header() const { return bit_flag() & 0x08; } + size_t size() const { + return sizeof(*this) + file_name_length() + extra_fields_length() + + comment_length(); + } + + const Zip64ExtraField *zip64_extra_field() const { + return Zip64ExtraField::find(extra_fields(), + extra_fields() + extra_fields_length()); + } + + private: + uint32_t signature_; + uint16_t version_; + uint16_t version_to_extract_; + uint16_t bit_flag_; + uint16_t compression_method_; + uint16_t last_mod_file_time_; + uint16_t last_mod_file_date_; + uint32_t crc32_; + uint32_t compressed_file_size32_; + uint32_t uncompressed_file_size32_; + uint16_t file_name_length_; + uint16_t extra_fields_length_; + uint16_t comment_length_; + uint16_t start_disk_nr_; + uint16_t internal_attributes_; + uint32_t external_attributes_; + uint32_t local_header_offset32_; + char file_name_[0]; + // Followed by extra fields and then comment. +} __attribute__((packed)); +static_assert(46 == sizeof(CDH), "Class CDH fields layout is incorrect."); + +/* Zip64 End of Central Directory Locator. */ +class ECD64Locator { + public: + void signature() { signature_ = htole32(0x07064b50); } + bool is() const { return 0x07064b50 == le32toh(signature_); } + + void ecd64_disk_nr(uint32_t nr) { ecd64_disk_nr_ = htole32(nr); } + uint32_t ecd4_disk_nr() const { return le32toh(ecd64_disk_nr_); } + + void ecd64_offset(uint64_t v) { ecd64_offset_ = htole64(v); } + uint64_t ecd64_offset() const { return le64toh(ecd64_offset_); } + + void total_disks(uint32_t v) { total_disks_ = htole32(v); } + uint32_t total_disks() const { return le32toh(total_disks_); } + + private: + uint32_t signature_; + uint32_t ecd64_disk_nr_; + uint64_t ecd64_offset_; + uint32_t total_disks_; +} __attribute__((packed)); +static_assert(20 == sizeof(ECD64Locator), + "ECD64Locator class fields layout is incorrect."); + +/* End of Central Directory. */ +class ECD { + public: + void signature() { signature_ = htole32(0x06054b50); } + bool is() const { return 0x06054b50 == le32toh(signature_); } + + void this_disk_nr(uint16_t v) { this_disk_nr_ = htole16(v); } + uint16_t this_disk_nr() const { return le16toh(this_disk_nr_); } + + void cen_disk_nr(uint16_t v) { cen_disk_nr_ = htole16(v); } + uint16_t cen_disk_nr() const { return le16toh(cen_disk_nr_); } + + void this_disk_entries16(uint16_t v) { this_disk_entries16_ = htole16(v); } + uint16_t this_disk_entries16() const { return le16toh(this_disk_entries16_); } + + void total_entries16(uint16_t v) { total_entries16_ = htole16(v); } + uint16_t total_entries16() const { return le16toh(total_entries16_); } + + void cen_size32(uint32_t v) { cen_size32_ = htole32(v); } + uint32_t cen_size32() const { return le32toh(cen_size32_); } + + void cen_offset32(uint32_t v) { cen_offset32_ = htole32(v); } + uint32_t cen_offset32() const { return le32toh(cen_offset32_); } + + void comment(uint8_t *data, uint16_t data_size) { + comment_length_ = htole16(data_size); + if (data_size) { + memcpy(comment_, data, data_size); + } + } + uint16_t comment_length() const { return le16toh(comment_length_); } + const uint8_t *comment() const { return comment_; } + + uint64_t ecd64_offset() const { + const ECD64Locator *locator = reinterpret_cast<const ECD64Locator *>( + byte_ptr(this) - sizeof(ECD64Locator)); + return locator->is() ? locator->ecd64_offset() : 0xFFFFFFFFFFFFFFFF; + } + + private: + uint32_t signature_; + uint16_t this_disk_nr_; + uint16_t cen_disk_nr_; + uint16_t this_disk_entries16_; + uint16_t total_entries16_; + uint32_t cen_size32_; + uint32_t cen_offset32_; + uint16_t comment_length_; + uint8_t comment_[0]; +} __attribute__((packed)); +static_assert(22 == sizeof(ECD), "ECD class fields layout is incorrect."); + +/* Zip64 end of central directory. */ +class ECD64 { + public: + bool is() const { return 0x06064b50 == le32toh(signature_); } + void signature() { signature_ = htole32(0x06064b50); } + + void remaining_size(uint64_t v) { remaining_size_ = htole64(v); } + uint64_t remaining_size() const { return le64toh(remaining_size_); } + + void version(uint16_t v) { version_ = htole16(v); } + uint16_t version() const { return le16toh(version_); } + + void version_to_extract(uint16_t v) { version_to_extract_ = htole16(v); } + uint16_t version_to_extract() const { return le16toh(version_to_extract_); } + + void this_disk_nr(uint32_t v) { this_disk_nr_ = htole32(v); } + uint32_t this_disk_nr() const { return le32toh(this_disk_nr_); } + + void cen_disk_nr(uint32_t v) { cen_disk_nr_ = htole32(v); } + uint32_t cen_disk_nr() const { return le32toh(cen_disk_nr_); } + + void this_disk_entries(uint64_t v) { this_disk_entries_ = htole64(v); } + uint64_t this_disk_entries() const { return le64toh(this_disk_entries_); } + + void total_entries(uint64_t v) { total_entries_ = htole64(v); } + uint64_t total_entries() const { return le64toh(total_entries_); } + + void cen_size(uint64_t v) { cen_size_ = htole64(v); } + uint64_t cen_size() const { return le64toh(cen_size_); } + + void cen_offset(uint64_t v) { cen_offset_ = htole64(v); } + uint64_t cen_offset() const { return le64toh(cen_offset_); } + + private: + uint32_t signature_; + uint64_t remaining_size_; + uint16_t version_; + uint16_t version_to_extract_; + uint32_t this_disk_nr_; + uint32_t cen_disk_nr_; + uint64_t this_disk_entries_; + uint64_t total_entries_; + uint64_t cen_size_; + uint64_t cen_offset_; +} __attribute__((packed)); +static_assert(56 == sizeof(ECD64), "ECD64 class fields layout is incorrect."); + +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_ diff --git a/src/tools/singlejar/zlib_interface.h b/src/tools/singlejar/zlib_interface.h new file mode 100644 index 0000000000..1a06b8e821 --- /dev/null +++ b/src/tools/singlejar/zlib_interface.h @@ -0,0 +1,98 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_ +#define BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_ + +#include <zlib.h> + +#include <err.h> +#include <stdint.h> + +// An interface to zlib's inflater. Usage: +// Inflater inflater; +// inflater.DataToInflate(data, data_size); +// for (;;) { +// int rc = inflater.Inflate(out_buffer, out_buffer_size); +// if (rc == Z_STREAM_END) { +// break; +// } +// // If we ran out of out_buffer, create a new one +// } +// inflater.Reset(); +// +class Inflater { + public: + Inflater() { + zstream_.zalloc = Z_NULL; + zstream_.zfree = Z_NULL; + zstream_.opaque = Z_NULL; + zstream_.avail_in = 0; + zstream_.next_in = nullptr; + int ret = inflateInit2(&zstream_, -MAX_WBITS); + if (ret != Z_OK) { + errx(2, "inflateInit2 returned %d\n", ret); + } + } + + ~Inflater() { inflateEnd(&zstream_); } + + void reset() { inflateReset(&zstream_); } + + void DataToInflate(const uint8_t *in_buffer, unsigned in_buffer_length) { + zstream_.next_in = const_cast<uint8_t *>(in_buffer); + zstream_.avail_in = in_buffer_length; + } + + int Inflate(uint8_t *out_buffer, unsigned out_buffer_length) { + zstream_.next_out = out_buffer; + zstream_.avail_out = out_buffer_length; + return inflate(&zstream_, Z_SYNC_FLUSH); + } + + unsigned available_out() const { return zstream_.avail_out; } + + const char *error_message() const { return zstream_.msg; } + + private: + z_stream zstream_; +}; + +// A little wrapper around zlib's deflater. +struct Deflater : z_stream { + Deflater() { + zalloc = Z_NULL; + zfree = Z_NULL; + opaque = Z_NULL; + next_in = nullptr; + avail_in = 0; + next_out = nullptr; + avail_out = 0; + int ret = deflateInit2(this, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, + 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { + errx(2, "deflateInit returned %d (%s)", ret, msg); + } + } + + ~Deflater() { deflateEnd(this); } + + int Deflate(const uint8_t *data, size_t data_size, int flag) { + next_in = const_cast<uint8_t *>(data); + avail_in = data_size; + return deflate(this, flag); + } +}; + +#endif // BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_ diff --git a/src/tools/singlejar/zlib_interface_test.cc b/src/tools/singlejar/zlib_interface_test.cc new file mode 100644 index 0000000000..802d15350e --- /dev/null +++ b/src/tools/singlejar/zlib_interface_test.cc @@ -0,0 +1,100 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <memory> + +#include "src/tools/singlejar/zlib_interface.h" + +#include "gtest/gtest.h" + +namespace { + +class ZlibInterfaceTest : public ::testing::Test { + protected: + void SetUp() override { + inflater_.reset(new Inflater); + deflater_.reset(new Deflater); + } + + std::unique_ptr<Inflater> inflater_; + std::unique_ptr<Deflater> deflater_; +}; + +static const uint8_t bytes[] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; + +TEST_F(ZlibInterfaceTest, DeflateFully) { + uint8_t compressed[256]; + deflater_.get()->next_out = compressed; + deflater_.get()->avail_out = sizeof(compressed); + EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH)); +} + +TEST_F(ZlibInterfaceTest, DeflateIntoChunks) { + uint8_t compressed[256]; + deflater_.get()->next_out = compressed; + deflater_.get()->avail_out = 2; + EXPECT_EQ(Z_OK, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH)); + EXPECT_EQ(0, deflater_.get()->avail_out); + deflater_.get()->next_out = compressed + 2; + deflater_.get()->avail_out = sizeof(compressed) - 2; + EXPECT_EQ(Z_STREAM_END, + deflater_->Deflate(deflater_.get()->next_in, + deflater_.get()->avail_in, Z_FINISH)); +} + +TEST_F(ZlibInterfaceTest, DeflateChunks) { + uint8_t compressed[256]; + deflater_.get()->next_out = compressed; + deflater_.get()->avail_out = sizeof(compressed); + EXPECT_EQ(Z_OK, deflater_->Deflate(bytes, 4, Z_NO_FLUSH)); + EXPECT_EQ(Z_STREAM_END, + deflater_->Deflate(bytes + 4, sizeof(bytes) - 4, Z_FINISH)); +} + +TEST_F(ZlibInterfaceTest, InflateFully) { + uint8_t compressed[256]; + deflater_.get()->next_out = compressed; + deflater_.get()->avail_out = sizeof(compressed); + EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH)); + + // Now we have deflated data, inflate it back and compare. + size_t compressed_size = sizeof(compressed) - deflater_.get()->avail_out; + inflater_->DataToInflate(compressed, compressed_size); + + uint8_t uncompressed[256]; + memset(uncompressed, 0, sizeof(uncompressed)); + EXPECT_EQ(Z_STREAM_END, + inflater_->Inflate(uncompressed, sizeof(uncompressed))); + EXPECT_EQ(sizeof(bytes), sizeof(uncompressed) - inflater_->available_out()); + EXPECT_EQ(0, memcmp(bytes, uncompressed, sizeof(bytes))); +} + +TEST_F(ZlibInterfaceTest, InflateToChunks) { + uint8_t compressed[256]; + deflater_.get()->next_out = compressed; + deflater_.get()->avail_out = sizeof(compressed); + EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH)); + + // Now we have deflated data, inflate it back and compare. + size_t compressed_size = sizeof(compressed) - deflater_.get()->avail_out; + inflater_->DataToInflate(compressed, compressed_size); + uint8_t uncompressed[256]; + memset(uncompressed, 0, sizeof(uncompressed)); + EXPECT_EQ(Z_OK, inflater_->Inflate(uncompressed, 3)); + EXPECT_EQ(Z_STREAM_END, + inflater_->Inflate(uncompressed + 3, sizeof(uncompressed) - 3)); + EXPECT_EQ(0, memcmp(bytes, uncompressed, sizeof(bytes))); +} + +} // namespace |