aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Sasha Smundak <asmundak@google.com>2016-07-15 17:35:51 +0000
committerGravatar Yun Peng <pcloudy@google.com>2016-07-18 10:43:29 +0000
commit06a12e561268f67de6a417315944f8e4d05afc0f (patch)
treea9fe559e7f307d0a132c59def9fa309c8710465a
parent5ea55cbab969820da346d16c4998e957b8c3f60e (diff)
C++ reimplementation of singlejar tool: first checkin, take two: fix the problem that caused the rollback.
*** Original change description *** Automated [] rollback of commit f667aa54f4fcc2c04182de9bc267a7ee469f6445. *** Reason for rollback *** Breaks CI, see, e.g., http://ci.bazel.io/job/bazel-tests/BAZEL_VERSION=HEAD,PLATFORM_NAME=ubuntu_15.10-x86_64/92/console *** Original change description *** C++ reimplementation of singlejar tool: first checkin. -- MOS_MIGRATED_REVID=127554239
-rw-r--r--src/BUILD1
-rw-r--r--src/tools/singlejar/BUILD72
-rw-r--r--src/tools/singlejar/diag.h31
-rw-r--r--src/tools/singlejar/input_jar.h148
-rw-r--r--src/tools/singlejar/input_jar_bad_jar_test.cc67
-rw-r--r--src/tools/singlejar/input_jar_random_jars_test.cc74
-rw-r--r--src/tools/singlejar/input_jar_scan_entries_test.h302
-rw-r--r--src/tools/singlejar/input_jar_scan_jartool_test.cc64
-rw-r--r--src/tools/singlejar/input_jar_scan_ziptool_test.cc51
-rw-r--r--src/tools/singlejar/mapped_file.h96
-rw-r--r--src/tools/singlejar/zip_headers.h485
-rw-r--r--src/tools/singlejar/zlib_interface.h98
-rw-r--r--src/tools/singlejar/zlib_interface_test.cc100
13 files changed, 1589 insertions, 0 deletions
diff --git a/src/BUILD b/src/BUILD
index 392c944843..3694e6777a 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -310,6 +310,7 @@ filegroup(
"//src/tools/xcode/libtool:srcs",
"//src/tools/xcode/momcwrapper:srcs",
"//src/tools/xcode/realpath:srcs",
+ "//src/tools/singlejar:srcs",
"//src/tools/xcode/stdredirect:srcs",
"//src/tools/xcode/swiftstdlibtoolwrapper:srcs",
"//src/tools/xcode/xcodelocator:srcs",
diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD
new file mode 100644
index 0000000000..bca525abb5
--- /dev/null
+++ b/src/tools/singlejar/BUILD
@@ -0,0 +1,72 @@
+# Description:
+# singlejar C++ implementation.
+package(default_visibility = ["//src:__subpackages__"])
+
+filegroup(
+ name = "srcs",
+ srcs = glob(["**"]),
+ visibility = ["//src:__pkg__"],
+)
+
+cc_test(
+ name = "input_jar_scan_jartool_test",
+ size = "large",
+ srcs = [
+ "input_jar_scan_entries_test.h",
+ "input_jar_scan_jartool_test.cc",
+ ":input_jar",
+ ],
+ copts = ["-DJAR_TOOL_PATH=\\\"external/local_jdk/bin/jar\\\""],
+ data = [
+ "//external:jar",
+ "//external:jdk-default",
+ ],
+ deps = ["//third_party:gtest"],
+)
+
+cc_test(
+ name = "input_jar_scan_ziptool_test",
+ size = "large",
+ srcs = [
+ "input_jar_scan_entries_test.h",
+ "input_jar_scan_ziptool_test.cc",
+ ":input_jar",
+ ],
+ deps = ["//third_party:gtest"],
+)
+
+cc_test(
+ name = "input_jar_bad_jar_test",
+ srcs = [
+ "input_jar_bad_jar_test.cc",
+ ":input_jar",
+ ],
+ deps = ["//third_party:gtest"],
+)
+
+cc_test(
+ name = "zlib_interface_test",
+ srcs = [
+ "zlib_interface_test.cc",
+ ":zlib_interface",
+ ],
+ deps = [
+ "//third_party:gtest",
+ "//third_party/zlib",
+ ],
+)
+
+filegroup(
+ name = "input_jar",
+ srcs = [
+ "diag.h",
+ "input_jar.h",
+ "mapped_file.h",
+ "zip_headers.h",
+ ],
+)
+
+filegroup(
+ name = "zlib_interface",
+ srcs = ["zlib_interface.h"],
+)
diff --git a/src/tools/singlejar/diag.h b/src/tools/singlejar/diag.h
new file mode 100644
index 0000000000..9085e1260d
--- /dev/null
+++ b/src/tools/singlejar/diag.h
@@ -0,0 +1,31 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_ 1
+
+/*
+ * Various useful diagnostics functions from Linux err.h file, wrapped
+ * for portability.
+ */
+#if defined(__APPLE__) || defined(__linux)
+#include <err.h>
+#define diag_err(...) err(__VA_ARGS__)
+#define diag_errx(...) errx(__VA_ARGS__)
+#define diag_warn(...) warn(__VA_ARGS__)
+#define diag_warnx(...) warnx(__VA_ARGS__)
+#else
+#error Unknown platform
+#endif
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_DIAG_H_
diff --git a/src/tools/singlejar/input_jar.h b/src/tools/singlejar/input_jar.h
new file mode 100644
index 0000000000..c9d6677a97
--- /dev/null
+++ b/src/tools/singlejar/input_jar.h
@@ -0,0 +1,148 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_ 1
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "src/tools/singlejar/diag.h"
+#include "src/tools/singlejar/mapped_file.h"
+#include "src/tools/singlejar/zip_headers.h"
+
+/*
+ * An input jar. The usage pattern is:
+ * InputJar input_jar("path/to/file");
+ * if (!input_jar.Open()) { fail...}
+ * CDH *dir_entry;
+ * LH *local_header;
+ * while (dir_entry = input_jar.NextExtry(&local_header)) {
+ * // process entry.
+ * }
+ * input_jar.Close(); // actually, called by destructor, too.
+ */
+class InputJar {
+ public:
+ InputJar() : path_(nullptr) {}
+
+ ~InputJar() { Close(); }
+
+ int fd() const { return mapped_file_.fd(); }
+
+ // Opens the file, memory maps it and locates Central Directory.
+ bool Open(const char *path) {
+ if (path_ != nullptr) {
+ diag_errx(1, "%s:%d: This instance is already handling %s\n", __FILE__,
+ __LINE__, path_);
+ }
+ if (!mapped_file_.Open(path)) {
+ diag_warn("%s:%d: Cannot open input jar %s", __FILE__, __LINE__, path);
+ mapped_file_.Close();
+ return false;
+ }
+ if (mapped_file_.size() < sizeof(ECD)) {
+ diag_warnx(
+ "%s:%d: %s is only %ld bytes long, should be at least %lu bytes long",
+ __FILE__, __LINE__, path_, mapped_file_.size(), sizeof(ECD));
+ mapped_file_.Close();
+ return false;
+ }
+
+ // Now locate End of Central Directory (ECD) record.
+ const char *ecd_min = mapped_file_.end() - 65536 - sizeof(ECD);
+ if (ecd_min < mapped_file_.start()) {
+ ecd_min = mapped_file_.start();
+ }
+
+ const ECD *ecd = nullptr;
+ for (const char *ecd_ptr = mapped_file_.end() - sizeof(ECD);
+ ecd_ptr >= ecd_min; --ecd_ptr) {
+ ecd = reinterpret_cast<const ECD *>(ecd_ptr);
+ if (ecd->is() && ecd) {
+ break;
+ }
+ }
+ if (!ecd) {
+ diag_warnx("%s:%d: Cannot locate ECD record in %s", __FILE__, __LINE__,
+ path);
+ mapped_file_.Close();
+ return false;
+ }
+ uint64_t offset_to_dir = ecd->cen_offset32();
+ if (offset_to_dir == 0xFFFFFFFF) {
+ const ECD64 *ecd64 = reinterpret_cast<const ECD64 *>(
+ mapped_file_.address(ecd->ecd64_offset()));
+ offset_to_dir = ecd64->cen_offset();
+ }
+ cdh_ = reinterpret_cast<const CDH *>(mapped_file_.address(offset_to_dir));
+ if (!cdh_->is()) {
+ diag_warnx("in %s, expected central file header signature at 0x%" PRIx64,
+ path, offset_to_dir);
+ mapped_file_.Close();
+ return false;
+ }
+ path_ = strdup(path);
+ return true;
+ }
+
+ // Returns the next Central Directory Header or NULL.
+ const CDH *NextEntry(const LH **local_header_ptr) {
+ if (!path_) {
+ diag_errx(1, "%s:%d: call Open() first!", __FILE__, __LINE__);
+ }
+ if (!cdh_->is()) {
+ return nullptr;
+ }
+ const CDH *current_cdh = cdh_;
+ const uint8_t *new_cdr = byte_ptr(cdh_) + cdh_->size();
+ if (!mapped_file_.mapped(new_cdr)) {
+ diag_errx(
+ 1,
+ "Bad directory record at offset 0x%" PRIx64 " of %s\n"
+ "file name length = %u, extra_field length = %u, comment length = %u",
+ CentralDirectoryRecordOffset(cdh_), path_, cdh_->file_name_length(),
+ cdh_->extra_fields_length(), cdh_->comment_length());
+ }
+ cdh_ = reinterpret_cast<const CDH *>(new_cdr);
+ *local_header_ptr = LocalHeader(current_cdh);
+ return current_cdh;
+ }
+
+ // Closes the file.
+ bool Close() {
+ mapped_file_.Close();
+ if (path_ != nullptr) {
+ free(path_);
+ path_ = nullptr;
+ }
+ return true;
+ }
+
+ uint64_t CentralDirectoryRecordOffset(const void *cdr) const {
+ return mapped_file_.offset(static_cast<const char *>(cdr));
+ }
+
+ const LH *LocalHeader(const CDH *cdh) const {
+ return reinterpret_cast<const LH *>(
+ mapped_file_.address(cdh->local_header_offset()));
+ }
+
+ private:
+ char *path_;
+ MappedFile mapped_file_;
+ const CDH *cdh_; // current directory entry
+};
+
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_H_
diff --git a/src/tools/singlejar/input_jar_bad_jar_test.cc b/src/tools/singlejar/input_jar_bad_jar_test.cc
new file mode 100644
index 0000000000..7e976959f2
--- /dev/null
+++ b/src/tools/singlejar/input_jar_bad_jar_test.cc
@@ -0,0 +1,67 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <unistd.h>
+#include <memory>
+#include <string>
+
+#include "src/tools/singlejar/input_jar.h"
+
+#include "gtest/gtest.h"
+
+static const char kJar[] = "jar.jar";
+
+class InputJarBadJarTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ input_jar_.reset(new InputJar);
+ }
+
+ // Allocates a with given name and with given size.
+ static bool AllocateFile(const char *name, size_t size) {
+ int fd = open(name, O_CREAT | O_RDWR | O_TRUNC, 0777);
+ if (fd < 0) {
+ perror(name);
+ return false;
+ }
+ if (size) {
+ if (ftruncate(fd, size) == 0) {
+ return close(fd) == 0;
+ } else {
+ auto last_error = errno;
+ close(fd);
+ errno = last_error;
+ return false;
+ }
+ } else {
+ return close(fd) == 0;
+ }
+ }
+
+ std::unique_ptr<InputJar> input_jar_;
+};
+
+TEST_F(InputJarBadJarTest, NotAJar) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ AllocateFile(kJar, 1000);
+ ASSERT_FALSE(input_jar_->Open(kJar));
+}
+
+// Check that an empty file does not cause trouble in MappedFile.
+TEST_F(InputJarBadJarTest, EmptyFile) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ AllocateFile(kJar, 0);
+ ASSERT_FALSE(input_jar_->Open(kJar));
+}
diff --git a/src/tools/singlejar/input_jar_random_jars_test.cc b/src/tools/singlejar/input_jar_random_jars_test.cc
new file mode 100644
index 0000000000..67e6db2042
--- /dev/null
+++ b/src/tools/singlejar/input_jar_random_jars_test.cc
@@ -0,0 +1,74 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dirent.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "src/tools/singlejar/input_jar.h"
+
+#include "gtest/gtest.h"
+
+static const char kJarsDirPath[] =
+ "third_party/bazel/src/tools/singlejar/jars_to_test";
+class InputJarRandomJarsTest : public testing::Test {};
+
+TEST_F(InputJarRandomJarsTest, ScanAllJars) {
+ int processed_jars = 0;
+ DIR *dirp = opendir(kJarsDirPath);
+ ASSERT_NE(nullptr, dirp);
+
+ struct dirent *dirent;
+ InputJar input_jar;
+ while ((dirent = readdir(dirp)) != nullptr) {
+ if (dirent->d_type != DT_REG && dirent->d_type != DT_LNK) {
+ continue;
+ }
+ std::string path = std::string(kJarsDirPath) + "/" + dirent->d_name;
+ if (dirent->d_type == DT_LNK) {
+ struct stat st;
+ if (stat(path.c_str(), &st)) {
+ perror(path.c_str());
+ continue;
+ } else if (!S_ISREG(st.st_mode)) {
+ continue;
+ }
+ }
+ EXPECT_TRUE(input_jar.Open(path.c_str()));
+ const LH *lh;
+ const CDH *cdh;
+ int file_count = 0;
+ int entry_count = 0;
+ for (; (cdh = input_jar.NextEntry(&lh)); ++entry_count) {
+ ASSERT_TRUE(cdh->is());
+ ASSERT_NE(nullptr, lh);
+ ASSERT_TRUE(lh->is());
+ EXPECT_EQ(lh->file_name_length(), cdh->file_name_length());
+ EXPECT_NE(lh->file_name_length(), 0);
+ EXPECT_EQ(0, strncmp(lh->file_name(), cdh->file_name(),
+ lh->file_name_length()));
+ if ('/' != lh->file_name()[lh->file_name_length() - 1]) {
+ ++file_count;
+ }
+ }
+ input_jar.Close();
+ fprintf(stderr, "%s: %d files, %d entries\n", dirent->d_name, file_count,
+ entry_count);
+ ++processed_jars;
+ }
+ closedir(dirp);
+ EXPECT_LT(0, processed_jars);
+}
diff --git a/src/tools/singlejar/input_jar_scan_entries_test.h b/src/tools/singlejar/input_jar_scan_entries_test.h
new file mode 100644
index 0000000000..b86445e423
--- /dev/null
+++ b/src/tools/singlejar/input_jar_scan_entries_test.h
@@ -0,0 +1,302 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_ 1
+
+#include <errno.h>
+#include <unistd.h>
+#include <memory>
+#include <string>
+
+#include "src/tools/singlejar/input_jar.h"
+
+#include "gtest/gtest.h"
+
+static const char kJar[] = "jar.jar";
+static const char kXXXX[] = "4GB-1file";
+static const char kEmpty[] = "empty";
+static const char kRes1[] = "res1";
+static const char kRes2[] = "res2";
+static const char kHuge[] = "4GB+1file";
+static const int32_t res1_size = 123;
+static const int32_t res2_size = 456;
+static const int64_t huge_size = 0x100000001L;
+static const int64_t kHugeOffset = 0x100000001L;
+
+/* Verifies that InputJar can handle zip/jar files created by a given creator.
+ * This includes basic directory scan, handling huge (>4GB) zip files and huge
+ * entries in them, and handling zip files with "huge" (>64K) number of entries.
+ * A creator is passed as a typed parameter.
+ */
+template <class ZipCreator>
+class InputJarScanEntries : public testing::Test {
+ public:
+ static void SetUpTestCase() { ZipCreator::SetUpTestCase(); }
+
+ static void TearDownTestCase() { ZipCreator::TearDownTestCase(); }
+
+ // Allocates a with given name and with given size.
+ static bool AllocateFile(const char *name, size_t size) {
+ int fd = open(name, O_CREAT | O_RDWR | O_TRUNC, 0777);
+ if (fd < 0) {
+ perror(name);
+ return false;
+ }
+ if (size) {
+ if (ftruncate(fd, size) == 0) {
+ return close(fd) == 0;
+ } else {
+ auto last_error = errno;
+ close(fd);
+ errno = last_error;
+ return false;
+ }
+ } else {
+ return close(fd) == 0;
+ }
+ }
+
+ static void CreateBasicJar() {
+ ASSERT_TRUE(AllocateFile(kRes1, res1_size));
+ ASSERT_TRUE(AllocateFile(kRes2, res2_size));
+ unlink(kJar);
+ ASSERT_EQ(0, ZipCreator::Jar(true, kJar, kRes1, kRes2, nullptr));
+ unlink(kRes1);
+ unlink(kRes2);
+ }
+
+ static void CreateJarWithHugeUncompressed() {
+ ASSERT_TRUE(AllocateFile(kHuge, huge_size));
+ unlink(kJar);
+ ASSERT_EQ(0, ZipCreator::Jar(true, kJar, kHuge, nullptr));
+ unlink(kHuge);
+ }
+
+ static void CreateJarWithZip64Entries() {
+ ASSERT_TRUE(AllocateFile(kXXXX, 0xFFFFFFFF));
+ ASSERT_TRUE(AllocateFile(kHuge, huge_size));
+ ASSERT_TRUE(AllocateFile(kEmpty, 0));
+ ASSERT_TRUE(AllocateFile(kRes1, res1_size));
+ ASSERT_EQ(
+ 0, ZipCreator::Jar(false, kJar, kXXXX, kHuge, kEmpty, kRes1, nullptr));
+ unlink(kXXXX);
+ unlink(kHuge);
+ unlink(kEmpty);
+ unlink(kRes1);
+ }
+
+ static void CreateJarWithLotsOfEntries() {
+ unlink(kJar);
+ // Create 256 directories with 256 files in each one,
+ // make an archive from them
+ for (int dir = 0; dir < 256; ++dir) {
+ char dirname[10];
+ snprintf(dirname, sizeof(dirname), "dir%d", dir);
+ ASSERT_EQ(0, mkdir(dirname, 0777));
+ for (int file = 0; file < 256; ++file) {
+ char filepath[20];
+ snprintf(filepath, sizeof(filepath), "%s/%d", dirname, file);
+ ASSERT_TRUE(AllocateFile(filepath, 1));
+ }
+ }
+ ASSERT_EQ(0, ZipCreator::Jar(false, kJar, "dir*", nullptr));
+ for (int dir = 0; dir < 256; ++dir) {
+ char rmdircmd[100];
+ snprintf(rmdircmd, sizeof(rmdircmd), "rm dir%d/* && rmdir dir%d", dir,
+ dir);
+ ASSERT_EQ(0, system(rmdircmd));
+ }
+ }
+
+ static void LsZip(const char *zip_name) {
+#if !defined(__APPLE__)
+ std::string command = (std::string("unzip -v ") + zip_name).c_str();
+ ASSERT_EQ(0, system(command.c_str())) << "Failed command: " << command;
+#endif
+ }
+
+ void SetUp() override { input_jar_.reset(new InputJar); }
+
+ static void SmogCheck(const CDH *cdh, const LH *lh) {
+ ASSERT_TRUE(cdh->is()) << "No expected tag in the Central Directory Entry.";
+ ASSERT_NE(nullptr, lh) << "No local header.";
+ ASSERT_TRUE(lh->is()) << "No expected tag in the Local Header.";
+ EXPECT_EQ(lh->file_name_length(), cdh->file_name_length());
+ EXPECT_NE(lh->file_name_length(), 0);
+ std::string lh_name(lh->file_name(), lh->file_name_length());
+ std::string cdh_name(cdh->file_name(), cdh->file_name_length());
+ EXPECT_EQ(lh_name, cdh_name);
+ if (!cdh->no_size_in_local_header()) {
+ EXPECT_EQ(lh->compressed_file_size(), cdh->compressed_file_size())
+ << "Entry: " << lh_name;
+ EXPECT_EQ(lh->uncompressed_file_size(), cdh->uncompressed_file_size())
+ << "Entry: " << cdh_name;
+ }
+ }
+
+ std::unique_ptr<InputJar> input_jar_;
+};
+
+TYPED_TEST_CASE_P(InputJarScanEntries);
+
+TYPED_TEST_P(InputJarScanEntries, OpenClose) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ this->CreateBasicJar();
+ this->LsZip(kJar);
+ ASSERT_TRUE(this->input_jar_->Open(kJar));
+ EXPECT_GE(this->input_jar_->fd(), 0);
+ this->input_jar_->Close();
+ EXPECT_LT(this->input_jar_->fd(), 0);
+}
+
+/*
+ * Check that the jar has the expected entries, they have expected
+ * sizes, and that we can access both central directory entries and
+ * local headers.
+ */
+TYPED_TEST_P(InputJarScanEntries, Basic) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ this->CreateBasicJar();
+ ASSERT_TRUE(this->input_jar_->Open(kJar));
+ const LH *lh;
+ const CDH *cdh;
+ int file_count = 0;
+ bool res1_present = false;
+ bool res2_present = false;
+ for (int entry_count = 0; (cdh = this->input_jar_->NextEntry(&lh));
+ ++entry_count) {
+ this->SmogCheck(cdh, lh);
+ if ('/' != lh->file_name()[lh->file_name_length() - 1]) {
+ ++file_count;
+ if (cdh->file_name_is(kRes1)) {
+ EXPECT_EQ(res1_size, cdh->uncompressed_file_size());
+ res1_present = true;
+ } else if (cdh->file_name_is(kRes2)) {
+ EXPECT_EQ(res2_size, cdh->uncompressed_file_size());
+ res2_present = true;
+ }
+ }
+ }
+
+ this->input_jar_->Close();
+ unlink(kJar);
+ EXPECT_TRUE(res1_present) << "Jar file " << kJar << " lacks expected '"
+ << kRes1 << "' file.";
+ EXPECT_TRUE(res2_present) << "Jar file " << kJar << " lacks expected '"
+ << kRes2 << "' file.";
+}
+
+/*
+ * Check we can handle >4GB jar with >4GB entry in it.
+ */
+TYPED_TEST_P(InputJarScanEntries, HugeUncompressed) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ this->CreateJarWithHugeUncompressed();
+ this->LsZip(kJar);
+ ASSERT_TRUE(this->input_jar_->Open(kJar));
+ const LH *lh;
+ const CDH *cdh;
+ bool huge_file_present = false;
+
+ while ((cdh = this->input_jar_->NextEntry(&lh))) {
+ this->SmogCheck(cdh, lh);
+ if (cdh->file_name_is(kHuge)) {
+ std::string entry_name(cdh->file_name(), cdh->file_name_length());
+ EXPECT_EQ(huge_size, cdh->uncompressed_file_size()) << "Entry: "
+ << entry_name;
+ huge_file_present = true;
+ }
+ }
+ this->input_jar_->Close();
+ unlink(kJar);
+ EXPECT_TRUE(huge_file_present) << "Jar file " << kJar << " lacks expected '"
+ << kHuge << "' file.";
+}
+
+/*
+ * Check we can handle >4GB jar with huge and small entries and huge and
+ * small offsets in the central directory.
+ */
+TYPED_TEST_P(InputJarScanEntries, TestZip64) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ this->CreateJarWithZip64Entries();
+ this->LsZip(kJar);
+ ASSERT_TRUE(this->input_jar_->Open(kJar));
+ const LH *lh;
+ const CDH *cdh;
+ while ((cdh = this->input_jar_->NextEntry(&lh))) {
+ this->SmogCheck(cdh, lh);
+
+ if (cdh->file_name_is(kXXXX)) {
+ EXPECT_EQ(0xFFFFFFFF, cdh->uncompressed_file_size());
+ EXPECT_EQ(0xFFFFFFFF, cdh->compressed_file_size());
+ } else if (cdh->file_name_is(kHuge)) {
+ EXPECT_EQ(huge_size, cdh->uncompressed_file_size());
+ EXPECT_EQ(huge_size, cdh->compressed_file_size());
+ EXPECT_LT(kHugeOffset, cdh->local_header_offset());
+ } else if (cdh->file_name_is(kEmpty)) {
+ EXPECT_EQ(0, cdh->uncompressed_file_size());
+ EXPECT_EQ(0, cdh->compressed_file_size());
+ EXPECT_EQ(0, lh->compressed_file_size());
+ EXPECT_LT(kHugeOffset, cdh->local_header_offset());
+ } else if (cdh->file_name_is(kRes1)) {
+ EXPECT_EQ(res1_size, cdh->uncompressed_file_size());
+ EXPECT_LT(kHugeOffset, cdh->local_header_offset());
+ }
+ }
+ this->input_jar_->Close();
+ unlink(kJar);
+}
+
+/*
+ * Check we can handle >64K entries.
+ */
+TYPED_TEST_P(InputJarScanEntries, LotsOfEntries) {
+ ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+ this->CreateJarWithLotsOfEntries();
+#if !defined(__APPLE__)
+ const char kTailUnzip[] = "unzip -v jar.jar | tail";
+ ASSERT_EQ(0, system(kTailUnzip)) << "Failed command: " << kTailUnzip;
+#endif
+ ASSERT_TRUE(this->input_jar_->Open(kJar));
+ const LH *lh;
+ const CDH *cdh;
+ int entry_count = 0;
+ int file_count = 0;
+ int dir_count = 0;
+ while ((cdh = this->input_jar_->NextEntry(&lh))) {
+ this->SmogCheck(cdh, lh);
+ ++entry_count;
+ if (cdh->file_name()[cdh->file_name_length() - 1] == '/') {
+ ++dir_count;
+ } else {
+ ++file_count;
+ }
+ }
+ this->input_jar_->Close();
+ unlink(kJar);
+
+ /* We cannot compare to the exact number because JDK's jar
+ * adds META-INF/ and META-INF/MANIFEST.MF.
+ */
+ EXPECT_LE(256 * 257, entry_count);
+ EXPECT_LE(256, dir_count);
+ EXPECT_LE(256 * 256, file_count);
+}
+
+REGISTER_TYPED_TEST_CASE_P(InputJarScanEntries, OpenClose, Basic,
+ HugeUncompressed, TestZip64, LotsOfEntries);
+
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_INPUT_JAR_SCAN_ENTRIES_TEST_H_
diff --git a/src/tools/singlejar/input_jar_scan_jartool_test.cc b/src/tools/singlejar/input_jar_scan_jartool_test.cc
new file mode 100644
index 0000000000..dd12f3e81d
--- /dev/null
+++ b/src/tools/singlejar/input_jar_scan_jartool_test.cc
@@ -0,0 +1,64 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/* Verify that InputJar can scan zip/jar files created by JDK's jar tool. */
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "src/tools/singlejar/input_jar_scan_entries_test.h"
+
+#if !defined(JAR_TOOL_PATH)
+#error "The path to jar tool has to be defined via -DJAR_TOOL_PATH="
+#endif
+
+class JartoolCreator {
+ public:
+ static void SetUpTestCase() {
+ jar_path_ = realpath(JAR_TOOL_PATH, nullptr);
+ if (!jar_path_) {
+ // At least show what's available.
+ system("ls -1R");
+ }
+ ASSERT_NE(nullptr, jar_path_);
+ }
+
+ static void TearDownTestCase() {
+ free(jar_path_);
+ }
+
+ static int Jar(bool compress, const char *output_jar, ...) {
+ std::string command(jar_path_);
+ if (access(output_jar, F_OK) == 0) {
+ command += compress ? " -uf " : " -u0f ";
+ } else {
+ command += compress ? " -cf " : " -c0f ";
+ }
+ command += output_jar;
+ va_list paths;
+ va_start(paths, output_jar);
+ char *path;
+ while ((path = va_arg(paths, char *))) {
+ command += ' ';
+ command += path;
+ }
+ return system(command.c_str());
+ }
+ static char * jar_path_;
+};
+
+char *JartoolCreator::jar_path_;
+
+typedef testing::Types<JartoolCreator> Creators;
+INSTANTIATE_TYPED_TEST_CASE_P(Jartool, InputJarScanEntries, Creators);
diff --git a/src/tools/singlejar/input_jar_scan_ziptool_test.cc b/src/tools/singlejar/input_jar_scan_ziptool_test.cc
new file mode 100644
index 0000000000..c130981064
--- /dev/null
+++ b/src/tools/singlejar/input_jar_scan_ziptool_test.cc
@@ -0,0 +1,51 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/* Verify that InputJar can scan zip/jar files created by JDK's jar tool. */
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "src/tools/singlejar/input_jar_scan_entries_test.h"
+
+/* Creates jar files using zip. */
+class ZiptoolCreator {
+ public:
+ static void SetUpTestCase() {
+ }
+
+ static void TearDownTestCase() {
+ }
+
+ static int Jar(bool compress, const char *output_jar, ...) {
+ std::string command("zip -qr");
+ if (access(output_jar, F_OK) == 0) {
+ command = compress ? "u " : "u0 ";
+ } else {
+ command += compress ? " " : "0 ";
+ }
+ command += output_jar;
+ va_list paths;
+ va_start(paths, output_jar);
+ char *path;
+ while ((path = va_arg(paths, char *))) {
+ command += ' ';
+ command += path;
+ }
+ return system(command.c_str());
+ }
+};
+
+typedef testing::Types<ZiptoolCreator> Creators;
+INSTANTIATE_TYPED_TEST_CASE_P(Jartool, InputJarScanEntries, Creators);
diff --git a/src/tools/singlejar/mapped_file.h b/src/tools/singlejar/mapped_file.h
new file mode 100644
index 0000000000..8302e599a2
--- /dev/null
+++ b/src/tools/singlejar/mapped_file.h
@@ -0,0 +1,96 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_ 1
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "src/tools/singlejar/diag.h"
+
+/*
+ * A mapped read-only file with auto closing.
+ *
+ * MappedFile::Open maps a file with specified name to memory as read-only.
+ * It is assumed that the address space is large enough for that.
+ * MappedFile::Close deletes the mapping. The destructor calls it, too.
+ * A predictable set of methods provide conversion between file offsets and
+ * mapped addresses, returns map size, etc.
+ *
+ * The implementation is 64-bit Linux or OSX specific.
+ */
+#if !((defined(__linux) || defined(__APPLE__)) && __SIZEOF_POINTER__ == 8)
+#error This code for 64 bit Unix.
+#endif
+
+class MappedFile {
+ public:
+ MappedFile() : mapped_start_(nullptr), mapped_end_(nullptr), fd_(-1) {}
+
+ ~MappedFile() { Close(); }
+
+ bool Open(const char *filename) {
+ if (is_open()) {
+ diag_errx(1, "%s:%d: This instance is already open", __FILE__, __LINE__);
+ }
+ if ((fd_ = open(filename, O_RDONLY)) < 0) {
+ diag_warn("%s:%d: open %s:", __FILE__, __LINE__, filename);
+ return false;
+ }
+ // Map the file, even if it is empty (in which case allocate 1 byte to it).
+ struct stat st;
+ if (fstat(fd_, &st) ||
+ (mapped_start_ = static_cast<char *>(
+ mmap(nullptr, st.st_size ? st.st_size : 1, PROT_READ, MAP_PRIVATE,
+ fd_, 0))) == MAP_FAILED) {
+ diag_warn("%s:%d: mmap %s:", __FILE__, __LINE__, filename);
+ close(fd_);
+ fd_ = -1;
+ return false;
+ }
+ mapped_end_ = mapped_start_ + st.st_size;
+ return true;
+ }
+
+ void Close() {
+ if (is_open()) {
+ munmap(mapped_start_, mapped_end_ - mapped_start_);
+ mapped_start_ = mapped_end_ = nullptr;
+ close(fd_);
+ fd_ = -1;
+ }
+ }
+
+ bool mapped(const void *addr) const {
+ return mapped_start_ <= addr && addr < mapped_end_;
+ }
+
+ const char *start() const { return mapped_start_; }
+ const char *end() const { return mapped_end_; }
+ const char *address(off_t offset) const { return mapped_start_ + offset; }
+ off_t offset(const char *address) const { return address - mapped_start_; }
+ int fd() const { return fd_; }
+ size_t size() const { return mapped_end_ - mapped_start_; }
+ bool is_open() { return fd_ >= 0; }
+
+ private:
+ char *mapped_start_;
+ char *mapped_end_;
+ int fd_;
+};
+
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_MAPPED_FILE_H_
diff --git a/src/tools/singlejar/zip_headers.h b/src/tools/singlejar/zip_headers.h
new file mode 100644
index 0000000000..d1018d3553
--- /dev/null
+++ b/src/tools/singlejar/zip_headers.h
@@ -0,0 +1,485 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_
+
+/*
+ * Zip file headers, as described in .ZIP File Format Specification
+ * http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__linux)
+#include <endian.h>
+#elif defined(__APPLE__)
+// Hopefully OSX will keep running solely on little endian CPUs, so:
+#define le16toh(x) (x)
+#define le32toh(x) (x)
+#define le64toh(x) (x)
+#define htole16(x) (x)
+#define htole32(x) (x)
+#define htole64(x) (x)
+#else
+#error "This platform is not supported."
+#endif
+
+#include <type_traits>
+
+static const uint8_t *byte_ptr(const void *ptr) {
+ return reinterpret_cast<const uint8_t *>(ptr);
+}
+
+/* Overall .ZIP file format (section 4.3.6), and the corresponding classes
+ * [local file header 1] class LH
+ * [encryption header 1]
+ * [file data 1]
+ * [data descriptor 1]
+ * .
+ * .
+ * .
+ * [local file header n]
+ * [encryption header n]
+ * [file data n]
+ * [data descriptor n]
+ * [archive decryption header]
+ * [archive extra data record]
+ * [central directory header 1] class CDH
+ * .
+ * .
+ * .
+ * [central directory header n]
+ * [zip64 end of central directory record] class ECD64
+ * [zip64 end of central directory locator] class ECDLocator
+ * [end of central directory record] class ECD
+ */
+
+/* Zip64 Extra Field (section 4.5.3 of the .ZIP format spec)
+ *
+ * It is present if a value of a uncompressed_size/compressed_size/file_offset
+ * exceeds 32 bits. It consists of a 4-byte header followed by
+ * [64-bit uncompressed_size] [64-bit compressed_size] [64-bit file_offset]
+ * Only the entities whose value exceed 32 bits are present, and the present
+ * ones are always in the order shown above. The originating 32-bit field
+ * contains 0xFFFFFFFF to indicate that the value is 64-bit and is in
+ * Zip64 Extra Field.
+ */
+class Zip64ExtraField {
+ public:
+ static const Zip64ExtraField *find(const uint8_t *start, const uint8_t *end) {
+ while (start < end) {
+ const Zip64ExtraField *z64 =
+ reinterpret_cast<const Zip64ExtraField *>(start);
+ if (z64->is()) {
+ return z64;
+ }
+ start = byte_ptr(start) + z64->size();
+ }
+ return nullptr;
+ }
+
+ bool is() const { return 1 == le16toh(tag_); }
+ void signature() { tag_ = htole16(1); }
+
+ uint16_t payload_size() const { return le16toh(payload_size_); }
+ void payload_size(uint16_t v) { payload_size_ = htole16(v); }
+
+ uint16_t size() const { return sizeof(Zip64ExtraField) + payload_size(); }
+
+ // The value of i-th attribute
+ uint64_t attr64(int index) const { return le64toh(attr_[index]); }
+
+ private:
+ uint16_t tag_;
+ uint16_t payload_size_;
+ uint64_t attr_[];
+} __attribute__((packed));
+static_assert(4 == sizeof(Zip64ExtraField),
+ "Zip64ExtraField class fields layout is incorrect.");
+
+/* Local Header precedes each archive file data (section 4.3.7). */
+class LH {
+ public:
+ bool is() const { return 0x04034b50 == le32toh(signature_); }
+ void signature() { signature_ = htole32(0x04034b50); }
+
+ uint16_t version() const { return le16toh(version_); }
+ void version(uint16_t v) { version_ = htole16(v); }
+
+ void bit_flag(uint16_t v) { bit_flag_ = htole16(v); }
+ uint16_t bit_flag() const { return le16toh(bit_flag_); }
+
+ uint16_t compression_method() const { return le16toh(compression_method_); }
+ void compression_method(uint16_t v) { compression_method_ = htole16(v); }
+
+ uint16_t last_mod_file_time() const { return le16toh(last_mod_file_time_); }
+ void last_mod_file_time(uint16_t v) { last_mod_file_time_ = htole16(v); }
+
+ uint16_t last_mod_file_date() const { return le16toh(last_mod_file_date_); }
+ void last_mod_file_date(uint16_t v) { last_mod_file_date_ = htole16(v); }
+
+ uint32_t crc32() const { return le32toh(crc32_); }
+ void crc32(uint32_t v) { crc32_ = htole32(v); }
+
+ size_t compressed_file_size() const {
+ size_t size32 = compressed_file_size32();
+ if (size32 != 0xFFFFFFFF) {
+ return size32;
+ }
+ const Zip64ExtraField *z64 = zip64_extra_field();
+ return z64 == nullptr
+ ? 0xFFFFFFFF
+ : z64->attr64(uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0);
+ }
+ size_t compressed_file_size32() const {
+ return le32toh(compressed_file_size32_);
+ }
+ void compressed_file_size32(uint32_t v) {
+ compressed_file_size32_ = htole32(v);
+ }
+
+ size_t uncompressed_file_size() const {
+ size_t size32 = uncompressed_file_size32();
+ if (size32 != 0xFFFFFFFF) {
+ return size32;
+ }
+ const Zip64ExtraField *z64 = zip64_extra_field();
+ return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(0);
+ }
+ size_t uncompressed_file_size32() const {
+ return le32toh(uncompressed_file_size32_);
+ }
+ void uncompressed_file_size32(uint32_t v) {
+ uncompressed_file_size32_ = htole32(v);
+ }
+
+ uint16_t file_name_length() const { return le16toh(file_name_length_); }
+ const char *file_name() const { return file_name_; }
+ void file_name(const char *filename, uint16_t len) {
+ file_name_length_ = htole16(len);
+ if (len) {
+ memcpy(file_name_, filename, file_name_length_);
+ }
+ }
+ bool file_name_is(const char *name) const {
+ size_t name_len = strlen(name);
+ return file_name_length() == name_len &&
+ 0 == strncmp(file_name(), name, name_len);
+ }
+
+ uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); }
+ void extra_fields_length(uint16_t v) {
+ extra_fields_length_ = htole16(extra_fields_length_);
+ }
+ const uint8_t *extra_fields() const {
+ return byte_ptr(file_name_ + file_name_length_);
+ }
+
+ size_t size() const {
+ return sizeof(LH) + file_name_length() + extra_fields_length();
+ }
+ const uint8_t *data() const { return extra_fields() + extra_fields_length(); }
+ size_t in_zip_size() const {
+ return compression_method() ? compressed_file_size()
+ : uncompressed_file_size();
+ }
+
+ const Zip64ExtraField *zip64_extra_field() const {
+ return Zip64ExtraField::find(extra_fields(),
+ extra_fields() + extra_fields_length());
+ }
+
+ private:
+ uint32_t signature_;
+ uint16_t version_;
+ uint16_t bit_flag_;
+ uint16_t compression_method_;
+ uint16_t last_mod_file_time_;
+ uint16_t last_mod_file_date_;
+ uint32_t crc32_;
+ uint32_t compressed_file_size32_;
+ uint32_t uncompressed_file_size32_;
+ uint16_t file_name_length_;
+ uint16_t extra_fields_length_;
+ char file_name_[0];
+ // Followed by extra_fields.
+} __attribute__((packed));
+static_assert(30 == sizeof(LH), "The fields layout for class LH is incorrect");
+
+/* Central Directory Header. */
+class CDH {
+ public:
+ void signature() { signature_ = htole32(0x02014b50); }
+ bool is() const { return 0x02014b50 == le32toh(signature_); }
+
+ void version(uint16_t v) { version_ = htole16(v); }
+
+ void version_to_extract(uint16_t v) { version_to_extract_ = htole16(v); }
+
+ void bit_flag(uint16_t v) { bit_flag_ = htole16(v); }
+ uint16_t bit_flag() const { return le16toh(bit_flag_); }
+
+ void compression_method(uint16_t v) { compression_method_ = htole16(v); }
+
+ void last_mod_file_time(uint16_t v) { last_mod_file_time_ = htole16(v); }
+
+ void last_mod_file_date(uint16_t v) { last_mod_file_date_ = htole16(v); }
+
+ void crc32(uint32_t v) { crc32_ = htole32(v); }
+
+ size_t compressed_file_size() const {
+ size_t size32 = compressed_file_size32();
+ if (size32 != 0xFFFFFFFF) {
+ return size32;
+ }
+ const Zip64ExtraField *z64 = zip64_extra_field();
+ return z64 == nullptr
+ ? 0xFFFFFFFF
+ : z64->attr64(uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0);
+ }
+ size_t compressed_file_size32() const {
+ return le32toh(compressed_file_size32_);
+ }
+ void compressed_file_size32(uint32_t v) {
+ compressed_file_size32_ = htole32(v);
+ }
+
+ size_t uncompressed_file_size() const {
+ uint32_t size32 = uncompressed_file_size32();
+ if (size32 != 0xFFFFFFFF) {
+ return size32;
+ }
+ const Zip64ExtraField *z64 = zip64_extra_field();
+ return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(0);
+ }
+ size_t uncompressed_file_size32() const {
+ return le32toh(uncompressed_file_size32_);
+ }
+
+ void uncompressed_file_size32(uint32_t v) {
+ uncompressed_file_size32_ = htole32(v);
+ }
+
+ uint16_t file_name_length() const { return le16toh(file_name_length_); }
+ const char *file_name() const { return file_name_; }
+ void file_name(const char *filename, uint16_t filename_len) {
+ file_name_length_ = htole16(filename_len);
+ if (filename_len) {
+ memcpy(file_name_, filename, filename_len);
+ }
+ }
+ bool file_name_is(const char *name) const {
+ size_t name_len = strlen(name);
+ return file_name_length() == name_len &&
+ 0 == strncmp(file_name(), name, name_len);
+ }
+
+ uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); }
+ const uint8_t *extra_fields() const {
+ return byte_ptr(file_name_ + file_name_length_);
+ }
+ void extra_fields_length(uint16_t v) { extra_fields_length_ = htole16(v); }
+
+ uint16_t comment_length() const { return le16toh(comment_length_); }
+ void comment_length(uint16_t v) { comment_length_ = htole16(v); }
+
+ uint16_t start_disk_nr() const { return le16toh(start_disk_nr_); }
+ void disk_number(uint16_t v) { start_disk_nr_ = htole16(v); }
+
+ uint16_t internal_attributes() const { return le16toh(internal_attributes_); }
+ void internal_attributes(uint16_t v) { internal_attributes_ = htole16(v); }
+
+ uint32_t external_attribute() const { return le32toh(external_attributes_); }
+ void external_attribute(uint32_t v) { external_attributes_ = htole32(v); }
+
+ uint64_t local_header_offset() const {
+ uint32_t size32 = local_header_offset32();
+ if (size32 != 0xFFFFFFFF) {
+ return size32;
+ }
+ const Zip64ExtraField *z64 = zip64_extra_field();
+ int attr_no = uncompressed_file_size32() == 0xFFFFFFFF ? 1 : 0;
+ if (compressed_file_size32() == 0xFFFFFFFF) {
+ ++attr_no;
+ }
+ return z64 == nullptr ? 0xFFFFFFFF : z64->attr64(attr_no);
+ }
+
+ uint32_t local_header_offset32() const {
+ return le32toh(local_header_offset32_);
+ }
+ void local_header_offset32(uint32_t v) {
+ local_header_offset32_ = htole32(v);
+ }
+ bool no_size_in_local_header() const { return bit_flag() & 0x08; }
+ size_t size() const {
+ return sizeof(*this) + file_name_length() + extra_fields_length() +
+ comment_length();
+ }
+
+ const Zip64ExtraField *zip64_extra_field() const {
+ return Zip64ExtraField::find(extra_fields(),
+ extra_fields() + extra_fields_length());
+ }
+
+ private:
+ uint32_t signature_;
+ uint16_t version_;
+ uint16_t version_to_extract_;
+ uint16_t bit_flag_;
+ uint16_t compression_method_;
+ uint16_t last_mod_file_time_;
+ uint16_t last_mod_file_date_;
+ uint32_t crc32_;
+ uint32_t compressed_file_size32_;
+ uint32_t uncompressed_file_size32_;
+ uint16_t file_name_length_;
+ uint16_t extra_fields_length_;
+ uint16_t comment_length_;
+ uint16_t start_disk_nr_;
+ uint16_t internal_attributes_;
+ uint32_t external_attributes_;
+ uint32_t local_header_offset32_;
+ char file_name_[0];
+ // Followed by extra fields and then comment.
+} __attribute__((packed));
+static_assert(46 == sizeof(CDH), "Class CDH fields layout is incorrect.");
+
+/* Zip64 End of Central Directory Locator. */
+class ECD64Locator {
+ public:
+ void signature() { signature_ = htole32(0x07064b50); }
+ bool is() const { return 0x07064b50 == le32toh(signature_); }
+
+ void ecd64_disk_nr(uint32_t nr) { ecd64_disk_nr_ = htole32(nr); }
+ uint32_t ecd4_disk_nr() const { return le32toh(ecd64_disk_nr_); }
+
+ void ecd64_offset(uint64_t v) { ecd64_offset_ = htole64(v); }
+ uint64_t ecd64_offset() const { return le64toh(ecd64_offset_); }
+
+ void total_disks(uint32_t v) { total_disks_ = htole32(v); }
+ uint32_t total_disks() const { return le32toh(total_disks_); }
+
+ private:
+ uint32_t signature_;
+ uint32_t ecd64_disk_nr_;
+ uint64_t ecd64_offset_;
+ uint32_t total_disks_;
+} __attribute__((packed));
+static_assert(20 == sizeof(ECD64Locator),
+ "ECD64Locator class fields layout is incorrect.");
+
+/* End of Central Directory. */
+class ECD {
+ public:
+ void signature() { signature_ = htole32(0x06054b50); }
+ bool is() const { return 0x06054b50 == le32toh(signature_); }
+
+ void this_disk_nr(uint16_t v) { this_disk_nr_ = htole16(v); }
+ uint16_t this_disk_nr() const { return le16toh(this_disk_nr_); }
+
+ void cen_disk_nr(uint16_t v) { cen_disk_nr_ = htole16(v); }
+ uint16_t cen_disk_nr() const { return le16toh(cen_disk_nr_); }
+
+ void this_disk_entries16(uint16_t v) { this_disk_entries16_ = htole16(v); }
+ uint16_t this_disk_entries16() const { return le16toh(this_disk_entries16_); }
+
+ void total_entries16(uint16_t v) { total_entries16_ = htole16(v); }
+ uint16_t total_entries16() const { return le16toh(total_entries16_); }
+
+ void cen_size32(uint32_t v) { cen_size32_ = htole32(v); }
+ uint32_t cen_size32() const { return le32toh(cen_size32_); }
+
+ void cen_offset32(uint32_t v) { cen_offset32_ = htole32(v); }
+ uint32_t cen_offset32() const { return le32toh(cen_offset32_); }
+
+ void comment(uint8_t *data, uint16_t data_size) {
+ comment_length_ = htole16(data_size);
+ if (data_size) {
+ memcpy(comment_, data, data_size);
+ }
+ }
+ uint16_t comment_length() const { return le16toh(comment_length_); }
+ const uint8_t *comment() const { return comment_; }
+
+ uint64_t ecd64_offset() const {
+ const ECD64Locator *locator = reinterpret_cast<const ECD64Locator *>(
+ byte_ptr(this) - sizeof(ECD64Locator));
+ return locator->is() ? locator->ecd64_offset() : 0xFFFFFFFFFFFFFFFF;
+ }
+
+ private:
+ uint32_t signature_;
+ uint16_t this_disk_nr_;
+ uint16_t cen_disk_nr_;
+ uint16_t this_disk_entries16_;
+ uint16_t total_entries16_;
+ uint32_t cen_size32_;
+ uint32_t cen_offset32_;
+ uint16_t comment_length_;
+ uint8_t comment_[0];
+} __attribute__((packed));
+static_assert(22 == sizeof(ECD), "ECD class fields layout is incorrect.");
+
+/* Zip64 end of central directory. */
+class ECD64 {
+ public:
+ bool is() const { return 0x06064b50 == le32toh(signature_); }
+ void signature() { signature_ = htole32(0x06064b50); }
+
+ void remaining_size(uint64_t v) { remaining_size_ = htole64(v); }
+ uint64_t remaining_size() const { return le64toh(remaining_size_); }
+
+ void version(uint16_t v) { version_ = htole16(v); }
+ uint16_t version() const { return le16toh(version_); }
+
+ void version_to_extract(uint16_t v) { version_to_extract_ = htole16(v); }
+ uint16_t version_to_extract() const { return le16toh(version_to_extract_); }
+
+ void this_disk_nr(uint32_t v) { this_disk_nr_ = htole32(v); }
+ uint32_t this_disk_nr() const { return le32toh(this_disk_nr_); }
+
+ void cen_disk_nr(uint32_t v) { cen_disk_nr_ = htole32(v); }
+ uint32_t cen_disk_nr() const { return le32toh(cen_disk_nr_); }
+
+ void this_disk_entries(uint64_t v) { this_disk_entries_ = htole64(v); }
+ uint64_t this_disk_entries() const { return le64toh(this_disk_entries_); }
+
+ void total_entries(uint64_t v) { total_entries_ = htole64(v); }
+ uint64_t total_entries() const { return le64toh(total_entries_); }
+
+ void cen_size(uint64_t v) { cen_size_ = htole64(v); }
+ uint64_t cen_size() const { return le64toh(cen_size_); }
+
+ void cen_offset(uint64_t v) { cen_offset_ = htole64(v); }
+ uint64_t cen_offset() const { return le64toh(cen_offset_); }
+
+ private:
+ uint32_t signature_;
+ uint64_t remaining_size_;
+ uint16_t version_;
+ uint16_t version_to_extract_;
+ uint32_t this_disk_nr_;
+ uint32_t cen_disk_nr_;
+ uint64_t this_disk_entries_;
+ uint64_t total_entries_;
+ uint64_t cen_size_;
+ uint64_t cen_offset_;
+} __attribute__((packed));
+static_assert(56 == sizeof(ECD64), "ECD64 class fields layout is incorrect.");
+
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_ZIP_HEADERS_H_
diff --git a/src/tools/singlejar/zlib_interface.h b/src/tools/singlejar/zlib_interface.h
new file mode 100644
index 0000000000..1a06b8e821
--- /dev/null
+++ b/src/tools/singlejar/zlib_interface.h
@@ -0,0 +1,98 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_
+#define BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_
+
+#include <zlib.h>
+
+#include <err.h>
+#include <stdint.h>
+
+// An interface to zlib's inflater. Usage:
+// Inflater inflater;
+// inflater.DataToInflate(data, data_size);
+// for (;;) {
+// int rc = inflater.Inflate(out_buffer, out_buffer_size);
+// if (rc == Z_STREAM_END) {
+// break;
+// }
+// // If we ran out of out_buffer, create a new one
+// }
+// inflater.Reset();
+//
+class Inflater {
+ public:
+ Inflater() {
+ zstream_.zalloc = Z_NULL;
+ zstream_.zfree = Z_NULL;
+ zstream_.opaque = Z_NULL;
+ zstream_.avail_in = 0;
+ zstream_.next_in = nullptr;
+ int ret = inflateInit2(&zstream_, -MAX_WBITS);
+ if (ret != Z_OK) {
+ errx(2, "inflateInit2 returned %d\n", ret);
+ }
+ }
+
+ ~Inflater() { inflateEnd(&zstream_); }
+
+ void reset() { inflateReset(&zstream_); }
+
+ void DataToInflate(const uint8_t *in_buffer, unsigned in_buffer_length) {
+ zstream_.next_in = const_cast<uint8_t *>(in_buffer);
+ zstream_.avail_in = in_buffer_length;
+ }
+
+ int Inflate(uint8_t *out_buffer, unsigned out_buffer_length) {
+ zstream_.next_out = out_buffer;
+ zstream_.avail_out = out_buffer_length;
+ return inflate(&zstream_, Z_SYNC_FLUSH);
+ }
+
+ unsigned available_out() const { return zstream_.avail_out; }
+
+ const char *error_message() const { return zstream_.msg; }
+
+ private:
+ z_stream zstream_;
+};
+
+// A little wrapper around zlib's deflater.
+struct Deflater : z_stream {
+ Deflater() {
+ zalloc = Z_NULL;
+ zfree = Z_NULL;
+ opaque = Z_NULL;
+ next_in = nullptr;
+ avail_in = 0;
+ next_out = nullptr;
+ avail_out = 0;
+ int ret = deflateInit2(this, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS,
+ 8, Z_DEFAULT_STRATEGY);
+ if (ret != Z_OK) {
+ errx(2, "deflateInit returned %d (%s)", ret, msg);
+ }
+ }
+
+ ~Deflater() { deflateEnd(this); }
+
+ int Deflate(const uint8_t *data, size_t data_size, int flag) {
+ next_in = const_cast<uint8_t *>(data);
+ avail_in = data_size;
+ return deflate(this, flag);
+ }
+};
+
+#endif // BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_
diff --git a/src/tools/singlejar/zlib_interface_test.cc b/src/tools/singlejar/zlib_interface_test.cc
new file mode 100644
index 0000000000..802d15350e
--- /dev/null
+++ b/src/tools/singlejar/zlib_interface_test.cc
@@ -0,0 +1,100 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "src/tools/singlejar/zlib_interface.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+class ZlibInterfaceTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ inflater_.reset(new Inflater);
+ deflater_.reset(new Deflater);
+ }
+
+ std::unique_ptr<Inflater> inflater_;
+ std::unique_ptr<Deflater> deflater_;
+};
+
+static const uint8_t bytes[] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4};
+
+TEST_F(ZlibInterfaceTest, DeflateFully) {
+ uint8_t compressed[256];
+ deflater_.get()->next_out = compressed;
+ deflater_.get()->avail_out = sizeof(compressed);
+ EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH));
+}
+
+TEST_F(ZlibInterfaceTest, DeflateIntoChunks) {
+ uint8_t compressed[256];
+ deflater_.get()->next_out = compressed;
+ deflater_.get()->avail_out = 2;
+ EXPECT_EQ(Z_OK, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH));
+ EXPECT_EQ(0, deflater_.get()->avail_out);
+ deflater_.get()->next_out = compressed + 2;
+ deflater_.get()->avail_out = sizeof(compressed) - 2;
+ EXPECT_EQ(Z_STREAM_END,
+ deflater_->Deflate(deflater_.get()->next_in,
+ deflater_.get()->avail_in, Z_FINISH));
+}
+
+TEST_F(ZlibInterfaceTest, DeflateChunks) {
+ uint8_t compressed[256];
+ deflater_.get()->next_out = compressed;
+ deflater_.get()->avail_out = sizeof(compressed);
+ EXPECT_EQ(Z_OK, deflater_->Deflate(bytes, 4, Z_NO_FLUSH));
+ EXPECT_EQ(Z_STREAM_END,
+ deflater_->Deflate(bytes + 4, sizeof(bytes) - 4, Z_FINISH));
+}
+
+TEST_F(ZlibInterfaceTest, InflateFully) {
+ uint8_t compressed[256];
+ deflater_.get()->next_out = compressed;
+ deflater_.get()->avail_out = sizeof(compressed);
+ EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH));
+
+ // Now we have deflated data, inflate it back and compare.
+ size_t compressed_size = sizeof(compressed) - deflater_.get()->avail_out;
+ inflater_->DataToInflate(compressed, compressed_size);
+
+ uint8_t uncompressed[256];
+ memset(uncompressed, 0, sizeof(uncompressed));
+ EXPECT_EQ(Z_STREAM_END,
+ inflater_->Inflate(uncompressed, sizeof(uncompressed)));
+ EXPECT_EQ(sizeof(bytes), sizeof(uncompressed) - inflater_->available_out());
+ EXPECT_EQ(0, memcmp(bytes, uncompressed, sizeof(bytes)));
+}
+
+TEST_F(ZlibInterfaceTest, InflateToChunks) {
+ uint8_t compressed[256];
+ deflater_.get()->next_out = compressed;
+ deflater_.get()->avail_out = sizeof(compressed);
+ EXPECT_EQ(Z_STREAM_END, deflater_->Deflate(bytes, sizeof(bytes), Z_FINISH));
+
+ // Now we have deflated data, inflate it back and compare.
+ size_t compressed_size = sizeof(compressed) - deflater_.get()->avail_out;
+ inflater_->DataToInflate(compressed, compressed_size);
+ uint8_t uncompressed[256];
+ memset(uncompressed, 0, sizeof(uncompressed));
+ EXPECT_EQ(Z_OK, inflater_->Inflate(uncompressed, 3));
+ EXPECT_EQ(Z_STREAM_END,
+ inflater_->Inflate(uncompressed + 3, sizeof(uncompressed) - 3));
+ EXPECT_EQ(0, memcmp(bytes, uncompressed, sizeof(bytes)));
+}
+
+} // namespace