diff options
Diffstat (limited to 'src/tools')
-rw-r--r-- | src/tools/singlejar/BUILD | 47 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar.cc | 149 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar.h | 154 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_preambled_test.cc | 2 | ||||
-rw-r--r-- | src/tools/singlejar/input_jar_random_jars_test.cc | 2 | ||||
-rw-r--r-- | src/tools/singlejar/mapped_file.h | 10 |
6 files changed, 200 insertions, 164 deletions
diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD index 76dce90627..81e5d283de 100644 --- a/src/tools/singlejar/BUILD +++ b/src/tools/singlejar/BUILD @@ -14,11 +14,11 @@ cc_test( srcs = [ "combiners_test.cc", ":combiners", - ":input_jar", ":zip_headers", ":zlib_interface", ], deps = [ + ":input_jar", "//third_party:gtest", "//third_party/zlib", ], @@ -28,13 +28,13 @@ cc_test( name = "input_jar_preambled_test", srcs = [ "input_jar_preambled_test.cc", - ":input_jar", ":test_util", ], data = [ ":test1", ], deps = [ + ":input_jar", "//src/main/cpp/util", "//third_party:gtest", ], @@ -46,7 +46,6 @@ cc_test( srcs = [ "input_jar_scan_entries_test.h", "input_jar_scan_jartool_test.cc", - ":input_jar", ":test_util", ], copts = ["-DJAR_TOOL_PATH=\\\"external/local_jdk/bin/jar\\\""], @@ -56,7 +55,10 @@ cc_test( ], # Timing out, see https://github.com/bazelbuild/bazel/issues/1555 tags = ["manual"], - deps = ["//third_party:gtest"], + deps = [ + ":input_jar", + "//third_party:gtest", + ], ) cc_test( @@ -65,22 +67,26 @@ cc_test( srcs = [ "input_jar_scan_entries_test.h", "input_jar_scan_ziptool_test.cc", - ":input_jar", ":test_util", ], # Timing out, see https://github.com/bazelbuild/bazel/issues/1555 tags = ["manual"], - deps = ["//third_party:gtest"], + deps = [ + ":input_jar", + "//third_party:gtest", + ], ) cc_test( name = "input_jar_bad_jar_test", srcs = [ "input_jar_bad_jar_test.cc", - ":input_jar", ":test_util", ], - deps = ["//third_party:gtest"], + deps = [ + ":input_jar", + "//third_party:gtest", + ], ) cc_test( @@ -110,7 +116,6 @@ cc_test( size = "large", srcs = [ "transient_bytes_test.cc", - ":input_jar", ":test_util", ":transient_bytes", ":zlib_interface", @@ -118,6 +123,7 @@ cc_test( # Timing out, see https://github.com/bazelbuild/bazel/issues/1555 tags = ["manual"], deps = [ + ":input_jar", "//third_party:gtest", "//third_party/zlib", ], @@ -146,6 +152,19 @@ cc_test( ) cc_library( + name = "input_jar", + srcs = [ + "diag.h", + "input_jar.cc", + "mapped_file.h", + ], + hdrs = [ + "input_jar.h", + "zip_headers.h", + ], +) + +cc_library( name = "options", srcs = [ "diag.h", @@ -166,16 +185,6 @@ filegroup( ) filegroup( - name = "input_jar", - srcs = [ - "diag.h", - "input_jar.h", - "mapped_file.h", - "zip_headers.h", - ], -) - -filegroup( name = "test_util", srcs = ["test_util.h"], ) diff --git a/src/tools/singlejar/input_jar.cc b/src/tools/singlejar/input_jar.cc new file mode 100644 index 0000000000..980a90c6f8 --- /dev/null +++ b/src/tools/singlejar/input_jar.cc @@ -0,0 +1,149 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/tools/singlejar/input_jar.h" + +bool InputJar::Open(const std::string &path) { + if (!path_.empty()) { + diag_errx(1, "%s:%d: This instance is already handling %s\n", __FILE__, + __LINE__, path_.c_str()); + } + if (!mapped_file_.Open(path)) { + diag_warn("%s:%d: Cannot open input jar %s", __FILE__, __LINE__, + path.c_str()); + mapped_file_.Close(); + return false; + } + if (mapped_file_.size() < sizeof(ECD)) { + diag_warnx( + "%s:%d: %s is only 0x%lx" + " bytes long, should be at least 0x%lx bytes long", + __FILE__, __LINE__, path.c_str(), mapped_file_.size(), sizeof(ECD)); + mapped_file_.Close(); + return false; + } + + // Now locate End of Central Directory (ECD) record. + auto ecd_min = mapped_file_.end() - 65536 - sizeof(ECD); + if (ecd_min < mapped_file_.start()) { + ecd_min = mapped_file_.start(); + } + + const ECD *ecd = nullptr; + for (auto ecd_ptr = mapped_file_.end() - sizeof(ECD); ecd_ptr >= ecd_min; + --ecd_ptr) { + if (reinterpret_cast<const ECD *>(ecd_ptr)->is()) { + ecd = reinterpret_cast<const ECD *>(ecd_ptr); + break; + } + } + if (ecd == nullptr) { + diag_warnx("%s:%d: Cannot locate ECD record in %s", __FILE__, __LINE__, + path.c_str()); + mapped_file_.Close(); + return false; + } + + /* Find Central Directory and preamble size. We want to handle the case + * where a Jar/Zip file contains a preamble (an arbitrary data before the + * first entry) and 'zip -A' was not called to adjust the offsets, so all + * the offsets are off by the preamble size. In the 32-bit case (that is, + * there is no ECD64Locator+ECD64), ECD immediately follows the last CDH, + * ECD immediately follows the Central Directory, and contains its size, so + * Central Directory can be found reliably. We then use its stated location, + * which ECD contains, too, to calculate the preamble size. In the 64-bit + * case, there are ECD64 and ECD64Locator records between the end of the + * Central Directory and the ECD, the calculation is similar, with the + * exception of the logic to find the actual start of the ECD64. + * ECD64Locator contains only its position in the file, which is off by + * preamble size, but does not contain the actual size of ECD64, which in + * theory is variable (the fixed fields may be followed by some custom data, + * with the total size saved in ECD64::remaining_size and thus unavailable + * until we find ECD64. We assume that the custom data is missing. + */ + + // First, sanity checks. + uint64_t cen_position = ecd->cen_offset32(); + if (cen_position != 0xFFFFFFFF) { + if (!mapped_file_.mapped(mapped_file_.address(cen_position))) { + diag_warnx("%s:%d: %s is corrupt: Central Directory location 0x%" PRIx64 + " is invalid", + __FILE__, __LINE__, path.c_str(), cen_position); + mapped_file_.Close(); + return false; + } + if (mapped_file_.offset(ecd) <= cen_position) { + diag_warnx("%s:%d: %s is corrupt: End of Central Directory at 0x%" PRIx64 + " precedes Central Directory at 0x%" PRIx64, + __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd), + cen_position); + mapped_file_.Close(); + return false; + } + } + uint64_t cen_size = ecd->cen_size32(); + if (cen_size != 0xFFFFFFFF) { + if (cen_size > mapped_file_.offset(ecd)) { + diag_warnx("%s:%d: %s is corrupt: Central Directory size 0x%" PRIx64 + " is too large", + __FILE__, __LINE__, path.c_str(), cen_size); + mapped_file_.Close(); + return false; + } + } + + auto ecd64loc = reinterpret_cast<const ECD64Locator *>(byte_ptr(ecd) - + sizeof(ECD64Locator)); + if (ecd64loc->is()) { + auto ecd64 = + reinterpret_cast<const ECD64 *>(byte_ptr(ecd64loc) - sizeof(ECD64)); + if (!ecd64->is()) { + diag_warnx( + "%s:%d: %s is corrupt, expected ECD64 record at offset 0x%" PRIx64 + " is missing", + __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd64)); + mapped_file_.Close(); + return false; + } + cdh_ = reinterpret_cast<const CDH *>(byte_ptr(ecd64) - ecd64->cen_size()); + preamble_size_ = mapped_file_.offset(cdh_) - ecd64->cen_offset(); + // Find CEN and preamble size. + } else { + if (cen_size == 0xFFFFFFFF || cen_position == 0xFFFFFFFF) { + diag_warnx( + "%s:%d: %s is corrupt, expected ECD64 locator record at " + "offset 0x%" PRIx64 " is missing", + __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd64loc)); + return false; + } + cdh_ = reinterpret_cast<const CDH *>(byte_ptr(ecd) - cen_size); + preamble_size_ = mapped_file_.offset(cdh_) - cen_position; + } + if (!cdh_->is()) { + diag_warnx( + "%s:%d: In %s, expected central file header signature at " + "offset0x%" PRIx64, + __FILE__, __LINE__, path.c_str(), mapped_file_.offset(cdh_)); + mapped_file_.Close(); + return false; + } + path_ = path; + return true; +} + +bool InputJar::Close() { + mapped_file_.Close(); + path_.clear(); + return true; +} diff --git a/src/tools/singlejar/input_jar.h b/src/tools/singlejar/input_jar.h index cd3dbf82e0..265a392692 100644 --- a/src/tools/singlejar/input_jar.h +++ b/src/tools/singlejar/input_jar.h @@ -18,6 +18,8 @@ #include <inttypes.h> #include <stdlib.h> +#include <string> + #include "src/tools/singlejar/diag.h" #include "src/tools/singlejar/mapped_file.h" #include "src/tools/singlejar/zip_headers.h" @@ -35,142 +37,18 @@ */ class InputJar { public: - InputJar() : path_(nullptr) {} + InputJar() {} ~InputJar() { Close(); } int fd() const { return mapped_file_.fd(); } // Opens the file, memory maps it and locates Central Directory. - bool Open(const char *path) { - if (path_ != nullptr) { - diag_errx(1, "%s:%d: This instance is already handling %s\n", __FILE__, - __LINE__, path_); - } - if (!mapped_file_.Open(path)) { - diag_warn("%s:%d: Cannot open input jar %s", __FILE__, __LINE__, path); - mapped_file_.Close(); - return false; - } - if (mapped_file_.size() < sizeof(ECD)) { - diag_warnx("%s:%d: %s is only 0x%lx" - " bytes long, should be at least 0x%lx bytes long", - __FILE__, __LINE__, path_, mapped_file_.size(), sizeof(ECD)); - mapped_file_.Close(); - return false; - } - - // Now locate End of Central Directory (ECD) record. - auto ecd_min = mapped_file_.end() - 65536 - sizeof(ECD); - if (ecd_min < mapped_file_.start()) { - ecd_min = mapped_file_.start(); - } - - const ECD *ecd = nullptr; - for (auto ecd_ptr = mapped_file_.end() - sizeof(ECD); ecd_ptr >= ecd_min; - --ecd_ptr) { - if (reinterpret_cast<const ECD *>(ecd_ptr)->is()) { - ecd = reinterpret_cast<const ECD *>(ecd_ptr); - break; - } - } - if (ecd == nullptr) { - diag_warnx("%s:%d: Cannot locate ECD record in %s", __FILE__, __LINE__, - path); - mapped_file_.Close(); - return false; - } - - /* Find Central Directory and preamble size. We want to handle the case - * where a Jar/Zip file contains a preamble (an arbitrary data before the - * first entry) and 'zip -A' was not called to adjust the offsets, so all - * the offsets are off by the preamble size. In the 32-bit case (that is, - * there is no ECD64Locator+ECD64), ECD immediately follows the last CDH, - * ECD immediately follows the Central Directory, and contains its size, so - * Central Directory can be found reliably. We then use its stated location, - * which ECD contains, too, to calculate the preamble size. In the 64-bit - * case, there are ECD64 and ECD64Locator records between the end of the - * Central Directory and the ECD, the calculation is similar, with the - * exception of the logic to find the actual start of the ECD64. - * ECD64Locator contains only its position in the file, which is off by - * preamble size, but does not contain the actual size of ECD64, which in - * theory is variable (the fixed fields may be followed by some custom data, - * with the total size saved in ECD64::remaining_size and thus unavailable - * until we find ECD64. We assume that the custom data is missing. - */ - - // First, sanity checks. - uint64_t cen_position = ecd->cen_offset32(); - if (cen_position != 0xFFFFFFFF) { - if (!mapped_file_.mapped(mapped_file_.address(cen_position))) { - diag_warnx("%s:%d: %s is corrupt: Central Directory location 0x%" PRIx64 - " is invalid", - __FILE__, __LINE__, path, cen_position); - mapped_file_.Close(); - return false; - } - if (mapped_file_.offset(ecd) <= cen_position) { - diag_warnx( - "%s:%d: %s is corrupt: End of Central Directory at 0x%" PRIx64 - " precedes Central Directory at 0x%" PRIx64, - __FILE__, __LINE__, path, mapped_file_.offset(ecd), cen_position); - mapped_file_.Close(); - return false; - } - } - uint64_t cen_size = ecd->cen_size32(); - if (cen_size != 0xFFFFFFFF) { - if (cen_size > mapped_file_.offset(ecd)) { - diag_warnx("%s:%d: %s is corrupt: Central Directory size 0x%" PRIx64 - " is too large", - __FILE__, __LINE__, path, cen_size); - mapped_file_.Close(); - return false; - } - } - - auto ecd64loc = reinterpret_cast<const ECD64Locator *>( - byte_ptr(ecd) - sizeof(ECD64Locator)); - if (ecd64loc->is()) { - auto ecd64 = - reinterpret_cast<const ECD64 *>(byte_ptr(ecd64loc) - sizeof(ECD64)); - if (!ecd64->is()) { - diag_warnx( - "%s:%d: %s is corrupt, expected ECD64 record at offset 0x%" PRIx64 - " is missing", - __FILE__, __LINE__, path, mapped_file_.offset(ecd64)); - mapped_file_.Close(); - return false; - } - cdh_ = reinterpret_cast<const CDH *>(byte_ptr(ecd64) - ecd64->cen_size()); - preamble_size_ = mapped_file_.offset(cdh_) - ecd64->cen_offset(); - // Find CEN and preamble size. - } else { - if (cen_size == 0xFFFFFFFF || cen_position == 0xFFFFFFFF) { - diag_warnx( - "%s:%d: %s is corrupt, expected ECD64 locator record at " - "offset 0x%" PRIx64 " is missing", - __FILE__, __LINE__, path, mapped_file_.offset(ecd64loc)); - return false; - } - cdh_ = reinterpret_cast<const CDH *>(byte_ptr(ecd) - cen_size); - preamble_size_ = mapped_file_.offset(cdh_) - cen_position; - } - if (!cdh_->is()) { - diag_warnx( - "%s:%d: In %s, expected central file header signature at " - "offset0x%" PRIx64, - __FILE__, __LINE__, path, mapped_file_.offset(cdh_)); - mapped_file_.Close(); - return false; - } - path_ = strdup(path); - return true; - } + bool Open(const std::string& path); // Returns the next Central Directory Header or NULL. const CDH *NextEntry(const LH **local_header_ptr) { - if (!path_) { + if (path_.empty()) { diag_errx(1, "%s:%d: call Open() first!", __FILE__, __LINE__); } if (!cdh_->is()) { @@ -183,8 +61,9 @@ class InputJar { 1, "Bad directory record at offset 0x%" PRIx64 " of %s\n" "file name length = %u, extra_field length = %u, comment length = %u", - CentralDirectoryRecordOffset(cdh_), path_, cdh_->file_name_length(), - cdh_->extra_fields_length(), cdh_->comment_length()); + CentralDirectoryRecordOffset(cdh_), path_.c_str(), + cdh_->file_name_length(), cdh_->extra_fields_length(), + cdh_->comment_length()); } cdh_ = reinterpret_cast<const CDH *>(new_cdr); *local_header_ptr = LocalHeader(current_cdh); @@ -192,17 +71,10 @@ class InputJar { } // Closes the file. - bool Close() { - mapped_file_.Close(); - if (path_ != nullptr) { - free(path_); - path_ = nullptr; - } - return true; - } + bool Close(); uint64_t CentralDirectoryRecordOffset(const void *cdr) const { - return mapped_file_.offset(static_cast<const char *>(cdr)); + return mapped_file_.offset(cdr); } const LH *LocalHeader(const CDH *cdh) const { @@ -210,8 +82,12 @@ class InputJar { mapped_file_.address(cdh->local_header_offset() + preamble_size_)); } + uint64_t LocalHeaderOffset(const LH *lh) const { + return mapped_file_.offset(lh); + } + private: - char *path_; + std::string path_; MappedFile mapped_file_; const CDH *cdh_; // current directory entry uint64_t preamble_size_; // Bytes before the Zip proper. diff --git a/src/tools/singlejar/input_jar_preambled_test.cc b/src/tools/singlejar/input_jar_preambled_test.cc index a622ea6953..b540198374 100644 --- a/src/tools/singlejar/input_jar_preambled_test.cc +++ b/src/tools/singlejar/input_jar_preambled_test.cc @@ -47,7 +47,7 @@ class InputJarPreambledTest : public testing::Test { void SetUp() override { input_jar_.reset(new InputJar); } void Verify(const std::string &path) { - ASSERT_TRUE(input_jar_->Open(path.c_str())); + ASSERT_TRUE(input_jar_->Open(path)); const LH *lh; const CDH *cdh; while ((cdh = input_jar_->NextEntry(&lh))) { diff --git a/src/tools/singlejar/input_jar_random_jars_test.cc b/src/tools/singlejar/input_jar_random_jars_test.cc index 2112c43660..b2f454245c 100644 --- a/src/tools/singlejar/input_jar_random_jars_test.cc +++ b/src/tools/singlejar/input_jar_random_jars_test.cc @@ -47,7 +47,7 @@ TEST_F(InputJarRandomJarsTest, ScanAllJars) { continue; } } - EXPECT_TRUE(input_jar.Open(path.c_str())); + EXPECT_TRUE(input_jar.Open(path)); const LH *lh; const CDH *cdh; int file_count = 0; diff --git a/src/tools/singlejar/mapped_file.h b/src/tools/singlejar/mapped_file.h index c8e96a3ffa..6338994c9f 100644 --- a/src/tools/singlejar/mapped_file.h +++ b/src/tools/singlejar/mapped_file.h @@ -20,6 +20,8 @@ #include <sys/stat.h> #include <unistd.h> +#include <string> + #include "src/tools/singlejar/diag.h" /* @@ -43,12 +45,12 @@ class MappedFile { ~MappedFile() { Close(); } - bool Open(const char *filename) { + bool Open(const std::string& path) { if (is_open()) { diag_errx(1, "%s:%d: This instance is already open", __FILE__, __LINE__); } - if ((fd_ = open(filename, O_RDONLY)) < 0) { - diag_warn("%s:%d: open %s:", __FILE__, __LINE__, filename); + if ((fd_ = open(path.c_str(), O_RDONLY)) < 0) { + diag_warn("%s:%d: open %s:", __FILE__, __LINE__, path.c_str()); return false; } // Map the file, even if it is empty (in which case allocate 1 byte to it). @@ -57,7 +59,7 @@ class MappedFile { (mapped_start_ = static_cast<unsigned char *>( mmap(nullptr, st.st_size ? st.st_size : 1, PROT_READ, MAP_PRIVATE, fd_, 0))) == MAP_FAILED) { - diag_warn("%s:%d: mmap %s:", __FILE__, __LINE__, filename); + diag_warn("%s:%d: mmap %s:", __FILE__, __LINE__, path.c_str()); close(fd_); fd_ = -1; return false; |