diff options
author | Sasha Smundak <asmundak@google.com> | 2016-07-27 17:05:11 +0000 |
---|---|---|
committer | Adam Michael <ajmichael@google.com> | 2016-07-28 18:36:29 -0400 |
commit | de63c6f89798b3ce2db02b81bd8c139341ad0868 (patch) | |
tree | cb241c75723b09205bf3778fbea79567d643cde2 /src | |
parent | a1a31ff6d16811dc72df32861bd1c1a98e104f02 (diff) |
Initial checkin of the combiner classes.
--
MOS_MIGRATED_REVID=128597707
Diffstat (limited to 'src')
-rw-r--r-- | src/tools/singlejar/BUILD | 26 | ||||
-rw-r--r-- | src/tools/singlejar/combiners.h | 198 | ||||
-rw-r--r-- | src/tools/singlejar/combiners_test.cc | 193 |
3 files changed, 417 insertions, 0 deletions
diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD index 5414c8dfc5..552d84ffec 100644 --- a/src/tools/singlejar/BUILD +++ b/src/tools/singlejar/BUILD @@ -9,6 +9,22 @@ filegroup( ) cc_test( + name = "combiners_test", + size = "large", + srcs = [ + "combiners_test.cc", + ":combiners", + ":input_jar", + ":zip_headers", + ":zlib_interface", + ], + deps = [ + "//third_party:gtest", + "//third_party/zlib", + ], +) + +cc_test( name = "input_jar_scan_jartool_test", size = "large", srcs = [ @@ -124,6 +140,16 @@ cc_library( ) filegroup( + name = "combiners", + srcs = [ + "combiners.h", + ":transient_bytes", + ":zip_headers", + ":zlib_interface", + ], +) + +filegroup( name = "input_jar", srcs = [ "diag.h", diff --git a/src/tools/singlejar/combiners.h b/src/tools/singlejar/combiners.h new file mode 100644 index 0000000000..1bc2b16aac --- /dev/null +++ b/src/tools/singlejar/combiners.h @@ -0,0 +1,198 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_TOOLS_SINGLEJAR_COMBINERS_H_ +#define SRC_TOOLS_SINGLEJAR_COMBINERS_H_ 1 + +#include <memory> +#include <string> + +#include "src/tools/singlejar/transient_bytes.h" +#include "src/tools/singlejar/zip_headers.h" +#include "src/tools/singlejar/zlib_interface.h" + +// An output jar entry consisting of a concatenation of the input jar +// entries. Byte sequences can be appended to it, too. +class Concatenator { + public: + Concatenator(const std::string &filename) : filename_(filename) {} + + // Appends the contents of the given input entry. + bool Merge(const CDH *cdh, const LH *lh) { + CreateBuffer(); + if (Z_NO_COMPRESSION == lh->compression_method()) { + buffer_->ReadEntryContents(lh); + } else if (Z_DEFLATED == lh->compression_method()) { + if (!inflater_.get()) { + inflater_.reset(new Inflater()); + } + buffer_->DecompressEntryContents(cdh, lh, inflater_.get()); + } else { + errx(2, "%s is neither stored nor deflated", filename_.c_str()); + } + return true; + } + + // Returns a point to the buffer containing Local Header followed by the + // payload. The caller is responsible of freeing the buffer. + void *OutputEntry() { + if (!buffer_.get()) { + return nullptr; + } + + // Allocate a contiguous buffer for the local file header and + // deflated data. We assume that deflate decreases the size, so if + // the deflater reports overflow, we just save original data. + size_t deflated_buffer_size = + sizeof(LH) + filename_.size() + buffer_->data_size(); + + // Huge entry (>4GB) needs Zip64 extension field with 64-bit original + // and compressed size values. + uint8_t + zip64_extension_buffer[sizeof(Zip64ExtraField) + 2 * sizeof(uint64_t)]; + bool huge_buffer = (buffer_->data_size() >= 0xFFFFFFFF); + if (huge_buffer) { + deflated_buffer_size += sizeof(zip64_extension_buffer); + } + LH *lh = reinterpret_cast<LH *>(malloc(deflated_buffer_size)); + if (lh == nullptr) { + return nullptr; + } + lh->signature(); + lh->version(20); + lh->bit_flag(0x0); + lh->last_mod_file_time(1); // 00:00:01 + lh->last_mod_file_date(33); // 1980-01-01 + lh->crc32(0x12345678); + lh->compressed_file_size32(0); + lh->file_name(filename_.c_str(), filename_.size()); + + if (huge_buffer) { + // Add Z64 extension if this is a huge entry. + lh->uncompressed_file_size32(0xFFFFFFFF); + Zip64ExtraField *z64 = + reinterpret_cast<Zip64ExtraField *>(zip64_extension_buffer); + z64->signature(); + z64->payload_size(2 * sizeof(uint64_t)); + z64->attr64(0, buffer_->data_size()); + lh->extra_fields(reinterpret_cast<uint8_t *>(z64), z64->size()); + } else { + lh->uncompressed_file_size32(buffer_->data_size()); + lh->extra_fields(nullptr, 0); + } + + uint32_t checksum; + uint64_t compressed_size; + uint16_t method = buffer_->Write(lh->data(), &checksum, &compressed_size); + lh->crc32(checksum); + lh->compression_method(method); + if (huge_buffer) { + lh->compressed_file_size32(compressed_size < 0xFFFFFFFF ? compressed_size + : 0xFFFFFFFF); + // Not sure if this has to be written in the small case, but it shouldn't + // hurt. + const_cast<Zip64ExtraField *>(lh->zip64_extra_field()) + ->attr64(1, compressed_size); + } else { + // If original data is <4GB, the compressed one is, too. + lh->compressed_file_size32(compressed_size); + } + return reinterpret_cast<void *>(lh); + } + + void Append(const char *s, size_t n) { + CreateBuffer(); + buffer_->Append(reinterpret_cast<const uint8_t *>(s), n); + } + + void Append(const char *s) { Append(s, strlen(s)); } + + void Append(const std::string &str) { Append(str.c_str(), str.size()); } + + const std::string &filename() const { return filename_; } + + private: + void CreateBuffer() { + if (!buffer_.get()) { + buffer_.reset(new TransientBytes()); + } + } + const std::string filename_; + std::unique_ptr<TransientBytes> buffer_; + std::unique_ptr<Inflater> inflater_; +}; + +// Combines the contents of the multiple input entries which are XML +// files into a single XML output entry with given top level XML tag. +class XmlCombiner { + public: + XmlCombiner(const std::string &filename, const char *xml_tag) + : filename_(filename), xml_tag_(xml_tag) {} + + bool Merge(const CDH *cdh, const LH *lh) { + if (!concatenator_.get()) { + concatenator_.reset(new Concatenator(filename_)); + concatenator_->Append("<"); + concatenator_->Append(xml_tag_); + concatenator_->Append(">\n"); + } + return concatenator_->Merge(cdh, lh); + } + + // Returns a pointer to the buffer containing LocalHeader for the entry, + // immediately followed by entry payload. The caller is responsible for + // freeing the buffer. + void *OutputEntry() { + if (!concatenator_.get()) { + return nullptr; + } + concatenator_->Append("</"); + concatenator_->Append(xml_tag_); + concatenator_->Append(">\n"); + return concatenator_->OutputEntry(); + } + + const std::string filename() const { return filename_; } + + private: + const std::string filename_; + const char *xml_tag_; + std::unique_ptr<Concatenator> concatenator_; + std::unique_ptr<Inflater> inflater_; +}; + +// A wrapper around Concatenator allowing to append +// NAME=VALUE +// lines to the contents. +class PropertyCombiner : public Concatenator { + public: + PropertyCombiner(const std::string &filename) : Concatenator(filename) {} + void AddProperty(const char *key, const char *value) { + // TODO(asmundak): deduplicate properties. + Append(key); + Append("=", 1); + Append(value); + Append("\n", 1); + } + + void AddProperty(const std::string &key, const std::string &value) { + // TODO(asmundak): deduplicate properties. + Append(key); + Append("=", 1); + Append(value); + Append("\n", 1); + } +}; + +#endif // SRC_TOOLS_SINGLEJAR_COMBINERS_H_ diff --git a/src/tools/singlejar/combiners_test.cc b/src/tools/singlejar/combiners_test.cc new file mode 100644 index 0000000000..9b6701c9ea --- /dev/null +++ b/src/tools/singlejar/combiners_test.cc @@ -0,0 +1,193 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/tools/singlejar/combiners.h" + +#include "src/tools/singlejar/input_jar.h" +#include "src/tools/singlejar/zip_headers.h" +#include "src/tools/singlejar/zlib_interface.h" +#include "gtest/gtest.h" + +namespace { + +static const char kTag1Contents[] = "<tag1>Contents1</tag1>"; +static const char kTag2Contents[] = "<tag2>Contents2</tag2>"; +static const char kCombinedXmlContents[] = + "<toplevel>\n<tag1>Contents1</tag1><tag2>Contents2</tag2></toplevel>\n"; +static const char kConcatenatedContents[] = + "<tag1>Contents1</tag1><tag2>Contents2</tag2>"; +const uint8_t kPoison = 0xFA; + +// A test fixture is used because test case setup is needed. +class CombinersTest : public ::testing::Test { + protected: + static void SetUpTestCase() { + ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR"))); + ASSERT_TRUE(CreateFile("tag1.xml", kTag1Contents)); + ASSERT_TRUE(CreateFile("tag2.xml", kTag2Contents)); + ASSERT_EQ(0, system("zip -qm combiners.zip tag1.xml tag2.xml")); + } + + static void TearDownTestCase() { system("rm -f xmls.zip"); } + + static bool CreateFile(const char *filename, const char *contents) { + FILE *fp = fopen(filename, "wb"); + size_t contents_size = strlen(contents); + if (fp == nullptr || fwrite(contents, contents_size, 1, fp) != 1 || + fclose(fp)) { + perror(filename); + return false; + } + return true; + } +}; + +// Test Concatenator. +TEST_F(CombinersTest, ConcatenatorSmall) { + InputJar input_jar; + Concatenator concatenator("concat"); + ASSERT_TRUE(input_jar.Open("combiners.zip")); + const LH *lh; + const CDH *cdh; + while ((cdh = input_jar.NextEntry(&lh))) { + if (cdh->file_name_is("tag1.xml") || cdh->file_name_is("tag2.xml")) { + ASSERT_TRUE(concatenator.Merge(cdh, lh)); + } + } + + // Create output, verify Local Header contents. + LH *entry = reinterpret_cast<LH *>(concatenator.OutputEntry()); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_DEFLATED, entry->compression_method()); + uint64_t original_size = entry->uncompressed_file_size(); + uint64_t compressed_size = entry->compressed_file_size(); + EXPECT_EQ(strlen(kConcatenatedContents), original_size); + EXPECT_LE(compressed_size, original_size); + EXPECT_TRUE(entry->file_name_is("concat")); + EXPECT_EQ(0, entry->extra_fields_length()); + + // Decompress and check contents. + Inflater inflater; + inflater.DataToInflate(entry->data(), compressed_size); + uint8_t buffer[256]; + memset(buffer, kPoison, sizeof(buffer)); + ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); + EXPECT_EQ(kPoison, buffer[original_size]); + EXPECT_EQ(kConcatenatedContents, + std::string(reinterpret_cast<char *>(buffer), original_size)); + + free(reinterpret_cast<void *>(entry)); +} + +// Tests that Concatenator creates huge (>4GB original/compressed sizes) +// correctly. This test is slow. +TEST_F(CombinersTest, ConcatenatorHuge) { + Concatenator concatenator("huge"); + + // Append 5,000,000,000 bytes to the concatenator. + const int kBufSize = 1000000; + char *buf = reinterpret_cast<char *>(malloc(kBufSize)); + memset(buf, kPoison, kBufSize); + for (int i = 0; i < 5000; ++i) { + concatenator.Append(buf, kBufSize); + } + free(buf); + + // Now hope that we have enough memory :-) + LH *entry = reinterpret_cast<LH *>(concatenator.OutputEntry()); + ASSERT_NE(nullptr, entry); + ASSERT_TRUE(entry->is()); + ASSERT_EQ(20, entry->version()); + EXPECT_EQ(Z_DEFLATED, entry->compression_method()); + uint64_t original_size = entry->uncompressed_file_size(); + uint64_t compressed_size = entry->compressed_file_size(); + ASSERT_EQ(5000000000, original_size); + ASSERT_LE(compressed_size, original_size); + free(reinterpret_cast<void *>(entry)); +} + +// Test XmlCombiner. +TEST_F(CombinersTest, XmlCombiner) { + InputJar input_jar; + XmlCombiner xml_combiner("combined.xml", "toplevel"); + ASSERT_TRUE(input_jar.Open("combiners.zip")); + const LH *lh; + const CDH *cdh; + while ((cdh = input_jar.NextEntry(&lh))) { + if (cdh->file_name_is("tag1.xml") || cdh->file_name_is("tag2.xml")) { + ASSERT_TRUE(xml_combiner.Merge(cdh, lh)); + } + } + + // Create output, verify Local Header contents. + LH *entry = reinterpret_cast<LH *>(xml_combiner.OutputEntry()); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_DEFLATED, entry->compression_method()); + uint64_t original_size = entry->uncompressed_file_size(); + uint64_t compressed_size = entry->compressed_file_size(); + EXPECT_EQ(strlen(kCombinedXmlContents), original_size); + EXPECT_LE(compressed_size, original_size); + EXPECT_TRUE(entry->file_name_is("combined.xml")); + EXPECT_EQ(0, entry->extra_fields_length()); + + // Decompress and check contents. + Inflater inflater; + inflater.DataToInflate(entry->data(), compressed_size); + uint8_t buffer[256]; + memset(buffer, kPoison, sizeof(buffer)); + ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); + EXPECT_EQ(kPoison, buffer[original_size]); + EXPECT_EQ(kCombinedXmlContents, + std::string(reinterpret_cast<char *>(buffer), original_size)); + + free(reinterpret_cast<void *>(entry)); +} +// Test PropertyCombiner. +TEST_F(CombinersTest, PropertyCombiner) { + static char kProperties[] = + "name=value\n" + "name_str=value_str\n"; + PropertyCombiner property_combiner("properties"); + property_combiner.AddProperty("name", "value"); + property_combiner.AddProperty(std::string("name_str"), + std::string("value_str")); + + // Create output, verify Local Header contents. + LH *entry = reinterpret_cast<LH *>(property_combiner.OutputEntry()); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_DEFLATED, entry->compression_method()); + uint64_t original_size = entry->uncompressed_file_size(); + uint64_t compressed_size = entry->compressed_file_size(); + EXPECT_EQ(strlen(kProperties), original_size); + EXPECT_LE(compressed_size, original_size); + EXPECT_EQ("properties", entry->file_name_string()); + EXPECT_EQ(0, entry->extra_fields_length()); + + // Decompress and check contents. + Inflater inflater; + inflater.DataToInflate(entry->data(), compressed_size); + uint8_t buffer[256]; + memset(buffer, kPoison, sizeof(buffer)); + ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); + EXPECT_EQ(kPoison, buffer[original_size]); + EXPECT_EQ(kProperties, + std::string(reinterpret_cast<char *>(buffer), original_size)); + + free(reinterpret_cast<void *>(entry)); +} + +} // namespace |