From 10b382587a79bdd431f7b2630d445561585f097c Mon Sep 17 00:00:00 2001 From: Sasha Smundak Date: Tue, 16 Aug 2016 17:49:00 +0000 Subject: Preparation for enabling --compression/--dont_change_compression options: Allow combiners to copy the payload out instead of compressing it. -- MOS_MIGRATED_REVID=130419840 --- src/tools/singlejar/combiners.cc | 17 +++++--- src/tools/singlejar/combiners.h | 15 ++++--- src/tools/singlejar/combiners_test.cc | 62 ++++++++++++++++++++++++----- src/tools/singlejar/output_jar.cc | 47 +++++++++++++++++----- src/tools/singlejar/transient_bytes.h | 12 ++++-- src/tools/singlejar/transient_bytes_test.cc | 25 ++++++++++-- 6 files changed, 139 insertions(+), 39 deletions(-) diff --git a/src/tools/singlejar/combiners.cc b/src/tools/singlejar/combiners.cc index 736e504df9..8095413672 100644 --- a/src/tools/singlejar/combiners.cc +++ b/src/tools/singlejar/combiners.cc @@ -34,7 +34,7 @@ bool Concatenator::Merge(const CDH *cdh, const LH *lh) { return true; } -void *Concatenator::OutputEntry() { +void *Concatenator::OutputEntry(bool compress) { if (!buffer_.get()) { return nullptr; } @@ -82,7 +82,14 @@ void *Concatenator::OutputEntry() { uint32_t checksum; uint64_t compressed_size; - uint16_t method = buffer_->Write(lh->data(), &checksum, &compressed_size); + uint16_t method; + if (compress) { + method = buffer_->CompressOut(lh->data(), &checksum, &compressed_size); + } else { + buffer_->CopyOut(lh->data(), &checksum); + method = Z_NO_COMPRESSION; + compressed_size = buffer_->data_size(); + } lh->crc32(checksum); lh->compression_method(method); if (huge_buffer) { @@ -103,7 +110,7 @@ NullCombiner::~NullCombiner() {} bool NullCombiner::Merge(const CDH *cdh, const LH *lh) { return true; } -void *NullCombiner::OutputEntry() { return nullptr; } +void *NullCombiner::OutputEntry(bool compress) { return nullptr; } XmlCombiner::~XmlCombiner() {} @@ -117,14 +124,14 @@ bool XmlCombiner::Merge(const CDH *cdh, const LH *lh) { return concatenator_->Merge(cdh, lh); } -void *XmlCombiner::OutputEntry() { +void *XmlCombiner::OutputEntry(bool compress) { if (!concatenator_.get()) { return nullptr; } concatenator_->Append("Append(xml_tag_); concatenator_->Append(">\n"); - return concatenator_->OutputEntry(); + return concatenator_->OutputEntry(compress); } PropertyCombiner::~PropertyCombiner() {} diff --git a/src/tools/singlejar/combiners.h b/src/tools/singlejar/combiners.h index 15baa78c7f..ce5c3dce91 100644 --- a/src/tools/singlejar/combiners.h +++ b/src/tools/singlejar/combiners.h @@ -27,9 +27,12 @@ class Combiner { virtual ~Combiner(); // Merges the contents of the given Zip entry to this instance. virtual bool Merge(const CDH *cdh, const LH *lh) = 0; - // Returns a point to the buffer containing Local Header followed by the - // payload. The caller is responsible of freeing the buffer. - virtual void *OutputEntry() = 0; + // Returns a point to the buffer containing Local Header followed by the + // payload. The caller is responsible of freeing the buffer. If `compress' + // is not set, the payload is a copy of the bytes held by this combiner. + // Otherwise the payload is compressed, provided that the compressed data + // is smaller than the original. + virtual void *OutputEntry(bool compress) = 0; }; // An output jar entry consisting of a concatenation of the input jar @@ -42,7 +45,7 @@ class Concatenator : public Combiner { bool Merge(const CDH *cdh, const LH *lh) override; - void *OutputEntry() override; + void *OutputEntry(bool compress) override; void Append(const char *s, size_t n) { CreateBuffer(); @@ -73,7 +76,7 @@ class NullCombiner : public Combiner { public: ~NullCombiner() override; bool Merge(const CDH *cdh, const LH *lh) override; - void *OutputEntry() override; + void *OutputEntry(bool compress) override; }; // Combines the contents of the multiple input entries which are XML @@ -86,7 +89,7 @@ class XmlCombiner : public Combiner { bool Merge(const CDH *cdh, const LH *lh) override; - void *OutputEntry() override; + void *OutputEntry(bool compress) override; const std::string filename() const { return filename_; } diff --git a/src/tools/singlejar/combiners_test.cc b/src/tools/singlejar/combiners_test.cc index f6da247721..c81021014e 100644 --- a/src/tools/singlejar/combiners_test.cc +++ b/src/tools/singlejar/combiners_test.cc @@ -20,6 +20,7 @@ #include "gtest/gtest.h" namespace { +using std::string; static const char kTag1Contents[] = "Contents1"; static const char kTag2Contents[] = "Contents2"; @@ -67,7 +68,7 @@ TEST_F(CombinersTest, ConcatenatorSmall) { } // Create output, verify Local Header contents. - LH *entry = reinterpret_cast(concatenator.OutputEntry()); + LH *entry = reinterpret_cast(concatenator.OutputEntry(true)); EXPECT_TRUE(entry->is()); EXPECT_EQ(20, entry->version()); EXPECT_EQ(Z_DEFLATED, entry->compression_method()); @@ -86,8 +87,21 @@ TEST_F(CombinersTest, ConcatenatorSmall) { ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); EXPECT_EQ(kPoison, buffer[original_size]); EXPECT_EQ(kConcatenatedContents, - std::string(reinterpret_cast(buffer), original_size)); + string(reinterpret_cast(buffer), original_size)); + free(reinterpret_cast(entry)); + // And if we just copy instead of compress: + entry = reinterpret_cast(concatenator.OutputEntry(false)); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_NO_COMPRESSION, entry->compression_method()); + original_size = entry->uncompressed_file_size(); + compressed_size = entry->compressed_file_size(); + EXPECT_EQ(compressed_size, original_size); + EXPECT_EQ(kConcatenatedContents, + string(reinterpret_cast(entry->data()), original_size)); + EXPECT_TRUE(entry->file_name_is("concat")); + EXPECT_EQ(0, entry->extra_fields_length()); free(reinterpret_cast(entry)); } @@ -106,7 +120,7 @@ TEST_F(CombinersTest, ConcatenatorHuge) { free(buf); // Now hope that we have enough memory :-) - LH *entry = reinterpret_cast(concatenator.OutputEntry()); + LH *entry = reinterpret_cast(concatenator.OutputEntry(true)); ASSERT_NE(nullptr, entry); ASSERT_TRUE(entry->is()); ASSERT_EQ(20, entry->version()); @@ -122,24 +136,27 @@ TEST_F(CombinersTest, ConcatenatorHuge) { TEST_F(CombinersTest, NullCombiner) { NullCombiner null_combiner; ASSERT_TRUE(null_combiner.Merge(nullptr, nullptr)); - ASSERT_EQ(nullptr, null_combiner.OutputEntry()); + ASSERT_EQ(nullptr, null_combiner.OutputEntry(true)); + ASSERT_EQ(nullptr, null_combiner.OutputEntry(false)); } // Test XmlCombiner. TEST_F(CombinersTest, XmlCombiner) { InputJar input_jar; XmlCombiner xml_combiner("combined.xml", "toplevel"); + XmlCombiner xml_combiner2("combined2.xml", "toplevel"); ASSERT_TRUE(input_jar.Open("combiners.zip")); const LH *lh; const CDH *cdh; while ((cdh = input_jar.NextEntry(&lh))) { if (cdh->file_name_is("tag1.xml") || cdh->file_name_is("tag2.xml")) { ASSERT_TRUE(xml_combiner.Merge(cdh, lh)); + ASSERT_TRUE(xml_combiner2.Merge(cdh, lh)); } } // Create output, verify Local Header contents. - LH *entry = reinterpret_cast(xml_combiner.OutputEntry()); + LH *entry = reinterpret_cast(xml_combiner.OutputEntry(true)); EXPECT_TRUE(entry->is()); EXPECT_EQ(20, entry->version()); EXPECT_EQ(Z_DEFLATED, entry->compression_method()); @@ -158,8 +175,21 @@ TEST_F(CombinersTest, XmlCombiner) { ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); EXPECT_EQ(kPoison, buffer[original_size]); EXPECT_EQ(kCombinedXmlContents, - std::string(reinterpret_cast(buffer), original_size)); + string(reinterpret_cast(buffer), original_size)); + free(reinterpret_cast(entry)); + // And for the combiner that just copies out: + entry = reinterpret_cast(xml_combiner2.OutputEntry(false)); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_NO_COMPRESSION, entry->compression_method()); + original_size = entry->uncompressed_file_size(); + compressed_size = entry->compressed_file_size(); + EXPECT_EQ(compressed_size, original_size); + EXPECT_EQ(kCombinedXmlContents, + string(reinterpret_cast(entry->data()), original_size)); + EXPECT_TRUE(entry->file_name_is("combined2.xml")); + EXPECT_EQ(0, entry->extra_fields_length()); free(reinterpret_cast(entry)); } @@ -170,14 +200,13 @@ TEST_F(CombinersTest, PropertyCombiner) { "name_str=value_str\n"; PropertyCombiner property_combiner("properties"); property_combiner.AddProperty("name", "value"); - property_combiner.AddProperty(std::string("name_str"), - std::string("value_str")); + property_combiner.AddProperty(string("name_str"), string("value_str")); // Merge should not be called. ASSERT_FALSE(property_combiner.Merge(nullptr, nullptr)); // Create output, verify Local Header contents. - LH *entry = reinterpret_cast(property_combiner.OutputEntry()); + LH *entry = reinterpret_cast(property_combiner.OutputEntry(true)); EXPECT_TRUE(entry->is()); EXPECT_EQ(20, entry->version()); EXPECT_EQ(Z_DEFLATED, entry->compression_method()); @@ -196,8 +225,21 @@ TEST_F(CombinersTest, PropertyCombiner) { ASSERT_EQ(Z_STREAM_END, inflater.Inflate((buffer), sizeof(buffer))); EXPECT_EQ(kPoison, buffer[original_size]); EXPECT_EQ(kProperties, - std::string(reinterpret_cast(buffer), original_size)); + string(reinterpret_cast(buffer), original_size)); + free(reinterpret_cast(entry)); + // Create output, verify Local Header contents. + entry = reinterpret_cast(property_combiner.OutputEntry(false)); + EXPECT_TRUE(entry->is()); + EXPECT_EQ(20, entry->version()); + EXPECT_EQ(Z_NO_COMPRESSION, entry->compression_method()); + original_size = entry->uncompressed_file_size(); + compressed_size = entry->compressed_file_size(); + EXPECT_EQ(compressed_size, original_size); + EXPECT_EQ(kProperties, + string(reinterpret_cast(entry->data()), original_size)); + EXPECT_EQ("properties", entry->file_name_string()); + EXPECT_EQ(0, entry->extra_fields_length()); free(reinterpret_cast(entry)); } diff --git a/src/tools/singlejar/output_jar.cc b/src/tools/singlejar/output_jar.cc index 7314e5f6c7..848477523c 100644 --- a/src/tools/singlejar/output_jar.cc +++ b/src/tools/singlejar/output_jar.cc @@ -188,14 +188,14 @@ int OutputJar::Doit(Options *options) { // file, followed by the build properties file. AddDirectory("META-INF/"); manifest_.Append("\r\n"); - WriteEntry(manifest_.OutputEntry()); + WriteEntry(manifest_.OutputEntry(options_->force_compression)); if (!options_->exclude_build_data) { - WriteEntry(build_properties_.OutputEntry()); + WriteEntry(build_properties_.OutputEntry(options_->force_compression)); } // Then classpath resources. for (auto &classpath_resource : classpath_resources_) { - WriteEntry(classpath_resource->OutputEntry()); + WriteEntry(classpath_resource->OutputEntry(options_->force_compression)); } // Then copy source files' contents. @@ -323,6 +323,29 @@ bool OutputJar::AddJar(int jar_path_index) { } } + // Decide what to do with the existing entry depending on force_compress + // and preserve_compress options and entry's current state: + // force_compress preserve_compress compressed Action + // N N N Copy + // N N Y Decompress + // N Y * Copy + // Y * N Compress + // Y N Y Copy + // Y Y can't be + if ((options_->force_compression && + jar_entry->compression_method() == Z_NO_COMPRESSION) || + (!options_->force_compression && !options_->preserve_compression && + jar_entry->compression_method() == Z_DEFLATED)) { + // Change compression. + Concatenator combiner(jar_entry->file_name_string()); + if (!combiner.Merge(jar_entry, lh)) { + diag_err(1, "%s:%d: cannot add %.*s", __FILE__, __LINE__, + jar_entry->file_name_length(), jar_entry->file_name()); + } + WriteEntry(combiner.OutputEntry(options_->force_compression)); + continue; + } + // Now we have to copy: // local header // file data @@ -428,10 +451,12 @@ void OutputJar::WriteEntry(void *buffer) { // https://msdn.microsoft.com/en-us/library/9kkf9tah.aspx // ("32-Bit Windows Time/Date Formats") if (options_->normalize_timestamps) { - // Regular "normalized" timestamp is 01/01/1980 00:00:00. No need to handle - // .class files here, they are always copied from the input jars. - entry->last_mod_file_time(0); + // Regular "normalized" timestamp is 01/01/1980 00:00:00, while for the + // .class file it is 01/01/1980 00:00:02 entry->last_mod_file_date(33); + entry->last_mod_file_time( + ends_with(entry->file_name(), entry->file_name_length(), ".class") ? 1 + : 0); } else { struct tm tm; // Time has 2-second resolution, so round up: @@ -525,14 +550,14 @@ bool OutputJar::Close() { } for (auto &service_handler : service_handlers_) { - WriteEntry(service_handler->OutputEntry()); + WriteEntry(service_handler->OutputEntry(options_->force_compression)); } for (auto &extra_combiner : extra_combiners_) { - WriteEntry(extra_combiner->OutputEntry()); + WriteEntry(extra_combiner->OutputEntry(options_->force_compression)); } - WriteEntry(spring_handlers_.OutputEntry()); - WriteEntry(spring_schemas_.OutputEntry()); - WriteEntry(protobuf_meta_handler_.OutputEntry()); + WriteEntry(spring_handlers_.OutputEntry(options_->force_compression)); + WriteEntry(spring_schemas_.OutputEntry(options_->force_compression)); + WriteEntry(protobuf_meta_handler_.OutputEntry(options_->force_compression)); // TODO(asmundak): handle manifest; off_t output_position = lseek(fd_, 0, SEEK_CUR); if (output_position == (off_t)-1) { diff --git a/src/tools/singlejar/transient_bytes.h b/src/tools/singlejar/transient_bytes.h index a6968aae9e..f2f1ba2edc 100644 --- a/src/tools/singlejar/transient_bytes.h +++ b/src/tools/singlejar/transient_bytes.h @@ -143,7 +143,8 @@ class TransientBytes { // shorter of compressed or uncompressed. Sets the checksum and number of // bytes written and returns Z_DEFLATED if compression took place or // Z_NO_COMPRESSION otherwise. - uint16_t Write(uint8_t *buffer, uint32_t *checksum, uint64_t *bytes_written) { + uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum, + uint64_t *bytes_written) { Deflater deflater; deflater.next_out = buffer; *checksum = 0; @@ -195,6 +196,13 @@ class TransientBytes { } // Compression does not help, just copy the bytes to the output buffer. + CopyOut(buffer, checksum); + *bytes_written = data_size(); + return Z_NO_COMPRESSION; + } + + // Copies the bytes to the buffer and sets the checksum. + void CopyOut(uint8_t *buffer, uint32_t *checksum) { uint64_t to_copy = data_size(); uint8_t *buffer_end = buffer + to_copy; *checksum = 0; @@ -206,8 +214,6 @@ class TransientBytes { memcpy(buffer_end - to_copy, data_block->data_, chunk_size); to_copy -= chunk_size; } - *bytes_written = data_size(); - return Z_NO_COMPRESSION; } // Number of data bytes. diff --git a/src/tools/singlejar/transient_bytes_test.cc b/src/tools/singlejar/transient_bytes_test.cc index 11719ff77b..cdb07dd869 100644 --- a/src/tools/singlejar/transient_bytes_test.cc +++ b/src/tools/singlejar/transient_bytes_test.cc @@ -227,7 +227,9 @@ TEST_F(TransientBytesTest, DecompressEntryContents) { input_jar->Close(); } -TEST_F(TransientBytesTest, WriteCompress) { +// Verify CompressOut: if compressed size is less than original, it writes out +// compressed data. +TEST_F(TransientBytesTest, CompressOut) { std::unique_ptr input_jar(new InputJar); ASSERT_TRUE(input_jar->Open(kCompressedJar)); const LH *lh; @@ -248,7 +250,7 @@ TEST_F(TransientBytesTest, WriteCompress) { ASSERT_NE(nullptr, buffer); uint32_t crc32 = 0; uint64_t bytes_written; - uint16_t rc = transient_bytes_->Write(buffer, &crc32, &bytes_written); + uint16_t rc = transient_bytes_->CompressOut(buffer, &crc32, &bytes_written); EXPECT_EQ(Z_DEFLATED, rc) << "TransientBytes::Write did not compress " << cdh->file_name_string(); @@ -294,15 +296,30 @@ TEST_F(TransientBytesTest, WriteCompress) { input_jar->Close(); } -TEST_F(TransientBytesTest, WriteStore) { +// Verify CompressOut: if compressed size exceeds original, it writes out +// original data +TEST_F(TransientBytesTest, CompressOutStore) { transient_bytes_->Append("a"); uint8_t buffer[400] = {0xfe, 0xfb}; uint32_t crc32 = 0; uint64_t bytes_written; - uint16_t rc = transient_bytes_->Write(buffer, &crc32, &bytes_written); + uint16_t rc = transient_bytes_->CompressOut(buffer, &crc32, &bytes_written); ASSERT_EQ(Z_NO_COMPRESSION, rc); ASSERT_EQ(1, bytes_written); + ASSERT_EQ('a', buffer[0]); ASSERT_EQ(0xfb, buffer[1]); + ASSERT_EQ(0xE8B7BE43, crc32); +} + +// Verify CopyOut. +TEST_F(TransientBytesTest, CopyOut) { + transient_bytes_->Append("a"); + uint8_t buffer[400] = {0xfe, 0xfb}; + uint32_t crc32 = 0; + transient_bytes_->CopyOut(buffer, &crc32); + ASSERT_EQ('a', buffer[0]); + ASSERT_EQ(0xfb, buffer[1]); + ASSERT_EQ(0xE8B7BE43, crc32); } } // namespace -- cgit v1.2.3