diff options
author | 2016-08-05 20:07:40 +0000 | |
---|---|---|
committer | 2016-08-08 08:07:43 +0000 | |
commit | 57472638ce94796deabc3d3db4ba851a1d0d600f (patch) | |
tree | a974e20c948e22b8f1f6a7080352a83952bd25d5 /src | |
parent | 6f12858c22a2a8b588de0854c640eb784ed38e5e (diff) |
Initial checkin of the OutputJar and main.
--
MOS_MIGRATED_REVID=129473820
Diffstat (limited to 'src')
-rw-r--r-- | src/tools/singlejar/BUILD | 61 | ||||
-rw-r--r-- | src/tools/singlejar/options.h | 2 | ||||
-rw-r--r-- | src/tools/singlejar/output_jar.cc | 615 | ||||
-rw-r--r-- | src/tools/singlejar/output_jar.h | 101 | ||||
-rw-r--r-- | src/tools/singlejar/output_jar_simple_test.cc | 307 | ||||
-rw-r--r-- | src/tools/singlejar/singlejar_main.cc | 23 |
6 files changed, 1109 insertions, 0 deletions
diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD index 81e5d283de..497ff287f5 100644 --- a/src/tools/singlejar/BUILD +++ b/src/tools/singlejar/BUILD @@ -8,6 +8,19 @@ filegroup( visibility = ["//src:__pkg__"], ) +cc_binary( + name = "singlejar", + srcs = [ + "singlejar_main.cc", + ], + linkstatic = 1, + deps = [ + "options", + "output_jar", + "//third_party/zlib", + ], +) + cc_test( name = "combiners_test", size = "large", @@ -103,6 +116,26 @@ cc_test( ) cc_test( + name = "output_jar_simple_test", + srcs = [ + "output_jar_simple_test.cc", + ], + copts = ["-Ithird_party/bazel"], + data = [ + ":test1", + ":test2", + ], + deps = [ + ":input_jar", + ":options", + ":output_jar", + "//src/main/cpp:blaze_util", + "//src/main/cpp/util", + "//third_party:gtest", + ], +) + +cc_test( name = "token_stream_test", srcs = [ "token_stream_test.cc", @@ -172,6 +205,26 @@ cc_library( "options.h", ":token_stream", ], + hdrs = ["options.h"], +) + +cc_library( + name = "output_jar", + srcs = [ + "output_jar.cc", + "output_jar.h", + ":combiners", + ":mapped_file.h", + ":options.h", + ":zip_headers", + ], + hdrs = ["output_jar.h"], + deps = [ + ":input_jar", + ":options", + "//src/main/cpp/util", + "//third_party/zlib", + ], ) filegroup( @@ -227,3 +280,11 @@ java_library( "zlib_interface.h", ], ) + +java_library( + name = "test2", + resources = [ + "token_stream.h", + "transient_bytes.h", + ], +) diff --git a/src/tools/singlejar/options.h b/src/tools/singlejar/options.h index 7cf3f2aa13..753f0a2718 100644 --- a/src/tools/singlejar/options.h +++ b/src/tools/singlejar/options.h @@ -26,6 +26,7 @@ class Options { force_compression(false), normalize_timestamps(false), no_duplicates(false), + no_duplicate_classes(false), preserve_compression(false), verbose(false), warn_duplicate_resources(false) {} @@ -47,6 +48,7 @@ class Options { bool force_compression; bool normalize_timestamps; bool no_duplicates; + bool no_duplicate_classes; bool preserve_compression; bool verbose; bool warn_duplicate_resources; diff --git a/src/tools/singlejar/output_jar.cc b/src/tools/singlejar/output_jar.cc new file mode 100644 index 0000000000..a81e35f06b --- /dev/null +++ b/src/tools/singlejar/output_jar.cc @@ -0,0 +1,615 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * The implementation of the OutputJar methods. + */ +#include "src/tools/singlejar/output_jar.h" + +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#if defined(__linux) +#include <sys/sendfile.h> +#endif +#include <sys/stat.h> +#include <sys/times.h> +#include <unistd.h> + +#include "src/main/cpp/util/file.h" +#include "src/tools/singlejar/combiners.h" +#include "src/tools/singlejar/diag.h" +#include "src/tools/singlejar/input_jar.h" +#include "src/tools/singlejar/mapped_file.h" +#include "src/tools/singlejar/options.h" +#include "src/tools/singlejar/zip_headers.h" + +#include <zlib.h> + +#define TODO(cond, msg) \ + if (!(cond)) { \ + diag_errx(2, "%s:%d: TODO(asmundak): " msg, __FILE__, __LINE__); \ + } + +OutputJar::OutputJar() + : options_(nullptr), + fd_(-1), + entries_(0), + duplicate_entries_(0), + cen_(nullptr), + cen_size_(0), + cen_capacity_(0), + spring_handlers_("META-INF/spring.handlers"), + spring_schemas_("META-INF/spring.schemas"), + protobuf_meta_handler_("protobuf.meta"), + manifest_("META-INF/MANIFEST.MF"), + build_properties_("build-data.properties") { + known_members_.emplace(spring_handlers_.filename(), + EntryInfo{EntryInfo::CONCATENATE, &spring_handlers_}); + known_members_.emplace(spring_schemas_.filename(), + EntryInfo{EntryInfo::CONCATENATE, &spring_schemas_}); + known_members_.emplace(manifest_.filename(), + EntryInfo{EntryInfo::SKIP, &manifest_}); + known_members_.emplace( + protobuf_meta_handler_.filename(), + EntryInfo{EntryInfo::CONCATENATE, &protobuf_meta_handler_}); + known_members_.emplace(build_properties_.filename(), + EntryInfo{EntryInfo::SKIP, &build_properties_}); + manifest_.Append( + "Manifest-Version: 1.0\r\n" + "Created-By: singlejar\r\n"); +} + +int OutputJar::Doit(Options *options) { + if (nullptr != options_) { + diag_errx(1, "%s:%d: Doit() can be called only once.", __FILE__, __LINE__); + } + options_ = options; + + // TODO(asmundak): handle these options. + TODO(!options_->force_compression, "Handle --compression"); + TODO(!options_->normalize_timestamps, "Handle --normalize"); + TODO(!options_->preserve_compression, "Handle --dont_change_compression"); + TODO(!options_->include_prefixes.size(), "Handle --include_prefixes"); + + build_properties_.AddProperty("build.target", options_->output_jar.c_str()); + if (options_->verbose) { + fprintf(stderr, "combined_file_name=%s\n", options_->output_jar.c_str()); + if (!options_->main_class.empty()) { + fprintf(stderr, "main_class=%s\n", options_->main_class.c_str()); + } + if (!options_->java_launcher.empty()) { + fprintf(stderr, "java_launcher_file=%s\n", + options_->java_launcher.c_str()); + } + fprintf(stderr, "%ld source files\n", options_->input_jars.size()); + fprintf(stderr, "%ld manifest lines\n", options_->manifest_lines.size()); + } + + if (!Open()) { + exit(1); + } + + // Copy launcher if it is set. + if (!options_->java_launcher.empty()) { + const char *const launcher_path = options_->java_launcher.c_str(); + int in_fd = open(launcher_path, O_RDONLY); + struct stat statbuf; + if (fd_ < 0 || fstat(in_fd, &statbuf)) { + diag_err(1, "%s", launcher_path); + } + ssize_t byte_count = AppendFile(in_fd, nullptr, statbuf.st_size); + if (byte_count < 0) { + diag_err(1, "%s:%d: Cannot copy %s to %s", __FILE__, __LINE__, + launcher_path, options_->output_jar.c_str()); + } else if (byte_count != statbuf.st_size) { + diag_err(1, "%s:%d: Copied only %ld bytes out of %" PRIu64 " from %s", + __FILE__, __LINE__, byte_count, statbuf.st_size, launcher_path); + } + close(in_fd); + if (options_->verbose) { + fprintf(stderr, "Prepended %s (%" PRIu64 " bytes)\n", launcher_path, + statbuf.st_size); + } + } + + if (!options_->main_class.empty()) { + build_properties_.AddProperty("main.class", options_->main_class); + manifest_.Append("Main-Class: "); + manifest_.Append(options_->main_class); + manifest_.Append("\r\n"); + } + + for (auto &manifest_line : options_->manifest_lines) { + if (!manifest_line.empty()) { + manifest_.Append(manifest_line); + if (manifest_line[manifest_line.size() - 1] != '\n') { + manifest_.Append("\r\n"); + } + } + } + + for (auto &build_info_line : options_->build_info_lines) { + build_properties_.Append(build_info_line); + build_properties_.Append("\n"); + } + + for (auto &build_info_file : options_->build_info_files) { + MappedFile mapped_file; + if (!mapped_file.Open(build_info_file)) { + diag_err(1, "%s:%d: Bad build info file %s", __FILE__, __LINE__, + build_info_file.c_str()); + } + const char *data = reinterpret_cast<const char *>(mapped_file.start()); + const char *data_end = reinterpret_cast<const char *>(mapped_file.end()); + // TODO(asmundak): this isn't right, we should parse properties file. + while (data < data_end) { + const char *next_data = strchr(static_cast<const char *>(data), '\n'); + if (next_data) { + ++next_data; + } else { + next_data = data_end; + } + build_properties_.Append(data, next_data - data); + data = next_data; + } + mapped_file.Close(); + } + + for (auto &rpath : options_->classpath_resources) { + // TODO(asmundak): On Windows, look for \, too. + ClasspathResource(blaze_util::Basename(rpath), rpath); + } + + for (auto &rdesc : options_->resources) { + // A resource description is either NAME or NAME:PATH + std::size_t colon = rdesc.find_first_of(':'); + if (0 == colon) { + diag_errx(1, "%s:%d: Bad resource description %s", __FILE__, __LINE__, + rdesc.c_str()); + } + if (std::string::npos == colon) { + ClasspathResource(rdesc, rdesc); + } else { + ClasspathResource(rdesc.substr(0, colon), rdesc.substr(colon + 1)); + } + } + + // Ready to write zip entries. + // First, write a directory entry for the META-INF, followed by the manifest + // file, followed by the build properties file. + AddDirectory("META-INF/"); + manifest_.Append("\r\n"); + WriteEntry(manifest_.OutputEntry()); + if (!options_->exclude_build_data) { + WriteEntry(build_properties_.OutputEntry()); + } + + // Then classpath resources. + for (auto &classpath_resource : classpath_resources_) { + WriteEntry(classpath_resource->OutputEntry()); + } + + // Then copy source files' contents. + for (size_t ix = 0; ix < options_->input_jars.size(); ++ix) { + if (!AddJar(ix)) { + exit(1); + } + } + + // All entries written, write Central Directory and close. + Close(); + return 0; +} + +OutputJar::~OutputJar() { + if (fd_ >= 0) { + diag_warnx("%s:%d: Close() should be called first", __FILE__, __LINE__); + } +} + +bool OutputJar::Open() { + if (fd_ >= 0) { + diag_errx(1, "%s:%d: Cannot open output archive twice", __FILE__, __LINE__); + } + // The output file has read/write/execute permissions for the owner, + // default for the rest. + mode_t old_umask = umask(0); + fd_ = creat(path(), (S_IRWXU | S_IRWXG | S_IRWXO) & ~old_umask); + umask(old_umask); + if (fd_ < 0) { + diag_warn("%s:%d: %s", __FILE__, __LINE__, path()); + return false; + } + if (options_->verbose) { + fprintf(stderr, "Writing to %s\n", path()); + } + return true; +} + +bool OutputJar::AddJar(size_t jar_path_index) { + const std::string& input_jar_path = options_->input_jars[jar_path_index]; + InputJar input_jar; + if (!input_jar.Open(input_jar_path)) { + return false; + } + const CDH *jar_entry; + const LH *lh; + while ((jar_entry = input_jar.NextEntry(&lh))) { + const char *file_name = jar_entry->file_name(); + auto file_name_length = jar_entry->file_name_length(); + if (!file_name_length) { + diag_errx( + 1, "%s:%d: Bad central directory record in %s at offset 0x%" PRIx64, + __FILE__, __LINE__, input_jar_path.c_str(), + input_jar.CentralDirectoryRecordOffset(jar_entry)); + } + // Special files that cannot be handled by looking up known_members_ map: + // * ignore *.SF, *.RSA, *.DSA + // (TODO(asmundak): should this be done only in META-INF? + // * concatenate the contents of each file META-INF/services/ directory + // + if (ends_with(file_name, file_name_length, ".SF") || + ends_with(file_name, file_name_length, ".RSA") || + ends_with(file_name, file_name_length, ".DSA")) { + continue; + } else if (file_name[file_name_length - 1] != '/' && + begins_with(file_name, file_name_length, "META-INF/services/")) { + std::string service_path(file_name, file_name_length); + if (!known_members_.count(service_path)) { + Concatenator *service_handler = new Concatenator(service_path); + service_handlers_.emplace_back(service_handler); + known_members_.emplace( + service_path, EntryInfo{EntryInfo::CONCATENATE, service_handler}); + } + } + auto got = known_members_.emplace( + std::string(file_name, file_name_length), + EntryInfo{EntryInfo::PLAIN, reinterpret_cast<void *>(jar_path_index)}); + if (!got.second) { + // We allow duplicate entries in special cases: + // - various combiners + // - directory entries + // - manifest files + if (got.first->second.type_ == EntryInfo::XML_COMBINE) { + reinterpret_cast<XmlCombiner *>(got.first->second.data_) + ->Merge(jar_entry, lh); + continue; + } else if (got.first->second.type_ == EntryInfo::CONCATENATE) { + reinterpret_cast<Concatenator *>(got.first->second.data_) + ->Merge(jar_entry, lh); + continue; + } else if (got.first->second.type_ == EntryInfo::SKIP) { + continue; + } else if (file_name[file_name_length - 1] == '/') { + continue; + } else { + if (options_->no_duplicates || + (options_->no_duplicate_classes && + ends_with(file_name, file_name_length, ".class"))) { + auto previous_input_jar_index = + reinterpret_cast<size_t>(got.first->second.data_); + diag_errx(1, "%s:%d: %.*s is present both in %s and %s", __FILE__, + __LINE__, file_name_length, file_name, + options_->input_jars[previous_input_jar_index].c_str(), + input_jar_path.c_str()); + } else { + duplicate_entries_++; + continue; + } + } + } + + // Now we have to copy: + // local header + // file data + // data descriptor, if present. + off_t copy_from = jar_entry->local_header_offset(); + size_t num_bytes = lh->size(); + if (jar_entry->no_size_in_local_header()) { + // The size of the data descriptor varies. The actual data in it is three + // uint32's (crc32, compressed size, uncompressed size), but these can be + // preceded by the "PK\x7\x8" signature word (alas, 'jar' has it). + // Reading the descriptor just to figure out whether we need to copy four + // or three words will cost us another page read, let us assume the data + // description is always 4 words long at the cost of having an occasional + // one word gap between the entries. + num_bytes += jar_entry->compressed_file_size() + 4 * sizeof(uint32_t); + } else { + num_bytes += lh->compressed_file_size(); + } + off_t output_position = Position(); + // Do the actual copy. Use sendfile, avoiding copying the data to user + // space and back. + ssize_t n_copied = AppendFile(input_jar.fd(), ©_from, num_bytes); + if (n_copied < 0) { + diag_err(1, "%s:%d: Cannot copy %ld bytes of %.*s from %s", __FILE__, + __LINE__, num_bytes, file_name_length, file_name, + input_jar_path.c_str()); + } else if (static_cast<size_t>(n_copied) != num_bytes) { + diag_err(1, "%s:%d: Copied only %ld bytes out of %ld from %s", __FILE__, + __LINE__, n_copied, num_bytes, input_jar_path.c_str()); + } + + // Append central directory header for this file to the output central + // directory we are building. + TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); + AppendToDirectoryBuffer(jar_entry)->local_header_offset32(output_position); + ++entries_; + } + return input_jar.Close(); +} + +off_t OutputJar::Position() { + off_t position = lseek(fd_, 0, SEEK_CUR); + if (position == (off_t)-1) { + diag_err(1, "%s:%d: lseek", __FILE__, __LINE__); + } + TODO(position < 0xFFFFFFFF, "Handle Zip64"); + return position; +} + +// Writes an entry. The argument is the pointer to the contiguos block of +// memory containing Local Header for the entry, immediately followed by +// the data. The memory is freed after the data has been written. +void OutputJar::WriteEntry(void *buffer) { + if (buffer == nullptr) { + return; + } + LH *entry = reinterpret_cast<LH *>(buffer); + if (options_->verbose) { + fprintf(stderr, "%-.*s combiner has %lu bytes, %s to %lu\n", + entry->file_name_length(), entry->file_name(), + entry->uncompressed_file_size(), + entry->compression_method() == Z_NO_COMPRESSION ? "copied" + : "compressed", + entry->compressed_file_size()); + } + uint8_t *data_end = entry->data() + entry->in_zip_size(); + uint8_t *data = reinterpret_cast<uint8_t *>(entry); + off_t output_position = Position(); + while (data < data_end) { + ssize_t written = write(fd_, data, data_end - data); + if (written >= 0) { + data += written; + } else if (errno != EINTR) { + diag_err(1, "%s:%d: write", __FILE__, __LINE__); + } + } + // Data written, allocate CDH space and populate CDH. + CDH *cdh = reinterpret_cast<CDH *>( + ReserveCdh(sizeof(CDH) + entry->file_name_length())); + cdh->signature(); + cdh->version(20); + cdh->version_to_extract(entry->version()); + cdh->bit_flag(0x0); + cdh->compression_method(entry->compression_method()); + cdh->last_mod_file_time(entry->last_mod_file_time()); + cdh->last_mod_file_date(entry->last_mod_file_date()); + cdh->crc32(entry->crc32()); + TODO(entry->compressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); + cdh->compressed_file_size32(entry->compressed_file_size32()); + TODO(entry->uncompressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); + cdh->uncompressed_file_size32(entry->uncompressed_file_size32()); + cdh->file_name(entry->file_name(), entry->file_name_length()); + cdh->extra_fields(nullptr, 0); + cdh->comment_length(0); + cdh->start_disk_nr(0); + cdh->internal_attributes(0); + cdh->external_attributes(0); + cdh->local_header_offset32(output_position); + ++entries_; + free(reinterpret_cast<void *>(entry)); +} + +void OutputJar::AddDirectory(const char *path) { + size_t n_path = strlen(path); + size_t lh_size = sizeof(LH) + n_path; + LH *lh = reinterpret_cast<LH *>(malloc(lh_size)); + lh->signature(); + lh->version(20); + lh->bit_flag(0); // TODO(asmundak): should I set UTF8 flag? + lh->compression_method(Z_NO_COMPRESSION); + lh->last_mod_file_time(0); + lh->last_mod_file_date(33); + lh->crc32(0); + lh->compressed_file_size32(0); + lh->uncompressed_file_size32(0); + lh->file_name(path, n_path); + lh->extra_fields(nullptr, 0); + known_members_.emplace(path, EntryInfo{EntryInfo::SKIP, nullptr}); + WriteEntry(lh); +} + +// Appends a Central Directory Entry to the directory buffer. +CDH *OutputJar::AppendToDirectoryBuffer(const CDH *cdh) { + size_t cdh_size = cdh->size(); + return reinterpret_cast<CDH *>( + memcpy(reinterpret_cast<CDH *>(ReserveCdr(cdh_size)), cdh, cdh_size)); +} + +uint8_t *OutputJar::ReserveCdr(size_t chunk_size) { + if (cen_size_ + chunk_size > cen_capacity_) { + cen_capacity_ += 1000000; + cen_ = reinterpret_cast<uint8_t *>(realloc(cen_, cen_capacity_)); + if (!cen_) { + diag_errx(1, "%s:%d: Cannot allocate %ld bytes for the directory", + __FILE__, __LINE__, cen_capacity_); + } + } + uint8_t *entry = cen_ + cen_size_; + cen_size_ += chunk_size; + return entry; +} + +uint8_t *OutputJar::ReserveCdh(size_t size) { + return static_cast<uint8_t *>(memset(ReserveCdr(size), 0, size)); +} + +// Write out combined jar. +bool OutputJar::Close() { + if (fd_ < 0) { + return true; + } + + for (auto &service_handler : service_handlers_) { + WriteEntry(service_handler->OutputEntry()); + } + WriteEntry(spring_handlers_.OutputEntry()); + WriteEntry(spring_schemas_.OutputEntry()); + WriteEntry(protobuf_meta_handler_.OutputEntry()); + // TODO(asmundak): handle manifest; + off_t output_position = lseek(fd_, 0, SEEK_CUR); + if (output_position == (off_t)-1) { + diag_err(1, "%s:%d: lseek", __FILE__, __LINE__); + } + TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); + + size_t cen_size = + cen_size_; // Save it before AppendToDirectoryBuffer updates it. + ECD *ecd = reinterpret_cast<ECD *>(ReserveCdh(sizeof(ECD))); + ecd->signature(); + ecd->this_disk_entries16((uint16_t)entries_); + TODO(entries_ < 0xFFFF, "Handle >=64K entries"); + ecd->total_entries16((uint16_t)entries_); + TODO(cen_size < 0xFFFFFFFF, "Handle Zip64"); + ecd->cen_size32(cen_size); + TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); + ecd->cen_offset32(output_position); + + // Write Central Directory. + uint8_t *cen_end = cen_ + cen_size_; + uint8_t *cen = cen_; + while (cen < cen_end) { + ssize_t n = write(fd_, cen, cen_end - cen); + if (n < 0) { + diag_err(1, "%s:%d: Cannot write central directory", __FILE__, __LINE__); + } + cen += n; + } + free(cen_); + + if (close(fd_)) { + diag_err(1, "%s:%d: %s", __FILE__, __LINE__, path()); + fd_ = -1; + return false; + } + + fd_ = -1; + if (options_->verbose) { + fprintf(stderr, "Wrote %s with %d entries", path(), entries_); + if (duplicate_entries_) { + fprintf(stderr, ", skipped %d entries", duplicate_entries_); + } + fprintf(stderr, "\n"); + } + return true; +} + +void OutputJar::ClasspathResource(const std::string &resource_name, + const std::string &resource_path) { + if (known_members_.count(resource_name)) { + if (options_->warn_duplicate_resources) { + diag_warnx( + "%s:%d: Duplicate resource name %s in the --classpath_resource or " + "--resource option", + __FILE__, __LINE__, resource_name.c_str()); + // TODO(asmundak): this mimics old behaviour. Confirm that unless + // we run with --warn_duplicate_resources, the output zip file contains + // the concatenated contents of the all the resources with the same name. + return; + } + } + MappedFile mapped_file; + if (!mapped_file.Open(resource_path)) { + diag_err(1, "%s:%d: %s", __FILE__, __LINE__, resource_path.c_str()); + } + Concatenator *classpath_resource = new Concatenator(resource_name); + classpath_resource->Append( + reinterpret_cast<const char *>(mapped_file.start()), mapped_file.size()); + classpath_resources_.emplace_back(classpath_resource); + known_members_.emplace(resource_name, + EntryInfo{EntryInfo::PLAIN, classpath_resource}); +} + +#if defined(__APPLE__) +ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) { + if (!count) { + return 0; + } + uint8_t buffer[8192]; + ssize_t total_written = 0; + + // If the input file position (the offset in the input file) has been passed, + // that's where we start, and the input file position has to be restored after + // we are done copying. + const off_t offset_error = static_cast<off_t>(-1); + off_t old_input_offset = offset_error; + if (in_offset) { + if (offset_error == (old_input_offset = lseek(in_fd, 0, SEEK_CUR)) || + offset_error == lseek(in_fd, *in_offset, SEEK_SET)) { + return -1; + } + } + while (total_written < count) { + ssize_t n_read = + read(in_fd, buffer, std::min(sizeof(buffer), count - total_written)); + if (n_read > 0) { + uint8_t *write_buffer = buffer; + uint8_t *write_buffer_end = write_buffer + n_read; + while (write_buffer < write_buffer_end) { + ssize_t n_written = + write(fd_, write_buffer, write_buffer_end - write_buffer); + if (n_written > 0) { + write_buffer += n_written; + } else if (EAGAIN != errno) { + return -1; + } + } + total_written += n_read; + } else if (n_read == 0) { + break; + } else if (EAGAIN != errno) { + return -1; + } + } + + // If the input file position has been passed, update it and restore + // the read position in the input file. + if (in_offset) { + if (offset_error == lseek(in_fd, old_input_offset, SEEK_SET)) { + return -1; + } + *in_offset += total_written; + } + return total_written; +} + +#elif defined(__linux) +ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) { + // sendfile call is interruptable and has to be handled the same way as write + // call. + for (size_t to_write = count; to_write > 0;) { + ssize_t written = sendfile(fd_, in_fd, in_offset, to_write); + if (written < 0) { + return written; + } else if (written == 0) { + return static_cast<ssize_t>(count - to_write); + } + to_write -= static_cast<size_t>(written); + } + return static_cast<ssize_t>(count); +} +#endif diff --git a/src/tools/singlejar/output_jar.h b/src/tools/singlejar/output_jar.h new file mode 100644 index 0000000000..f723e41077 --- /dev/null +++ b/src/tools/singlejar/output_jar.h @@ -0,0 +1,101 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_TOOLS_SINGLEJAR_COMBINED_JAR_H_ +#define SRC_TOOLS_SINGLEJAR_COMBINED_JAR_H_ + +#include <stdint.h> +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include "src/tools/singlejar/combiners.h" +#include "src/tools/singlejar/options.h" + +/* + * Jar file we are writing. + */ +class OutputJar { + public: + // Constructor. + OutputJar(); + // Do all that needs to be done. Can be called only once. + int Doit(Options *options); + // Destructor. + ~OutputJar(); + + private: + // Open output jar. + bool Open(); + // Add the contents of the given input jar. + bool AddJar(size_t jar_path_index); + // Returns the current output position. + off_t Position(); + // Write Jar entry. + void WriteEntry(void *local_header_and_payload); + // Write a directory entry. + void AddDirectory(const char *path); + // Append given Central Directory Header to CEN (Central Directory) buffer. + CDH *AppendToDirectoryBuffer(const CDH *cdh); + // Reserve space in CEN buffer. + uint8_t *ReserveCdr(size_t chunk_size); + // Reserve space for the Central Directory Header in CEN buffer. + uint8_t *ReserveCdh(size_t size); + // Close output. + bool Close(); + // Set classpath resource with given resource name and path. + void ClasspathResource(const std::string& resource_name, + const std::string& resource_path); + // Output jar path. + const char *path() const { return options_->output_jar.c_str(); } + // Copy the bytes from the given file. + ssize_t AppendFile(int in_fd, off_t *in_offset, size_t count); + + // The purpose of these two tiny utility methods is to avoid creating a + // std::string instance (which always involves allocating an object on the + // heap) when we just need to check that a sequence of bytes in memory has + // given prefix or suffix. + static bool begins_with(const char *str, size_t n, const char *head) { + const size_t n_head = strlen(head); + return n >= n_head && !strncmp(str, head, n_head); + } + static bool ends_with(const char *str, size_t n, const char *tail) { + const size_t n_tail = strlen(tail); + return n >= n_tail && !strncmp(str + n - n_tail, tail, n_tail); + } + + Options *options_; + struct EntryInfo { + enum EntryType { PLAIN, XML_COMBINE, CONCATENATE, SKIP } type_; + void *data_; // TODO(asmundak): use virtual dispatch instead. + }; + + std::unordered_map<std::string, struct EntryInfo> known_members_; + int fd_; + int entries_; + int duplicate_entries_; + uint8_t *cen_; + size_t cen_size_; + size_t cen_capacity_; + Concatenator spring_handlers_; + Concatenator spring_schemas_; + Concatenator protobuf_meta_handler_; + Concatenator manifest_; + PropertyCombiner build_properties_; + std::vector<std::unique_ptr<Concatenator> > service_handlers_; + std::vector<std::unique_ptr<Concatenator> > classpath_resources_; +}; + +#endif // SRC_TOOLS_SINGLEJAR_COMBINED_JAR_H_ diff --git a/src/tools/singlejar/output_jar_simple_test.cc b/src/tools/singlejar/output_jar_simple_test.cc new file mode 100644 index 0000000000..598afcdff7 --- /dev/null +++ b/src/tools/singlejar/output_jar_simple_test.cc @@ -0,0 +1,307 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/main/cpp/blaze_util.h" +#include "src/main/cpp/util/file.h" +#include "src/main/cpp/util/port.h" +#include "src/main/cpp/util/strings.h" +#include "src/tools/singlejar/input_jar.h" +#include "src/tools/singlejar/options.h" +#include "src/tools/singlejar/output_jar.h" +#include "gtest/gtest.h" + +namespace { + +#if !defined(DATA_DIR_TOP) +#define DATA_DIR_TOP +#endif + +static bool HasSubstr(const std::string &s, const std::string &what) { + return std::string::npos != s.find(what); +} + +class OutputJarSimpleTest : public ::testing::Test { + protected: + OutputJarSimpleTest() {} + + ~OutputJarSimpleTest() override {} + + void CreateOutput(const std::string &out_path, const char *first_arg...) { + std::string args_string; + va_list ap; + va_start(ap, first_arg); + const char *args[100] = {"--output", out_path.c_str()}; + unsigned nargs = 2; + if (first_arg) { + args[nargs++] = first_arg; + while (nargs < arraysize(args)) { + const char *arg = va_arg(ap, const char *); + if (arg) { + args[nargs++] = arg; + args_string += ' '; + args_string += arg; + } else { + break; + } + } + va_end(ap); + ASSERT_GE(arraysize(args), nargs); + } + printf("Arguments: %s\n", args_string.c_str()); + options_.ParseCommandLine(nargs, args); + ASSERT_EQ(0, output_jar_.Doit(&options_)); + EXPECT_EQ(0, VerifyZip(out_path)); + } + + static std::string OutputFilePath(const std::string &relpath) { + const char *out_dir = getenv("TEST_TMPDIR"); + return blaze_util::JoinPath((nullptr == out_dir) ? "." : out_dir, + relpath.c_str()); + } + + static int VerifyZip(const std::string &zip_path) { + std::string verify_command; + blaze_util::StringPrintf(&verify_command, "zip -Tv %s", zip_path.c_str()); + return system(verify_command.c_str()); + } + + static void GetEntryContents(const std::string &zip_path, + const char *entry_name, std::string *contents) { + contents->clear(); + std::string command; + blaze_util::StringPrintf(&command, "unzip -p %s %s", zip_path.c_str(), + entry_name); + FILE *fp = popen(command.c_str(), "r"); + ASSERT_NE(nullptr, fp); + char buf[1024]; + while (fgets(buf, sizeof(buf), fp)) { + contents->append(buf); + } + ASSERT_NE(0, feof(fp)); + ASSERT_EQ(0, ferror(fp)); + ASSERT_EQ(0, pclose(fp)); + } + + OutputJar output_jar_; + Options options_; +}; + +// No inputs at all. +TEST_F(OutputJarSimpleTest, Empty) { + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, nullptr); + InputJar input_jar; + ASSERT_TRUE(input_jar.Open(out_path.c_str())); + const LH *lh; + const CDH *cdh; + while ((cdh = input_jar.NextEntry(&lh))) { + ASSERT_TRUE(cdh->is()) << "No expected tag in the Central Directory Entry."; + ASSERT_NE(nullptr, lh) << "No local header."; + ASSERT_TRUE(lh->is()) << "No expected tag in the Local Header."; + EXPECT_EQ(lh->file_name_string(), cdh->file_name_string()); + if (!cdh->no_size_in_local_header()) { + EXPECT_EQ(lh->compressed_file_size(), cdh->compressed_file_size()) + << "Entry: " << lh->file_name_string(); + EXPECT_EQ(lh->uncompressed_file_size(), cdh->uncompressed_file_size()) + << "Entry: " << cdh->file_name_string(); + } + } + input_jar.Close(); + std::string manifest; + GetEntryContents(out_path, "META-INF/MANIFEST.MF", &manifest); + EXPECT_EQ( + "Manifest-Version: 1.0\r\n" + "Created-By: singlejar\r\n" + "\r\n", + manifest); + std::string build_properties; + GetEntryContents(out_path, "build-data.properties", &build_properties); + EXPECT_PRED2(HasSubstr, build_properties, "build.target="); +} + +// Source jars. +TEST_F(OutputJarSimpleTest, Source) { + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--sources", + DATA_DIR_TOP "src/tools/singlejar/libtest1.jar", + DATA_DIR_TOP "src/tools/singlejar/libtest2.jar", nullptr); + InputJar input_jar; + ASSERT_TRUE(input_jar.Open(out_path.c_str())); + const LH *lh; + const CDH *cdh; + while ((cdh = input_jar.NextEntry(&lh))) { + ASSERT_TRUE(cdh->is()) << "No expected tag in the Central Directory Entry."; + ASSERT_NE(nullptr, lh) << "No local header."; + ASSERT_TRUE(lh->is()) << "No expected tag in the Local Header."; + EXPECT_EQ(lh->file_name_string(), cdh->file_name_string()); + if (!cdh->no_size_in_local_header()) { + EXPECT_EQ(lh->compressed_file_size(), cdh->compressed_file_size()) + << "Entry: " << lh->file_name_string(); + EXPECT_EQ(lh->uncompressed_file_size(), cdh->uncompressed_file_size()) + << "Entry: " << cdh->file_name_string(); + } + } + input_jar.Close(); +} + +// Verify --java_launcher argument +TEST_F(OutputJarSimpleTest, JavaLauncher) { + std::string out_path = OutputFilePath("out.jar"); + const char *launcher_path = DATA_DIR_TOP "src/tools/singlejar/libtest1.jar"; + CreateOutput(out_path, "--java_launcher", launcher_path, nullptr); + // check that the offset of the first entry equals launcher size. + InputJar input_jar; + ASSERT_TRUE(input_jar.Open(out_path.c_str())); + const LH *lh; + const CDH *cdh; + cdh = input_jar.NextEntry(&lh); + ASSERT_NE(nullptr, cdh); + struct stat statbuf; + ASSERT_EQ(0, stat(launcher_path, &statbuf)); + EXPECT_TRUE(cdh->is()); + EXPECT_TRUE(lh->is()); + EXPECT_EQ(statbuf.st_size, cdh->local_header_offset()); + input_jar.Close(); +} + +// --main_class option. +TEST_F(OutputJarSimpleTest, MainClass) { + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--main_class", "com.google.my.Main", nullptr); + std::string manifest; + GetEntryContents(out_path, "META-INF/MANIFEST.MF", &manifest); + EXPECT_EQ( + "Manifest-Version: 1.0\r\n" + "Created-By: singlejar\r\n" + "Main-Class: com.google.my.Main\r\n" + "\r\n", + manifest); +} + +// --deploy_manifest_lines option. +TEST_F(OutputJarSimpleTest, DeployManifestLines) { + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--deploy_manifest_lines", "property1: foo", + "property2: bar", nullptr); + std::string manifest; + GetEntryContents(out_path, "META-INF/MANIFEST.MF", &manifest); + EXPECT_EQ( + "Manifest-Version: 1.0\r\n" + "Created-By: singlejar\r\n" + "property1: foo\r\n" + "property2: bar\r\n" + "\r\n", + manifest); +} + +// --extra_build_info option +TEST_F(OutputJarSimpleTest, ExtraBuildInfo) { + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--extra_build_info", "property1=value1", + "--extra_build_info", "property2=value2", nullptr); + std::string build_properties; + GetEntryContents(out_path, "build-data.properties", &build_properties); + EXPECT_PRED2(HasSubstr, build_properties, "\nproperty1=value1\n"); + EXPECT_PRED2(HasSubstr, build_properties, "\nproperty2=value2\n"); +} + +// --build_info_file and --extra_build_info options. +TEST_F(OutputJarSimpleTest, BuildInfoFile) { + std::string build_info_path1 = OutputFilePath("buildinfo1"); + ASSERT_TRUE(blaze::WriteFile("property11=value11\nproperty12=value12\n", + build_info_path1)); + std::string build_info_path2 = OutputFilePath("buildinfo2"); + ASSERT_TRUE(blaze::WriteFile("property21=value21\nproperty22=value22\n", + build_info_path2)); + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--build_info_file", build_info_path1.c_str(), + "--extra_build_info", "property=value", "--build_info_file", + build_info_path2.c_str(), nullptr); + std::string build_properties; + GetEntryContents(out_path, "build-data.properties", &build_properties); + EXPECT_PRED2(HasSubstr, build_properties, "property11=value11\n"); + EXPECT_PRED2(HasSubstr, build_properties, "property12=value12\n"); + EXPECT_PRED2(HasSubstr, build_properties, "property21=value21\n"); + EXPECT_PRED2(HasSubstr, build_properties, "property22=value22\n"); + EXPECT_PRED2(HasSubstr, build_properties, "property=value\n"); +} + +// --resources option. +TEST_F(OutputJarSimpleTest, Resources) { + std::string res11_path = OutputFilePath("res11"); + std::string res11_spec = std::string("res1:") + res11_path; + ASSERT_TRUE(blaze::WriteFile("res11.line1\nres11.line2\n", res11_path)); + + std::string res12_path = OutputFilePath("res12"); + std::string res12_spec = std::string("res1:") + res12_path; + ASSERT_TRUE(blaze::WriteFile("res12.line1\nres12.line2\n", res12_path)); + + std::string res2_path = OutputFilePath("res2"); + ASSERT_TRUE(blaze::WriteFile("res2.line1\nres2.line2\n", res2_path)); + + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--resources", res11_spec.c_str(), res12_spec.c_str(), + res2_path.c_str(), nullptr); + + // The output should have 'res1' entry containing the concatenation of the + // 'res11' and 'res12' files. + std::string res1; + GetEntryContents(out_path, "res1", &res1); + EXPECT_EQ("res11.line1\nres11.line2\nres12.line1\nres12.line2\n", res1); + + // The output should have res2 path entry and contents. + std::string res2; + GetEntryContents(out_path, res2_path.c_str(), &res2); + EXPECT_EQ("res2.line1\nres2.line2\n", res2); +} + +// --classpath_resources +TEST_F(OutputJarSimpleTest, ClasspathResources) { + std::string res1_path = OutputFilePath("cp_res"); + ASSERT_TRUE(blaze::WriteFile("line1\nline2\n", res1_path)); + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--classpath_resources", res1_path.c_str(), nullptr); + std::string res; + GetEntryContents(out_path, "cp_res", &res); + EXPECT_EQ("line1\nline2\n", res); +} + +// Duplicate entries for --resources or --classpath_resources +TEST_F(OutputJarSimpleTest, DuplicateResources) { + std::string cp_res_path = OutputFilePath("cp_res"); + ASSERT_TRUE(blaze::WriteFile("line1\nline2\n", cp_res_path)); + + std::string res1_path = OutputFilePath("res1"); + std::string res1_spec = "foo:" + res1_path; + ASSERT_TRUE(blaze::WriteFile("resline1\nresline2\n", res1_path)); + + std::string res2_path = OutputFilePath("res2"); + std::string res2_spec = "foo:" + res2_path; + ASSERT_TRUE(blaze::WriteFile("line3\nline4\n", res2_path)); + + std::string out_path = OutputFilePath("out.jar"); + CreateOutput(out_path, "--warn_duplicate_resources", "--resources", + res1_spec.c_str(), res2_spec.c_str(), "--classpath_resources", + cp_res_path.c_str(), cp_res_path.c_str(), nullptr); + + std::string cp_res; + GetEntryContents(out_path, "cp_res", &cp_res); + EXPECT_EQ("line1\nline2\n", cp_res); + + std::string foo; + GetEntryContents(out_path, "foo", &foo); + EXPECT_EQ("resline1\nresline2\n", foo); +} + +} // namespace diff --git a/src/tools/singlejar/singlejar_main.cc b/src/tools/singlejar/singlejar_main.cc new file mode 100644 index 0000000000..12c52aa094 --- /dev/null +++ b/src/tools/singlejar/singlejar_main.cc @@ -0,0 +1,23 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/tools/singlejar/options.h" +#include "src/tools/singlejar/output_jar.h" + +int main(int argc, char *argv[]) { + Options options; + options.ParseCommandLine(argc - 1, argv + 1); + OutputJar output_jar; + return output_jar.Doit(&options); +} |