// Copyright 2016 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /* * The implementation of the OutputJar methods. */ #include "src/tools/singlejar/output_jar.h" #include #include #include #include #if defined(__linux) #include #endif #include #include #include #include "src/main/cpp/util/file.h" #include "src/tools/singlejar/combiners.h" #include "src/tools/singlejar/diag.h" #include "src/tools/singlejar/input_jar.h" #include "src/tools/singlejar/mapped_file.h" #include "src/tools/singlejar/options.h" #include "src/tools/singlejar/zip_headers.h" #include #define TODO(cond, msg) \ if (!(cond)) { \ diag_errx(2, "%s:%d: TODO(asmundak): " msg, __FILE__, __LINE__); \ } OutputJar::OutputJar() : options_(nullptr), fd_(-1), entries_(0), duplicate_entries_(0), cen_(nullptr), cen_size_(0), cen_capacity_(0), spring_handlers_("META-INF/spring.handlers"), spring_schemas_("META-INF/spring.schemas"), protobuf_meta_handler_("protobuf.meta"), manifest_("META-INF/MANIFEST.MF"), build_properties_("build-data.properties") { known_members_.emplace(spring_handlers_.filename(), EntryInfo{&spring_handlers_}); known_members_.emplace(spring_schemas_.filename(), EntryInfo{&spring_schemas_}); known_members_.emplace(manifest_.filename(), EntryInfo{&manifest_}); known_members_.emplace(protobuf_meta_handler_.filename(), EntryInfo{&protobuf_meta_handler_}); known_members_.emplace(build_properties_.filename(), EntryInfo{&build_properties_}); manifest_.Append( "Manifest-Version: 1.0\r\n" "Created-By: singlejar\r\n"); } int OutputJar::Doit(Options *options) { if (nullptr != options_) { diag_errx(1, "%s:%d: Doit() can be called only once.", __FILE__, __LINE__); } options_ = options; // TODO(asmundak): handle these options. TODO(!options_->force_compression, "Handle --compression"); TODO(!options_->normalize_timestamps, "Handle --normalize"); TODO(!options_->preserve_compression, "Handle --dont_change_compression"); build_properties_.AddProperty("build.target", options_->output_jar.c_str()); if (options_->verbose) { fprintf(stderr, "combined_file_name=%s\n", options_->output_jar.c_str()); if (!options_->main_class.empty()) { fprintf(stderr, "main_class=%s\n", options_->main_class.c_str()); } if (!options_->java_launcher.empty()) { fprintf(stderr, "java_launcher_file=%s\n", options_->java_launcher.c_str()); } fprintf(stderr, "%ld source files\n", options_->input_jars.size()); fprintf(stderr, "%ld manifest lines\n", options_->manifest_lines.size()); } if (!Open()) { exit(1); } // Copy launcher if it is set. if (!options_->java_launcher.empty()) { const char *const launcher_path = options_->java_launcher.c_str(); int in_fd = open(launcher_path, O_RDONLY); struct stat statbuf; if (fd_ < 0 || fstat(in_fd, &statbuf)) { diag_err(1, "%s", launcher_path); } ssize_t byte_count = AppendFile(in_fd, nullptr, statbuf.st_size); if (byte_count < 0) { diag_err(1, "%s:%d: Cannot copy %s to %s", __FILE__, __LINE__, launcher_path, options_->output_jar.c_str()); } else if (byte_count != statbuf.st_size) { diag_err(1, "%s:%d: Copied only %ld bytes out of %" PRIu64 " from %s", __FILE__, __LINE__, byte_count, statbuf.st_size, launcher_path); } close(in_fd); if (options_->verbose) { fprintf(stderr, "Prepended %s (%" PRIu64 " bytes)\n", launcher_path, statbuf.st_size); } } if (!options_->main_class.empty()) { build_properties_.AddProperty("main.class", options_->main_class); manifest_.Append("Main-Class: "); manifest_.Append(options_->main_class); manifest_.Append("\r\n"); } for (auto &manifest_line : options_->manifest_lines) { if (!manifest_line.empty()) { manifest_.Append(manifest_line); if (manifest_line[manifest_line.size() - 1] != '\n') { manifest_.Append("\r\n"); } } } for (auto &build_info_line : options_->build_info_lines) { build_properties_.Append(build_info_line); build_properties_.Append("\n"); } for (auto &build_info_file : options_->build_info_files) { MappedFile mapped_file; if (!mapped_file.Open(build_info_file)) { diag_err(1, "%s:%d: Bad build info file %s", __FILE__, __LINE__, build_info_file.c_str()); } const char *data = reinterpret_cast(mapped_file.start()); const char *data_end = reinterpret_cast(mapped_file.end()); // TODO(asmundak): this isn't right, we should parse properties file. while (data < data_end) { const char *next_data = strchr(static_cast(data), '\n'); if (next_data) { ++next_data; } else { next_data = data_end; } build_properties_.Append(data, next_data - data); data = next_data; } mapped_file.Close(); } for (auto &rpath : options_->classpath_resources) { // TODO(asmundak): On Windows, look for \, too. ClasspathResource(blaze_util::Basename(rpath), rpath); } for (auto &rdesc : options_->resources) { // A resource description is either NAME or NAME:PATH std::size_t colon = rdesc.find_first_of(':'); if (0 == colon) { diag_errx(1, "%s:%d: Bad resource description %s", __FILE__, __LINE__, rdesc.c_str()); } if (std::string::npos == colon) { ClasspathResource(rdesc, rdesc); } else { ClasspathResource(rdesc.substr(0, colon), rdesc.substr(colon + 1)); } } // Ready to write zip entries. // First, write a directory entry for the META-INF, followed by the manifest // file, followed by the build properties file. AddDirectory("META-INF/"); manifest_.Append("\r\n"); WriteEntry(manifest_.OutputEntry()); if (!options_->exclude_build_data) { WriteEntry(build_properties_.OutputEntry()); } // Then classpath resources. for (auto &classpath_resource : classpath_resources_) { WriteEntry(classpath_resource->OutputEntry()); } // Then copy source files' contents. for (int ix = 0; ix < options_->input_jars.size(); ++ix) { if (!AddJar(ix)) { exit(1); } } // All entries written, write Central Directory and close. Close(); return 0; } OutputJar::~OutputJar() { if (fd_ >= 0) { diag_warnx("%s:%d: Close() should be called first", __FILE__, __LINE__); } } bool OutputJar::Open() { if (fd_ >= 0) { diag_errx(1, "%s:%d: Cannot open output archive twice", __FILE__, __LINE__); } // The output file has read/write/execute permissions for the owner, // default for the rest. mode_t old_umask = umask(0); fd_ = creat(path(), (S_IRWXU | S_IRWXG | S_IRWXO) & ~old_umask); umask(old_umask); if (fd_ < 0) { diag_warn("%s:%d: %s", __FILE__, __LINE__, path()); return false; } if (options_->verbose) { fprintf(stderr, "Writing to %s\n", path()); } return true; } bool OutputJar::AddJar(int jar_path_index) { const std::string& input_jar_path = options_->input_jars[jar_path_index]; InputJar input_jar; if (!input_jar.Open(input_jar_path)) { return false; } const CDH *jar_entry; const LH *lh; while ((jar_entry = input_jar.NextEntry(&lh))) { const char *file_name = jar_entry->file_name(); auto file_name_length = jar_entry->file_name_length(); if (!file_name_length) { diag_errx( 1, "%s:%d: Bad central directory record in %s at offset 0x%" PRIx64, __FILE__, __LINE__, input_jar_path.c_str(), input_jar.CentralDirectoryRecordOffset(jar_entry)); } // Special files that cannot be handled by looking up known_members_ map: // * ignore *.SF, *.RSA, *.DSA // (TODO(asmundak): should this be done only in META-INF? // if (ends_with(file_name, file_name_length, ".SF") || ends_with(file_name, file_name_length, ".RSA") || ends_with(file_name, file_name_length, ".DSA")) { continue; } bool include_entry = true; if (!options_->include_prefixes.empty()) { for (auto& prefix : options_->include_prefixes) { if ((include_entry = (prefix.size() <= file_name_length && 0 == strncmp(file_name, prefix.c_str(), prefix.size())))) { break; } } } if (!include_entry) { continue; } bool is_dir = (file_name[file_name_length - 1] != '/'); if (is_dir && begins_with(file_name, file_name_length, "META-INF/services/")) { // The contents of the META-INF/services/ on the output is the // concatenation of the META-INF/services/ files from all inputs. std::string service_path(file_name, file_name_length); if (!known_members_.count(service_path)) { // Create a concatenator and add it to the known_members_ map. // The call to Merge() below will then take care of the rest. Concatenator *service_handler = new Concatenator(service_path); service_handlers_.emplace_back(service_handler); known_members_.emplace(service_path, EntryInfo{service_handler}); } } // Install a new entry unless it is already present. All the plain (non-dir) // entries that require a combiner have been already installed, so the call // will add either a directory entry whose handler will ignore subsequent // duplicates, or an ordinary plain entry, for which we save the index of // the first input jar (in order to provide diagnostics on duplicate). auto got = known_members_.emplace(std::string(file_name, file_name_length), EntryInfo{is_dir ? &null_combiner_ : nullptr, is_dir ? -1 : jar_path_index}); if (!got.second) { auto &entry_info = got.first->second; // Handle special entries (the ones that have a combiner. if (entry_info.combiner_ != nullptr) { entry_info.combiner_->Merge(jar_entry, lh); continue; } // Plain file entry. If duplicates are not allowed, bail out. Otherwise // just ignore this entry. if (options_->no_duplicates || (options_->no_duplicate_classes && ends_with(file_name, file_name_length, ".class"))) { diag_errx(1, "%s:%d: %.*s is present both in %s and %s", __FILE__, __LINE__, file_name_length, file_name, options_->input_jars[entry_info.input_jar_index_].c_str(), input_jar_path.c_str()); } else { duplicate_entries_++; continue; } } // Now we have to copy: // local header // file data // data descriptor, if present. off_t copy_from = jar_entry->local_header_offset(); size_t num_bytes = lh->size(); if (jar_entry->no_size_in_local_header()) { // The size of the data descriptor varies. The actual data in it is three // uint32's (crc32, compressed size, uncompressed size), but these can be // preceded by the "PK\x7\x8" signature word (alas, 'jar' has it). // Reading the descriptor just to figure out whether we need to copy four // or three words will cost us another page read, let us assume the data // description is always 4 words long at the cost of having an occasional // one word gap between the entries. num_bytes += jar_entry->compressed_file_size() + 4 * sizeof(uint32_t); } else { num_bytes += lh->compressed_file_size(); } off_t output_position = Position(); // Do the actual copy. Use sendfile, avoiding copying the data to user // space and back. ssize_t n_copied = AppendFile(input_jar.fd(), ©_from, num_bytes); if (n_copied < 0) { diag_err(1, "%s:%d: Cannot copy %ld bytes of %.*s from %s", __FILE__, __LINE__, num_bytes, file_name_length, file_name, input_jar_path.c_str()); } else if (static_cast(n_copied) != num_bytes) { diag_err(1, "%s:%d: Copied only %ld bytes out of %ld from %s", __FILE__, __LINE__, n_copied, num_bytes, input_jar_path.c_str()); } // Append central directory header for this file to the output central // directory we are building. TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); AppendToDirectoryBuffer(jar_entry)->local_header_offset32(output_position); ++entries_; } return input_jar.Close(); } off_t OutputJar::Position() { off_t position = lseek(fd_, 0, SEEK_CUR); if (position == (off_t)-1) { diag_err(1, "%s:%d: lseek", __FILE__, __LINE__); } TODO(position < 0xFFFFFFFF, "Handle Zip64"); return position; } // Writes an entry. The argument is the pointer to the contiguos block of // memory containing Local Header for the entry, immediately followed by // the data. The memory is freed after the data has been written. void OutputJar::WriteEntry(void *buffer) { if (buffer == nullptr) { return; } LH *entry = reinterpret_cast(buffer); if (options_->verbose) { fprintf(stderr, "%-.*s combiner has %lu bytes, %s to %lu\n", entry->file_name_length(), entry->file_name(), entry->uncompressed_file_size(), entry->compression_method() == Z_NO_COMPRESSION ? "copied" : "compressed", entry->compressed_file_size()); } uint8_t *data_end = entry->data() + entry->in_zip_size(); uint8_t *data = reinterpret_cast(entry); off_t output_position = Position(); while (data < data_end) { ssize_t written = write(fd_, data, data_end - data); if (written >= 0) { data += written; } else if (errno != EINTR) { diag_err(1, "%s:%d: write", __FILE__, __LINE__); } } // Data written, allocate CDH space and populate CDH. CDH *cdh = reinterpret_cast( ReserveCdh(sizeof(CDH) + entry->file_name_length())); cdh->signature(); cdh->version(20); cdh->version_to_extract(entry->version()); cdh->bit_flag(0x0); cdh->compression_method(entry->compression_method()); cdh->last_mod_file_time(entry->last_mod_file_time()); cdh->last_mod_file_date(entry->last_mod_file_date()); cdh->crc32(entry->crc32()); TODO(entry->compressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); cdh->compressed_file_size32(entry->compressed_file_size32()); TODO(entry->uncompressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); cdh->uncompressed_file_size32(entry->uncompressed_file_size32()); cdh->file_name(entry->file_name(), entry->file_name_length()); cdh->extra_fields(nullptr, 0); cdh->comment_length(0); cdh->start_disk_nr(0); cdh->internal_attributes(0); cdh->external_attributes(0); cdh->local_header_offset32(output_position); ++entries_; free(reinterpret_cast(entry)); } void OutputJar::AddDirectory(const char *path) { size_t n_path = strlen(path); size_t lh_size = sizeof(LH) + n_path; LH *lh = reinterpret_cast(malloc(lh_size)); lh->signature(); lh->version(20); lh->bit_flag(0); // TODO(asmundak): should I set UTF8 flag? lh->compression_method(Z_NO_COMPRESSION); lh->last_mod_file_time(0); lh->last_mod_file_date(33); lh->crc32(0); lh->compressed_file_size32(0); lh->uncompressed_file_size32(0); lh->file_name(path, n_path); lh->extra_fields(nullptr, 0); known_members_.emplace(path, EntryInfo{&null_combiner_}); WriteEntry(lh); } // Appends a Central Directory Entry to the directory buffer. CDH *OutputJar::AppendToDirectoryBuffer(const CDH *cdh) { size_t cdh_size = cdh->size(); return reinterpret_cast( memcpy(reinterpret_cast(ReserveCdr(cdh_size)), cdh, cdh_size)); } uint8_t *OutputJar::ReserveCdr(size_t chunk_size) { if (cen_size_ + chunk_size > cen_capacity_) { cen_capacity_ += 1000000; cen_ = reinterpret_cast(realloc(cen_, cen_capacity_)); if (!cen_) { diag_errx(1, "%s:%d: Cannot allocate %ld bytes for the directory", __FILE__, __LINE__, cen_capacity_); } } uint8_t *entry = cen_ + cen_size_; cen_size_ += chunk_size; return entry; } uint8_t *OutputJar::ReserveCdh(size_t size) { return static_cast(memset(ReserveCdr(size), 0, size)); } // Write out combined jar. bool OutputJar::Close() { if (fd_ < 0) { return true; } for (auto &service_handler : service_handlers_) { WriteEntry(service_handler->OutputEntry()); } for (auto &extra_combiner : extra_combiners_) { WriteEntry(extra_combiner->OutputEntry()); } WriteEntry(spring_handlers_.OutputEntry()); WriteEntry(spring_schemas_.OutputEntry()); WriteEntry(protobuf_meta_handler_.OutputEntry()); // TODO(asmundak): handle manifest; off_t output_position = lseek(fd_, 0, SEEK_CUR); if (output_position == (off_t)-1) { diag_err(1, "%s:%d: lseek", __FILE__, __LINE__); } TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); size_t cen_size = cen_size_; // Save it before AppendToDirectoryBuffer updates it. ECD *ecd = reinterpret_cast(ReserveCdh(sizeof(ECD))); ecd->signature(); ecd->this_disk_entries16((uint16_t)entries_); TODO(entries_ < 0xFFFF, "Handle >=64K entries"); ecd->total_entries16((uint16_t)entries_); TODO(cen_size < 0xFFFFFFFF, "Handle Zip64"); ecd->cen_size32(cen_size); TODO(output_position < 0xFFFFFFFF, "Handle Zip64"); ecd->cen_offset32(output_position); // Write Central Directory. uint8_t *cen_end = cen_ + cen_size_; uint8_t *cen = cen_; while (cen < cen_end) { ssize_t n = write(fd_, cen, cen_end - cen); if (n < 0) { diag_err(1, "%s:%d: Cannot write central directory", __FILE__, __LINE__); } cen += n; } free(cen_); if (close(fd_)) { diag_err(1, "%s:%d: %s", __FILE__, __LINE__, path()); fd_ = -1; return false; } fd_ = -1; if (options_->verbose) { fprintf(stderr, "Wrote %s with %d entries", path(), entries_); if (duplicate_entries_) { fprintf(stderr, ", skipped %d entries", duplicate_entries_); } fprintf(stderr, "\n"); } return true; } void OutputJar::ClasspathResource(const std::string &resource_name, const std::string &resource_path) { if (known_members_.count(resource_name)) { if (options_->warn_duplicate_resources) { diag_warnx( "%s:%d: Duplicate resource name %s in the --classpath_resource or " "--resource option", __FILE__, __LINE__, resource_name.c_str()); // TODO(asmundak): this mimics old behaviour. Confirm that unless // we run with --warn_duplicate_resources, the output zip file contains // the concatenated contents of the all the resources with the same name. return; } } MappedFile mapped_file; if (!mapped_file.Open(resource_path)) { diag_err(1, "%s:%d: %s", __FILE__, __LINE__, resource_path.c_str()); } Concatenator *classpath_resource = new Concatenator(resource_name); classpath_resource->Append( reinterpret_cast(mapped_file.start()), mapped_file.size()); classpath_resources_.emplace_back(classpath_resource); known_members_.emplace(resource_name, EntryInfo{classpath_resource}); } #if defined(__APPLE__) ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) { if (!count) { return 0; } uint8_t buffer[8192]; ssize_t total_written = 0; // If the input file position (the offset in the input file) has been passed, // that's where we start, and the input file position has to be restored after // we are done copying. const off_t offset_error = static_cast(-1); off_t old_input_offset = offset_error; if (in_offset) { if (offset_error == (old_input_offset = lseek(in_fd, 0, SEEK_CUR)) || offset_error == lseek(in_fd, *in_offset, SEEK_SET)) { return -1; } } while (total_written < count) { ssize_t n_read = read(in_fd, buffer, std::min(sizeof(buffer), count - total_written)); if (n_read > 0) { uint8_t *write_buffer = buffer; uint8_t *write_buffer_end = write_buffer + n_read; while (write_buffer < write_buffer_end) { ssize_t n_written = write(fd_, write_buffer, write_buffer_end - write_buffer); if (n_written > 0) { write_buffer += n_written; } else if (EAGAIN != errno) { return -1; } } total_written += n_read; } else if (n_read == 0) { break; } else if (EAGAIN != errno) { return -1; } } // If the input file position has been passed, update it and restore // the read position in the input file. if (in_offset) { if (offset_error == lseek(in_fd, old_input_offset, SEEK_SET)) { return -1; } *in_offset += total_written; } return total_written; } #elif defined(__linux) ssize_t OutputJar::AppendFile(int in_fd, off_t *in_offset, size_t count) { // sendfile call is interruptable and has to be handled the same way as write // call. for (size_t to_write = count; to_write > 0;) { ssize_t written = sendfile(fd_, in_fd, in_offset, to_write); if (written < 0) { return written; } else if (written == 0) { return static_cast(count - to_write); } to_write -= static_cast(written); } return static_cast(count); } #endif void OutputJar::ExtraCombiner(const std::string &entry_name, Combiner *combiner) { extra_combiners_.emplace_back(combiner); known_members_.emplace(entry_name, EntryInfo{combiner}); }