// Copyright 2016 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_ #define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_ #define __STDC_FORMAT_MACROS 1 #include #include #include #include "src/tools/singlejar/diag.h" #include "src/tools/singlejar/zip_headers.h" #include "src/tools/singlejar/zlib_interface.h" /* * An instance of this class holds decompressed data in a list of chunks, * to be eventually compressed to the output buffer. * Use DecompressFile() or ReadFile() (depending on whether an entry is * compressed or not) to append the contents of a Zip entry. * Use Append() to append a sequence of bytes or a string. * Use Write() to write out the contents, it will compress the entry if * necessary. */ class TransientBytes { public: TransientBytes() : allocated_(0), data_size_(0), first_block_(nullptr), last_block_(nullptr) {} ~TransientBytes() { while (first_block_) { auto block = first_block_; first_block_ = first_block_->next_block_; delete block; } last_block_ = nullptr; } // Appends raw bytes. void Append(const uint8_t *data, uint64_t data_size) { uint64_t chunk_size; auto data_end = data + data_size; for (; data < data_end; data += chunk_size) { chunk_size = std::min(static_cast(data_end - data), ensure_space()); copy(data, chunk_size); } } // Same, but for a string. void Append(const char *str) { Append(reinterpret_cast(str), strlen(str)); } // Appends the contents of the uncompressed Zip entry. void ReadEntryContents(const LH *lh) { Append(lh->data(), lh->uncompressed_file_size()); } // Appends the contents of the compressed Zip entry. Resets the inflater // used to decompress. void DecompressEntryContents(const CDH *cdh, const LH *lh, Inflater *inflater) { uint64_t old_total_out = inflater->total_out(); uint64_t in_bytes; uint64_t out_bytes; const uint8_t *data = lh->data(); if (cdh->no_size_in_local_header()) { in_bytes = cdh->compressed_file_size(); out_bytes = cdh->uncompressed_file_size(); } else { in_bytes = lh->compressed_file_size(); out_bytes = lh->uncompressed_file_size(); } while (in_bytes > 0) { // A single region to inflate cannot exceed 4GB-1. uint32_t in_bytes_chunk = 0xFFFFFFFF; if (in_bytes_chunk > in_bytes) { in_bytes_chunk = in_bytes; } inflater->DataToInflate(data, in_bytes_chunk); for (;;) { uint32_t available_out = ensure_space(); int ret = inflater->Inflate(append_position(), available_out); uint32_t inflated = available_out - inflater->available_out(); if (Z_STREAM_END == ret) { // No more data to decompress. Update write position and we are done // for this input chunk. advance(inflated); break; } else if (Z_OK == ret) { // No more space in the output buffer. Advance write position, update // the number of remaining bytes. if (inflater->available_out()) { diag_errx(2, "%s:%d: Internal error inflating %.*s: Inflate reported " "Z_OK but there are still %" PRIu32 " bytes available in the output buffer", __FILE__, __LINE__, lh->file_name_length(), lh->file_name(), inflater->available_out()); } advance(inflated); } else { diag_errx(2, "%s:%d: Internal error inflating %.*s: inflate() call " "returned %d (%s)", __FILE__, __LINE__, lh->file_name_length(), lh->file_name(), ret, inflater->error_message()); } } data += in_bytes_chunk; in_bytes -= in_bytes_chunk; } // Smog check if (inflater->total_out() - old_total_out != out_bytes) { diag_errx(2, "%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64 " bytes , but the uncompressed entry should be %" PRIu64 "bytes long", __FILE__, __LINE__, lh->file_name_length(), lh->file_name(), inflater->total_out() - old_total_out, out_bytes); } inflater->reset(); } // Writes the contents bytes to the given buffer in an optimal way, i.e., the // shorter of compressed or uncompressed. Sets the checksum and number of // bytes written and returns Z_DEFLATED if compression took place or // Z_NO_COMPRESSION otherwise. uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum, uint64_t *bytes_written) { *checksum = 0; uint64_t to_compress = data_size(); if (to_compress == 0) { *bytes_written = 0; return Z_NO_COMPRESSION; } Deflater deflater; deflater.next_out = buffer; uint16_t compression_method = Z_DEFLATED; // Feed data blocks to the deflater one by one, but break if the compressed // size exceeds the original size. for (auto data_block = first_block_; data_block && compression_method != Z_NO_COMPRESSION; data_block = data_block->next_block_) { // The compressed size should not exceed the original size less the number // of bytes already compressed. And, it should not exceed 4GB-1. deflater.avail_out = std::min(data_size() - deflater.total_out, static_cast(0xFFFFFFFF)); // Out of the total number of bytes that remain to be compressed, we // can compress no more than this block. uint32_t chunk_size = static_cast(std::min( static_cast(sizeof(data_block->data_)), to_compress)); *checksum = crc32(*checksum, data_block->data_, chunk_size); deflater.avail_in = chunk_size; to_compress -= chunk_size; int ret = deflater.Deflate(data_block->data_, chunk_size, to_compress ? Z_NO_FLUSH : Z_FINISH); if (ret == Z_OK) { if (!deflater.avail_out) { // We ran out of space in the output buffer, which means // that deflated size exceeds original size. Leave the loop // and just copy the data. compression_method = Z_NO_COMPRESSION; } } else if (ret == Z_BUF_ERROR && !deflater.avail_in) { // We ran out of data block, this is not a error. } else if (ret == Z_STREAM_END) { if (data_block->next_block_ || to_compress) { diag_errx(2, "%s:%d: Internal error: deflate() call at the end, but " "there is more data to compress!", __FILE__, __LINE__); } } else { diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret, deflater.msg); } } if (compression_method != Z_NO_COMPRESSION) { *bytes_written = deflater.total_out; return compression_method; } // Compression does not help, just copy the bytes to the output buffer. CopyOut(buffer, checksum); *bytes_written = data_size(); return Z_NO_COMPRESSION; } // Copies the bytes to the buffer and sets the checksum. void CopyOut(uint8_t *buffer, uint32_t *checksum) { uint64_t to_copy = data_size(); uint8_t *buffer_end = buffer + to_copy; *checksum = 0; for (auto data_block = first_block_; data_block; data_block = data_block->next_block_) { size_t chunk_size = std::min(static_cast(sizeof(data_block->data_)), to_copy); *checksum = crc32(*checksum, data_block->data_, chunk_size); memcpy(buffer_end - to_copy, data_block->data_, chunk_size); to_copy -= chunk_size; } } // Number of data bytes. uint64_t data_size() const { return data_size_; } // This is mostly for testing: stream out contents to a Sink instance. // The class Sink has to have // void operator()(const void *chunk, uint64_t chunk_size) const; // template void stream_out(const Sink &sink) const { uint64_t to_copy = data_size(); for (auto data_block = first_block_; data_block; data_block = data_block->next_block_) { uint64_t chunk_size = sizeof(data_block->data_); if (chunk_size > to_copy) { chunk_size = to_copy; } sink.operator()(data_block->data_, chunk_size); to_copy -= chunk_size; } } uint8_t last_byte() const { if (!data_size()) { diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty", __FILE__, __LINE__); } if (free_size() >= sizeof(last_block_->data_)) { diag_errx(1, "%s:%d: internal error: the last data block is empty", __FILE__, __LINE__); } return *(last_block_->End() - free_size() - 1); } private: // Ensures there is some space to write to, returns the amount available. uint64_t ensure_space() { if (!free_size()) { auto *data_block = new DataBlock(); if (last_block_) { last_block_->next_block_ = data_block; } last_block_ = data_block; if (!first_block_) { first_block_ = data_block; } allocated_ += sizeof(data_block->data_); } return free_size(); } // Records that given amount of bytes is to be appended to the buffer. // Returns the old write position. uint8_t *advance(size_t amount) { if (amount > free_size()) { diag_errx( 2, "%s: %d: Cannot advance %zu bytes, only %" PRIu64 " is available", __FILE__, __LINE__, amount, free_size()); } uint8_t *pos = append_position(); data_size_ += amount; return pos; } void copy(const uint8_t *from, size_t count) { memcpy(advance(count), from, count); } uint8_t *append_position() { return last_block_ ? last_block_->End() - free_size() : nullptr; } // Returns the amount of free space. uint64_t free_size() const { return allocated_ - data_size_; } // The bytes are kept in an linked list of the DataBlock instances. // TODO(asmundak): perhaps use mmap to allocate these? struct DataBlock { struct DataBlock *next_block_; uint8_t data_[0x40000 - 8]; DataBlock() : next_block_(nullptr) {} uint8_t *End() { return data_ + sizeof(data_); } }; uint64_t allocated_; uint64_t data_size_; struct DataBlock *first_block_; struct DataBlock *last_block_; }; #endif // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_