aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/tools/singlejar/transient_bytes.h
blob: 7c708707d2783b3f810b7773abce1ad4e11771f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
#define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_

#define __STDC_FORMAT_MACROS 1

#include <inttypes.h>
#include <algorithm>
#include <ostream>

#include "src/tools/singlejar/diag.h"
#include "src/tools/singlejar/zip_headers.h"
#include "src/tools/singlejar/zlib_interface.h"

/*
 * An instance of this class holds decompressed data in a list of chunks,
 * to be eventually compressed to the output buffer.
 * Use DecompressFile() or ReadFile() (depending on whether an entry is
 * compressed or not) to append the contents of a Zip entry.
 * Use Append() to append a sequence of bytes or a string.
 * Use Write() to write out the contents, it will compress the entry if
 * necessary.
 */
class TransientBytes {
 public:
  TransientBytes()
      : allocated_(0),
        data_size_(0),
        first_block_(nullptr),
        last_block_(nullptr) {}

  ~TransientBytes() {
    while (first_block_) {
      auto block = first_block_;
      first_block_ = first_block_->next_block_;
      delete block;
    }
    last_block_ = nullptr;
  }

  // Appends raw bytes.
  void Append(const uint8_t *data, uint64_t data_size) {
    uint64_t chunk_size;
    auto data_end = data + data_size;
    for (; data < data_end; data += chunk_size) {
      chunk_size =
          std::min(static_cast<uint64_t>(data_end - data), ensure_space());
      copy(data, chunk_size);
    }
  }

  // Same, but for a string.
  void Append(const char *str) {
    Append(reinterpret_cast<const uint8_t *>(str), strlen(str));
  }

  // Appends the contents of the uncompressed Zip entry.
  void ReadEntryContents(const LH *lh) {
    Append(lh->data(), lh->uncompressed_file_size());
  }

  // Appends the contents of the compressed Zip entry. Resets the inflater
  // used to decompress.
  void DecompressEntryContents(const CDH *cdh, const LH *lh,
                               Inflater *inflater) {
    uint64_t old_total_out = inflater->total_out();
    uint64_t in_bytes;
    uint64_t out_bytes;
    const uint8_t *data = lh->data();

    if (cdh->no_size_in_local_header()) {
      in_bytes = cdh->compressed_file_size();
      out_bytes = cdh->uncompressed_file_size();
    } else {
      in_bytes = lh->compressed_file_size();
      out_bytes = lh->uncompressed_file_size();
    }

    while (in_bytes > 0) {
      // A single region to inflate cannot exceed 4GB-1.
      uint32_t in_bytes_chunk = 0xFFFFFFFF;
      if (in_bytes_chunk > in_bytes) {
        in_bytes_chunk = in_bytes;
      }
      inflater->DataToInflate(data, in_bytes_chunk);
      for (;;) {
        uint32_t available_out = ensure_space();
        int ret = inflater->Inflate(append_position(), available_out);
        uint32_t inflated = available_out - inflater->available_out();
        if (Z_STREAM_END == ret) {
          // No more data to decompress. Update write position and we are done
          // for this input chunk.
          advance(inflated);
          break;
        } else if (Z_OK == ret) {
          // No more space in the output buffer. Advance write position, update
          // the number of remaining bytes.
          if (inflater->available_out()) {
            diag_errx(2,
                      "%s:%d: Internal error inflating %.*s: Inflate reported "
                      "Z_OK but there are still %" PRIu32
                      " bytes available in the output buffer",
                      __FILE__, __LINE__, lh->file_name_length(),
                      lh->file_name(), inflater->available_out());
          }
          advance(inflated);
        } else {
          diag_errx(2,
                    "%s:%d: Internal error inflating %.*s: inflate() call "
                    "returned %d (%s)",
                    __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
                    ret, inflater->error_message());
        }
      }
      data += in_bytes_chunk;
      in_bytes -= in_bytes_chunk;
    }

    // Smog check
    if (inflater->total_out() - old_total_out != out_bytes) {
      diag_errx(2,
                "%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64
                " bytes , but the uncompressed entry should be %" PRIu64
                "bytes long",
                __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
                inflater->total_out() - old_total_out, out_bytes);
    }
    inflater->reset();
  }

  // Writes the contents bytes to the given buffer in an optimal way, i.e., the
  // shorter of compressed or uncompressed. Sets the checksum and number of
  // bytes written and returns Z_DEFLATED if compression took place or
  // Z_NO_COMPRESSION otherwise.
  uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum,
                       uint64_t *bytes_written) {
    *checksum = 0;
    uint64_t to_compress = data_size();
    if (to_compress == 0) {
      *bytes_written = 0;
      return Z_NO_COMPRESSION;
    }

    Deflater deflater;
    deflater.next_out = buffer;
    uint16_t compression_method = Z_DEFLATED;

    // Feed data blocks to the deflater one by one, but break if the compressed
    // size exceeds the original size.
    for (auto data_block = first_block_;
         data_block && compression_method != Z_NO_COMPRESSION;
         data_block = data_block->next_block_) {
      // The compressed size should not exceed the original size less the number
      // of bytes already compressed. And, it should not exceed 4GB-1.
      deflater.avail_out = std::min(data_size() - deflater.total_out,
                                    static_cast<uint64_t>(0xFFFFFFFF));
      // Out of the total number of bytes that remain to be compressed, we
      // can compress no more than this block.
      uint32_t chunk_size = static_cast<uint32_t>(std::min(
          static_cast<uint64_t>(sizeof(data_block->data_)), to_compress));
      *checksum = crc32(*checksum, data_block->data_, chunk_size);
      deflater.avail_in = chunk_size;
      to_compress -= chunk_size;
      int ret = deflater.Deflate(data_block->data_, chunk_size,
                                 to_compress ? Z_NO_FLUSH : Z_FINISH);
      if (ret == Z_OK) {
        if (!deflater.avail_out) {
          // We ran out of space in the output buffer, which means
          // that deflated size exceeds original size. Leave the loop
          // and just copy the data.
          compression_method = Z_NO_COMPRESSION;
        }
      } else if (ret == Z_BUF_ERROR && !deflater.avail_in) {
        // We ran out of data block, this is not a error.
      } else if (ret == Z_STREAM_END) {
        if (data_block->next_block_ || to_compress) {
          diag_errx(2,
                    "%s:%d: Internal error: deflate() call at the end, but "
                    "there is more data to compress!",
                    __FILE__, __LINE__);
        }
      } else {
        diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret,
                  deflater.msg);
      }
    }
    if (compression_method != Z_NO_COMPRESSION) {
      *bytes_written = deflater.total_out;
      return compression_method;
    }

    // Compression does not help, just copy the bytes to the output buffer.
    CopyOut(buffer, checksum);
    *bytes_written = data_size();
    return Z_NO_COMPRESSION;
  }

  // Copies the bytes to the buffer and sets the checksum.
  void CopyOut(uint8_t *buffer, uint32_t *checksum) {
    uint64_t to_copy = data_size();
    uint8_t *buffer_end = buffer + to_copy;
    *checksum = 0;
    for (auto data_block = first_block_; data_block;
         data_block = data_block->next_block_) {
      size_t chunk_size =
          std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy);
      *checksum = crc32(*checksum, data_block->data_, chunk_size);
      memcpy(buffer_end - to_copy, data_block->data_, chunk_size);
      to_copy -= chunk_size;
    }
  }

  // Number of data bytes.
  uint64_t data_size() const { return data_size_; }

  // This is mostly for testing: stream out contents to a Sink instance.
  // The class Sink has to have
  //     void operator()(const void *chunk, uint64_t chunk_size) const;
  //
  template <class Sink>
  void stream_out(const Sink &sink) const {
    uint64_t to_copy = data_size();
    for (auto data_block = first_block_; data_block;
         data_block = data_block->next_block_) {
      uint64_t chunk_size = sizeof(data_block->data_);
      if (chunk_size > to_copy) {
        chunk_size = to_copy;
      }
      sink.operator()(data_block->data_, chunk_size);
      to_copy -= chunk_size;
    }
  }

  uint8_t last_byte() const {
    if (!data_size()) {
      diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty",
                __FILE__, __LINE__);
    }
    if (free_size() >= sizeof(last_block_->data_)) {
      diag_errx(1, "%s:%d: internal error: the last data block is empty",
                __FILE__, __LINE__);
    }
    return *(last_block_->End() - free_size() - 1);
  }

 private:
  // Ensures there is some space to write to, returns the amount available.
  uint64_t ensure_space() {
    if (!free_size()) {
      auto *data_block = new DataBlock();
      if (last_block_) {
        last_block_->next_block_ = data_block;
      }
      last_block_ = data_block;
      if (!first_block_) {
        first_block_ = data_block;
      }
      allocated_ += sizeof(data_block->data_);
    }
    return free_size();
  }

  // Records that given amount of bytes is to be appended to the buffer.
  // Returns the old write position.
  uint8_t *advance(size_t amount) {
    if (amount > free_size()) {
      diag_errx(
          2, "%s: %d: Cannot advance %zu bytes, only %" PRIu64 " is available",
          __FILE__, __LINE__, amount, free_size());
    }
    uint8_t *pos = append_position();
    data_size_ += amount;
    return pos;
  }

  void copy(const uint8_t *from, size_t count) {
    memcpy(advance(count), from, count);
  }

  uint8_t *append_position() {
    return last_block_ ? last_block_->End() - free_size() : nullptr;
  }

  // Returns the amount of free space.
  uint64_t free_size() const { return allocated_ - data_size_; }

  // The bytes are kept in an linked list of the DataBlock instances.
  // TODO(asmundak): perhaps use mmap to allocate these?
  struct DataBlock {
    struct DataBlock *next_block_;
    uint8_t data_[0x40000 - 8];
    DataBlock() : next_block_(nullptr) {}
    uint8_t *End() { return data_ + sizeof(data_); }
  };

  uint64_t allocated_;
  uint64_t data_size_;
  struct DataBlock *first_block_;
  struct DataBlock *last_block_;
};

#endif  // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_