aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/lib/io/zlib_inputstream.h
blob: ac9e23ca972e196d908802c3962cce091bc02ee5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_
#define TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_

#include <string>

#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/io/inputstream_interface.h"
#include "tensorflow/core/lib/io/zlib_compression_options.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {
namespace io {

// Forward declare some members of zlib.h, which is only included in the
// .cc file.
struct ZStreamDef;

// An ZlibInputStream provides support for reading from a stream compressed
// using zlib (http://www.zlib.net/). Buffers the contents of the file.
//
// A given instance of an ZlibInputStream is NOT safe for concurrent use
// by multiple threads
class ZlibInputStream : public InputStreamInterface {
 public:
  // Create a ZlibInputStream for `input_stream` with a buffer of size
  // `input_buffer_bytes` bytes for reading contents from `input_stream` and
  // another buffer with size `output_buffer_bytes` for caching decompressed
  // contents.
  //
  // Takes ownership of `input_stream` iff `owns_input_stream` is true.
  ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes,
                  size_t output_buffer_bytes,
                  const ZlibCompressionOptions& zlib_options,
                  bool owns_input_stream);

  // Equivalent to the previous constructor with owns_input_stream=false.
  ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes,
                  size_t output_buffer_bytes,
                  const ZlibCompressionOptions& zlib_options);

  ~ZlibInputStream();

  // Reads bytes_to_read bytes into *result, overwriting *result.
  //
  // Return Status codes:
  // OK:           If successful.
  // OUT_OF_RANGE: If there are not enough bytes to read before
  //               the end of the stream.
  // ABORTED:      If inflate() fails, we return the error code with the
  //               error message in `z_stream_->msg`.
  // others:       If reading from stream failed.
  Status ReadNBytes(int64 bytes_to_read, string* result) override;

  int64 Tell() const override;

  Status Reset() override;

 private:
  void InitZlibBuffer();

  const bool owns_input_stream_;
  InputStreamInterface* input_stream_;
  size_t input_buffer_capacity_;   // Size of z_stream_input_
  size_t output_buffer_capacity_;  // Size of z_stream_output_
  char* next_unread_byte_;         // Next unread byte in z_stream_output_

  ZlibCompressionOptions const zlib_options_;

  std::unique_ptr<ZStreamDef> z_stream_def_;

  // Reads data from `input_stream_` and tries to fill up `z_stream_input_` if
  // enough unread data is left in `input_stream_`.
  //
  // Looks up z_stream_->next_in to check how much data in z_stream_input_
  // has already been read. The used data is removed and new data is added to
  // after any unread data in z_stream_input_.
  // After this call z_stream_->next_in points to the start of z_stream_input_
  // and z_stream_->avail_in stores the number of readable bytes in
  // z_stream_input_.
  //
  // Returns OutOfRange error if NO data could be read from stream. Note that
  // this won't return an OutOfRange if there wasn't sufficient data in stream
  // to completely fill up z_stream_input_.
  Status ReadFromStream();

  // Calls `inflate()` and returns DataLoss Status if it failed.
  Status Inflate();

  // Starts reading bytes at `next_unread_byte_` till either `bytes_to_read`
  // bytes have been read or `z_stream_->next_out` is reached.
  // Returns the number of bytes read and advances the `next_unread_byte_`
  // pointer to the next location to read from.
  size_t ReadBytesFromCache(size_t bytes_to_read, string* result);

  // The number of unread bytes in z_stream_output_.
  //
  // z_stream_output_  -->
  //
  // [RRRRRRRRRRRRRRRRRRUUUUUUUUUUUUUU000000000000000000]
  //                    ^             ^
  //           next_unread_byte_    z_stream_->next_out
  //
  // R: Read bytes
  // U: Unread bytes
  // 0: garbage bytes where new output will be written
  //
  // Returns the size of [next_unread_byte_, z_stream_->next_out)
  size_t NumUnreadBytes() const;

  // Number of *uncompressed* bytes that have been read from this stream.
  int64 bytes_read_;

  TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream);
};

}  // namespace io
}  // namespace tensorflow

#endif  // TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_