diff options
author | Eugene Brevdo <ebrevdo@google.com> | 2018-09-10 10:47:54 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-10 10:55:36 -0700 |
commit | a0bec62c0219e143a8b0d8e3dd3fb5b577db388e (patch) | |
tree | 72867c04d7bf0dddc3c676573d3c60e1edb73211 /tensorflow/core/lib | |
parent | 54273565a7b877ef448c29650409a60021cf6c5e (diff) |
Add helper functions that allow users to write TFRecords in memory.
PiperOrigin-RevId: 212293765
Diffstat (limited to 'tensorflow/core/lib')
-rw-r--r-- | tensorflow/core/lib/io/record_reader.cc | 3 | ||||
-rw-r--r-- | tensorflow/core/lib/io/record_reader.h | 8 | ||||
-rw-r--r-- | tensorflow/core/lib/io/record_writer.cc | 15 | ||||
-rw-r--r-- | tensorflow/core/lib/io/record_writer.h | 32 |
4 files changed, 44 insertions, 14 deletions
diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index c24628be57..f93ebea771 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -109,9 +109,6 @@ Status RecordReader::ReadChecksummed(uint64 offset, size_t n, string* result) { } Status RecordReader::ReadRecord(uint64* offset, string* record) { - static const size_t kHeaderSize = sizeof(uint64) + sizeof(uint32); - static const size_t kFooterSize = sizeof(uint32); - // Position the input stream. int64 curr_pos = input_stream_->Tell(); int64 desired_pos = static_cast<int64>(*offset); diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index c05f9e1b36..11af1366b0 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -58,6 +58,14 @@ class RecordReaderOptions { // Note: this class is not thread safe; external synchronization required. class RecordReader { public: + // Format of a single record: + // uint64 length + // uint32 masked crc of length + // byte data[length] + // uint32 masked crc of data + static const size_t kHeaderSize = sizeof(uint64) + sizeof(uint32); + static const size_t kFooterSize = sizeof(uint32); + // Create a reader that will return log records from "*file". // "*file" must remain live while this Reader is in use. explicit RecordReader( diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc index 6e71d23e71..2c6db2487e 100644 --- a/tensorflow/core/lib/io/record_writer.cc +++ b/tensorflow/core/lib/io/record_writer.cc @@ -88,10 +88,6 @@ RecordWriter::~RecordWriter() { } } -static uint32 MaskedCrc(const char* data, size_t n) { - return crc32c::Mask(crc32c::Value(data, n)); -} - Status RecordWriter::WriteRecord(StringPiece data) { if (dest_ == nullptr) { return Status(::tensorflow::error::FAILED_PRECONDITION, @@ -102,13 +98,10 @@ Status RecordWriter::WriteRecord(StringPiece data) { // uint32 masked crc of length // byte data[length] // uint32 masked crc of data - char header[sizeof(uint64) + sizeof(uint32)]; - core::EncodeFixed64(header + 0, data.size()); - core::EncodeFixed32(header + sizeof(uint64), - MaskedCrc(header, sizeof(uint64))); - char footer[sizeof(uint32)]; - core::EncodeFixed32(footer, MaskedCrc(data.data(), data.size())); - + char header[kHeaderSize]; + char footer[kFooterSize]; + PopulateHeader(header, data.data(), data.size()); + PopulateFooter(footer, data.data(), data.size()); TF_RETURN_IF_ERROR(dest_->Append(StringPiece(header, sizeof(header)))); TF_RETURN_IF_ERROR(dest_->Append(data)); return dest_->Append(StringPiece(footer, sizeof(footer))); diff --git a/tensorflow/core/lib/io/record_writer.h b/tensorflow/core/lib/io/record_writer.h index 6a2bf66d12..1212e1fafb 100644 --- a/tensorflow/core/lib/io/record_writer.h +++ b/tensorflow/core/lib/io/record_writer.h @@ -16,8 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_ #define TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_ +#include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/hash/crc32c.h" #if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_outputbuffer.h" @@ -47,6 +49,14 @@ class RecordWriterOptions { class RecordWriter { public: + // Format of a single record: + // uint64 length + // uint32 masked crc of length + // byte data[length] + // uint32 masked crc of data + static const size_t kHeaderSize = sizeof(uint64) + sizeof(uint32); + static const size_t kFooterSize = sizeof(uint32); + // Create a writer that will append data to "*dest". // "*dest" must be initially empty. // "*dest" must remain live while this Writer is in use. @@ -72,13 +82,35 @@ class RecordWriter { // are invalid. Status Close(); + // Utility method to populate TFRecord headers. Populates record-header in + // "header[0,kHeaderSize-1]". The record-header is based on data[0, n-1]. + inline static void PopulateHeader(char* header, const char* data, size_t n); + + // Utility method to populate TFRecord footers. Populates record-footer in + // "footer[0,kFooterSize-1]". The record-footer is based on data[0, n-1]. + inline static void PopulateFooter(char* footer, const char* data, size_t n); + private: WritableFile* dest_; RecordWriterOptions options_; + inline static uint32 MaskedCrc(const char* data, size_t n) { + return crc32c::Mask(crc32c::Value(data, n)); + } + TF_DISALLOW_COPY_AND_ASSIGN(RecordWriter); }; +void RecordWriter::PopulateHeader(char* header, const char* data, size_t n) { + core::EncodeFixed64(header + 0, n); + core::EncodeFixed32(header + sizeof(uint64), + MaskedCrc(header, sizeof(uint64))); +} + +void RecordWriter::PopulateFooter(char* footer, const char* data, size_t n) { + core::EncodeFixed32(footer, MaskedCrc(data, n)); +} + } // namespace io } // namespace tensorflow |