diff options
Diffstat (limited to 'tensorflow/python/lib/io')
-rwxr-xr-x | tensorflow/python/lib/io/__init__.py | 0 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_reader.cc | 49 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_reader.h | 50 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_reader.i | 39 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_writer.cc | 44 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_writer.h | 38 | ||||
-rw-r--r-- | tensorflow/python/lib/io/py_record_writer.i | 38 | ||||
-rw-r--r-- | tensorflow/python/lib/io/python_io.py | 29 | ||||
-rw-r--r-- | tensorflow/python/lib/io/tf_record.py | 68 |
9 files changed, 355 insertions, 0 deletions
diff --git a/tensorflow/python/lib/io/__init__.py b/tensorflow/python/lib/io/__init__.py new file mode 100755 index 0000000000..e69de29bb2 --- /dev/null +++ b/tensorflow/python/lib/io/__init__.py diff --git a/tensorflow/python/lib/io/py_record_reader.cc b/tensorflow/python/lib/io/py_record_reader.cc new file mode 100644 index 0000000000..5cc5229a8b --- /dev/null +++ b/tensorflow/python/lib/io/py_record_reader.cc @@ -0,0 +1,49 @@ +#include "tensorflow/python/lib/io/py_record_reader.h" + +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/public/env.h" + +namespace tensorflow { + +class RandomAccessFile; + +namespace io { + +PyRecordReader::PyRecordReader() {} + +PyRecordReader* PyRecordReader::New(const string& filename, + uint64 start_offset) { + RandomAccessFile* file; + Status s = Env::Default()->NewRandomAccessFile(filename, &file); + if (!s.ok()) { + return nullptr; + } + PyRecordReader* reader = new PyRecordReader; + reader->offset_ = start_offset; + reader->file_ = file; + reader->reader_ = new RecordReader(reader->file_); + return reader; +} + +PyRecordReader::~PyRecordReader() { + delete reader_; + delete file_; +} + +bool PyRecordReader::GetNext() { + if (reader_ == nullptr) return false; + Status s = reader_->ReadRecord(&offset_, &record_); + return s.ok(); +} + +void PyRecordReader::Close() { + delete reader_; + delete file_; + file_ = nullptr; + reader_ = nullptr; +} + +} // namespace io +} // namespace tensorflow diff --git a/tensorflow/python/lib/io/py_record_reader.h b/tensorflow/python/lib/io/py_record_reader.h new file mode 100644 index 0000000000..5a775761df --- /dev/null +++ b/tensorflow/python/lib/io/py_record_reader.h @@ -0,0 +1,50 @@ +#ifndef TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_ +#define TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_ + +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/public/status.h" + +namespace tensorflow { + +class RandomAccessFile; + +namespace io { + +class RecordReader; + +// A wrapper around io::RecordReader that is more easily SWIG wrapped for +// Python. An instance of this class is not safe for concurrent access +// by multiple threads. +class PyRecordReader { + public: + static PyRecordReader* New(const string& filename, uint64 start_offset); + ~PyRecordReader(); + + // Attempt to get the next record at "current_offset()". If + // successful, returns true, and the record contents can be retrieve + // with "this->record()". Otherwise, returns false. + bool GetNext(); + // Return the current record contents. Only valid after the preceding call + // to GetNext() returned true + string record() const { return record_; } + // Return the current offset in the file. + uint64 offset() const { return offset_; } + + // Close the underlying file and release its resources. + void Close(); + + private: + PyRecordReader(); + + uint64 offset_; + RandomAccessFile* file_; // Owned + io::RecordReader* reader_; // Owned + string record_; + TF_DISALLOW_COPY_AND_ASSIGN(PyRecordReader); +}; + +} // namespace io +} // namespace tensorflow + +#endif // TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_ diff --git a/tensorflow/python/lib/io/py_record_reader.i b/tensorflow/python/lib/io/py_record_reader.i new file mode 100644 index 0000000000..19f911bd52 --- /dev/null +++ b/tensorflow/python/lib/io/py_record_reader.i @@ -0,0 +1,39 @@ +%nothread tensorflow::io::PyRecordReader::GetNext; + +%include "tensorflow/python/platform/base.i" + +%feature("except") tensorflow::io::PyRecordReader::New { + // Let other threads run while we read + Py_BEGIN_ALLOW_THREADS + $action + Py_END_ALLOW_THREADS +} + +%newobject tensorflow::io::PyRecordReader::New; + +%feature("except") tensorflow::io::PyRecordReader::GetNext { + // Let other threads run while we read + Py_BEGIN_ALLOW_THREADS + $action + Py_END_ALLOW_THREADS +} + +%{ +#include "tensorflow/python/lib/io/py_record_reader.h" +%} + +%ignoreall + +%unignore tensorflow; +%unignore tensorflow::io; +%unignore tensorflow::io::PyRecordReader; +%unignore tensorflow::io::PyRecordReader::~PyRecordReader; +%unignore tensorflow::io::PyRecordReader::GetNext; +%unignore tensorflow::io::PyRecordReader::offset; +%unignore tensorflow::io::PyRecordReader::record; +%unignore tensorflow::io::PyRecordReader::Close; +%unignore tensorflow::io::PyRecordReader::New; + +%include "tensorflow/python/lib/io/py_record_reader.h" + +%unignoreall diff --git a/tensorflow/python/lib/io/py_record_writer.cc b/tensorflow/python/lib/io/py_record_writer.cc new file mode 100644 index 0000000000..e557756cbc --- /dev/null +++ b/tensorflow/python/lib/io/py_record_writer.cc @@ -0,0 +1,44 @@ +#include "tensorflow/python/lib/io/py_record_writer.h" + +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/lib/io/record_writer.h" +#include "tensorflow/core/public/env.h" + +namespace tensorflow { +namespace io { + +PyRecordWriter::PyRecordWriter() {} + +PyRecordWriter* PyRecordWriter::New(const string& filename) { + WritableFile* file; + Status s = Env::Default()->NewWritableFile(filename, &file); + if (!s.ok()) { + return nullptr; + } + PyRecordWriter* writer = new PyRecordWriter; + writer->file_ = file; + writer->writer_ = new RecordWriter(writer->file_); + return writer; +} + +PyRecordWriter::~PyRecordWriter() { + delete writer_; + delete file_; +} + +bool PyRecordWriter::WriteRecord(::tensorflow::StringPiece record) { + if (writer_ == nullptr) return false; + Status s = writer_->WriteRecord(record); + return s.ok(); +} + +void PyRecordWriter::Close() { + delete writer_; + delete file_; + writer_ = nullptr; + file_ = nullptr; +} + +} // namespace io +} // namespace tensorflow diff --git a/tensorflow/python/lib/io/py_record_writer.h b/tensorflow/python/lib/io/py_record_writer.h new file mode 100644 index 0000000000..e3fd05bd9a --- /dev/null +++ b/tensorflow/python/lib/io/py_record_writer.h @@ -0,0 +1,38 @@ +#ifndef THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_ +#define THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_ + +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/public/status.h" + +namespace tensorflow { + +class WritableFile; + +namespace io { + +class RecordWriter; + +// A wrapper around io::RecordWriter that is more easily SWIG wrapped for +// Python. An instance of this class is not safe for concurrent access +// by multiple threads. +class PyRecordWriter { + public: + static PyRecordWriter* New(const string& filename); + ~PyRecordWriter(); + + bool WriteRecord(::tensorflow::StringPiece record); + void Close(); + + private: + PyRecordWriter(); + + WritableFile* file_; // Owned + io::RecordWriter* writer_; // Owned + TF_DISALLOW_COPY_AND_ASSIGN(PyRecordWriter); +}; + +} // namespace io +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_ diff --git a/tensorflow/python/lib/io/py_record_writer.i b/tensorflow/python/lib/io/py_record_writer.i new file mode 100644 index 0000000000..20fe52c495 --- /dev/null +++ b/tensorflow/python/lib/io/py_record_writer.i @@ -0,0 +1,38 @@ +%nothread tensorflow::io::PyRecordWriter::WriteRecord; + +%include "tensorflow/python/platform/base.i" +%include "tensorflow/python/lib/core/strings.i" + +%feature("except") tensorflow::io::PyRecordWriter::New { + // Let other threads run while we write + Py_BEGIN_ALLOW_THREADS + $action + Py_END_ALLOW_THREADS +} + +%newobject tensorflow::io::PyRecordWriter::New; + +%feature("except") tensorflow::io::PyRecordWriter::WriteRecord { + // Let other threads run while we write + Py_BEGIN_ALLOW_THREADS + $action + Py_END_ALLOW_THREADS +} + +%{ +#include "tensorflow/python/lib/io/py_record_writer.h" +%} + +%ignoreall + +%unignore tensorflow; +%unignore tensorflow::io; +%unignore tensorflow::io::PyRecordWriter; +%unignore tensorflow::io::PyRecordWriter::~PyRecordWriter; +%unignore tensorflow::io::PyRecordWriter::WriteRecord; +%unignore tensorflow::io::PyRecordWriter::Close; +%unignore tensorflow::io::PyRecordWriter::New; + +%include "tensorflow/python/lib/io/py_record_writer.h" + +%unignoreall diff --git a/tensorflow/python/lib/io/python_io.py b/tensorflow/python/lib/io/python_io.py new file mode 100644 index 0000000000..aedcd2ef03 --- /dev/null +++ b/tensorflow/python/lib/io/python_io.py @@ -0,0 +1,29 @@ +"""## Data IO (Python Functions) + +A TFRecords file represents a sequence of (binary) strings. The format is not +random access, so it is suitable for streaming large amounts of data but not +suitable if fast sharding or other non-sequential access is desired. + +@@TFRecordWriter +@@tf_record_iterator + +- - - + +### TFRecords Format Details + +A TFRecords file contains a sequence of strings with CRC hashes. Each record +has the format + + uint64 length + uint32 masked_crc32_of_length + byte data[length] + uint32 masked_crc32_of_data + +and the records are concatenated together to produce the file. The CRC32s +are [described here](https://en.wikipedia.org/wiki/Cyclic_redundancy_check), +and the mask of a CRC is + + masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul +""" + +from tensorflow.python.lib.io.tf_record import * diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py new file mode 100644 index 0000000000..00825bbda2 --- /dev/null +++ b/tensorflow/python/lib/io/tf_record.py @@ -0,0 +1,68 @@ +"""For reading and writing TFRecords files.""" + +from tensorflow.python import pywrap_tensorflow + + +def tf_record_iterator(path): + """An iterator that read the records from a TFRecords file. + + Args: + path: The path to the TFRecords file. + + Yields: + Strings. + + Raises: + IOError: If `path` cannot be opened for reading. + """ + reader = pywrap_tensorflow.PyRecordReader_New(path, 0) + if reader is None: + raise IOError("Could not open %s." % path) + while reader.GetNext(): + yield reader.record() + reader.Close() + + +class TFRecordWriter(object): + """A class to write records to a TFRecords file. + + This class implements `__enter__` and `__exit__`, and can be used + in `with` blocks like a normal file. + + @@__init__ + @@write + @@close + """ + # TODO(josh11b): Support appending? + def __init__(self, path): + """Opens file `path` and creates a `TFRecordWriter` writing to it. + + Args: + path: The path to the TFRecords file. + + Raises: + IOError: If `path` cannot be opened for writing. + """ + self._writer = pywrap_tensorflow.PyRecordWriter_New(path) + if self._writer is None: + raise IOError("Could not write to %s." % path) + + def __enter__(self): + """Enter a `with` block.""" + pass + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Exit a `with` block, closing the file.""" + self.close() + + def write(self, record): + """Write a string record to the file. + + Args: + record: str + """ + self._writer.WriteRecord(record) + + def close(self): + """Close the file.""" + self._writer.Close() |