aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/lib/io
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/lib/io')
-rwxr-xr-xtensorflow/python/lib/io/__init__.py0
-rw-r--r--tensorflow/python/lib/io/py_record_reader.cc49
-rw-r--r--tensorflow/python/lib/io/py_record_reader.h50
-rw-r--r--tensorflow/python/lib/io/py_record_reader.i39
-rw-r--r--tensorflow/python/lib/io/py_record_writer.cc44
-rw-r--r--tensorflow/python/lib/io/py_record_writer.h38
-rw-r--r--tensorflow/python/lib/io/py_record_writer.i38
-rw-r--r--tensorflow/python/lib/io/python_io.py29
-rw-r--r--tensorflow/python/lib/io/tf_record.py68
9 files changed, 355 insertions, 0 deletions
diff --git a/tensorflow/python/lib/io/__init__.py b/tensorflow/python/lib/io/__init__.py
new file mode 100755
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/python/lib/io/__init__.py
diff --git a/tensorflow/python/lib/io/py_record_reader.cc b/tensorflow/python/lib/io/py_record_reader.cc
new file mode 100644
index 0000000000..5cc5229a8b
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_reader.cc
@@ -0,0 +1,49 @@
+#include "tensorflow/python/lib/io/py_record_reader.h"
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/env.h"
+
+namespace tensorflow {
+
+class RandomAccessFile;
+
+namespace io {
+
+PyRecordReader::PyRecordReader() {}
+
+PyRecordReader* PyRecordReader::New(const string& filename,
+ uint64 start_offset) {
+ RandomAccessFile* file;
+ Status s = Env::Default()->NewRandomAccessFile(filename, &file);
+ if (!s.ok()) {
+ return nullptr;
+ }
+ PyRecordReader* reader = new PyRecordReader;
+ reader->offset_ = start_offset;
+ reader->file_ = file;
+ reader->reader_ = new RecordReader(reader->file_);
+ return reader;
+}
+
+PyRecordReader::~PyRecordReader() {
+ delete reader_;
+ delete file_;
+}
+
+bool PyRecordReader::GetNext() {
+ if (reader_ == nullptr) return false;
+ Status s = reader_->ReadRecord(&offset_, &record_);
+ return s.ok();
+}
+
+void PyRecordReader::Close() {
+ delete reader_;
+ delete file_;
+ file_ = nullptr;
+ reader_ = nullptr;
+}
+
+} // namespace io
+} // namespace tensorflow
diff --git a/tensorflow/python/lib/io/py_record_reader.h b/tensorflow/python/lib/io/py_record_reader.h
new file mode 100644
index 0000000000..5a775761df
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_reader.h
@@ -0,0 +1,50 @@
+#ifndef TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_
+#define TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/status.h"
+
+namespace tensorflow {
+
+class RandomAccessFile;
+
+namespace io {
+
+class RecordReader;
+
+// A wrapper around io::RecordReader that is more easily SWIG wrapped for
+// Python. An instance of this class is not safe for concurrent access
+// by multiple threads.
+class PyRecordReader {
+ public:
+ static PyRecordReader* New(const string& filename, uint64 start_offset);
+ ~PyRecordReader();
+
+ // Attempt to get the next record at "current_offset()". If
+ // successful, returns true, and the record contents can be retrieve
+ // with "this->record()". Otherwise, returns false.
+ bool GetNext();
+ // Return the current record contents. Only valid after the preceding call
+ // to GetNext() returned true
+ string record() const { return record_; }
+ // Return the current offset in the file.
+ uint64 offset() const { return offset_; }
+
+ // Close the underlying file and release its resources.
+ void Close();
+
+ private:
+ PyRecordReader();
+
+ uint64 offset_;
+ RandomAccessFile* file_; // Owned
+ io::RecordReader* reader_; // Owned
+ string record_;
+ TF_DISALLOW_COPY_AND_ASSIGN(PyRecordReader);
+};
+
+} // namespace io
+} // namespace tensorflow
+
+#endif // TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_READER_H_
diff --git a/tensorflow/python/lib/io/py_record_reader.i b/tensorflow/python/lib/io/py_record_reader.i
new file mode 100644
index 0000000000..19f911bd52
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_reader.i
@@ -0,0 +1,39 @@
+%nothread tensorflow::io::PyRecordReader::GetNext;
+
+%include "tensorflow/python/platform/base.i"
+
+%feature("except") tensorflow::io::PyRecordReader::New {
+ // Let other threads run while we read
+ Py_BEGIN_ALLOW_THREADS
+ $action
+ Py_END_ALLOW_THREADS
+}
+
+%newobject tensorflow::io::PyRecordReader::New;
+
+%feature("except") tensorflow::io::PyRecordReader::GetNext {
+ // Let other threads run while we read
+ Py_BEGIN_ALLOW_THREADS
+ $action
+ Py_END_ALLOW_THREADS
+}
+
+%{
+#include "tensorflow/python/lib/io/py_record_reader.h"
+%}
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::io;
+%unignore tensorflow::io::PyRecordReader;
+%unignore tensorflow::io::PyRecordReader::~PyRecordReader;
+%unignore tensorflow::io::PyRecordReader::GetNext;
+%unignore tensorflow::io::PyRecordReader::offset;
+%unignore tensorflow::io::PyRecordReader::record;
+%unignore tensorflow::io::PyRecordReader::Close;
+%unignore tensorflow::io::PyRecordReader::New;
+
+%include "tensorflow/python/lib/io/py_record_reader.h"
+
+%unignoreall
diff --git a/tensorflow/python/lib/io/py_record_writer.cc b/tensorflow/python/lib/io/py_record_writer.cc
new file mode 100644
index 0000000000..e557756cbc
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_writer.cc
@@ -0,0 +1,44 @@
+#include "tensorflow/python/lib/io/py_record_writer.h"
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "tensorflow/core/public/env.h"
+
+namespace tensorflow {
+namespace io {
+
+PyRecordWriter::PyRecordWriter() {}
+
+PyRecordWriter* PyRecordWriter::New(const string& filename) {
+ WritableFile* file;
+ Status s = Env::Default()->NewWritableFile(filename, &file);
+ if (!s.ok()) {
+ return nullptr;
+ }
+ PyRecordWriter* writer = new PyRecordWriter;
+ writer->file_ = file;
+ writer->writer_ = new RecordWriter(writer->file_);
+ return writer;
+}
+
+PyRecordWriter::~PyRecordWriter() {
+ delete writer_;
+ delete file_;
+}
+
+bool PyRecordWriter::WriteRecord(::tensorflow::StringPiece record) {
+ if (writer_ == nullptr) return false;
+ Status s = writer_->WriteRecord(record);
+ return s.ok();
+}
+
+void PyRecordWriter::Close() {
+ delete writer_;
+ delete file_;
+ writer_ = nullptr;
+ file_ = nullptr;
+}
+
+} // namespace io
+} // namespace tensorflow
diff --git a/tensorflow/python/lib/io/py_record_writer.h b/tensorflow/python/lib/io/py_record_writer.h
new file mode 100644
index 0000000000..e3fd05bd9a
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_writer.h
@@ -0,0 +1,38 @@
+#ifndef THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
+#define THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/status.h"
+
+namespace tensorflow {
+
+class WritableFile;
+
+namespace io {
+
+class RecordWriter;
+
+// A wrapper around io::RecordWriter that is more easily SWIG wrapped for
+// Python. An instance of this class is not safe for concurrent access
+// by multiple threads.
+class PyRecordWriter {
+ public:
+ static PyRecordWriter* New(const string& filename);
+ ~PyRecordWriter();
+
+ bool WriteRecord(::tensorflow::StringPiece record);
+ void Close();
+
+ private:
+ PyRecordWriter();
+
+ WritableFile* file_; // Owned
+ io::RecordWriter* writer_; // Owned
+ TF_DISALLOW_COPY_AND_ASSIGN(PyRecordWriter);
+};
+
+} // namespace io
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_PYTHON_LIB_IO_PY_RECORD_WRITER_H_
diff --git a/tensorflow/python/lib/io/py_record_writer.i b/tensorflow/python/lib/io/py_record_writer.i
new file mode 100644
index 0000000000..20fe52c495
--- /dev/null
+++ b/tensorflow/python/lib/io/py_record_writer.i
@@ -0,0 +1,38 @@
+%nothread tensorflow::io::PyRecordWriter::WriteRecord;
+
+%include "tensorflow/python/platform/base.i"
+%include "tensorflow/python/lib/core/strings.i"
+
+%feature("except") tensorflow::io::PyRecordWriter::New {
+ // Let other threads run while we write
+ Py_BEGIN_ALLOW_THREADS
+ $action
+ Py_END_ALLOW_THREADS
+}
+
+%newobject tensorflow::io::PyRecordWriter::New;
+
+%feature("except") tensorflow::io::PyRecordWriter::WriteRecord {
+ // Let other threads run while we write
+ Py_BEGIN_ALLOW_THREADS
+ $action
+ Py_END_ALLOW_THREADS
+}
+
+%{
+#include "tensorflow/python/lib/io/py_record_writer.h"
+%}
+
+%ignoreall
+
+%unignore tensorflow;
+%unignore tensorflow::io;
+%unignore tensorflow::io::PyRecordWriter;
+%unignore tensorflow::io::PyRecordWriter::~PyRecordWriter;
+%unignore tensorflow::io::PyRecordWriter::WriteRecord;
+%unignore tensorflow::io::PyRecordWriter::Close;
+%unignore tensorflow::io::PyRecordWriter::New;
+
+%include "tensorflow/python/lib/io/py_record_writer.h"
+
+%unignoreall
diff --git a/tensorflow/python/lib/io/python_io.py b/tensorflow/python/lib/io/python_io.py
new file mode 100644
index 0000000000..aedcd2ef03
--- /dev/null
+++ b/tensorflow/python/lib/io/python_io.py
@@ -0,0 +1,29 @@
+"""## Data IO (Python Functions)
+
+A TFRecords file represents a sequence of (binary) strings. The format is not
+random access, so it is suitable for streaming large amounts of data but not
+suitable if fast sharding or other non-sequential access is desired.
+
+@@TFRecordWriter
+@@tf_record_iterator
+
+- - -
+
+### TFRecords Format Details
+
+A TFRecords file contains a sequence of strings with CRC hashes. Each record
+has the format
+
+ uint64 length
+ uint32 masked_crc32_of_length
+ byte data[length]
+ uint32 masked_crc32_of_data
+
+and the records are concatenated together to produce the file. The CRC32s
+are [described here](https://en.wikipedia.org/wiki/Cyclic_redundancy_check),
+and the mask of a CRC is
+
+ masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul
+"""
+
+from tensorflow.python.lib.io.tf_record import *
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
new file mode 100644
index 0000000000..00825bbda2
--- /dev/null
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -0,0 +1,68 @@
+"""For reading and writing TFRecords files."""
+
+from tensorflow.python import pywrap_tensorflow
+
+
+def tf_record_iterator(path):
+ """An iterator that read the records from a TFRecords file.
+
+ Args:
+ path: The path to the TFRecords file.
+
+ Yields:
+ Strings.
+
+ Raises:
+ IOError: If `path` cannot be opened for reading.
+ """
+ reader = pywrap_tensorflow.PyRecordReader_New(path, 0)
+ if reader is None:
+ raise IOError("Could not open %s." % path)
+ while reader.GetNext():
+ yield reader.record()
+ reader.Close()
+
+
+class TFRecordWriter(object):
+ """A class to write records to a TFRecords file.
+
+ This class implements `__enter__` and `__exit__`, and can be used
+ in `with` blocks like a normal file.
+
+ @@__init__
+ @@write
+ @@close
+ """
+ # TODO(josh11b): Support appending?
+ def __init__(self, path):
+ """Opens file `path` and creates a `TFRecordWriter` writing to it.
+
+ Args:
+ path: The path to the TFRecords file.
+
+ Raises:
+ IOError: If `path` cannot be opened for writing.
+ """
+ self._writer = pywrap_tensorflow.PyRecordWriter_New(path)
+ if self._writer is None:
+ raise IOError("Could not write to %s." % path)
+
+ def __enter__(self):
+ """Enter a `with` block."""
+ pass
+
+ def __exit__(self, unused_type, unused_value, unused_traceback):
+ """Exit a `with` block, closing the file."""
+ self.close()
+
+ def write(self, record):
+ """Write a string record to the file.
+
+ Args:
+ record: str
+ """
+ self._writer.WriteRecord(record)
+
+ def close(self):
+ """Close the file."""
+ self._writer.Close()