aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/lib/io/python_io.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/lib/io/python_io.py')
-rw-r--r--tensorflow/python/lib/io/python_io.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/tensorflow/python/lib/io/python_io.py b/tensorflow/python/lib/io/python_io.py
new file mode 100644
index 0000000000..aedcd2ef03
--- /dev/null
+++ b/tensorflow/python/lib/io/python_io.py
@@ -0,0 +1,29 @@
+"""## Data IO (Python Functions)
+
+A TFRecords file represents a sequence of (binary) strings. The format is not
+random access, so it is suitable for streaming large amounts of data but not
+suitable if fast sharding or other non-sequential access is desired.
+
+@@TFRecordWriter
+@@tf_record_iterator
+
+- - -
+
+### TFRecords Format Details
+
+A TFRecords file contains a sequence of strings with CRC hashes. Each record
+has the format
+
+ uint64 length
+ uint32 masked_crc32_of_length
+ byte data[length]
+ uint32 masked_crc32_of_data
+
+and the records are concatenated together to produce the file. The CRC32s
+are [described here](https://en.wikipedia.org/wiki/Cyclic_redundancy_check),
+and the mask of a CRC is
+
+ masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul
+"""
+
+from tensorflow.python.lib.io.tf_record import *