aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Derek Murray <mrry@google.com>2018-01-02 17:02:52 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-02 17:06:11 -0800
commita4301f0baa173a347caa34c606c19fd139b055e0 (patch)
tree83d4f2874574a5c0f80794c1b1e22baa4ef42c56
parent6a20edf95fcaf45c46385eaf649e814a571737ed (diff)
[tf.data] Improve error message in `tf.data.FixedLengthRecordDataset`.
The iterator now eagerly returns an error message when the size of an input file (minus header and footer) is not a multiple of the fixed-length record. PiperOrigin-RevId: 180612356
-rw-r--r--tensorflow/core/kernels/data/dataset.h11
-rw-r--r--tensorflow/core/kernels/data/reader_dataset_ops.cc14
-rw-r--r--tensorflow/core/kernels/data/shuffle_dataset_op.cc1
-rw-r--r--tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py18
4 files changed, 43 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/data/dataset.h b/tensorflow/core/kernels/data/dataset.h
index 7e01535bd8..cf4722af05 100644
--- a/tensorflow/core/kernels/data/dataset.h
+++ b/tensorflow/core/kernels/data/dataset.h
@@ -485,7 +485,16 @@ class DatasetIterator : public IteratorBase {
Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
bool* end_of_sequence) final {
port::Tracing::TraceMe activity(params_.prefix);
- return GetNextInternal(ctx, out_tensors, end_of_sequence);
+ Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
+ if (TF_PREDICT_FALSE(errors::IsOutOfRange(s) && !*end_of_sequence)) {
+ s = errors::Internal(
+ "Iterator \"", params_.prefix,
+ "\" returned OutOfRange without setting `*end_of_sequence`. This "
+ "indicates that an error may have occurred. Original message: ",
+ s.error_message());
+ LOG(ERROR) << s;
+ }
+ return s;
}
Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final {
diff --git a/tensorflow/core/kernels/data/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc
index 557e98c1e6..74cb78e4f4 100644
--- a/tensorflow/core/kernels/data/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc
@@ -409,6 +409,20 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
TF_RETURN_IF_ERROR(ctx->env()->GetFileSize(
dataset()->filenames_[current_file_index_], &file_size));
file_pos_limit_ = file_size - dataset()->footer_bytes_;
+
+ uint64 body_size =
+ file_size - (dataset()->header_bytes_ + dataset()->footer_bytes_);
+
+ if (body_size % dataset()->record_bytes_ != 0) {
+ return errors::InvalidArgument(
+ "Excluding the header (", dataset()->header_bytes_,
+ " bytes) and footer (", dataset()->footer_bytes_,
+ " bytes), input file \"",
+ dataset()->filenames_[current_file_index_],
+ "\" has body length ", body_size,
+ " bytes, which is not an exact multiple of the record length (",
+ dataset()->record_bytes_, " bytes).");
+ }
TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile(
dataset()->filenames_[current_file_index_], &file_));
input_buffer_.reset(
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index caef449b8e..11b13eeaa8 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -107,6 +107,7 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
// sequence has been reached, we return an OutOfRange error to
// terminate the iteration. (Otherwise, this iterator may loop
// infinitely and never produce a value.)
+ *end_of_sequence = true;
return errors::OutOfRange(
"Attempted to repeat an empty dataset infinitely.");
}
diff --git a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
index c8e7333b4b..d7140088c3 100644
--- a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
@@ -272,6 +272,24 @@ class FixedLengthRecordReaderTest(test.TestCase):
with self.assertRaises(errors.OutOfRangeError):
sess.run(iterator.get_next())
+ def testFixedLengthRecordDatasetWrongSize(self):
+ test_filenames = self._createFiles()
+ dataset = readers.FixedLengthRecordDataset(
+ test_filenames,
+ self._record_bytes + 1, # Incorrect record length.
+ self._header_bytes,
+ self._footer_bytes,
+ buffer_size=10)
+ iterator = dataset.make_one_shot_iterator()
+
+ with self.test_session() as sess:
+ with self.assertRaisesRegexp(
+ errors.InvalidArgumentError,
+ r"Excluding the header \(5 bytes\) and footer \(2 bytes\), input "
+ r"file \".*fixed_length_record.0.txt\" has body length 21 bytes, "
+ r"which is not an exact multiple of the record length \(4 bytes\)."):
+ sess.run(iterator.get_next())
+
def _iterator_checkpoint_path(self):
return os.path.join(self.get_temp_dir(), "iterator")