diff options
author | 2018-01-02 17:02:52 -0800 | |
---|---|---|
committer | 2018-01-02 17:06:11 -0800 | |
commit | a4301f0baa173a347caa34c606c19fd139b055e0 (patch) | |
tree | 83d4f2874574a5c0f80794c1b1e22baa4ef42c56 | |
parent | 6a20edf95fcaf45c46385eaf649e814a571737ed (diff) |
[tf.data] Improve error message in `tf.data.FixedLengthRecordDataset`.
The iterator now eagerly returns an error message when the size of an input file
(minus header and footer) is not a multiple of the fixed-length record.
PiperOrigin-RevId: 180612356
-rw-r--r-- | tensorflow/core/kernels/data/dataset.h | 11 | ||||
-rw-r--r-- | tensorflow/core/kernels/data/reader_dataset_ops.cc | 14 | ||||
-rw-r--r-- | tensorflow/core/kernels/data/shuffle_dataset_op.cc | 1 | ||||
-rw-r--r-- | tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py | 18 |
4 files changed, 43 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/data/dataset.h b/tensorflow/core/kernels/data/dataset.h index 7e01535bd8..cf4722af05 100644 --- a/tensorflow/core/kernels/data/dataset.h +++ b/tensorflow/core/kernels/data/dataset.h @@ -485,7 +485,16 @@ class DatasetIterator : public IteratorBase { Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors, bool* end_of_sequence) final { port::Tracing::TraceMe activity(params_.prefix); - return GetNextInternal(ctx, out_tensors, end_of_sequence); + Status s = GetNextInternal(ctx, out_tensors, end_of_sequence); + if (TF_PREDICT_FALSE(errors::IsOutOfRange(s) && !*end_of_sequence)) { + s = errors::Internal( + "Iterator \"", params_.prefix, + "\" returned OutOfRange without setting `*end_of_sequence`. This " + "indicates that an error may have occurred. Original message: ", + s.error_message()); + LOG(ERROR) << s; + } + return s; } Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final { diff --git a/tensorflow/core/kernels/data/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc index 557e98c1e6..74cb78e4f4 100644 --- a/tensorflow/core/kernels/data/reader_dataset_ops.cc +++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc @@ -409,6 +409,20 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR(ctx->env()->GetFileSize( dataset()->filenames_[current_file_index_], &file_size)); file_pos_limit_ = file_size - dataset()->footer_bytes_; + + uint64 body_size = + file_size - (dataset()->header_bytes_ + dataset()->footer_bytes_); + + if (body_size % dataset()->record_bytes_ != 0) { + return errors::InvalidArgument( + "Excluding the header (", dataset()->header_bytes_, + " bytes) and footer (", dataset()->footer_bytes_, + " bytes), input file \"", + dataset()->filenames_[current_file_index_], + "\" has body length ", body_size, + " bytes, which is not an exact multiple of the record length (", + dataset()->record_bytes_, " bytes)."); + } TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile( dataset()->filenames_[current_file_index_], &file_)); input_buffer_.reset( diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index caef449b8e..11b13eeaa8 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -107,6 +107,7 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { // sequence has been reached, we return an OutOfRange error to // terminate the iteration. (Otherwise, this iterator may loop // infinitely and never produce a value.) + *end_of_sequence = true; return errors::OutOfRange( "Attempted to repeat an empty dataset infinitely."); } diff --git a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py index c8e7333b4b..d7140088c3 100644 --- a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py @@ -272,6 +272,24 @@ class FixedLengthRecordReaderTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(iterator.get_next()) + def testFixedLengthRecordDatasetWrongSize(self): + test_filenames = self._createFiles() + dataset = readers.FixedLengthRecordDataset( + test_filenames, + self._record_bytes + 1, # Incorrect record length. + self._header_bytes, + self._footer_bytes, + buffer_size=10) + iterator = dataset.make_one_shot_iterator() + + with self.test_session() as sess: + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Excluding the header \(5 bytes\) and footer \(2 bytes\), input " + r"file \".*fixed_length_record.0.txt\" has body length 21 bytes, " + r"which is not an exact multiple of the record length \(4 bytes\)."): + sess.run(iterator.get_next()) + def _iterator_checkpoint_path(self): return os.path.join(self.get_temp_dir(), "iterator") |