aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/data/slide_dataset_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/data/slide_dataset_op.cc')
-rw-r--r--tensorflow/core/kernels/data/slide_dataset_op.cc252
1 files changed, 252 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc
new file mode 100644
index 0000000000..4f3537b691
--- /dev/null
+++ b/tensorflow/core/kernels/data/slide_dataset_op.cc
@@ -0,0 +1,252 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/batch_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class SlideDatasetOp : public UnaryDatasetOpKernel {
+ public:
+ explicit SlideDatasetOp(OpKernelConstruction* ctx)
+ : UnaryDatasetOpKernel(ctx) {}
+
+ void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+ DatasetBase** output) override {
+ int64 window_size = 0;
+ int64 stride = 1;
+ OP_REQUIRES_OK(ctx,
+ ParseScalarArgument<int64>(ctx, "window_size", &window_size));
+ OP_REQUIRES_OK(ctx,
+ ParseScalarArgument<int64>(ctx, "stride", &stride));
+ OP_REQUIRES(
+ ctx, window_size > 0,
+ errors::InvalidArgument("Window size must be greater than zero."));
+ OP_REQUIRES(
+ ctx, stride > 0 && stride < window_size,
+ errors::InvalidArgument("Stride must be in [1, window_size)."));
+
+ *output = new Dataset(ctx, window_size, stride, input);
+ }
+
+ private:
+ class Dataset : public GraphDatasetBase {
+ public:
+ Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input)
+ : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) {
+ input_->Ref();
+
+ const auto& input_shapes = input_->output_shapes();
+ output_shapes_.reserve(input_shapes.size());
+ for (const auto& input_shape : input_shapes) {
+ output_shapes_.emplace_back(
+ PartialTensorShape({-1}).Concatenate(input_shape));
+ }
+ }
+
+ ~Dataset() override { input_->Unref(); }
+
+ std::unique_ptr<IteratorBase> MakeIterator(
+ const string& prefix) const override {
+ return std::unique_ptr<IteratorBase>(new Iterator(
+ Iterator::Params{this, strings::StrCat(prefix, "::Slide")}));
+ }
+
+ const DataTypeVector& output_dtypes() const override {
+ return input_->output_dtypes();
+ }
+
+ const std::vector<PartialTensorShape>& output_shapes() const override {
+ return output_shapes_;
+ }
+
+ string DebugString() override {
+ return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset");
+ }
+
+ protected:
+ Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+ Node** output) const override {
+ Node* input_graph_node = nullptr;
+ TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+ Node* window_size = nullptr;
+ Node* stride = nullptr;
+ TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size));
+ TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride));
+ TF_RETURN_IF_ERROR(
+ b->AddDataset(this, {input_graph_node, window_size, stride}, output));
+ return Status::OK();
+ }
+
+ private:
+
+ class Iterator : public DatasetIterator<Dataset> {
+ public:
+ explicit Iterator(const Params& params)
+ : DatasetIterator<Dataset>(params),
+ input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {}
+
+ Status GetNextInternal(IteratorContext* ctx,
+ std::vector<Tensor>* out_tensors,
+ bool* end_of_sequence) override {
+ const int64 window_size = dataset()->window_size_;
+ const int64 stride = dataset()->stride_;
+ std::vector<std::vector<Tensor>> batch_elements;
+ {
+ mutex_lock l(mu_);
+ if (!input_impl_) {
+ *end_of_sequence = true;
+ return Status::OK();
+ }
+ batch_elements.reserve(window_size);
+ const bool first_call = cache_.empty();
+ if (first_call) {
+ cache_.reserve(window_size);
+ } else {
+ // Reuse cache in the previous iteration.
+ cache_.swap(batch_elements);
+ }
+ // Fill up with new elements.
+ *end_of_sequence = false;
+ for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence;
+ ++i) {
+ std::vector<Tensor> batch_element_tuple;
+ TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple,
+ end_of_sequence));
+ if (!*end_of_sequence) {
+ batch_elements.push_back(std::move(batch_element_tuple));
+ } else {
+ input_impl_.reset();
+ }
+ }
+ // Drop the final smaller blocks.
+ if (batch_elements.size() < window_size) {
+ DCHECK(*end_of_sequence);
+ return Status::OK();
+ }
+ // Cache the data used for the next iteration.
+ for (size_t i = stride; i < window_size; ++i) {
+ cache_.emplace_back(batch_elements[i]);
+ }
+ }
+
+ // Construct output tensors.
+ // Those codes below are copied from batch_dataset_op.cc.
+ const size_t num_tuple_components = batch_elements[0].size();
+ const int64 num_batch_elements = batch_elements.size();
+ for (size_t component_index = 0; component_index < num_tuple_components;
+ ++component_index) {
+ const Tensor& first_element = batch_elements[0][component_index];
+ TensorShape batch_component_shape({num_batch_elements});
+ batch_component_shape.AppendShape(first_element.shape());
+ Tensor batch_component(cpu_allocator(), first_element.dtype(),
+ batch_component_shape);
+ // Build the output tuple component by copying one slice
+ // from each input element in the batch.
+ for (size_t i = 0; i < num_batch_elements; ++i) {
+ if (batch_elements[i][component_index].shape() !=
+ first_element.shape()) {
+ return errors::InvalidArgument(
+ "Cannot batch tensors with different shapes in component ",
+ component_index, ". First element had shape ",
+ first_element.shape().DebugString(), " and element ", i,
+ " had shape ",
+ batch_elements[i][component_index].shape().DebugString(),
+ ".");
+ }
+ TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice(
+ std::move(batch_elements[i][component_index]), &batch_component,
+ i));
+ }
+ out_tensors->emplace_back(std::move(batch_component));
+ }
+ *end_of_sequence = false;
+ return Status::OK();
+ }
+
+ protected:
+ Status SaveInternal(IteratorStateWriter* writer) override {
+ mutex_lock l(mu_);
+ if (!input_impl_) {
+ TF_RETURN_IF_ERROR(
+ writer->WriteScalar(full_name("input_impl_empty"), ""));
+ } else {
+ TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+ }
+ // Save cache.
+ TF_RETURN_IF_ERROR(
+ writer->WriteScalar(strings::StrCat("cache_size"), cache_.size()));
+ for (int64 i = 0; i < cache_.size(); i++) {
+ TF_RETURN_IF_ERROR(writer->WriteScalar(
+ strings::StrCat("cache[", i, "]_size"), cache_[i].size()));
+ for (int64 j = 0; j < cache_[i].size(); j++) {
+ TF_RETURN_IF_ERROR(writer->WriteTensor(
+ strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j]));
+ }
+ }
+ return Status::OK();
+ }
+
+ Status RestoreInternal(IteratorContext* ctx,
+ IteratorStateReader* reader) override {
+ mutex_lock l(mu_);
+ if (!reader->Contains(full_name("input_impl_empty"))) {
+ TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+ } else {
+ input_impl_.reset();
+ }
+ // Restore cache.
+ int64 cache_size;
+ TF_RETURN_IF_ERROR(
+ reader->ReadScalar(strings::StrCat("cache_size"), &cache_size));
+ cache_.resize(cache_size);
+ for (int64 i = 0; i < cache_size; i++) {
+ int64 vector_size;
+ TF_RETURN_IF_ERROR(reader->ReadScalar(
+ strings::StrCat("cache[", i, "]_size"), &vector_size));
+ cache_[i].resize(vector_size);
+ for (int64 j = 0; j < vector_size; j++) {
+ TF_RETURN_IF_ERROR(reader->ReadTensor(
+ strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j]));
+ }
+ }
+ return Status::OK();
+ }
+
+ private:
+ mutex mu_;
+ std::vector<std::vector<Tensor>> cache_ GUARDED_BY(mu_);
+ std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+ };
+
+ const int64 window_size_;
+ const int64 stride_;
+ const DatasetBase* const input_;
+ std::vector<PartialTensorShape> output_shapes_;
+ };
+};
+
+REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU),
+ SlideDatasetOp);
+
+} // namespace
+
+} // namespace tensorflow