Rollback changes to crop_and_resize.

END_PUBLIC BEGIN_PUBLIC Automated g4 rollback of changelist 155203119 PiperOrigin-RevId: 156748870
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-05-22 09:52:17 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-05-22 09:55:57 -0700
commit: f7df71b97389a71a3e21334ebe40b0b05e569fc3 (patch)
tree: 792af1f4c03bb107432a6d7197689dc98c36b620 /tensorflow/core/kernels/crop_and_resize_op.cc
parent: 629755ccac2beb9ea9ea16e0782b9b5084672f6a (diff)
1 files changed, 244 insertions, 324 deletions
diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc
index c68a8b0bd2..746fe63e2a 100644
--- a/tensorflow/core/kernels/crop_and_resize_op.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op.cc
@@ -19,9 +19,6 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/crop_and_resize_op.h"
 
-#include <functional>
-#include <string>
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -29,13 +26,10 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
-#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/types.h"
 
 #if GOOGLE_CUDA
-#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #endif  // GOOGLE_CUDA
 
@@ -43,67 +37,41 @@ namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-using Callback = std::function<void()>;
-
-namespace {
 
-static inline Status ParseAndCheckBoxSizes(const Tensor& boxes,
-                                           const Tensor& box_index,
-                                           int* num_boxes) {
-  if (boxes.NumElements() == 0 && box_index.NumElements() == 0) {
+static inline void ParseAndCheckBoxSizes(OpKernelContext* context,
+                                         const Tensor& boxes,
+                                         const Tensor& box_ind,
+                                         int* num_boxes) {
+  if (boxes.NumElements() == 0 && box_ind.NumElements() == 0) {
     *num_boxes = 0;
-    return Status::OK();
+    return;
   }
   // The shape of 'boxes' is [num_boxes, 4].
-  if (boxes.dims() != 2) {
-    return errors::InvalidArgument("boxes must be 2-D",
-                                   boxes.shape().DebugString());
-  }
+  OP_REQUIRES(context, boxes.dims() == 2,
+              errors::InvalidArgument("boxes must be 2-D",
+                                      boxes.shape().DebugString()));
   *num_boxes = boxes.dim_size(0);
-  if (boxes.dim_size(1) != 4) {
-    return errors::InvalidArgument("boxes must have 4 columns");
-  }
-  // The shape of 'box_index' is [num_boxes].
-  if (box_index.dims() != 1) {
-    return errors::InvalidArgument("box_index must be 1-D",
-                                   box_index.shape().DebugString());
-  }
-  if (box_index.dim_size(0) != *num_boxes) {
-    return errors::InvalidArgument("box_index has incompatible shape");
-  }
-  return Status::OK();
+  OP_REQUIRES(context, boxes.dim_size(1) == 4,
+              errors::InvalidArgument("boxes must have 4 columns"));
+
+  // The shape of 'box_ind' is [num_boxes].
+  OP_REQUIRES(context, box_ind.dims() == 1,
+              errors::InvalidArgument("box_ind must be 1-D",
+                                      box_ind.shape().DebugString()));
+  OP_REQUIRES(context, box_ind.dim_size(0) == *num_boxes,
+              errors::InvalidArgument("box_ind has incompatible shape"));
 }
 
-// Conditionally calls the compute callback if all values in box_index are in
-// [0, batch_size) then calls done.
+// Verifies that all values in box_ind are in [0, batch).
 template <typename Device>
-inline void RunIfBoxIndexIsValid(
-    OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index,
-    int batch_size, const Callback& compute, const Callback& done);
-
-// Specialization of CheckValidBoxIndex for a CPUDevice.
-template <>
-inline void RunIfBoxIndexIsValid<CPUDevice>(
-    OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index,
-    int batch_size, const Callback& compute, const Callback& done) {
-  const int num_boxes = box_index.dimension(0);
-  for (int b = 0; b < num_boxes; ++b) {
-    OP_REQUIRES_ASYNC(
-        context, FastBoundsCheck(box_index(b), batch_size),
-        errors::OutOfRange("box_index has values outside [0, batch_size)"),
-        done);
-  }
-  compute();
-  done();
-}
-
-}  // namespace
+inline void CheckValidBoxInd(
+    OpKernelContext* context,
+    typename TTypes<int32, 1>::ConstTensor box_ind_data, int batch);
 
 template <typename Device, typename T>
-class CropAndResizeOp : public AsyncOpKernel {
+class CropAndResizeOp : public OpKernel {
  public:
-  explicit CropAndResizeOp(OpKernelConstruction* context)
-      : AsyncOpKernel(context) {
+  explicit CropAndResizeOp(OpKernelConstruction* context) : OpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
@@ -112,77 +80,69 @@ class CropAndResizeOp : public AsyncOpKernel {
                                              &extrapolation_value_));
   }
 
-  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
-    // The shape of 'image' is [batch_size, image_height, image_width,
-    // channels].
+  void Compute(OpKernelContext* context) override {
+    // The shape of 'image' is [batch, image_height, image_width, channels].
     const Tensor& image = context->input(0);
-    // The shape of 'boxes' is [num_boxes, 4].
-    const Tensor& boxes = context->input(1);
-    // The shape of 'box_index' is [num_boxes].
-    const Tensor& box_index = context->input(2);
-    // The shape of 'crop_size' is [2].
-    const Tensor& crop_size = context->input(3);
+    OP_REQUIRES(context, image.dims() == 4,
+                errors::InvalidArgument("input image must be 4-D",
+                                        image.shape().DebugString()));
 
-    // Validate inputs dimensions.
-    OP_REQUIRES_ASYNC(context, image.dims() == 4,
-                      errors::InvalidArgument("input image must be 4-D",
-                                              image.shape().DebugString()),
-                      done);
-    const int batch_size = image.dim_size(0);
+    const int batch = image.dim_size(0);
     const int image_height = image.dim_size(1);
     const int image_width = image.dim_size(2);
     const int depth = image.dim_size(3);
-    OP_REQUIRES_ASYNC(
-        context, image_height > 0 && image_width > 0,
-        errors::InvalidArgument("image dimensions must be positive"), done);
+    OP_REQUIRES(context, image_height > 0 && image_width > 0,
+                errors::InvalidArgument("image dimensions must be positive"));
+
+    // The shape of 'boxes' is [num_boxes, 4].
+    const Tensor& boxes = context->input(1);
+
+    // The shape of 'box_ind' is [num_boxes].
+    const Tensor& box_ind = context->input(2);
+
     int num_boxes = 0;
-    OP_REQUIRES_OK_ASYNC(
-        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
-
-    OP_REQUIRES_ASYNC(context, crop_size.dims() == 1,
-                      errors::InvalidArgument("crop_size must be 1-D",
-                                              crop_size.shape().DebugString()),
-                      done);
-    OP_REQUIRES_ASYNC(
-        context, crop_size.dim_size(0) == 2,
-        errors::InvalidArgument("crop_size must have two elements",
-                                crop_size.shape().DebugString()),
-        done);
-
-    // Copy and validate crop sizes.
+    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
+
+    // The shape of 'crop_size' is [2].
+    const Tensor& crop_size = context->input(3);
+
+    OP_REQUIRES(context, crop_size.dims() == 1,
+                errors::InvalidArgument("crop_size must be 1-D",
+                                        crop_size.shape().DebugString()));
+    OP_REQUIRES(context, crop_size.dim_size(0) == 2,
+                errors::InvalidArgument("crop_size must have two elements",
+                                        crop_size.shape().DebugString()));
+
     auto crop_size_vec = crop_size.vec<int32>();
     const int crop_height = internal::SubtleMustCopy(crop_size_vec(0));
     const int crop_width = internal::SubtleMustCopy(crop_size_vec(1));
-    OP_REQUIRES_ASYNC(
-        context, crop_height > 0 && crop_width > 0,
-        errors::InvalidArgument("crop dimensions must be positive"), done);
+    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
+                errors::InvalidArgument("crop dimensions must be positive"));
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
+    OP_REQUIRES_OK(
         context,
         context->allocate_output(
             0, TensorShape({num_boxes, crop_height, crop_width, depth}),
-            &output),
-        done);
-
-    auto compute_callback = [this, context, output]() {
-      const Tensor& image = context->input(0);
-      const Tensor& boxes = context->input(1);
-      const Tensor& box_index = context->input(2);
-      const bool status = functor::CropAndResize<Device, T>()(
-          context->eigen_device<Device>(), image.tensor<T, 4>(),
-          boxes.tensor<float, 2>(), box_index.tensor<int32, 1>(),
-          extrapolation_value_, output->tensor<float, 4>());
-      if (!status) {
-        context->SetStatus(
-            errors::Internal("Failed launch CropAndResizeKernel."));
-      }
-    };
-
-    RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(),
-                                 batch_size, std::move(compute_callback),
-                                 std::move(done));
+            &output));
+
+    typename TTypes<T, 4>::ConstTensor image_data = image.tensor<T, 4>();
+    typename TTypes<float, 2>::ConstTensor boxes_data =
+        boxes.tensor<float, 2>();
+    typename TTypes<int32, 1>::ConstTensor box_ind_data =
+        box_ind.tensor<int32, 1>();
+    typename TTypes<float, 4>::Tensor crops_data = output->tensor<float, 4>();
+
+    CheckValidBoxInd<Device>(context, box_ind_data, batch);
+
+    bool status = functor::CropAndResize<Device, T>()(
+        context->eigen_device<Device>(), image_data, boxes_data, box_ind_data,
+        extrapolation_value_, crops_data);
+    if (!status) {
+      context->SetStatus(
+          errors::Internal("Failed launch CropAndResizeKernel."));
+    }
   }
 
  private:
@@ -195,10 +155,10 @@ template <typename T>
 struct CropAndResize<CPUDevice, T> {
   bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor image,
                   typename TTypes<float, 2>::ConstTensor boxes,
-                  typename TTypes<int32, 1>::ConstTensor box_index,
+                  typename TTypes<int32, 1>::ConstTensor box_ind,
                   float extrapolation_value,
                   typename TTypes<float, 4>::Tensor crops) {
-    const int batch_size = image.dimension(0);
+    const int batch = image.dimension(0);
     const int image_height = image.dimension(1);
     const int image_width = image.dimension(2);
 
@@ -213,8 +173,8 @@ struct CropAndResize<CPUDevice, T> {
       const float y2 = boxes(b, 2);
       const float x2 = boxes(b, 3);
 
-      const int32 b_in = box_index(b);
-      if (!FastBoundsCheck(b_in, batch_size)) {
+      const int32 b_in = box_ind(b);
+      if (b_in < 0 || b_in >= batch) {
         continue;
       }
 
@@ -275,94 +235,89 @@ struct CropAndResize<CPUDevice, T> {
     return true;
   }
 };
-
 }  // namespace functor
 
 template <typename Device, typename T>
-class CropAndResizeGradImageOp : public AsyncOpKernel {
+class CropAndResizeGradImageOp : public OpKernel {
  public:
   explicit CropAndResizeGradImageOp(OpKernelConstruction* context)
-      : AsyncOpKernel(context) {
+      : OpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
                 errors::InvalidArgument("method must be 'bilinear'", method));
   }
 
-  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
+  void Compute(OpKernelContext* context) override {
     // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth].
     const Tensor& grads = context->input(0);
-    // The shape of 'boxes' is [num_boxes, 4].
-    const Tensor& boxes = context->input(1);
-    // The shape of 'box_index' is [num_boxes].
-    const Tensor& box_index = context->input(2);
-    // The shape of 'image_size' is [4].
-    const Tensor& image_size = context->input(3);
 
-    // Validate input shapes.
-    OP_REQUIRES_ASYNC(context, grads.dims() == 4,
-                      errors::InvalidArgument("grads image must be 4-D",
-                                              grads.shape().DebugString()),
-                      done);
+    OP_REQUIRES(context, grads.dims() == 4,
+                errors::InvalidArgument("grads image must be 4-D",
+                                        grads.shape().DebugString()));
     const int crop_height = grads.dim_size(1);
     const int crop_width = grads.dim_size(2);
-    OP_REQUIRES_ASYNC(
-        context, crop_height > 0 && crop_width > 0,
-        errors::InvalidArgument("grads dimensions must be positive"), done);
+    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
+                errors::InvalidArgument("grads dimensions must be positive"));
+
+    // The shape of 'boxes' is [num_boxes, 4].
+    const Tensor& boxes = context->input(1);
+
+    // The shape of 'box_ind' is [num_boxes].
+    const Tensor& box_ind = context->input(2);
+
     int num_boxes = 0;
-    OP_REQUIRES_OK_ASYNC(
-        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
-    OP_REQUIRES_ASYNC(
+    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
+
+    OP_REQUIRES(
         context, grads.dim_size(0) == num_boxes,
-        errors::InvalidArgument("boxes and grads have incompatible shape"),
-        done);
-
-    OP_REQUIRES_ASYNC(context, image_size.dims() == 1,
-                      errors::InvalidArgument("image_size must be 1-D",
-                                              image_size.shape().DebugString()),
-                      done);
-    OP_REQUIRES_ASYNC(context, image_size.dim_size(0) == 4,
-                      errors::InvalidArgument("image_size must have 4 elements",
-                                              image_size.shape().DebugString()),
-                      done);
+        errors::InvalidArgument("boxes and grads have incompatible shape"));
+
+    // The shape of 'image_size' is [4].
+    const Tensor& image_size = context->input(3);
+    OP_REQUIRES(context, image_size.dims() == 1,
+                errors::InvalidArgument("image_size must be 1-D",
+                                        image_size.shape().DebugString()));
+    OP_REQUIRES(context, image_size.dim_size(0) == 4,
+                errors::InvalidArgument("image_size must have 4 elements",
+                                        image_size.shape().DebugString()));
+
     auto image_size_vec = image_size.vec<int32>();
-    const int batch_size = internal::SubtleMustCopy(image_size_vec(0));
+    const int batch = internal::SubtleMustCopy(image_size_vec(0));
     const int image_height = internal::SubtleMustCopy(image_size_vec(1));
     const int image_width = internal::SubtleMustCopy(image_size_vec(2));
     const int depth = internal::SubtleMustCopy(image_size_vec(3));
-    OP_REQUIRES_ASYNC(
-        context, image_height > 0 && image_width > 0,
-        errors::InvalidArgument("image dimensions must be positive"), done);
-    OP_REQUIRES_ASYNC(
+
+    OP_REQUIRES(context, image_height > 0 && image_width > 0,
+                errors::InvalidArgument("image dimensions must be positive"));
+    OP_REQUIRES(
         context, grads.dim_size(3) == depth,
-        errors::InvalidArgument("image_size and grads are incompatible"), done);
+        errors::InvalidArgument("image_size and grads are incompatible"));
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(
-            0, TensorShape({batch_size, image_height, image_width, depth}),
-            &output),
-        done);
-
-    auto compute_callback = [context, output]() {
-      const Tensor& grads = context->input(0);
-      const Tensor& boxes = context->input(1);
-      const Tensor& box_index = context->input(2);
-      const bool status = functor::CropAndResizeBackpropImage<Device, T>()(
-          context->eigen_device<Device>(), grads.tensor<float, 4>(),
-          boxes.tensor<float, 2>(), box_index.tensor<int32, 1>(),
-          output->tensor<T, 4>());
-      if (!status) {
-        context->SetStatus(errors::Internal(
-            "Failed launch CropAndResizeBackpropImage kernel."));
-      }
-    };
-
-    RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(),
-                                 batch_size, std::move(compute_callback),
-                                 std::move(done));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(
+                     0, TensorShape({batch, image_height, image_width, depth}),
+                     &output));
+
+    typename TTypes<float, 4>::ConstTensor grads_data =
+        grads.tensor<float, 4>();
+    typename TTypes<float, 2>::ConstTensor boxes_data =
+        boxes.tensor<float, 2>();
+    typename TTypes<int32, 1>::ConstTensor box_ind_data =
+        box_ind.tensor<int32, 1>();
+    typename TTypes<T, 4>::Tensor output_data = output->tensor<T, 4>();
+
+    CheckValidBoxInd<Device>(context, box_ind_data, batch);
+
+    bool status = functor::CropAndResizeBackpropImage<Device, T>()(
+        context->eigen_device<Device>(), grads_data, boxes_data, box_ind_data,
+        output_data);
+    if (!status) {
+      context->SetStatus(
+          errors::Internal("Failed launch CropAndResizeBackpropImageKernel."));
+    }
   }
 };
 
@@ -373,9 +328,9 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
   bool operator()(const CPUDevice& d,
                   typename TTypes<float, 4>::ConstTensor grads,
                   typename TTypes<float, 2>::ConstTensor boxes,
-                  typename TTypes<int32, 1>::ConstTensor box_index,
+                  typename TTypes<int32, 1>::ConstTensor box_ind,
                   typename TTypes<T, 4>::Tensor grads_image) {
-    const int batch_size = grads_image.dimension(0);
+    const int batch = grads_image.dimension(0);
     const int image_height = grads_image.dimension(1);
     const int image_width = grads_image.dimension(2);
 
@@ -392,8 +347,8 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
       const float y2 = boxes(b, 2);
       const float x2 = boxes(b, 3);
 
-      const int32 b_in = box_index(b);
-      if (!FastBoundsCheck(b_in, batch_size)) {
+      const int32 b_in = box_ind(b);
+      if (b_in < 0 || b_in >= batch) {
         continue;
       }
 
@@ -444,90 +399,83 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
     return true;
   }
 };
-
 }  // namespace functor
 
 template <typename Device, typename T>
-class CropAndResizeGradBoxesOp : public AsyncOpKernel {
+class CropAndResizeGradBoxesOp : public OpKernel {
  public:
   explicit CropAndResizeGradBoxesOp(OpKernelConstruction* context)
-      : AsyncOpKernel(context) {
+      : OpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
                 errors::InvalidArgument("method must be 'bilinear'", method));
   }
 
-  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
+  void Compute(OpKernelContext* context) override {
     // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth].
     const Tensor& grads = context->input(0);
-    // The shape of 'boxes' is [num_boxes, 4].
-    const Tensor& boxes = context->input(2);
-    // The shape of 'box_index' is [num_boxes].
-    const Tensor& box_index = context->input(3);
-    // The shape of 'image' is [batch_size, image_height, image_width, depth].
-    const Tensor& image = context->input(1);
 
-    // Validate input shapes.
-    OP_REQUIRES_ASYNC(context, grads.dims() == 4,
-                      errors::InvalidArgument("grads image must be 4-D",
-                                              grads.shape().DebugString()),
-                      done);
+    OP_REQUIRES(context, grads.dims() == 4,
+                errors::InvalidArgument("grads image must be 4-D",
+                                        grads.shape().DebugString()));
+
     const int crop_height = grads.dim_size(1);
     const int crop_width = grads.dim_size(2);
     const int depth = grads.dim_size(3);
-    OP_REQUIRES_ASYNC(
-        context, crop_height > 0 && crop_width > 0,
-        errors::InvalidArgument("grads dimensions must be positive"), done);
-
-    OP_REQUIRES_ASYNC(context, image.dims() == 4,
-                      errors::InvalidArgument("input image must be 4-D",
-                                              image.shape().DebugString()),
-                      done);
-    const int batch_size = image.dim_size(0);
+    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
+                errors::InvalidArgument("grads dimensions must be positive"));
+
+    // The shape of 'image' is [batch, image_height, image_width, depth].
+    const Tensor& image = context->input(1);
+    OP_REQUIRES(context, image.dims() == 4,
+                errors::InvalidArgument("input image must be 4-D",
+                                        image.shape().DebugString()));
+
+    const int batch = image.dim_size(0);
     const int image_height = image.dim_size(1);
     const int image_width = image.dim_size(2);
-    OP_REQUIRES_ASYNC(
-        context, image_height > 0 && image_width > 0,
-        errors::InvalidArgument("image dimensions must be positive"), done);
-    OP_REQUIRES_ASYNC(context, image.dim_size(3) == depth,
-                      errors::InvalidArgument("image, grads depth differ"),
-                      done);
+    OP_REQUIRES(context, image_height > 0 && image_width > 0,
+                errors::InvalidArgument("image dimensions must be positive"));
+    OP_REQUIRES(context, image.dim_size(3) == depth,
+                errors::InvalidArgument("image, grads depth differ"));
+
+    // The shape of 'boxes' is [num_boxes, 4].
+    const Tensor& boxes = context->input(2);
+
+    // The shape of 'box_ind' is [num_boxes].
+    const Tensor& box_ind = context->input(3);
 
     int num_boxes = 0;
-    OP_REQUIRES_OK_ASYNC(
-        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
+    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
 
-    OP_REQUIRES_ASYNC(
+    OP_REQUIRES(
         context, grads.dim_size(0) == num_boxes,
-        errors::InvalidArgument("boxes and grads have incompatible shape"),
-        done);
+        errors::InvalidArgument("boxes and grads have incompatible shape"));
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(0, TensorShape({num_boxes, 4}), &output),
-        done);
-
-    auto compute_callback = [context, output]() {
-      const Tensor& grads = context->input(0);
-      const Tensor& image = context->input(1);
-      const Tensor& boxes = context->input(2);
-      const Tensor& box_index = context->input(3);
-      const bool status = functor::CropAndResizeBackpropBoxes<Device, T>()(
-          context->eigen_device<Device>(), grads.tensor<float, 4>(),
-          image.tensor<T, 4>(), boxes.tensor<float, 2>(),
-          box_index.tensor<int32, 1>(), output->tensor<float, 2>());
-      if (!status) {
-        context->SetStatus(errors::Internal(
-            "Failed launch CropAndResizeBackpropBoxes kernel."));
-      }
-    };
-
-    RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(),
-                                 batch_size, std::move(compute_callback),
-                                 std::move(done));
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0, TensorShape({num_boxes, 4}), &output));
+
+    typename TTypes<float, 4>::ConstTensor grads_data =
+        grads.tensor<float, 4>();
+    typename TTypes<T, 4>::ConstTensor image_data = image.tensor<T, 4>();
+    typename TTypes<float, 2>::ConstTensor boxes_data =
+        boxes.tensor<float, 2>();
+    typename TTypes<int32, 1>::ConstTensor box_ind_data =
+        box_ind.tensor<int32, 1>();
+    typename TTypes<float, 2>::Tensor output_data = output->tensor<float, 2>();
+
+    CheckValidBoxInd<Device>(context, box_ind_data, batch);
+
+    bool status = functor::CropAndResizeBackpropBoxes<Device, T>()(
+        context->eigen_device<Device>(), grads_data, image_data, boxes_data,
+        box_ind_data, output_data);
+    if (!status) {
+      context->SetStatus(
+          errors::Internal("Failed launch CropAndResizeBackpropBoxesKernel."));
+    }
   }
 };
 
@@ -539,9 +487,9 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> {
                   typename TTypes<float, 4>::ConstTensor grads,
                   typename TTypes<T, 4>::ConstTensor image,
                   typename TTypes<float, 2>::ConstTensor boxes,
-                  typename TTypes<int32, 1>::ConstTensor box_index,
+                  typename TTypes<int32, 1>::ConstTensor box_ind,
                   typename TTypes<float, 2>::Tensor grads_boxes) {
-    const int batch_size = image.dimension(0);
+    const int batch = image.dimension(0);
     const int image_height = image.dimension(1);
     const int image_width = image.dimension(2);
 
@@ -558,8 +506,8 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> {
       const float y2 = boxes(b, 2);
       const float x2 = boxes(b, 3);
 
-      const int32 b_in = box_index(b);
-      if (!FastBoundsCheck(b_in, batch_size)) {
+      const int32 b_in = box_ind(b);
+      if (b_in < 0 || b_in >= batch) {
         continue;
       }
 
@@ -641,19 +589,30 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> {
     return true;
   }
 };
-
 }  // namespace functor
 
-#define REGISTER_KERNEL(T)                                \
-  REGISTER_KERNEL_BUILDER(Name("CropAndResize")           \
-                              .Device(DEVICE_CPU)         \
-                              .TypeConstraint<T>("T")     \
-                              .HostMemory("crop_size"),   \
-                          CropAndResizeOp<CPUDevice, T>); \
-                                                          \
-  REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes")  \
-                              .Device(DEVICE_CPU)         \
-                              .TypeConstraint<T>("T"),    \
+// Specialization of CheckValidBoxInd for a CPUDevice.
+template <>
+inline void CheckValidBoxInd<CPUDevice>(
+    OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_ind,
+    int batch) {
+  const int num_boxes = box_ind.dimension(0);
+  for (int b = 0; b < num_boxes; ++b) {
+    OP_REQUIRES(context, box_ind(b) >= 0 && box_ind(b) < batch,
+                errors::OutOfRange("box_ind has values outside [0, batch)"));
+  }
+}
+
+#define REGISTER_KERNEL(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("CropAndResize")                    \
+                              .Device(DEVICE_CPU)                  \
+                              .TypeConstraint<T>("T")              \
+                              .HostMemory("crop_size"),            \
+                          CropAndResizeOp<CPUDevice, T>);          \
+                                                                   \
+  REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes")           \
+                              .Device(DEVICE_CPU)                  \
+                              .TypeConstraint<T>("T"),             \
                           CropAndResizeGradBoxesOp<CPUDevice, T>);
 
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
@@ -675,88 +634,49 @@ TF_CALL_double(REGISTER_KERNEL);
 
 #if GOOGLE_CUDA
 
-// Forward declaration of the CheckValidBoxIndexHelper specialization for GPU.
+// Forward declaration of the CheckValidBoxIndHelper specialization for GPU.
 namespace functor {
 template <>
-void CheckValidBoxIndexHelper<GPUDevice>::operator()(
-    const GPUDevice& d, typename TTypes<int32, 1>::ConstTensor box_index,
-    int batch_size, typename TTypes<bool, 0>::Tensor isvalid);
-extern template struct CheckValidBoxIndexHelper<GPUDevice>;
+void CheckValidBoxIndHelper<GPUDevice>::operator()(
+    const GPUDevice& d, typename TTypes<int32, 1>::ConstTensor box_ind,
+    int batch, typename TTypes<bool, 0>::Tensor isvalid);
+extern template struct CheckValidBoxIndHelper<GPUDevice>;
 }  // namespace functor
 
-namespace {
-
-// Specialization of CheckValidBoxIndex for a GPUDevice.
+// Specialization of CheckValidBoxInd for a GPUDevice.
 template <>
-inline void RunIfBoxIndexIsValid<GPUDevice>(
-    OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index,
-    int batch_size, const Callback& compute, const Callback& done) {
-  const int num_boxes = box_index.dimension(0);
+inline void CheckValidBoxInd<GPUDevice>(
+    OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_ind,
+    int batch) {
+  const int num_boxes = box_ind.dimension(0);
   if (num_boxes == 0) {
-    compute();
-    done();
     return;
   }
+  Tensor isvalid_tensor;
+  OP_REQUIRES_OK(context,
+                 context->allocate_temp(DataTypeToEnum<bool>::value,
+                                        TensorShape({}), &isvalid_tensor));
 
-  Tensor isvalid_dev_tensor;
-  OP_REQUIRES_OK_ASYNC(
-      context,
-      context->allocate_temp(DataTypeToEnum<bool>::value, TensorShape({}),
-                             &isvalid_dev_tensor),
-      done);
-  typename TTypes<bool, 0>::Tensor isvalid_dev =
-      isvalid_dev_tensor.tensor<bool, 0>();
+  typename TTypes<bool, 0>::Tensor isvalid = isvalid_tensor.tensor<bool, 0>();
 
-  // Run the actual box check on the device.
-  functor::CheckValidBoxIndexHelper<GPUDevice>()(
-      context->eigen_device<GPUDevice>(), box_index, batch_size, isvalid_dev);
+  functor::CheckValidBoxIndHelper<GPUDevice>()(
+      context->eigen_device<GPUDevice>(), box_ind, batch, isvalid);
 
-  // Copy the result back to the host.
   auto* stream = context->op_device_context()->stream();
-  OP_REQUIRES_ASYNC(context, stream,
-                    errors::Internal("No GPU stream available."), done);
-  Tensor isvalid_host_tensor;
-  // Use pinned host memory on the host to avoid unnecessary
-  // synchronization.
-  AllocatorAttributes alloc_attr;
-  alloc_attr.set_on_host(true);
-  alloc_attr.set_gpu_compatible(true);
-  OP_REQUIRES_OK_ASYNC(
-      context,
-      context->allocate_temp(DataTypeToEnum<bool>::value, TensorShape({}),
-                             &isvalid_host_tensor, alloc_attr),
-      done);
-  perftools::gputools::DeviceMemoryBase wrapped(isvalid_dev.data(),
-                                                sizeof(bool));
-  const bool status =
-      stream
-          ->ThenMemcpy(
-              isvalid_host_tensor.scalar<bool>().data() /* destination */,
-              wrapped /* source */, sizeof(bool))
-          .ok();
-  OP_REQUIRES_ASYNC(
-      context, status,
-      errors::Internal("Failed to launch copy of isvalid from device to host."),
-      done);
-
-  // We capture both temporary tensors to prevent them from being deallocated
-  // when ComputeAsync returns and before the closure runs.
-  auto wrapped_callback = [context, isvalid_host_tensor, isvalid_dev_tensor,
-                           compute, done]() {
-    const bool isvalid = isvalid_host_tensor.scalar<bool>()();
-    OP_REQUIRES_ASYNC(
-        context, isvalid,
-        errors::OutOfRange("box_index has values outside [0, batch_size)"),
-        done);
-    compute();
-    done();
-  };
-
-  context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
-      stream, wrapped_callback);
-}
+  OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
+
+  bool isvalid_host = false;
+  perftools::gputools::DeviceMemoryBase isvalid_gpu(isvalid.data(),
+                                                    sizeof(bool));
+  stream->ThenMemcpy(&isvalid_host, isvalid_gpu, sizeof(bool));
+  stream->BlockHostUntilDone();
 
-}  // namespace
+  OP_REQUIRES(context, stream->ok(),
+              errors::Internal("cudaMemcpy from device to host failed"));
+
+  OP_REQUIRES(context, isvalid_host,
+              errors::OutOfRange("box_ind has values outside [0, batch)"));
+}
 
 #define REGISTER_KERNEL(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("CropAndResize")                    \
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-05-22 09:52:17 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-05-22 09:55:57 -0700
commit	f7df71b97389a71a3e21334ebe40b0b05e569fc3 (patch)
tree	792af1f4c03bb107432a6d7197689dc98c36b620 /tensorflow/core/kernels/crop_and_resize_op.cc
parent	629755ccac2beb9ea9ea16e0782b9b5084672f6a (diff)