diff options
author | 2017-05-22 09:52:17 -0700 | |
---|---|---|
committer | 2017-05-22 09:55:57 -0700 | |
commit | f7df71b97389a71a3e21334ebe40b0b05e569fc3 (patch) | |
tree | 792af1f4c03bb107432a6d7197689dc98c36b620 /tensorflow/core/kernels/crop_and_resize_op.cc | |
parent | 629755ccac2beb9ea9ea16e0782b9b5084672f6a (diff) |
Rollback changes to crop_and_resize.
END_PUBLIC
BEGIN_PUBLIC
Automated g4 rollback of changelist 155203119
PiperOrigin-RevId: 156748870
Diffstat (limited to 'tensorflow/core/kernels/crop_and_resize_op.cc')
-rw-r--r-- | tensorflow/core/kernels/crop_and_resize_op.cc | 568 |
1 files changed, 244 insertions, 324 deletions
diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc index c68a8b0bd2..746fe63e2a 100644 --- a/tensorflow/core/kernels/crop_and_resize_op.cc +++ b/tensorflow/core/kernels/crop_and_resize_op.cc @@ -19,9 +19,6 @@ limitations under the License. #include "tensorflow/core/kernels/crop_and_resize_op.h" -#include <functional> -#include <string> - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -29,13 +26,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/bounds_check.h" -#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA -#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/platform/stream_executor.h" #endif // GOOGLE_CUDA @@ -43,67 +37,41 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -using Callback = std::function<void()>; - -namespace { -static inline Status ParseAndCheckBoxSizes(const Tensor& boxes, - const Tensor& box_index, - int* num_boxes) { - if (boxes.NumElements() == 0 && box_index.NumElements() == 0) { +static inline void ParseAndCheckBoxSizes(OpKernelContext* context, + const Tensor& boxes, + const Tensor& box_ind, + int* num_boxes) { + if (boxes.NumElements() == 0 && box_ind.NumElements() == 0) { *num_boxes = 0; - return Status::OK(); + return; } // The shape of 'boxes' is [num_boxes, 4]. - if (boxes.dims() != 2) { - return errors::InvalidArgument("boxes must be 2-D", - boxes.shape().DebugString()); - } + OP_REQUIRES(context, boxes.dims() == 2, + errors::InvalidArgument("boxes must be 2-D", + boxes.shape().DebugString())); *num_boxes = boxes.dim_size(0); - if (boxes.dim_size(1) != 4) { - return errors::InvalidArgument("boxes must have 4 columns"); - } - // The shape of 'box_index' is [num_boxes]. - if (box_index.dims() != 1) { - return errors::InvalidArgument("box_index must be 1-D", - box_index.shape().DebugString()); - } - if (box_index.dim_size(0) != *num_boxes) { - return errors::InvalidArgument("box_index has incompatible shape"); - } - return Status::OK(); + OP_REQUIRES(context, boxes.dim_size(1) == 4, + errors::InvalidArgument("boxes must have 4 columns")); + + // The shape of 'box_ind' is [num_boxes]. + OP_REQUIRES(context, box_ind.dims() == 1, + errors::InvalidArgument("box_ind must be 1-D", + box_ind.shape().DebugString())); + OP_REQUIRES(context, box_ind.dim_size(0) == *num_boxes, + errors::InvalidArgument("box_ind has incompatible shape")); } -// Conditionally calls the compute callback if all values in box_index are in -// [0, batch_size) then calls done. +// Verifies that all values in box_ind are in [0, batch). template <typename Device> -inline void RunIfBoxIndexIsValid( - OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index, - int batch_size, const Callback& compute, const Callback& done); - -// Specialization of CheckValidBoxIndex for a CPUDevice. -template <> -inline void RunIfBoxIndexIsValid<CPUDevice>( - OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index, - int batch_size, const Callback& compute, const Callback& done) { - const int num_boxes = box_index.dimension(0); - for (int b = 0; b < num_boxes; ++b) { - OP_REQUIRES_ASYNC( - context, FastBoundsCheck(box_index(b), batch_size), - errors::OutOfRange("box_index has values outside [0, batch_size)"), - done); - } - compute(); - done(); -} - -} // namespace +inline void CheckValidBoxInd( + OpKernelContext* context, + typename TTypes<int32, 1>::ConstTensor box_ind_data, int batch); template <typename Device, typename T> -class CropAndResizeOp : public AsyncOpKernel { +class CropAndResizeOp : public OpKernel { public: - explicit CropAndResizeOp(OpKernelConstruction* context) - : AsyncOpKernel(context) { + explicit CropAndResizeOp(OpKernelConstruction* context) : OpKernel(context) { string method; OP_REQUIRES_OK(context, context->GetAttr("method", &method)); OP_REQUIRES(context, method == "bilinear", @@ -112,77 +80,69 @@ class CropAndResizeOp : public AsyncOpKernel { &extrapolation_value_)); } - void ComputeAsync(OpKernelContext* context, DoneCallback done) override { - // The shape of 'image' is [batch_size, image_height, image_width, - // channels]. + void Compute(OpKernelContext* context) override { + // The shape of 'image' is [batch, image_height, image_width, channels]. const Tensor& image = context->input(0); - // The shape of 'boxes' is [num_boxes, 4]. - const Tensor& boxes = context->input(1); - // The shape of 'box_index' is [num_boxes]. - const Tensor& box_index = context->input(2); - // The shape of 'crop_size' is [2]. - const Tensor& crop_size = context->input(3); + OP_REQUIRES(context, image.dims() == 4, + errors::InvalidArgument("input image must be 4-D", + image.shape().DebugString())); - // Validate inputs dimensions. - OP_REQUIRES_ASYNC(context, image.dims() == 4, - errors::InvalidArgument("input image must be 4-D", - image.shape().DebugString()), - done); - const int batch_size = image.dim_size(0); + const int batch = image.dim_size(0); const int image_height = image.dim_size(1); const int image_width = image.dim_size(2); const int depth = image.dim_size(3); - OP_REQUIRES_ASYNC( - context, image_height > 0 && image_width > 0, - errors::InvalidArgument("image dimensions must be positive"), done); + OP_REQUIRES(context, image_height > 0 && image_width > 0, + errors::InvalidArgument("image dimensions must be positive")); + + // The shape of 'boxes' is [num_boxes, 4]. + const Tensor& boxes = context->input(1); + + // The shape of 'box_ind' is [num_boxes]. + const Tensor& box_ind = context->input(2); + int num_boxes = 0; - OP_REQUIRES_OK_ASYNC( - context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done); - - OP_REQUIRES_ASYNC(context, crop_size.dims() == 1, - errors::InvalidArgument("crop_size must be 1-D", - crop_size.shape().DebugString()), - done); - OP_REQUIRES_ASYNC( - context, crop_size.dim_size(0) == 2, - errors::InvalidArgument("crop_size must have two elements", - crop_size.shape().DebugString()), - done); - - // Copy and validate crop sizes. + ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes); + + // The shape of 'crop_size' is [2]. + const Tensor& crop_size = context->input(3); + + OP_REQUIRES(context, crop_size.dims() == 1, + errors::InvalidArgument("crop_size must be 1-D", + crop_size.shape().DebugString())); + OP_REQUIRES(context, crop_size.dim_size(0) == 2, + errors::InvalidArgument("crop_size must have two elements", + crop_size.shape().DebugString())); + auto crop_size_vec = crop_size.vec<int32>(); const int crop_height = internal::SubtleMustCopy(crop_size_vec(0)); const int crop_width = internal::SubtleMustCopy(crop_size_vec(1)); - OP_REQUIRES_ASYNC( - context, crop_height > 0 && crop_width > 0, - errors::InvalidArgument("crop dimensions must be positive"), done); + OP_REQUIRES(context, crop_height > 0 && crop_width > 0, + errors::InvalidArgument("crop dimensions must be positive")); // Allocate output tensor. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( + OP_REQUIRES_OK( context, context->allocate_output( 0, TensorShape({num_boxes, crop_height, crop_width, depth}), - &output), - done); - - auto compute_callback = [this, context, output]() { - const Tensor& image = context->input(0); - const Tensor& boxes = context->input(1); - const Tensor& box_index = context->input(2); - const bool status = functor::CropAndResize<Device, T>()( - context->eigen_device<Device>(), image.tensor<T, 4>(), - boxes.tensor<float, 2>(), box_index.tensor<int32, 1>(), - extrapolation_value_, output->tensor<float, 4>()); - if (!status) { - context->SetStatus( - errors::Internal("Failed launch CropAndResizeKernel.")); - } - }; - - RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(), - batch_size, std::move(compute_callback), - std::move(done)); + &output)); + + typename TTypes<T, 4>::ConstTensor image_data = image.tensor<T, 4>(); + typename TTypes<float, 2>::ConstTensor boxes_data = + boxes.tensor<float, 2>(); + typename TTypes<int32, 1>::ConstTensor box_ind_data = + box_ind.tensor<int32, 1>(); + typename TTypes<float, 4>::Tensor crops_data = output->tensor<float, 4>(); + + CheckValidBoxInd<Device>(context, box_ind_data, batch); + + bool status = functor::CropAndResize<Device, T>()( + context->eigen_device<Device>(), image_data, boxes_data, box_ind_data, + extrapolation_value_, crops_data); + if (!status) { + context->SetStatus( + errors::Internal("Failed launch CropAndResizeKernel.")); + } } private: @@ -195,10 +155,10 @@ template <typename T> struct CropAndResize<CPUDevice, T> { bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor image, typename TTypes<float, 2>::ConstTensor boxes, - typename TTypes<int32, 1>::ConstTensor box_index, + typename TTypes<int32, 1>::ConstTensor box_ind, float extrapolation_value, typename TTypes<float, 4>::Tensor crops) { - const int batch_size = image.dimension(0); + const int batch = image.dimension(0); const int image_height = image.dimension(1); const int image_width = image.dimension(2); @@ -213,8 +173,8 @@ struct CropAndResize<CPUDevice, T> { const float y2 = boxes(b, 2); const float x2 = boxes(b, 3); - const int32 b_in = box_index(b); - if (!FastBoundsCheck(b_in, batch_size)) { + const int32 b_in = box_ind(b); + if (b_in < 0 || b_in >= batch) { continue; } @@ -275,94 +235,89 @@ struct CropAndResize<CPUDevice, T> { return true; } }; - } // namespace functor template <typename Device, typename T> -class CropAndResizeGradImageOp : public AsyncOpKernel { +class CropAndResizeGradImageOp : public OpKernel { public: explicit CropAndResizeGradImageOp(OpKernelConstruction* context) - : AsyncOpKernel(context) { + : OpKernel(context) { string method; OP_REQUIRES_OK(context, context->GetAttr("method", &method)); OP_REQUIRES(context, method == "bilinear", errors::InvalidArgument("method must be 'bilinear'", method)); } - void ComputeAsync(OpKernelContext* context, DoneCallback done) override { + void Compute(OpKernelContext* context) override { // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth]. const Tensor& grads = context->input(0); - // The shape of 'boxes' is [num_boxes, 4]. - const Tensor& boxes = context->input(1); - // The shape of 'box_index' is [num_boxes]. - const Tensor& box_index = context->input(2); - // The shape of 'image_size' is [4]. - const Tensor& image_size = context->input(3); - // Validate input shapes. - OP_REQUIRES_ASYNC(context, grads.dims() == 4, - errors::InvalidArgument("grads image must be 4-D", - grads.shape().DebugString()), - done); + OP_REQUIRES(context, grads.dims() == 4, + errors::InvalidArgument("grads image must be 4-D", + grads.shape().DebugString())); const int crop_height = grads.dim_size(1); const int crop_width = grads.dim_size(2); - OP_REQUIRES_ASYNC( - context, crop_height > 0 && crop_width > 0, - errors::InvalidArgument("grads dimensions must be positive"), done); + OP_REQUIRES(context, crop_height > 0 && crop_width > 0, + errors::InvalidArgument("grads dimensions must be positive")); + + // The shape of 'boxes' is [num_boxes, 4]. + const Tensor& boxes = context->input(1); + + // The shape of 'box_ind' is [num_boxes]. + const Tensor& box_ind = context->input(2); + int num_boxes = 0; - OP_REQUIRES_OK_ASYNC( - context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done); - OP_REQUIRES_ASYNC( + ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes); + + OP_REQUIRES( context, grads.dim_size(0) == num_boxes, - errors::InvalidArgument("boxes and grads have incompatible shape"), - done); - - OP_REQUIRES_ASYNC(context, image_size.dims() == 1, - errors::InvalidArgument("image_size must be 1-D", - image_size.shape().DebugString()), - done); - OP_REQUIRES_ASYNC(context, image_size.dim_size(0) == 4, - errors::InvalidArgument("image_size must have 4 elements", - image_size.shape().DebugString()), - done); + errors::InvalidArgument("boxes and grads have incompatible shape")); + + // The shape of 'image_size' is [4]. + const Tensor& image_size = context->input(3); + OP_REQUIRES(context, image_size.dims() == 1, + errors::InvalidArgument("image_size must be 1-D", + image_size.shape().DebugString())); + OP_REQUIRES(context, image_size.dim_size(0) == 4, + errors::InvalidArgument("image_size must have 4 elements", + image_size.shape().DebugString())); + auto image_size_vec = image_size.vec<int32>(); - const int batch_size = internal::SubtleMustCopy(image_size_vec(0)); + const int batch = internal::SubtleMustCopy(image_size_vec(0)); const int image_height = internal::SubtleMustCopy(image_size_vec(1)); const int image_width = internal::SubtleMustCopy(image_size_vec(2)); const int depth = internal::SubtleMustCopy(image_size_vec(3)); - OP_REQUIRES_ASYNC( - context, image_height > 0 && image_width > 0, - errors::InvalidArgument("image dimensions must be positive"), done); - OP_REQUIRES_ASYNC( + + OP_REQUIRES(context, image_height > 0 && image_width > 0, + errors::InvalidArgument("image dimensions must be positive")); + OP_REQUIRES( context, grads.dim_size(3) == depth, - errors::InvalidArgument("image_size and grads are incompatible"), done); + errors::InvalidArgument("image_size and grads are incompatible")); // Allocate output tensor. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_output( - 0, TensorShape({batch_size, image_height, image_width, depth}), - &output), - done); - - auto compute_callback = [context, output]() { - const Tensor& grads = context->input(0); - const Tensor& boxes = context->input(1); - const Tensor& box_index = context->input(2); - const bool status = functor::CropAndResizeBackpropImage<Device, T>()( - context->eigen_device<Device>(), grads.tensor<float, 4>(), - boxes.tensor<float, 2>(), box_index.tensor<int32, 1>(), - output->tensor<T, 4>()); - if (!status) { - context->SetStatus(errors::Internal( - "Failed launch CropAndResizeBackpropImage kernel.")); - } - }; - - RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(), - batch_size, std::move(compute_callback), - std::move(done)); + OP_REQUIRES_OK( + context, context->allocate_output( + 0, TensorShape({batch, image_height, image_width, depth}), + &output)); + + typename TTypes<float, 4>::ConstTensor grads_data = + grads.tensor<float, 4>(); + typename TTypes<float, 2>::ConstTensor boxes_data = + boxes.tensor<float, 2>(); + typename TTypes<int32, 1>::ConstTensor box_ind_data = + box_ind.tensor<int32, 1>(); + typename TTypes<T, 4>::Tensor output_data = output->tensor<T, 4>(); + + CheckValidBoxInd<Device>(context, box_ind_data, batch); + + bool status = functor::CropAndResizeBackpropImage<Device, T>()( + context->eigen_device<Device>(), grads_data, boxes_data, box_ind_data, + output_data); + if (!status) { + context->SetStatus( + errors::Internal("Failed launch CropAndResizeBackpropImageKernel.")); + } } }; @@ -373,9 +328,9 @@ struct CropAndResizeBackpropImage<CPUDevice, T> { bool operator()(const CPUDevice& d, typename TTypes<float, 4>::ConstTensor grads, typename TTypes<float, 2>::ConstTensor boxes, - typename TTypes<int32, 1>::ConstTensor box_index, + typename TTypes<int32, 1>::ConstTensor box_ind, typename TTypes<T, 4>::Tensor grads_image) { - const int batch_size = grads_image.dimension(0); + const int batch = grads_image.dimension(0); const int image_height = grads_image.dimension(1); const int image_width = grads_image.dimension(2); @@ -392,8 +347,8 @@ struct CropAndResizeBackpropImage<CPUDevice, T> { const float y2 = boxes(b, 2); const float x2 = boxes(b, 3); - const int32 b_in = box_index(b); - if (!FastBoundsCheck(b_in, batch_size)) { + const int32 b_in = box_ind(b); + if (b_in < 0 || b_in >= batch) { continue; } @@ -444,90 +399,83 @@ struct CropAndResizeBackpropImage<CPUDevice, T> { return true; } }; - } // namespace functor template <typename Device, typename T> -class CropAndResizeGradBoxesOp : public AsyncOpKernel { +class CropAndResizeGradBoxesOp : public OpKernel { public: explicit CropAndResizeGradBoxesOp(OpKernelConstruction* context) - : AsyncOpKernel(context) { + : OpKernel(context) { string method; OP_REQUIRES_OK(context, context->GetAttr("method", &method)); OP_REQUIRES(context, method == "bilinear", errors::InvalidArgument("method must be 'bilinear'", method)); } - void ComputeAsync(OpKernelContext* context, DoneCallback done) override { + void Compute(OpKernelContext* context) override { // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth]. const Tensor& grads = context->input(0); - // The shape of 'boxes' is [num_boxes, 4]. - const Tensor& boxes = context->input(2); - // The shape of 'box_index' is [num_boxes]. - const Tensor& box_index = context->input(3); - // The shape of 'image' is [batch_size, image_height, image_width, depth]. - const Tensor& image = context->input(1); - // Validate input shapes. - OP_REQUIRES_ASYNC(context, grads.dims() == 4, - errors::InvalidArgument("grads image must be 4-D", - grads.shape().DebugString()), - done); + OP_REQUIRES(context, grads.dims() == 4, + errors::InvalidArgument("grads image must be 4-D", + grads.shape().DebugString())); + const int crop_height = grads.dim_size(1); const int crop_width = grads.dim_size(2); const int depth = grads.dim_size(3); - OP_REQUIRES_ASYNC( - context, crop_height > 0 && crop_width > 0, - errors::InvalidArgument("grads dimensions must be positive"), done); - - OP_REQUIRES_ASYNC(context, image.dims() == 4, - errors::InvalidArgument("input image must be 4-D", - image.shape().DebugString()), - done); - const int batch_size = image.dim_size(0); + OP_REQUIRES(context, crop_height > 0 && crop_width > 0, + errors::InvalidArgument("grads dimensions must be positive")); + + // The shape of 'image' is [batch, image_height, image_width, depth]. + const Tensor& image = context->input(1); + OP_REQUIRES(context, image.dims() == 4, + errors::InvalidArgument("input image must be 4-D", + image.shape().DebugString())); + + const int batch = image.dim_size(0); const int image_height = image.dim_size(1); const int image_width = image.dim_size(2); - OP_REQUIRES_ASYNC( - context, image_height > 0 && image_width > 0, - errors::InvalidArgument("image dimensions must be positive"), done); - OP_REQUIRES_ASYNC(context, image.dim_size(3) == depth, - errors::InvalidArgument("image, grads depth differ"), - done); + OP_REQUIRES(context, image_height > 0 && image_width > 0, + errors::InvalidArgument("image dimensions must be positive")); + OP_REQUIRES(context, image.dim_size(3) == depth, + errors::InvalidArgument("image, grads depth differ")); + + // The shape of 'boxes' is [num_boxes, 4]. + const Tensor& boxes = context->input(2); + + // The shape of 'box_ind' is [num_boxes]. + const Tensor& box_ind = context->input(3); int num_boxes = 0; - OP_REQUIRES_OK_ASYNC( - context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done); + ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes); - OP_REQUIRES_ASYNC( + OP_REQUIRES( context, grads.dim_size(0) == num_boxes, - errors::InvalidArgument("boxes and grads have incompatible shape"), - done); + errors::InvalidArgument("boxes and grads have incompatible shape")); // Allocate output tensor. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_output(0, TensorShape({num_boxes, 4}), &output), - done); - - auto compute_callback = [context, output]() { - const Tensor& grads = context->input(0); - const Tensor& image = context->input(1); - const Tensor& boxes = context->input(2); - const Tensor& box_index = context->input(3); - const bool status = functor::CropAndResizeBackpropBoxes<Device, T>()( - context->eigen_device<Device>(), grads.tensor<float, 4>(), - image.tensor<T, 4>(), boxes.tensor<float, 2>(), - box_index.tensor<int32, 1>(), output->tensor<float, 2>()); - if (!status) { - context->SetStatus(errors::Internal( - "Failed launch CropAndResizeBackpropBoxes kernel.")); - } - }; - - RunIfBoxIndexIsValid<Device>(context, box_index.tensor<int32, 1>(), - batch_size, std::move(compute_callback), - std::move(done)); + OP_REQUIRES_OK(context, context->allocate_output( + 0, TensorShape({num_boxes, 4}), &output)); + + typename TTypes<float, 4>::ConstTensor grads_data = + grads.tensor<float, 4>(); + typename TTypes<T, 4>::ConstTensor image_data = image.tensor<T, 4>(); + typename TTypes<float, 2>::ConstTensor boxes_data = + boxes.tensor<float, 2>(); + typename TTypes<int32, 1>::ConstTensor box_ind_data = + box_ind.tensor<int32, 1>(); + typename TTypes<float, 2>::Tensor output_data = output->tensor<float, 2>(); + + CheckValidBoxInd<Device>(context, box_ind_data, batch); + + bool status = functor::CropAndResizeBackpropBoxes<Device, T>()( + context->eigen_device<Device>(), grads_data, image_data, boxes_data, + box_ind_data, output_data); + if (!status) { + context->SetStatus( + errors::Internal("Failed launch CropAndResizeBackpropBoxesKernel.")); + } } }; @@ -539,9 +487,9 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> { typename TTypes<float, 4>::ConstTensor grads, typename TTypes<T, 4>::ConstTensor image, typename TTypes<float, 2>::ConstTensor boxes, - typename TTypes<int32, 1>::ConstTensor box_index, + typename TTypes<int32, 1>::ConstTensor box_ind, typename TTypes<float, 2>::Tensor grads_boxes) { - const int batch_size = image.dimension(0); + const int batch = image.dimension(0); const int image_height = image.dimension(1); const int image_width = image.dimension(2); @@ -558,8 +506,8 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> { const float y2 = boxes(b, 2); const float x2 = boxes(b, 3); - const int32 b_in = box_index(b); - if (!FastBoundsCheck(b_in, batch_size)) { + const int32 b_in = box_ind(b); + if (b_in < 0 || b_in >= batch) { continue; } @@ -641,19 +589,30 @@ struct CropAndResizeBackpropBoxes<CPUDevice, T> { return true; } }; - } // namespace functor -#define REGISTER_KERNEL(T) \ - REGISTER_KERNEL_BUILDER(Name("CropAndResize") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<T>("T") \ - .HostMemory("crop_size"), \ - CropAndResizeOp<CPUDevice, T>); \ - \ - REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<T>("T"), \ +// Specialization of CheckValidBoxInd for a CPUDevice. +template <> +inline void CheckValidBoxInd<CPUDevice>( + OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_ind, + int batch) { + const int num_boxes = box_ind.dimension(0); + for (int b = 0; b < num_boxes; ++b) { + OP_REQUIRES(context, box_ind(b) >= 0 && box_ind(b) < batch, + errors::OutOfRange("box_ind has values outside [0, batch)")); + } +} + +#define REGISTER_KERNEL(T) \ + REGISTER_KERNEL_BUILDER(Name("CropAndResize") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<T>("T") \ + .HostMemory("crop_size"), \ + CropAndResizeOp<CPUDevice, T>); \ + \ + REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<T>("T"), \ CropAndResizeGradBoxesOp<CPUDevice, T>); TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL); @@ -675,88 +634,49 @@ TF_CALL_double(REGISTER_KERNEL); #if GOOGLE_CUDA -// Forward declaration of the CheckValidBoxIndexHelper specialization for GPU. +// Forward declaration of the CheckValidBoxIndHelper specialization for GPU. namespace functor { template <> -void CheckValidBoxIndexHelper<GPUDevice>::operator()( - const GPUDevice& d, typename TTypes<int32, 1>::ConstTensor box_index, - int batch_size, typename TTypes<bool, 0>::Tensor isvalid); -extern template struct CheckValidBoxIndexHelper<GPUDevice>; +void CheckValidBoxIndHelper<GPUDevice>::operator()( + const GPUDevice& d, typename TTypes<int32, 1>::ConstTensor box_ind, + int batch, typename TTypes<bool, 0>::Tensor isvalid); +extern template struct CheckValidBoxIndHelper<GPUDevice>; } // namespace functor -namespace { - -// Specialization of CheckValidBoxIndex for a GPUDevice. +// Specialization of CheckValidBoxInd for a GPUDevice. template <> -inline void RunIfBoxIndexIsValid<GPUDevice>( - OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_index, - int batch_size, const Callback& compute, const Callback& done) { - const int num_boxes = box_index.dimension(0); +inline void CheckValidBoxInd<GPUDevice>( + OpKernelContext* context, typename TTypes<int32, 1>::ConstTensor box_ind, + int batch) { + const int num_boxes = box_ind.dimension(0); if (num_boxes == 0) { - compute(); - done(); return; } + Tensor isvalid_tensor; + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum<bool>::value, + TensorShape({}), &isvalid_tensor)); - Tensor isvalid_dev_tensor; - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_temp(DataTypeToEnum<bool>::value, TensorShape({}), - &isvalid_dev_tensor), - done); - typename TTypes<bool, 0>::Tensor isvalid_dev = - isvalid_dev_tensor.tensor<bool, 0>(); + typename TTypes<bool, 0>::Tensor isvalid = isvalid_tensor.tensor<bool, 0>(); - // Run the actual box check on the device. - functor::CheckValidBoxIndexHelper<GPUDevice>()( - context->eigen_device<GPUDevice>(), box_index, batch_size, isvalid_dev); + functor::CheckValidBoxIndHelper<GPUDevice>()( + context->eigen_device<GPUDevice>(), box_ind, batch, isvalid); - // Copy the result back to the host. auto* stream = context->op_device_context()->stream(); - OP_REQUIRES_ASYNC(context, stream, - errors::Internal("No GPU stream available."), done); - Tensor isvalid_host_tensor; - // Use pinned host memory on the host to avoid unnecessary - // synchronization. - AllocatorAttributes alloc_attr; - alloc_attr.set_on_host(true); - alloc_attr.set_gpu_compatible(true); - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_temp(DataTypeToEnum<bool>::value, TensorShape({}), - &isvalid_host_tensor, alloc_attr), - done); - perftools::gputools::DeviceMemoryBase wrapped(isvalid_dev.data(), - sizeof(bool)); - const bool status = - stream - ->ThenMemcpy( - isvalid_host_tensor.scalar<bool>().data() /* destination */, - wrapped /* source */, sizeof(bool)) - .ok(); - OP_REQUIRES_ASYNC( - context, status, - errors::Internal("Failed to launch copy of isvalid from device to host."), - done); - - // We capture both temporary tensors to prevent them from being deallocated - // when ComputeAsync returns and before the closure runs. - auto wrapped_callback = [context, isvalid_host_tensor, isvalid_dev_tensor, - compute, done]() { - const bool isvalid = isvalid_host_tensor.scalar<bool>()(); - OP_REQUIRES_ASYNC( - context, isvalid, - errors::OutOfRange("box_index has values outside [0, batch_size)"), - done); - compute(); - done(); - }; - - context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( - stream, wrapped_callback); -} + OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); + + bool isvalid_host = false; + perftools::gputools::DeviceMemoryBase isvalid_gpu(isvalid.data(), + sizeof(bool)); + stream->ThenMemcpy(&isvalid_host, isvalid_gpu, sizeof(bool)); + stream->BlockHostUntilDone(); -} // namespace + OP_REQUIRES(context, stream->ok(), + errors::Internal("cudaMemcpy from device to host failed")); + + OP_REQUIRES(context, isvalid_host, + errors::OutOfRange("box_ind has values outside [0, batch)")); +} #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER(Name("CropAndResize") \ |