aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/tf2xla/kernels/lrn_ops.cc')
-rw-r--r--tensorflow/compiler/tf2xla/kernels/lrn_ops.cc173
1 files changed, 173 insertions, 0 deletions
diff --git a/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc
new file mode 100644
index 0000000000..93966d3d5a
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc
@@ -0,0 +1,173 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_compilation_device.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+
+namespace tensorflow {
+namespace {
+
+// Local response normalization
+class LRNOp : public XlaOpKernel {
+ public:
+ explicit LRNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("depth_radius", &depth_radius_));
+
+ // TODO(phawkins): handle non-float types for attributes.
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("bias", &bias_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("beta", &beta_));
+ }
+
+ void Compile(XlaOpKernelContext* ctx) override {
+ const TensorShape in_shape = ctx->InputShape(0);
+ OP_REQUIRES(ctx, in_shape.dims() == 4,
+ errors::InvalidArgument("in must be 4-dimensional"));
+
+ xla::ComputationBuilder* builder = ctx->builder();
+ xla::ComputationDataHandle input = ctx->Input(0);
+
+ // sqr_sum[a, b, c, d] =
+ // sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+ // output = input / (bias + alpha * sqr_sum) ** beta
+
+ // We use a window of depth_radius_ * 2 + 1, to account for the current
+ // element and a depth_radius_ on either side.
+ auto squared = builder->Mul(input, input);
+ auto sqr_sum = builder->ReduceWindow(
+ squared, XlaHelpers::Zero(builder, input_type(0)),
+ *ctx->GetOrCreateAdd(input_type(0)),
+ /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1},
+ /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame);
+
+ auto scale = builder->Pow(
+ builder->Add(builder->ConstantR0<float>(bias_),
+ builder->Mul(builder->ConstantR0<float>(alpha_), sqr_sum)),
+ builder->ConstantR0<float>(-beta_));
+
+ ctx->SetOutput(0, builder->Mul(input, scale));
+ }
+
+ private:
+ int64 depth_radius_;
+ float bias_;
+ float alpha_;
+ float beta_;
+};
+
+REGISTER_XLA_OP("LRN", LRNOp);
+
+class LRNGradOp : public XlaOpKernel {
+ public:
+ explicit LRNGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("depth_radius", &depth_radius_));
+
+ // TODO(phawkins): handle non-float types for attributes.
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("bias", &bias_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("beta", &beta_));
+ }
+
+ void Compile(XlaOpKernelContext* ctx) override {
+ const TensorShape in_grads_shape = ctx->InputShape(0);
+ const TensorShape in_image_shape = ctx->InputShape(1);
+ const TensorShape out_image_shape = ctx->InputShape(2);
+
+ OP_REQUIRES(ctx, in_grads_shape.dims() == 4 && in_image_shape.dims() == 4,
+ errors::InvalidArgument("inputs must be 4-dimensional"));
+ const int64 batch = in_grads_shape.dim_size(0);
+ const int64 rows = in_grads_shape.dim_size(1);
+ const int64 cols = in_grads_shape.dim_size(2);
+ const int64 depth = in_grads_shape.dim_size(3);
+ OP_REQUIRES(
+ ctx, in_image_shape.dim_size(0) == batch &&
+ in_image_shape.dim_size(1) == rows &&
+ in_image_shape.dim_size(2) == cols &&
+ in_image_shape.dim_size(3) == depth &&
+ out_image_shape.dim_size(0) == batch &&
+ out_image_shape.dim_size(1) == rows &&
+ out_image_shape.dim_size(2) == cols &&
+ out_image_shape.dim_size(3) == depth,
+ errors::InvalidArgument(
+ "input_grads, input_image, and out_image should have the same "
+ "shape"));
+
+ xla::ComputationBuilder* builder = ctx->builder();
+ xla::ComputationDataHandle in_grads = ctx->Input(0);
+ xla::ComputationDataHandle in_image = ctx->Input(1);
+ xla::ComputationDataHandle out_image = ctx->Input(2);
+
+ // This code is ported from tensorflow/core/kernels/lrn_op.cc. In Python
+ // pseudo-code, the Eigen code does this for each spatial position:
+ // grads = [0.0] * depth
+ // for j in range(depth):
+ // depth_begin = max(0, j - depth_radius)
+ // depth_end = min(depth, j + depth_radius + 1)
+ //
+ // norm = 0
+ // for k in range(depth_begin, depth_end):
+ // norm += in_image[k] * in_image[k]
+ // norm = alpha * norm + bias
+ //
+ // for k in range(depth_begin, depth_end):
+ // dyi = -2.0 * alpha * beta * in_image[k] * out_image[j] / norm
+ // if k == j:
+ // dyi += norm ** (-beta)
+ // dyi *= out_grads[j]
+ // grads[k] += dyi
+
+ auto squared = builder->Mul(in_image, in_image);
+ auto sqr_sum = builder->ReduceWindow(
+ squared, XlaHelpers::Zero(builder, input_type(0)),
+ *ctx->GetOrCreateAdd(input_type(0)),
+ /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1},
+ /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame);
+
+ auto norm =
+ builder->Add(builder->ConstantR0<float>(bias_),
+ builder->Mul(builder->ConstantR0<float>(alpha_), sqr_sum));
+
+ auto dy = builder->Mul(
+ builder->Mul(builder->ConstantR0<float>(-2.0f * alpha_ * beta_),
+ builder->Div(out_image, norm)),
+ in_grads);
+
+ auto dy_reduced = builder->ReduceWindow(
+ dy, XlaHelpers::Zero(builder, input_type(0)),
+ *ctx->GetOrCreateAdd(input_type(0)),
+ /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1},
+ /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame);
+
+ xla::ComputationDataHandle gradients = builder->Add(
+ builder->Mul(in_image, dy_reduced),
+ builder->Mul(in_grads,
+ builder->Pow(norm, builder->ConstantR0<float>(-beta_))));
+
+ ctx->SetOutput(0, gradients);
+ }
+
+ private:
+ int64 depth_radius_;
+ float bias_;
+ float alpha_;
+ float beta_;
+};
+
+REGISTER_XLA_OP("LRNGrad", LRNGradOp);
+
+} // anonymous namespace
+} // namespace tensorflow