Move contrib/quantization ops to tensorflow/core

Change: 134501895
author: Andrew Harp <andrewharp@google.com> 2016-09-27 23:00:57 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-09-28 00:04:31 -0700
commit: a6e66bd194eb5f0924de32366be42436ae911af7 (patch)
tree: a3d310ba8aaf478e93af128f0438a33d34634712 /tensorflow/core/kernels/quantize_op.cc
parent: 424176030f4a7111f4db513a650b980a102014ce (diff)
1 files changed, 159 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
new file mode 100644
index 0000000000..003654c1b0
--- /dev/null
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -0,0 +1,159 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/math_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/type_traits.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace {
+enum { QUANTIZE_MODE_MIN_COMBINED, QUANTIZE_MODE_MIN_FIRST };
+}  // namespace
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// Quantize a tensor from float to T, with user-specified min_range and
+// max_range.
+// TODO(xbing): Add a new QuantizeOp just taking scale,
+//              rather than min_range and max_range.
+template <typename Device, typename T>
+class QuantizeV2Op : public OpKernel {
+ public:
+  explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    half_range_ = !std::is_signed<T>::value
+                      ? 0.0f
+                      : (std::numeric_limits<T>::max() -
+                         std::numeric_limits<T>::min() + 1) /
+                            2.0f;
+    string mode_string;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
+    OP_REQUIRES(ctx,
+                (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST"),
+                errors::InvalidArgument("Mode string must be 'MIN_COMBINED' or"
+                                        " 'MIN_FIRST', is '" +
+                                        mode_string + "'"));
+    if (mode_string == "MIN_COMBINED") {
+      mode_ = QUANTIZE_MODE_MIN_COMBINED;
+    } else if (mode_string == "MIN_FIRST") {
+      mode_ = QUANTIZE_MODE_MIN_FIRST;
+    }
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& input = ctx->input(0);
+    const float input_min_range = ctx->input(1).flat<float>()(0);
+    const float input_max_range = ctx->input(2).flat<float>()(0);
+
+    float min_range;
+    float max_range;
+    OP_REQUIRES(ctx, !(input_max_range < input_min_range),
+                errors::InvalidArgument(
+                    "input_max_range must be larger than input_min_range."));
+
+    // When the minimum and maximum ranges are too close together, nudge them
+    // apart by a small value so that they are slightly different. This helps
+    // us avoid creating ill-formed buffers where all quantized values map to
+    // the same float number. These kinds of buffers cause problems for
+    // downstream ops when they need to do calculations on them.
+    // We pick the value by making sure that zero is not more than 100x the
+    // overall range from the maximum, so that the value can be easily
+    // represented when we promote the quantized value to a higher
+    // intermediate bit depth, since that's a common requirement.
+    min_range = input_min_range;
+    const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range),
+                                                  fabsf(input_max_range))) /
+                          100.0f;
+    max_range = std::max(input_max_range, input_min_range + epsilon);
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
+    if (mode_ == QUANTIZE_MODE_MIN_COMBINED) {
+      const float scale_factor =
+          (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) /
+          (max_range - min_range);
+
+      // Quantize:
+      // Make input in range of [min_range, max_range], then
+      // subtract min_range to be in range of [0, max_range - min_range]
+      // Divide by (max_range - min_range) to get to [0, 1.0]
+      // Multiply by range of T, after that shift left 1/2 range of T if
+      // T is signed.
+      // Note that std::round is used to round the number before the cast.
+      // std::round implements "round-half-away-zero",
+      // e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5,
+      // and 5.5 goes to 6.
+      auto o = output->template flat<T>();
+      bool is_signed = std::is_signed<T>::value;
+      if (is_signed) {
+        // The slow path.
+        // TODO(xbing,yonghui): Speedup this path as well.
+        o.device(ctx->template eigen_device<Device>()) =
+            ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) -
+              min_range) *
+                 scale_factor -
+             half_range_)
+                .unaryExpr(std::function<float(float)>(round))
+                .template cast<T>();
+      } else {
+        // The fast path that avoids unaryExpr
+        // According to the micro-benchmark, adding device here doesn't help.
+        o = ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) -
+              min_range) *
+                 scale_factor +
+             0.5f)
+                .template cast<T>();
+      }
+    } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
+      FloatTensorToQuantizedInPlaceUsingEigen<T>(
+          ctx->template eigen_device<Device>(), input, min_range, max_range,
+          output);
+    }
+
+    Tensor* output_min_tensor = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &output_min_tensor));
+    output_min_tensor->flat<float>()(0) = min_range;
+
+    Tensor* output_max_tensor = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &output_max_tensor));
+    output_max_tensor->flat<float>()(0) = max_range;
+  }
+
+ private:
+  float half_range_;
+  int mode_;
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
+    QuantizeV2Op<CPUDevice, quint8>);
+REGISTER_KERNEL_BUILDER(
+    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint8>("T"),
+    QuantizeV2Op<CPUDevice, qint8>);
+REGISTER_KERNEL_BUILDER(
+    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint16>("T"),
+    QuantizeV2Op<CPUDevice, quint16>);
+REGISTER_KERNEL_BUILDER(
+    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint16>("T"),
+    QuantizeV2Op<CPUDevice, qint16>);
+
+}  // namespace tensorflow
author	Andrew Harp <andrewharp@google.com>	2016-09-27 23:00:57 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-09-28 00:04:31 -0700
commit	a6e66bd194eb5f0924de32366be42436ae911af7 (patch)
tree	a3d310ba8aaf478e93af128f0438a33d34634712 /tensorflow/core/kernels/quantize_op.cc
parent	424176030f4a7111f4db513a650b980a102014ce (diff)