aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/fake_quant_ops_functor.h
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-10-24 13:35:38 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-10-24 14:47:26 -0700
commit9fb15ea28bc7ba713fb7745d60336d7a9a8f89a6 (patch)
treefae60cf4f8b9fe09af67613e4ea9a94d61407d7b /tensorflow/core/kernels/fake_quant_ops_functor.h
parent4a465522c1023ae13ea89f729fa6fb1ad7989eb7 (diff)
Add FakeQuant ops and kernels for use with quantized training.
Change: 137081182
Diffstat (limited to 'tensorflow/core/kernels/fake_quant_ops_functor.h')
-rw-r--r--tensorflow/core/kernels/fake_quant_ops_functor.h434
1 files changed, 434 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h
new file mode 100644
index 0000000000..d3f600cd82
--- /dev/null
+++ b/tensorflow/core/kernels/fake_quant_ops_functor.h
@@ -0,0 +1,434 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_
+
+#include <tuple>
+
+#define EIGEN_STACK_ALLOCATION_LIMIT 0
+#define EIGEN_USE_THREADS
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+static constexpr int kSteps = 255;
+static constexpr float kStepsFloat = static_cast<float>(kSteps);
+
+// Gymnastics with nudged zero point is to ensure that real zero maps to
+// an integer, which is required for e.g. zero-padding in convolutional layers.
+// Returns (nudged_min, nudged_max, nudged_scale).
+template <typename Device>
+std::tuple<float, float, float> Nudge(const float min, const float max) {
+ const float scale = (max - min) / (kStepsFloat - 0.0f);
+ const float zero_point_from_min = 0.0f - min / scale;
+ const uint8 nudged_zero_point = [zero_point_from_min] {
+ if (zero_point_from_min < 0.0f) {
+ return static_cast<uint8>(0);
+ } else if (zero_point_from_min > kStepsFloat) {
+ return static_cast<uint8>(kSteps);
+ } else {
+ return static_cast<uint8>(std::round(zero_point_from_min));
+ }
+ }();
+
+ const float nudged_min = (0.0f - nudged_zero_point) * scale;
+ const float nudged_max = (kStepsFloat - nudged_zero_point) * scale;
+ return std::make_tuple(nudged_min, nudged_max, scale);
+}
+
+template<typename T> using ConstScalar =
+ typename tensorflow::TTypes<T>::ConstScalar;
+template<typename T> using Scalar = typename tensorflow::TTypes<T>::Scalar;
+template<typename T> using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
+template<typename T> using Vec = typename tensorflow::TTypes<T>::Vec;
+template<typename T> using ConstFlat =
+ typename tensorflow::TTypes<T>::ConstFlat;
+template<typename T> using Flat = typename tensorflow::TTypes<T>::Flat;
+
+// Functor called by FakeQuantWithMinMaxArgsOp to do the work. Compiles both
+// for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxArgsFunctor {
+ void operator()(const Device& d, ConstFlat<float> inputs,
+ const float min, const float max, Flat<float> outputs) {
+ eigen_assert(min <= 0.0f && "min should be <= 0.0");
+ eigen_assert(max >= 0.0f && "max should be >= 0.0");
+ eigen_assert(min < max && "min should be < max");
+
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) = Nudge<Device>(min, max);
+ const float inv_nudged_scale = 1.0f / nudged_scale;
+
+ auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
+ auto clamped_shifted = clamped - nudged_min;
+ outputs.device(d) = (clamped_shifted * inv_nudged_scale + 0.5f).floor() *
+ nudged_scale + nudged_min;
+ }
+};
+
+// Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work. Compiles
+// both for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxArgsGradientFunctor {
+ void operator()(const Device& d, ConstFlat<float> gradients,
+ ConstFlat<float> inputs, const float min, const float max,
+ Flat<float> backprops) {
+ eigen_assert(min <= 0.0f && "min should be <= 0.0");
+ eigen_assert(max >= 0.0f && "max should be >= 0.0");
+ eigen_assert(min < max && "min should be < max");
+
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) = Nudge<Device>(min, max);
+
+ auto between_nudged_min_max = (inputs >= nudged_min && inputs <= nudged_max)
+ .select(inputs.constant(1.0f), inputs.constant(0.0f));
+ backprops.device(d) = gradients * between_nudged_min_max;
+ }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsOp to do the work. Compiles both
+// for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxVarsFunctor {
+ void operator()(const Device& d, ConstFlat<float> inputs,
+ ConstScalar<float> min, ConstScalar<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(), max());
+ const auto nudged_scale_repl = inputs.constant(nudged_scale);
+
+ const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
+ const auto clamped_shifted = clamped - nudged_min;
+ outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
+ nudged_scale_repl + nudged_min;
+ }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work. Compiles
+// both for CPU and GPU.
+template <typename Device>
+struct FakeQuantWithMinMaxVarsGradientFunctor {
+ void operator()(const Device& d,
+ ConstFlat<float> gradients, ConstFlat<float> inputs,
+ ConstScalar<float> min, ConstScalar<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> backprops_wrt_input,
+ Scalar<float> backprop_wrt_min,
+ Scalar<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(), max());
+
+ const auto between_min_max = (inputs >= nudged_min && inputs <= nudged_max)
+ .select(inputs.constant(1.0f), inputs.constant(0.0f));
+ backprops_wrt_input.device(d) = gradients * between_min_max;
+
+ const auto below_min = (inputs < nudged_min)
+ .select(inputs.constant(1.0f), inputs.constant(0.0f));
+ backprop_wrt_min.device(d) = (gradients * below_min).sum();
+
+ const auto above_max = (inputs > nudged_max)
+ .select(inputs.constant(1.0f), inputs.constant(0.0f));
+ backprop_wrt_max.device(d) = (gradients * above_max).sum();
+ }
+};
+
+using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
+
+// Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work.
+// Compiles both for CPU and GPU.
+//
+// Already verified: inputs, outputs, min, max are of shape [d].
+template <typename Device>
+struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
+ void operator()(const Device& d, ConstVec<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Vec<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+ const float clamped =
+ std::max(std::min(inputs(i), nudged_max), nudged_min);
+ const float clamped_shifted = clamped - nudged_min;
+
+ outputs(i) = std::round(clamped_shifted / nudged_scale) * nudged_scale +
+ nudged_min;
+ }
+ }
+};
+
+// Already verified: inputs, outputs are of shape [b, d], min, max are of shape
+// [d].
+template <typename Device>
+struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
+ void operator()(const Device& d, const Index batch_size, const Index depth,
+ ConstFlat<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ Eigen::DSizes<Index, 2> restored(batch_size, depth);
+ const auto inputs_restored = inputs.reshape(restored);
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+ const auto clamped = inputs_restored.chip<1>(i)
+ .cwiseMin(nudged_max).cwiseMax(nudged_min);
+ const auto clamped_shifted = clamped - nudged_min;
+
+ outputs.reshape(restored).chip<1>(i).device(d) =
+ (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
+ nudged_min;
+ }
+ }
+};
+
+// Already verified: inputs, outputs are of shape [b, h, w, d], min, max are
+// of shape [d].
+template <typename Device>
+struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
+ void operator()(const Device& d, const Index batch_size, const Index height,
+ const Index width, const Index depth,
+ ConstFlat<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> outputs) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ Eigen::DSizes<Index, 4> restored(batch_size, height, width, depth);
+ const auto inputs_restored = inputs.reshape(restored);
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+ const auto clamped = inputs_restored.chip<3>(i)
+ .cwiseMin(nudged_max).cwiseMax(nudged_min);
+ const auto clamped_shifted = clamped - nudged_min;
+
+ outputs.reshape(restored).chip<3>(i).device(d) =
+ (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
+ nudged_min;
+ }
+ }
+};
+
+// Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work.
+// Compiles both for CPU and GPU.
+//
+// Already verified: gradients, inputs, outputs, min, max, backprops_wrt_input,
+// backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor {
+ void operator()(const Device& d,
+ ConstVec<float> gradients, ConstVec<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Vec<float> backprops_wrt_input, Vec<float> backprop_wrt_min,
+ Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+
+ const bool between_min_max =
+ inputs(i) >= nudged_min && inputs(i) <= nudged_max;
+ backprops_wrt_input(i) = between_min_max ? gradients(i) : 0.0f;
+
+ const bool below_min = inputs(i) < nudged_min;
+ backprop_wrt_min(i) = below_min ? gradients(i) : 0.0f;
+
+ const bool above_max = inputs(i) > nudged_max;
+ backprop_wrt_max(i) = above_max ? gradients(i) : 0.0f;
+ }
+ }
+};
+
+// Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d],
+// min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
+ void operator()(const Device& d, const Index batch_size, const Index depth,
+ ConstFlat<float> gradients, ConstFlat<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> backprops_wrt_input,
+ Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ Eigen::DSizes<Index, 2> restored(batch_size, depth);
+ const auto gradients_restored = gradients.reshape(restored);
+ const auto inputs_restored = inputs.reshape(restored);
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+ const auto gradients_chip = gradients_restored.chip<1>(i);
+ const auto inputs_chip = inputs_restored.chip<1>(i);
+
+ const auto between_min_max =
+ (inputs_chip >= nudged_min && inputs_chip <= nudged_max)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ backprops_wrt_input.reshape(restored).chip<1>(i).device(d) =
+ gradients_chip * between_min_max;
+
+ const auto below_min = (inputs_chip < nudged_min)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ Eigen::DSizes<Index, 1> reduce(0);
+ backprop_wrt_min.chip<0>(i).device(d) =
+ (gradients_chip * below_min).sum(reduce);
+
+ const auto above_max = (inputs_chip > nudged_max)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ backprop_wrt_max.chip<0>(i).device(d) =
+ (gradients_chip * above_max).sum(reduce);
+ }
+ }
+};
+
+// Already verified: gradients, inputs, backprops_wrt_input are of shape
+// [b, h, w, d], min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
+template <typename Device>
+struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
+ void operator()(const Device& d, const Index batch_size, const Index height,
+ const Index width, const Index depth,
+ ConstFlat<float> gradients, ConstFlat<float> inputs,
+ ConstVec<float> min, ConstVec<float> max,
+#ifndef FAKE_QUANT_NO_DEBUG
+ Scalar<bool> check_min_max,
+#endif
+ Flat<float> backprops_wrt_input,
+ Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+#ifndef FAKE_QUANT_NO_DEBUG
+ check_min_max.device(d) = (min <= 0.0f).all();
+ eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise");
+ check_min_max.device(d) = (max >= 0.0f).all();
+ eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise");
+ check_min_max.device(d) = (min < max).all();
+ eigen_assert(check_min_max() && "min should be < max coeff-wise");
+#endif
+
+ Eigen::DSizes<Index, 4> restored(batch_size, height, width, depth);
+ const auto gradients_restored = gradients.reshape(restored);
+ const auto inputs_restored = inputs.reshape(restored);
+ for (Index i = 0; i < min.size(); ++i) {
+ float nudged_min, nudged_max, nudged_scale;
+ std::tie(nudged_min, nudged_max, nudged_scale) =
+ Nudge<Device>(min(i), max(i));
+ const auto gradients_chip = gradients_restored.chip<3>(i);
+ const auto inputs_chip = inputs_restored.chip<3>(i);
+
+ const auto between_min_max =
+ (inputs_chip >= nudged_min && inputs_chip <= nudged_max)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ backprops_wrt_input.reshape(restored).chip<3>(i).device(d) =
+ gradients_chip * between_min_max;
+
+ const auto below_min = (inputs_chip < nudged_min)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ Eigen::DSizes<Index, 3> reduce(0, 1, 2);
+ backprop_wrt_min.chip<0>(i).device(d) =
+ (gradients_chip * below_min).sum(reduce);
+
+ const auto above_max = (inputs_chip > nudged_max)
+ .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+ backprop_wrt_max.chip<0>(i).device(d) =
+ (gradients_chip * above_max).sum(reduce);
+ }
+ }
+};
+
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_