diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2016-09-28 00:15:58 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-09-28 01:35:32 -0700 |
commit | 419d5d072375ee0044fecb94e4bfe21a7b3b0b9e (patch) | |
tree | cb66e6e7238bf2e7938b58f3638bd31f65d542c2 /tensorflow/contrib/quantization | |
parent | c1e4f0f6a1078fd6715e8145fbef874e4d447ab8 (diff) |
Automated rollback of change 134501895
Change: 134506649
Diffstat (limited to 'tensorflow/contrib/quantization')
43 files changed, 8531 insertions, 15 deletions
diff --git a/tensorflow/contrib/quantization/BUILD b/tensorflow/contrib/quantization/BUILD index 5347b32bdb..881349fda7 100644 --- a/tensorflow/contrib/quantization/BUILD +++ b/tensorflow/contrib/quantization/BUILD @@ -13,6 +13,53 @@ load( "tf_custom_op_library", ) +cc_library( + name = "cc_array_ops", + srcs = ["ops/array_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_math_ops", + srcs = ["ops/math_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_nn_ops", + srcs = ["ops/nn_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_ops", + linkstatic = 1, + deps = [ + ":cc_array_ops", + ":cc_math_ops", + ":cc_nn_ops", + ], + alwayslink = 1, +) + +filegroup( + name = "android_ops", + srcs = glob(["ops/*.cc"]), + visibility = ["//visibility:public"], +) + py_library( name = "quantization_py", srcs = [ @@ -22,6 +69,8 @@ py_library( srcs_version = "PY2AND3", deps = [ ":ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", ], ) @@ -34,9 +83,52 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn_ops", + ":array_ops", + ":math_ops", + ":nn_ops", + ], +) + +tf_gen_op_wrapper_py( + name = "array_ops", + deps = ["//tensorflow/contrib/quantization:cc_array_ops"], +) + +tf_gen_op_wrapper_py( + name = "math_ops", + deps = ["//tensorflow/contrib/quantization:cc_math_ops"], +) + +tf_gen_op_wrapper_py( + name = "nn_ops", + deps = ["//tensorflow/contrib/quantization:cc_nn_ops"], +) + +py_test( + name = "dequantize_op_test", + size = "small", + srcs = ["python/dequantize_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":ops", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:framework_test_lib", + ], +) + +py_test( + name = "quantized_conv_ops_test", + size = "small", + srcs = ["python/quantized_conv_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":ops", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:framework_test_lib", ], ) @@ -47,6 +139,24 @@ filegroup( ]), ) +tf_custom_op_library( + name = "_quantized_ops.so", + srcs = [ + "ops/array_ops.cc", + "ops/math_ops.cc", + "ops/nn_ops.cc", + ], + deps = [ + ], +) + +py_library( + name = "quantized_ops_py", + srcs = ["load_quantized_ops_so.py"], + data = ["_quantized_ops.so"], + srcs_version = "PY2AND3", +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/quantization/Makefile.in b/tensorflow/contrib/quantization/Makefile.in new file mode 100644 index 0000000000..563639e5d7 --- /dev/null +++ b/tensorflow/contrib/quantization/Makefile.in @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# This sub Makefile compiles libraries under this directory. This is designed to +# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile. +# You can build targets in this file by including this sub makefile like: +# $ make -f tensorflow/contrib/makefile/Makefile TARGET=<target> \ +# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \ +# (optional: NDK_ROOT=<ndk_root>) contrib_quantization_tests +# TODO(satok): Support more targets + +GTEST_DIR := \ +$(MAKEFILE_DIR)/downloads/googletest/googletest + +GTEST_HEADERS = \ +$(wildcard $(GTEST_DIR)/include/gtest/*.h) \ +$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h) + +GTEST_SRCS := \ +$(wildcard $(GTEST_DIR)/src/*.cc) \ +$(wildcard $(GTEST_DIR)/src/*.h) \ +$(GTEST_HEADERS) + +QUANTIZATION_TEST_SRCS := \ +tensorflow/contrib/quantization/ops/math_ops.cc \ +tensorflow/contrib/quantization/kernels/quantize_op.cc \ +tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \ +tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \ +tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \ +tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \ +tensorflow/contrib/makefile/test/test_main.cc + +QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o)) + +QUANTIZATION_TEST_NAME := contrib_quantization_tests +QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME) + +INCLUDES += \ +-I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include + +QUANTIZATION_TEST_INCLUDES := $(INCLUDES) + +$(OBJDIR)gtest-all.o : $(GTEST_SRCS) + $(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \ + $(GTEST_DIR)/src/gtest-all.cc -o $@ + +$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o + $(AR) $(ARFLAGS) $@ $^ + +$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \ + -o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \ + $(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS) + +$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH) diff --git a/tensorflow/contrib/quantization/__init__.py b/tensorflow/contrib/quantization/__init__.py index dcb73399b3..833dd20b5a 100644 --- a/tensorflow/contrib/quantization/__init__.py +++ b/tensorflow/contrib/quantization/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_ from tensorflow.contrib.quantization.python.math_ops import * from tensorflow.contrib.quantization.python.nn_ops import * -from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops -from tensorflow.python.ops.gen_array_ops import dequantize -from tensorflow.python.ops.gen_array_ops import quantize_v2 -from tensorflow.python.ops.gen_array_ops import quantized_concat +from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops +from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize +from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2 +from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD new file mode 100644 index 0000000000..6be2ccaa07 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/BUILD @@ -0,0 +1,311 @@ +# Description: +# quantization-specific OpKernels + +package( + default_visibility = ["//visibility:public"], + features = ["-parse_headers"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", + "tf_custom_op_library", + "tf_kernel_library", +) + +filegroup( + name = "android_ops", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantization_utils.h", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + "reference_gemm.h", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +tf_kernel_library( + name = "quantized_ops", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + ], + hdrs = [ + "quantization_utils.h", + "reference_gemm.h", + ], + deps = [ + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:pooling_ops", + "//third_party/eigen3", + "@gemmlowp//:eight_bit_int_gemm", + ], +) + +tf_custom_op_library( + name = "_quantized_kernels.so", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantization_utils.h", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + "reference_gemm.h", + ], + deps = [ + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:ops_util_hdrs", + "//tensorflow/core/kernels:pooling_ops_hdrs", + "@gemmlowp//:eight_bit_int_gemm", + ], +) + +py_library( + name = "quantized_kernels_py", + srcs = ["load_quantized_kernels_so.py"], + data = ["_quantized_kernels.so"], + srcs_version = "PY2AND3", +) + +tf_cc_test( + name = "quantize_down_and_shrink_range_op_test", + size = "small", + srcs = ["quantize_down_and_shrink_range_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantization_utils_test", + srcs = ["quantization_utils_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//third_party/eigen3", + ], +) + +tf_cc_test( + name = "quantized_activation_ops_test", + srcs = ["quantized_activation_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_bias_add_op_test", + size = "small", + srcs = ["quantized_bias_add_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_conv_ops_test", + size = "small", + srcs = ["quantized_conv_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantize_op_test", + size = "small", + srcs = ["quantize_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_matmul_op_test", + size = "small", + srcs = ["quantized_matmul_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_pooling_ops_test", + size = "small", + srcs = ["quantized_pooling_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_concat_op_test", + size = "small", + srcs = ["quantized_concat_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_batch_norm_op_test", + size = "small", + srcs = ["quantized_batch_norm_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:batch_norm_op", + "//tensorflow/core/kernels:ops_testutil", + "//third_party/eigen3", + ], +) diff --git a/tensorflow/contrib/quantization/kernels/dequantize_op.cc b/tensorflow/contrib/quantization/kernels/dequantize_op.cc new file mode 100644 index 0000000000..a088954fc2 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/dequantize_op.cc @@ -0,0 +1,106 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/math_ops.cc. + +#define EIGEN_USE_THREADS + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace { +enum { QUANTIZE_MODE_MIN_COMBINED, QUANTIZE_MODE_MIN_FIRST }; +} // namespace + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template <typename Device, typename T> +class DequantizeOp : public OpKernel { + public: + explicit DequantizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + half_range_ = !std::is_signed<T>::value + ? 0.0f + : (static_cast<float>(std::numeric_limits<T>::max()) - + std::numeric_limits<T>::min() + 1) / + 2.0f; + string mode_string; + OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string)); + OP_REQUIRES(ctx, + (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST"), + errors::InvalidArgument("Mode string must be 'MIN_COMBINED' or" + " 'MIN_FIRST', is '" + + mode_string + "'")); + if (mode_string == "MIN_COMBINED") { + mode_ = QUANTIZE_MODE_MIN_COMBINED; + } else if (mode_string == "MIN_FIRST") { + mode_ = QUANTIZE_MODE_MIN_FIRST; + } + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(0); + const float min_range = ctx->input(1).flat<float>()(0); + const float max_range = ctx->input(2).flat<float>()(0); + + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); + if (mode_ == QUANTIZE_MODE_MIN_COMBINED) { + const float scale_factor = + (max_range - min_range) / + (static_cast<float>(std::numeric_limits<T>::max()) - + std::numeric_limits<T>::min()); + + // Multiply by scale factor and add min_range. + output->flat<float>() = + ((input.flat<T>().template cast<int>().template cast<float>() + + half_range_) * + scale_factor) + + min_range; + } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) { + QuantizedTensorToFloatInPlaceUsingEigen<T>( + ctx->template eigen_device<Device>(), input, min_range, max_range, + output); + } + } + + private: + float half_range_; + int mode_; +}; + +REGISTER_KERNEL_BUILDER( + Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<quint8>("T"), + DequantizeOp<CPUDevice, quint8>); +REGISTER_KERNEL_BUILDER( + Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint8>("T"), + DequantizeOp<CPUDevice, qint8>); +REGISTER_KERNEL_BUILDER( + Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<quint16>("T"), + DequantizeOp<CPUDevice, quint16>); +REGISTER_KERNEL_BUILDER( + Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint16>("T"), + DequantizeOp<CPUDevice, qint16>); + +REGISTER_KERNEL_BUILDER( + Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint32>("T"), + DequantizeOp<CPUDevice, qint32>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/hexagon/BUILD b/tensorflow/contrib/quantization/kernels/hexagon/BUILD new file mode 100644 index 0000000000..b57a2ac1b5 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD @@ -0,0 +1,45 @@ +# Description: +# quantization-specific OpKernels for hexagon + +package( + default_visibility = ["//visibility:public"], + features = ["-parse_headers"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +tf_cc_test( + name = "quantized_matmul_op_for_hexagon_test", + size = "small", + srcs = ["quantized_matmul_op_for_hexagon_test.cc"], + deps = [ + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/contrib/quantization/kernels:quantized_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) diff --git a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc new file mode 100644 index 0000000000..3d139fbe0a --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc @@ -0,0 +1,136 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Tests in this file are designed to evaluate hexagon DSP operations. + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +#ifdef USE_HEXAGON_LIBS +#include "tensorflow/core/platform/hexagon/gemm_wrapper.h" +#include "tensorflow/core/platform/profile_utils/cpu_utils.h" +#endif + +namespace tensorflow { + +class QuantizedMatMulOpForHexagonTest : public OpsTestBase { + protected: + void SetUp() final { +#ifdef USE_HEXAGON_LIBS + profile_utils::CpuUtils::EnableClockCycleProfiling(true); + LOG(INFO) << "Hexagon libs are linked (wrapper version = " + << hexagon_gemm_wrapper_GetWrapperVersion() + << ", hexagon binary version = " + << hexagon_gemm_wrapper_GetHexagonBinaryVersion() << ")"; + LOG(INFO) << "Cpu frequency = " + << profile_utils::CpuUtils::GetCycleCounterFrequency(); +#else + LOG(WARNING) << "Hexagon libs are not linked."; +#endif + } +}; + +// Shows some statistics of hexagon dsp using hexagon specific APIs +#ifdef USE_HEXAGON_LIBS +TEST_F(QuantizedMatMulOpForHexagonTest, EvaluateSharedLibOverhead) { + const uint64 overhead_shared_lib_start = + profile_utils::CpuUtils::GetCurrentClockCycle(); + const int wrapper_version = hexagon_gemm_wrapper_GetWrapperVersion(); + const uint64 overhead_shared_lib_end = + profile_utils::CpuUtils::GetCurrentClockCycle(); + const uint64 overhead_shared_lib_diff = + (overhead_shared_lib_end - overhead_shared_lib_start); + const uint64 overhead_hexagon_rpc_start = + profile_utils::CpuUtils::GetCurrentClockCycle(); + const int hexagon_binary_version = + hexagon_gemm_wrapper_GetHexagonBinaryVersion(); + const uint64 overhead_hexagon_rpc_end = + profile_utils::CpuUtils::GetCurrentClockCycle(); + const uint64 overhead_hexagon_rpc_diff = + (overhead_hexagon_rpc_end - overhead_hexagon_rpc_start); + LOG(INFO) << "Shared lib (ver = " << wrapper_version << ") overhead is " + << overhead_shared_lib_diff << " cycles, time = " + << std::chrono::duration_cast<std::chrono::microseconds>( + profile_utils::CpuUtils::ConvertClockCycleToTime( + overhead_shared_lib_diff)) + .count() + << " usec"; + LOG(INFO) << "hexagon rpc (ver = " << hexagon_binary_version + << ") overhead is " << overhead_hexagon_rpc_diff + << " cycles, time = " + << std::chrono::duration_cast<std::chrono::microseconds>( + profile_utils::CpuUtils::ConvertClockCycleToTime( + overhead_hexagon_rpc_diff)) + .count() + << " usec"; +} +#endif + +// Runs two small matrices through the operator, and leaves all the parameters +// at their default values. +// This test is a sample to execute matmul on hexagon. +TEST_F(QuantizedMatMulOpForHexagonTest, Small_NoParams) { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + // A matrix is: + // | 1 | 2 | 3 | + // | 4 | 5 | 6 | + AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6}); + // B matrix is: + // | 7 | 8 | 9 | 10 | + // | 11 | 12 | 13 | 14 | + // | 15 | 16 | 17 | 18 | + AddInputFromArray<quint8>(TensorShape({3, 4}), + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + + TF_ASSERT_OK(RunOpKernel()); + // Here are the results we expect, from hand calculations: + // (1 * 7) + (2 * 11) + (3 * 15) = 74 + // (1 * 8) + (2 * 12) + (3 * 16) = 80 + // (1 * 9) + (2 * 13) + (3 * 17) = 86 + // (1 * 10) + (2 * 14) + (3 * 18) = 92 + // (4 * 7) + (5 * 11) + (6 * 15) = 173 + // (4 * 8) + (5 * 12) + (6 * 16) = 188 + // (4 * 9) + (5 * 13) + (6 * 17) = 203 + // (4 * 10) + (5 * 14) + (6 * 18) = 218 + Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4})); + test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py new file mode 100644 index 0000000000..3b7fd57a93 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py @@ -0,0 +1,48 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops for quantized evaluation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import threading + +import tensorflow as tf + +QUANTIZED_KERNELS_FILE = '_quantized_kernels.so' + +_quantized_kernels = None +_kernels_lock = threading.Lock() + + +# Workaround for the fact that importing tensorflow imports contrib +# (even if a user isn't using this or any other contrib op), but +# there's not yet any guarantee that the shared object exists. +# In which case, "import tensorflow" will always crash, even for users that +# never use contrib. +def Load(library_base_dir=''): + """Load the quantized ops library and return the loaded module.""" + with _kernels_lock: + global _quantized_kernels + if not _quantized_kernels: + data_files_path = os.path.join(library_base_dir, + tf.resource_loader.get_data_files_path()) + tf.logging.info('data path: %s', data_files_path) + _quantized_kernels = tf.load_op_library(os.path.join( + data_files_path, QUANTIZED_KERNELS_FILE)) + + assert _quantized_kernels, 'Could not load _quantized_kernels.so' + return _quantized_kernels diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc new file mode 100644 index 0000000000..72651f96b0 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc @@ -0,0 +1,42 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" + +namespace tensorflow { + +void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max, + float smaller_input_min, + float smaller_input_max, + float* output_min, float* output_max) { + // We need to have a good range to add our two arguments together in. This + // is surprisingly tricky, since it has to satisfy a few different needs: + // - Must be symmetrical around zero, so that 0 + 0 = 0. + // - Must hold the largest of the argument ranges. + // - Should have enough range that the bits of the lowest and highest + // arguments overlap if possible without the lower getting truncated. + // - Should have some headroom so that there's no overflow. + // - Needs to be signed. + // This leads us to use a scheme where we (assuming the inputs are eight bit + // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the + // accumulated results. This gives us all the properties we need. + *output_max = + std::max(input_max, std::max(-input_min, std::max(smaller_input_max, + -smaller_input_min))) * + (1 << 17); + *output_min = -(*output_max); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h new file mode 100644 index 0000000000..3b6a4901ba --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h @@ -0,0 +1,555 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ + +#define EIGEN_USE_THREADS + +// This is a set of functions that standardizes how quantized values are +// interpreted as float numbers. +// All of the current implementations are for reference and have not been +// optimized. They should be implementable using fixed point representations +// to avoid a dependency on floating-point hardware. + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "public/gemmlowp.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/threadpool.h" + +namespace tensorflow { + +// We have to be able to detect and handle overflows in int32, so this function +// uses doubles and int64's to make sure we have enough room. +template <class T> +int64 FloatToQuantizedUnclamped(float input, float range_min, float range_max) { + const int64 lowest_quantized = + static_cast<double>(Eigen::NumTraits<T>::lowest()); + if (range_min == range_max) { + return lowest_quantized; + } + const int number_of_bits = sizeof(T) * 8; + const int64 number_of_steps = static_cast<int64>(1) << number_of_bits; + const double range_adjust = (number_of_steps / (number_of_steps - 1.0)); + const double range = ((range_max - range_min) * range_adjust); + const double range_scale = (number_of_steps / range); + int64 quantized = + (round(input * range_scale) - round(range_min * range_scale)); + quantized += lowest_quantized; + return quantized; +} + +// This converts the float into the final quantized type, clamping/saturating +// any over or underflows. +template <class T> +T FloatToQuantized(float input, float range_min, float range_max) { + int64 quantized = FloatToQuantizedUnclamped<T>(input, range_min, range_max); + const int64 lowest_quantized = + static_cast<int64>(Eigen::NumTraits<T>::lowest()); + const int64 highest_quantized = + static_cast<int64>(Eigen::NumTraits<T>::highest()); + quantized = std::max(quantized, lowest_quantized); + quantized = std::min(quantized, highest_quantized); + return static_cast<T>(static_cast<int32>(quantized)); +} + +template <class T> +float QuantizedToFloat(T input, float range_min, float range_max) { + if (range_min == range_max) { + return range_min; + } + const int number_of_bits = sizeof(T) * 8; + const int64 number_of_steps = static_cast<int64>(1) << number_of_bits; + const double range_adjust = (number_of_steps / (number_of_steps - 1.0)); + const double range = ((range_max - range_min) * range_adjust); + const double range_scale = (range / number_of_steps); + const int64 lowest_quantized = + static_cast<int64>(Eigen::NumTraits<T>::lowest()); + const double offset_input = static_cast<double>(input) - lowest_quantized; + const double result = range_min + (offset_input * range_scale); + return static_cast<float>(result); +} + +template <class T> +float FloatForOneQuantizedLevel(float range_min, float range_max) { + const int64 highest = static_cast<int64>(Eigen::NumTraits<T>::highest()); + const int64 lowest = static_cast<int64>(Eigen::NumTraits<T>::lowest()); + const float float_for_one_quantized_level = + (range_max - range_min) / (highest - lowest); + return float_for_one_quantized_level; +} + +template <class T1, class T2, class T3> +void QuantizationRangeForMultiplication(float min_a, float max_a, float min_b, + float max_b, float* min_c, + float* max_c) { + const float a_float_for_one_quant_level = + FloatForOneQuantizedLevel<T1>(min_a, max_a); + const float b_float_for_one_quant_level = + FloatForOneQuantizedLevel<T2>(min_b, max_b); + + const int64 c_highest = static_cast<int64>(Eigen::NumTraits<T3>::highest()); + const int64 c_lowest = static_cast<int64>(Eigen::NumTraits<T3>::lowest()); + const float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + + *min_c = c_float_for_one_quant_level * c_lowest; + *max_c = c_float_for_one_quant_level * c_highest; +} + +// input_array is an eigen Tensor. q2f is a QuantizedToFloatStruct. +// This evaluates to an eigen tensor expression, to be used like: +// auto tensor = DEQUANTIZE_WITH_EIGEN(input_tensor, q2f); +#define DEQUANTIZE_WITH_EIGEN(input_array, q2f) \ + (q2f.range_min + \ + (((input_array.template cast<float>() - q2f.lowest_quantized())) * \ + q2f.range_scale)); + +// input_array is an eigen Tensor. f2q is a FloatToQuantizedStruct. +// OutputType is the type of output (e.g. quint8). +// This evaluates to an eigen tensor expression, to be used like: +// auto tensor = QUANTIZE_WITH_EIGEN(input_tensor, f2q, T); +#define QUANTIZE_WITH_EIGEN(input_array, f2q, OutputType) \ + ((input_array * f2q.range_scale).round() - \ + (f2q.range_min_scaled - f2q.lowest_quantized())) \ + .cwiseMax(f2q.lower_bound_float()) \ + .cwiseMin(f2q.upper_bound_float()) \ + .template cast<int32>() \ + .template cast<OutputType>() + +// For use with DEQUANTIZE_WITH_EIGEN. +template <typename T> +struct QuantizedToFloatStruct { + static constexpr int number_of_bits = sizeof(T) * 8; + static constexpr int64 number_of_steps = static_cast<int64>(1) + << number_of_bits; + + static float lowest_quantized() { + return static_cast<float>(Eigen::NumTraits<T>::lowest()); + } + + QuantizedToFloatStruct(float range_min, float range_max) + : range_min(range_min), + range_scale((range_max - range_min) / (number_of_steps - 1.0)) {} + + const float range_min; + const float range_scale; +}; + +// For use with QUANTIZE_WITH_EIGEN. +template <typename T> +struct FloatToQuantizedStruct { + static constexpr int number_of_bits = sizeof(T) * 8; + static constexpr int64 number_of_steps = static_cast<int64>(1) + << number_of_bits; + static constexpr double range_adjust = + (number_of_steps / (number_of_steps - 1.0)); + + // Casting QInt32's lowest or highest to a float gives a float that can't be + // cast back to int32 or QInt32. Instead, use bounds that can be converted + // back to int32 without going outside the range of an int32. + static float lower_bound_float() { + return Eigen::numext::maxi( + static_cast<float>(Eigen::NumTraits<T>::lowest()), -2.147483648e+09f); + } + static float upper_bound_float() { + return Eigen::numext::mini( + static_cast<float>(Eigen::NumTraits<T>::highest()), +2.147483520e+09f); + } + + static float lowest_quantized() { + return static_cast<float>(Eigen::NumTraits<T>::lowest()); + } + + FloatToQuantizedStruct(float range_min, float range_max) + : range_min(range_min), + range_scale(range_max == range_min + ? 0.0 + : (number_of_steps - 1.0) / (range_max - range_min)), + range_min_scaled(round(range_min * range_scale)) {} + + const float range_min; + const float range_scale; + const float range_min_scaled; +}; + +template <class T1, class T2> +inline T2 RequantizeInNewRange(T1 input, float min_input, float max_input, + float min_new, float max_new) { + const float input_float = QuantizedToFloat<T1>(input, min_input, max_input); + return FloatToQuantized<T2>(input_float, min_new, max_new); +} + +template <class T1, class T2> +inline void RequantizeManyInNewRange(const T1* input, size_t count, + float min_input, float max_input, + float min_output, float max_output, + T2* output) { + for (size_t index = 0; index < count; ++index) { + const float input_float = + QuantizedToFloat<T1>(input[index], min_input, max_input); + output[index] = FloatToQuantized<T2>(input_float, min_output, max_output); + } +} + +// Because converting 32-bit accumulated results down to eight bit is a common +// case, we have a specialized code path to handle it as efficiently as +// possible using only fixed-point math for the inner loop. +template <> +inline void RequantizeManyInNewRange<qint32, quint8>( + const qint32* input, size_t count, float min_input, float max_input, + float min_output, float max_output, quint8* output) { + // Initially we calculate all the constants we need once, before we go into + // the inner loop. If this is updated, also update the Eigen version. + const int fp_shift = 16; + const float input_range = max_input - min_input; + const float output_range = max_output - min_output; + const float recip_output_range = + output_range == 0.0 ? 0.0 : (255.0 / output_range); + const float input_rezero = (min_input + max_input) / 2.0; + const int64 range_scale_fp = + output_range == 0.0 ? 0.0 + : static_cast<int64>(255.0 * (1 << fp_shift) * + input_range / output_range); + const int64 input_offset_fp = + static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift)); + const int64 output_offset_fp = + output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) * + (min_output * 255.0) / + output_range); + const int64 rounding_delta = 1 << (fp_shift - 1); + + // Inside this loop we just do minimal adds, multiplies, and shifts, in a way + // that could be easily adapted for a SIMD implementation. It should also be + // possible to perform all the calculations in 32-bit rather than 64, but + // that's not been implemented yet. + for (size_t index = 0; index < count; ++index) { + const int64 input_value = static_cast<int64>(input[index]); + const int64 fp_value = + ((input_value * range_scale_fp) >> 32) + input_offset_fp; + const int64 offset_intermediate = fp_value - output_offset_fp; + const int64 round_intermediate = offset_intermediate + rounding_delta; + int64 quantized_int64 = round_intermediate >> fp_shift; + quantized_int64 = std::max(quantized_int64, 0LL); + quantized_int64 = std::min(quantized_int64, 255LL); + output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64)); + } +} + +template <int shift> +struct int64_right_shift_op { + EIGEN_EMPTY_STRUCT_CTOR(int64_right_shift_op) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const int64 operator()(const int64& a) const { + return a >> shift; + } +}; + +// See RequantizeManyInNewRange() for a non-eigen reference implementation. +template <class T1, class T2> +inline void RequantizeManyInNewRangeUsingEigen( + const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input, + float max_input, float min_output, float max_output, Tensor* output) { + auto input_array = input.flat<T1>(); + QuantizedToFloatStruct<T1> q2f(min_input, max_input); + auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f); + FloatToQuantizedStruct<T2> f2q(min_output, max_output); + auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T2); + + output->flat<T2>().device(device) = input_requantized; +} + +// See RequantizeManyInNewRange() for a non-eigen reference implementation. +// +// Because converting 32-bit accumulated results down to eight bit is a common +// case, we have a specialized code path to handle it as efficiently as +// possible using only fixed-point math for the inner loop. +template <> +inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>( + const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input, + float max_input, float min_output, float max_output, Tensor* output) { + // Initially we calculate all the constants we need once, before we go into + // the inner loop. If this is updated, also update the non-Eigen version. + const int fp_shift = 16; + const float input_range = max_input - min_input; + const float output_range = max_output - min_output; + const float recip_output_range = + output_range == 0.0 ? 0.0 : (255.0 / output_range); + const float input_rezero = (min_input + max_input) / 2.0; + const int64 range_scale_fp = + output_range == 0.0 ? 0.0 + : static_cast<int64>(255.0 * (1 << fp_shift) * + input_range / output_range); + const int64 input_offset_fp = + static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift)); + const int64 output_offset_fp = + output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) * + (min_output * 255.0) / + output_range); + const int64 rounding_delta = 1 << (fp_shift - 1); + + // Inside this eigen expression we just do minimal adds, multiplies, and + // shifts. It should be possible to perform all the calculations in 32-bit + // rather than 64, but that's not been implemented yet. + auto input_array = input.flat<qint32>(); + auto fp_value = ((input_array.template cast<int64>() * range_scale_fp) + .unaryExpr(int64_right_shift_op<32>())) + + (input_offset_fp - output_offset_fp + rounding_delta); + auto intermediate = fp_value.unaryExpr(int64_right_shift_op<fp_shift>()); + auto input_requantized = intermediate.cwiseMax(0LL) + .cwiseMin(255LL) + .template cast<int32>() + .template cast<quint8>(); + output->flat<quint8>().device(device) = input_requantized; +} + +// REQUIRES: 'result->NumElements() == input.NumElements()' +template <class T> +void FloatTensorToQuantizedInPlaceUsingEigen( + const Eigen::ThreadPoolDevice& device, const Tensor& input, float min, + float max, Tensor* result) { + DCHECK_EQ(DataTypeToEnum<T>::v(), result->dtype()); + auto flat_input = input.flat<float>(); + auto flat_result = result->flat<T>(); + DCHECK_EQ(flat_input.size(), flat_result.size()); + + FloatToQuantizedStruct<T> f2q(min, max); + flat_result.device(device) = QUANTIZE_WITH_EIGEN(flat_input, f2q, T); +} + +template <class T> +void FloatTensorToQuantizedInPlace(const Tensor& input, float min, float max, + Tensor* result) { + DCHECK_EQ(DataTypeToEnum<T>::v(), result->dtype()); + auto flat_input = input.flat<float>(); + auto flat_result = result->flat<T>(); + const int data_size = flat_input.size(); + DCHECK(data_size == flat_result.size()); + for (int i = 0; i < data_size; ++i) { + flat_result(i) = FloatToQuantized<T>(flat_input(i), min, max); + } +} + +template <class T> +Tensor FloatTensorToQuantized(const Tensor& input, float min, float max) { + Tensor result(DataTypeToEnum<T>::v(), input.shape()); + FloatTensorToQuantizedInPlace<T>(input, min, max, &result); + return result; +} + +// REQUIRES: 'result->NumElements() == input.NumElements()' +template <class T> +void QuantizedTensorToFloatInPlaceUsingEigen( + const Eigen::ThreadPoolDevice& device, const Tensor& input, float min, + float max, Tensor* result) { + DCHECK_EQ(DataTypeToEnum<T>::v(), input.dtype()); + auto flat_input = input.flat<T>(); + auto flat_result = result->flat<float>(); + const int data_size = flat_input.size(); + DCHECK(data_size == flat_result.size()); + + QuantizedToFloatStruct<T> q2f(min, max); + flat_result.device(device) = DEQUANTIZE_WITH_EIGEN(flat_input, q2f); +} + +// REQUIRES: 'result->NumElements() == input.NumElements()' +template <class T> +void QuantizedTensorToFloatInPlace(const Tensor& input, float min, float max, + Tensor* result) { + DCHECK_EQ(DataTypeToEnum<T>::v(), input.dtype()); + auto flat_input = input.flat<T>(); + auto flat_result = result->flat<float>(); + const int data_size = flat_input.size(); + DCHECK(data_size == flat_result.size()); + for (int i = 0; i < data_size; ++i) { + flat_result(i) = QuantizedToFloat<T>(flat_input(i), min, max); + } +} + +template <class T> +Tensor QuantizedTensorToFloat(const Tensor& input, float min, float max) { + Tensor result(DT_FLOAT, input.shape()); + QuantizedTensorToFloatInPlace<T>(input, min, max, &result); + return result; +} + +void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max, + float smaller_input_min, + float smaller_input_max, + float* output_min, float* output_max); + +// Add <input> and <smaller_input>. If <smaller_input> has fewer elements than +// <input>, then it is broadcast onto <input>. +template <typename T1, typename T2, typename T3> +void QuantizedAddUsingEigen(const Eigen::ThreadPoolDevice& device, + const Tensor& input, float input_min, + float input_max, const Tensor& smaller_input, + float smaller_input_min, float smaller_input_max, + Tensor* output, float* output_min, + float* output_max) { + const auto& input_flat = input.flat<T1>(); + const auto& smaller_input_flat = smaller_input.flat<T2>(); + auto output_flat = output->flat<T3>(); + + GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min, + smaller_input_max, output_min, output_max); + // To do addition properly, we need to compensate for a possibly unbalanced + // zero point in the total representation. The quantized value that + // represents the real number zero needs to be subtracted before addition to + // make sure that the identity of zero + zero = zero holds. + const T3 zero_in_total_space = + FloatToQuantized<T3>(0.0f, *output_min, *output_max); + + const int64 input_element_count = input.NumElements(); + const int64 smaller_input_element_count = smaller_input.NumElements(); + + QuantizedToFloatStruct<T1> smaller_input_q2f(smaller_input_min, + smaller_input_max); + QuantizedToFloatStruct<T2> input_q2f(input_min, input_max); + FloatToQuantizedStruct<T3> f2q(*output_min, *output_max); + + auto smaller_input_float = + DEQUANTIZE_WITH_EIGEN(smaller_input_flat, smaller_input_q2f); + auto smaller_input_in_total_space = + QUANTIZE_WITH_EIGEN(smaller_input_float, f2q, T3); + + auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f); + auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3); + + Eigen::array<Eigen::DenseIndex, 1> bcast; + bcast[0] = input_element_count / smaller_input_element_count; + output_flat.device(device) = + input_in_total_space + + (smaller_input_in_total_space.broadcast(bcast) + zero_in_total_space); +} + +// This is a reference implementation of the bias addition for quantized +// buffers, designed to provide a clear specification for the result we +// want. We'll want to specialize this for particular hardware, and +// probably even fuse it with matrix multiplications in a lot of cases. It's +// important to show the clamping behavior we want in particular. +template <typename T1, typename T2, typename T3> +void QuantizedAdd(const Eigen::ThreadPoolDevice& device, const Tensor& input, + float input_min, float input_max, const Tensor& smaller_input, + float smaller_input_min, float smaller_input_max, + Tensor* output, float* output_min, float* output_max) { + const auto& input_flat = input.flat<T1>(); + const auto& smaller_input_flat = smaller_input.flat<T2>(); + auto output_flat = output->flat<T3>(); + + GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min, + smaller_input_max, output_min, output_max); + // To do addition properly, we need to compensate for a possibly unbalanced + // zero point in the total representation. The quantized value that + // represents the real number zero needs to be subtracted before addition to + // make sure that the identity of zero + zero = zero holds. + const T3 zero_in_total_space = + FloatToQuantized<T3>(0.0f, *output_min, *output_max); + + const int64 input_element_count = input.NumElements(); + const int64 smaller_input_element_count = smaller_input.NumElements(); + + float total_min = *output_min; + float total_max = *output_max; + const size_t how_many_iterations = + (input_element_count / smaller_input_element_count); + for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) { + const size_t offset = iteration * smaller_input_element_count; + for (int c = 0; c < smaller_input_element_count; ++c) { + const int index = (offset + c); + // The two numbers we're going to add can each be in very different + // ranges (e.g. the quantized value '127' may represent very different + // real numbers in both) so we need to convert them to a common range + // before we sum them. + const T1 input_value = input_flat(index); + const T3 input_in_total_space = RequantizeInNewRange<T1, T3>( + input_value, input_min, input_max, total_min, total_max); + const T2 smaller_input_value = smaller_input_flat(c); + const T3 smaller_input_in_total_space = + RequantizeInNewRange<T2, T3>(smaller_input_value, smaller_input_min, + smaller_input_max, total_min, total_max); + const T3 total_pre = input_in_total_space + smaller_input_in_total_space; + // As noted above, we need to compensate for the offset of the actual + // zero point in the space we're operating in. + const T3 total = total_pre + zero_in_total_space; + output_flat(index) = total; + } + } +} + +// See gemmlowp/internal/multi_thread_gemm.h for definitions of +// Prepare, Wait, StartWorker, and CreateWorkers. +class TensorflowGemmlowpWorkersPool { + public: + TensorflowGemmlowpWorkersPool(thread::ThreadPool* workers) + : workers_(workers) {} + + ~TensorflowGemmlowpWorkersPool() { + // This workaround ensures that all worker tasks have exited methods in the + // BlockingCounter. Without this, there is a race where the context is torn + // down while the counter is in use. + counter_to_decrement_when_ready_.Reset(0); + } + + void Prepare(int workers_count) { + counter_to_decrement_when_ready_.Reset(workers_count); + } + + void Wait() { counter_to_decrement_when_ready_.Wait(); } + + void StartWorker(int index, gemmlowp::Task* task) { + CHECK(workers_ != nullptr); + // <index> is ignored - the tensorflow threadpool does not support assigning + // to a specific thread. + workers_->Schedule([this, task]() { + // TODO(cwhipkey): get a local_allocator from a thread local. + gemmlowp::Allocator local_allocator; + CHECK(task != nullptr); + task->local_allocator = &local_allocator; + task->Run(); + delete task; + counter_to_decrement_when_ready_.DecrementCount(); + }); + } + + void CreateWorkers(std::size_t workers_count) {} + + private: + thread::ThreadPool* const workers_; + + // The BlockingCounter used to wait for the workers. + gemmlowp::BlockingCounter counter_to_decrement_when_ready_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorflowGemmlowpWorkersPool); +}; + +class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase { + public: + TensorflowGemmContext(int num_threads, thread::ThreadPool* workers) + : workers_pool_(workers) { + set_max_num_threads(num_threads); + } + + TensorflowGemmlowpWorkersPool* workers_pool() { return &workers_pool_; } + + private: + TensorflowGemmlowpWorkersPool workers_pool_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorflowGemmContext); +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ diff --git a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc new file mode 100644 index 0000000000..d62610b2ca --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc @@ -0,0 +1,550 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include <limits> + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/common_runtime/eigen_thread_pool.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizationUtilsTest : public ::testing::Test { + protected: + void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device, + float input_min, float input_max, float output_min, + float output_max, + const std::vector<qint32>& values_quantized, + int tolerance = 1) { + const int values_count = values_quantized.size(); + std::vector<quint8> expected_values; + for (int value_index = 0; value_index < values_count; ++value_index) { + expected_values.push_back(FloatToQuantized<quint8>( + QuantizedToFloat(values_quantized[value_index], input_min, input_max), + output_min, output_max)); + } + + Tensor i_tensor = + tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized)); + Tensor o_tensor(DT_QUINT8, TensorShape{values_count}); + auto output_values = o_tensor.flat<quint8>(); + + if (eigen_device == nullptr) { + auto input_array = i_tensor.flat<qint32>(); + RequantizeManyInNewRange(input_array.data(), input_array.size(), + input_min, input_max, output_min, output_max, + output_values.data()); + } else { + RequantizeManyInNewRangeUsingEigen<qint32, quint8>( + *eigen_device, i_tensor, input_min, input_max, output_min, output_max, + &o_tensor); + } + + const string tolerance_str = strings::StrCat("+-", tolerance); + for (size_t value_index = 0; value_index < values_count; ++value_index) { + int e = expected_values[value_index]; + int v = output_values(value_index); + ASSERT_TRUE(std::abs(e - v) <= tolerance) + << "actual=" << v << ", expected=" << e << tolerance_str + << ", values_quantized[" << value_index + << "]=" << values_quantized[value_index] + << ", input_min=" << input_min << ", input_max=" << input_max + << ", output_min=" << output_min << ", output_max=" << output_max + << ", value_index=" << value_index; + } + } + + // If eigen_device is NULL, then the reference implementation is tested. + void TestRequantizeManyInNewRange32To8Bit( + Eigen::ThreadPoolDevice* eigen_device) { + // These are the float values we're going to test the conversions on. + const size_t values_count = 6; + const float values[values_count] = {0.0f, 0.45f, 1.0f, + -1.0f, 127.0f, 255.0f}; + // These are the input and output ranges we'll test. + const size_t ranges_count = 6; + const float ranges[ranges_count][4] = { + {0.0f, 255.0f, 0.0f, 255.0f}, // + {0.0f, 1.0f, 0.0f, 1.0f}, // + {-1.0f, 1.0f, -1.0f, 1.0f}, // + {-1.0f, 1.0f, -255.0f, 255.0f}, // + {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max + {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max + }; + for (int i = 0; i < ranges_count; ++i) { + const auto& r = ranges[i]; + std::vector<qint32> values_quantized; + for (int value_index = 0; value_index < values_count; ++value_index) { + const float v = values[value_index]; + values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1])); + } + TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3], + values_quantized); + } + + // Test with many different values in the input quantized range. + qint32 low = Eigen::NumTraits<qint32>::lowest(); + qint32 high = Eigen::NumTraits<qint32>::highest(); + std::vector<qint32> vals{low, high}; + int num_steps = 14419; + qint32 step = static_cast<int32>((1L << 32) / num_steps); + qint32 v = low + static_cast<qint32>(1); + for (int i = 0; i < num_steps; ++i) { + vals.push_back(v); + v += step; + } + TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals); + TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals); + TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f, + vals); + TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f, + 12345678.0f, vals); + + // Test when the input range is large and output range is small. + // Use all quantized values where the float is in the output range. + const float out_min = -29.1234; + const float out_max = 23.1234; + const float in_min = -1e6; + const float in_max = 1e6; + + low = FloatToQuantized<qint32>(out_min, in_min, in_max); + high = FloatToQuantized<qint32>(out_max, in_min, in_max); + vals.clear(); + for (int32 i = low; i <= high; ++i) vals.push_back(i); + TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals); + } + + template <typename InputType, typename OutputType> + void TestRequantizeManyInNewRangeEigenVsNonEigen() { + thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); + + const size_t ranges_count = 6; + const float ranges[ranges_count][4] = { + {0.0f, 255.0f, 0.0f, 255.0f}, // + {0.0f, 1.0f, 0.0f, 1.0f}, // + {-1.0f, 1.0f, -1.0f, 1.0f}, // + {-1.0f, 1.0f, -255.0f, 255.0f}, // + {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max + {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max + }; + + // Random values. + for (size_t range_index = 0; range_index < ranges_count; ++range_index) { + const float input_min = ranges[range_index][0]; + const float input_max = ranges[range_index][1]; + const float output_min = ranges[range_index][2]; + const float output_max = ranges[range_index][3]; + const int values_count = 10000; + random::PhiloxRandom philox(testing::RandomSeed(), 17); + random::SimplePhilox rnd(&philox); + std::vector<InputType> values_quantized; + for (int i = 0; i < values_count; ++i) { + float v = (rnd.RandFloat() * (input_max - input_min)) + input_min; + values_quantized.push_back( + FloatToQuantized<InputType>(v, input_min, input_max)); + } + + Tensor i_tensor = tensorflow::test::AsTensor( + gtl::ArraySlice<InputType>(values_quantized)); + const auto i_array = i_tensor.flat<InputType>(); + Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(), + TensorShape{values_count}); + auto output_values_eigen = o_tensor_eigen.flat<OutputType>(); + Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(), + TensorShape{values_count}); + auto output_values_ref = o_tensor_ref.flat<OutputType>(); + + RequantizeManyInNewRange(i_array.data(), i_array.size(), input_min, + input_max, output_min, output_max, + output_values_ref.data()); + RequantizeManyInNewRangeUsingEigen<InputType, OutputType>( + eigen_device, i_tensor, input_min, input_max, output_min, output_max, + &o_tensor_eigen); + + const int tolerance = 1; + for (int i = 0; i < values_quantized.size(); ++i) { + auto expected = output_values_ref(i); + auto actual = output_values_eigen(i); + // The eigen computation uses float for constants and computation + // instead of doubles, so can be different by 1 or 2 in some cases + // (e.g., input value 144.062744140625, min -1, max 255, type quint8). + ASSERT_TRUE(std::abs(expected - actual) <= tolerance) + << "expected=" << expected << " actual=" << actual + << " tolerance=" << tolerance << " v=" << values_quantized[i] + << " i=" << i << " input_min=" << input_min + << " input_max=" << input_max + << " input_type=" << DataTypeString(DataTypeToEnum<InputType>::v()) + << " output_type=" + << DataTypeString(DataTypeToEnum<OutputType>::v()); + } + } + } + + template <typename T> + void TestFloatToQuantizedInPlaceUsingEigen( + Eigen::ThreadPoolDevice* eigen_device) { + // These are the float values we're going to test the conversions on. + typedef std::pair<float, float> FPair; + for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f), // + FPair(-1.0f, 1.0f), // + FPair(-1.0f, 255.0f), // + FPair(0.0f, 1e6), // + FPair(0.0f, 1.0f), // + FPair(-31.0f, 13.0f)}) { + const float f_min = min_and_max.first; + const float f_max = min_and_max.second; + const float f_range = f_max - f_min; + const int values_count = 50000; + Tensor input(DT_FLOAT, TensorShape{values_count}); + auto input_array = input.flat<float>(); + for (int i = 0; i < values_count; ++i) { + input_array(i) = f_min + f_range * i / (values_count - 1); + } + + Tensor output(DataTypeToEnum<T>::v(), TensorShape{values_count}); + FloatTensorToQuantizedInPlaceUsingEigen<T>(*eigen_device, input, f_min, + f_max, &output); + auto output_array = output.flat<T>(); + + const int tolerance = 1; + for (int i = 0; i < values_count; ++i) { + int32 expected = FloatToQuantized<T>(input_array(i), f_min, f_max); + int32 actual = output_array(i); + + // The eigen computation uses float for constants and computation + // instead + // of doubles, so can be different by 1 or 2 in some cases (e.g., input + // value 144.062744140625, min -1, max 255, type quint8). + ASSERT_TRUE(std::abs(expected - actual) <= tolerance) + << "expected=" << expected << " actual=" << actual + << " tolerance=" << tolerance << " v=" << input_array(i) + << " i=" << i << " f_min=" << f_min << " f_max=" << f_max + << " type=" << DataTypeString(DataTypeToEnum<T>::v()); + } + } + } + + template <typename T> + void TestQuantizedToFloatInPlaceUsingEigen( + Eigen::ThreadPoolDevice* eigen_device) { + // These are the float values we're going to test the conversions on. + typedef std::pair<float, float> FPair; + for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f), // + FPair(-1.0f, 1.0f), // + FPair(-1.0f, 255.0f), // + FPair(0.0f, 1e6), // + FPair(0.0f, 1.0f), // + FPair(-31.0f, 13.0f)}) { + const float f_min = min_and_max.first; + const float f_max = min_and_max.second; + const int values_count = sizeof(T) == 1 ? 256 : 50000; + Tensor input(DataTypeToEnum<T>::v(), TensorShape{values_count}); + auto input_array = input.flat<T>(); + const double q_range = + static_cast<double>(Eigen::NumTraits<T>::highest()) - + Eigen::NumTraits<T>::lowest(); + for (int i = 0; i < values_count; ++i) { + if (sizeof(T) == 1) { + input_array(i) = Eigen::NumTraits<T>::lowest() + i; + } else { + int64 offset = static_cast<int64>(q_range / values_count * i); + input_array(i) = static_cast<int32>( + Eigen::NumTraits<T>::lowest() + + std::min<int64>(Eigen::NumTraits<T>::highest(), offset)); + } + } + + Tensor output(DT_FLOAT, TensorShape{values_count}); + QuantizedTensorToFloatInPlaceUsingEigen<T>(*eigen_device, input, f_min, + f_max, &output); + auto output_array = output.flat<float>(); + const double range = static_cast<double>(f_max) - f_min; + for (int i = 0; i < values_count; ++i) { + float expected = QuantizedToFloat<T>(input_array(i), f_min, f_max); + float actual = output_array(i); + ASSERT_NEAR(expected, actual, range * 1e-6) + << "expected=" << expected << " actual=" << actual + << " v=" << input_array(i) << " i=" << i << " f_min=" << f_min + << " f_max=" << f_max + << " type=" << DataTypeString(DataTypeToEnum<T>::v()); + } + } + } +}; + +TEST_F(QuantizationUtilsTest, FloatToQuantized) { + EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 1.0f)); + EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 2.0f)); + EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.5f, 0.0f, 1.0f)); + EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(1.0f, 0.0f, 2.0f)); + EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(1.0f, 0.0f, 1.0f)); + EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(2.0f, 0.0f, 2.0f)); + EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(-128.0f, -128.0f, 127.0f)); + EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.0f, -128.0f, 127.0f)); + EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(127.0f, -128.0f, 127.0f)); + EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(1.0f, 1.0f, 256.0f)); + EXPECT_EQ(quint8(127), FloatToQuantized<quint8>(128.0f, 1.0f, 256.0f)); + EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(256.0f, 1.0f, 256.0f)); + + const int int32_min = std::numeric_limits<int>::min(); + const int int32_max = std::numeric_limits<int>::max(); + + EXPECT_EQ(qint32(int32_min), + FloatToQuantized<qint32>(-128.0f, -128.0f, 128.0f)); + EXPECT_EQ(qint32(0), FloatToQuantized<qint32>(0.0f, -128.0f, 128.0f)); + EXPECT_EQ(qint32(int32_max), + FloatToQuantized<qint32>(128.0f, -128.0f, 128.0f)); +} + +TEST_F(QuantizationUtilsTest, QuantizedToFloat) { + EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 1.0f)), 1 / 255.0f); + EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 2.0f)), 1 / 255.0f); + EXPECT_LT(fabsf(0.5f - QuantizedToFloat<quint8>(127, 0.0f, 1.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(127, 0.0f, 2.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(255, 0.0f, 1.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(2.0f - QuantizedToFloat<quint8>(255, 0.0f, 2.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(0, 1.0f, 256.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(128.0f - QuantizedToFloat<quint8>(127, 1.0f, 256.0f)), + 1 / 255.0f); + EXPECT_LT(fabsf(256.0f - QuantizedToFloat<quint8>(255, 1.0f, 256.0f)), + 1 / 255.0f); + + const int int32_min = std::numeric_limits<int>::min(); + const int int32_max = std::numeric_limits<int>::max(); + + EXPECT_LT( + fabsf(-1.0f - QuantizedToFloat<qint32>(qint32(int32_min), -1.0f, 1.0f)), + 1e-5f); + EXPECT_LT(fabsf(0.0f - QuantizedToFloat<qint32>(qint32(0), -1.0f, 1.0f)), + 1e-5f); + EXPECT_LT( + fabsf(1.0f - QuantizedToFloat<qint32>(qint32(int32_max), -1.0f, 1.0f)), + 1e-5f); +} + +TEST_F(QuantizationUtilsTest, AvoidBias) { + for (int i = 0; i < 256; ++i) { + const float as_float = QuantizedToFloat<quint8>(i, 0.0f, 2.0f); + const int back_to_int = FloatToQuantized<quint8>(as_float, 0.0f, 2.0f); + EXPECT_EQ(i, back_to_int); + } +} + +TEST_F(QuantizationUtilsTest, RequantizeInNewRange) { + // These are the float values we're going to test the conversions on. + const size_t values_count = 6; + const float values[values_count] = {0.0f, 0.5f, 1.0f, -1.0f, 127.0f, 255.0f}; + // These are the input and output ranges we'll test. + const size_t ranges_count = 4; + const float ranges[ranges_count][4] = { + {0.0f, 255.0f, 0.0f, 255.0f}, + {0.0f, 1.0f, 0.0f, 1.0f}, + {-1.0f, 1.0f, -1.0f, 1.0f}, + {-1.0f, 1.0f, -255.0f, 255.0f}, + }; + for (size_t value_index = 0; value_index < values_count; ++value_index) { + const float value_float = values[value_index]; + for (size_t range_index = 0; range_index < ranges_count; ++range_index) { + const float input_min = ranges[range_index][0]; + const float input_max = ranges[range_index][1]; + const float output_min = ranges[range_index][2]; + const float output_max = ranges[range_index][3]; + const quint8 input_value = + FloatToQuantized<quint8>(value_float, input_min, input_max); + // Here we convert the quantized input value to what we expect + // to get in the output range. + const qint32 expected_value = FloatToQuantized<qint32>( + QuantizedToFloat(input_value, input_min, input_max), output_min, + output_max); + EXPECT_EQ(expected_value, + (RequantizeInNewRange<quint8, qint32>( + input_value, input_min, input_max, output_min, output_max))) + << "value_float=" << value_float << ", input_min=" << input_min + << ", input_max=" << input_max << ", output_min=" << output_min + << ", output_max=" << output_max; + } + } +} + +TEST_F(QuantizationUtilsTest, RequantizeInNewRangeRealData) { + const float value_as_float = -0.290169f; + const float input_min = -0.739539f; + const float input_max = 0.641057f; + const float output_min = -2381.49f; + const float output_max = 2207.6f; + const quint8 value_as_quint8 = + FloatToQuantized<quint8>(value_as_float, input_min, input_max); + EXPECT_EQ(quint8(83), value_as_quint8); + const qint32 actual_output = RequantizeInNewRange<quint8, qint32>( + value_as_quint8, input_min, input_max, output_min, output_max); + const qint32 value_as_qint32 = + FloatToQuantized<qint32>(value_as_float, output_min, output_max); + EXPECT_LT(std::abs(value_as_qint32 - actual_output), 10); +} + +TEST_F(QuantizationUtilsTest, RequantizeInNewRange32To8Bit) { + // These are the float values we're going to test the conversions on. + const size_t values_count = 6; + const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f}; + // These are the input and output ranges we'll test. + const size_t ranges_count = 4; + const float ranges[ranges_count][4] = { + {0.0f, 255.0f, 0.0f, 255.0f}, + {0.0f, 1.0f, 0.0f, 1.0f}, + {-1.0f, 1.0f, -1.0f, 1.0f}, + {-1.0f, 1.0f, -255.0f, 255.0f}, + }; + for (size_t value_index = 0; value_index < values_count; ++value_index) { + const float value_float = values[value_index]; + for (size_t range_index = 0; range_index < ranges_count; ++range_index) { + const float input_min = ranges[range_index][0]; + const float input_max = ranges[range_index][1]; + const float output_min = ranges[range_index][2]; + const float output_max = ranges[range_index][3]; + const qint32 input_value = + FloatToQuantized<qint32>(value_float, input_min, input_max); + // Here we convert the quantized input value to what we expect + // to get in the output range. + const quint8 expected_value = FloatToQuantized<quint8>( + QuantizedToFloat(input_value, input_min, input_max), output_min, + output_max); + EXPECT_EQ(expected_value, + (RequantizeInNewRange<qint32, quint8>( + input_value, input_min, input_max, output_min, output_max))) + << "input_value=" << input_value << ", value_float=" << value_float + << ", input_min=" << input_min << ", input_max=" << input_max + << ", output_min=" << output_min << ", output_max=" << output_max; + } + } +} + +TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8Bit) { + TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */); +} + +TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitUsingEigen) { + thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); + TestRequantizeManyInNewRange32To8Bit(&eigen_device); +} + +TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitEigenVsNonEigen) { + TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>(); +} + +TEST_F(QuantizationUtilsTest, + RequantizeManyInNewRange32To8BitSignedEigenVsNonEigen) { + TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, qint8>(); +} + +TEST_F(QuantizationUtilsTest, FloatTensorToQuantized) { + const int input_width = 3; + const int input_height = 3; + const float input_min = 0.0f; + const float input_max = 255.0f; + Tensor input(DT_FLOAT, TensorShape({input_height, input_width})); + test::FillValues<float>(&input, {1.0f, -1.0f, 10.0f, 10.25f, 127.0f, 255.0f, + 512.0f, 0.0f, 23.0f}); + Tensor expected(DT_QUINT8, TensorShape({input_height, input_width})); + test::FillValues<quint8>(&expected, {1, 0, 10, 10, 127, 255, 255, 0, 23}); + Tensor output = FloatTensorToQuantized<quint8>(input, input_min, input_max); + test::ExpectTensorEqual<quint8>(expected, output); +} + +// Verify that FloatToQuantizedInPlaceUsingEigen is same result as +// FloatToQuantized. +TEST_F(QuantizationUtilsTest, FloatToQuantizedInPlaceUsingEigen) { + thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); + + TestFloatToQuantizedInPlaceUsingEigen<quint8>(&eigen_device); + TestFloatToQuantizedInPlaceUsingEigen<qint8>(&eigen_device); + TestFloatToQuantizedInPlaceUsingEigen<quint16>(&eigen_device); + TestFloatToQuantizedInPlaceUsingEigen<qint16>(&eigen_device); +} + +TEST_F(QuantizationUtilsTest, OverflowWithEigen) { + thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); + + const int num_vals = 4; + const float input_min = 0.0f; + const float input_max = 2400.0f; + TensorShape shape({num_vals}); + Tensor input(DT_FLOAT, shape); + test::FillValues<float>(&input, {-100.f, 0.f, 2400.0f, 2400.0f}); + Tensor expected(DT_QINT32, shape); + // Note that the positive expected values are not the highest int32 value, + // because the implementation does a bounds check using float, not int32. + test::FillValues<qint32>( + &expected, + {static_cast<int32>(-2147483648), static_cast<int32>(-2147483648), + static_cast<int32>(2147483520), static_cast<int32>(2147483520)}); + + FloatToQuantizedStruct<qint32> f2q(input_min, input_max); + Tensor output(DT_QINT32, shape); + auto input_array = input.flat<float>(); + output.flat<qint32>() = QUANTIZE_WITH_EIGEN(input_array, f2q, qint32); + test::ExpectTensorEqual<qint32>(expected, output); +} + +TEST_F(QuantizationUtilsTest, QuantizedTensorToFloat) { + const int input_width = 3; + const int input_height = 3; + const float input_min = -128.0f; + const float input_max = 127.0f; + Tensor input(DT_QUINT8, TensorShape({input_height, input_width})); + test::FillValues<quint8>(&input, {0, 128, 255, 23, 24, 25, 243, 244, 245}); + Tensor expected(DT_FLOAT, TensorShape({input_height, input_width})); + test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f, + -103.0f, 115.0f, 116.0f, 117.0f}); + Tensor output = QuantizedTensorToFloat<quint8>(input, input_min, input_max); + test::ExpectTensorEqual<float>(expected, output); +} + +// Verify that QuantizedToFloatInPlaceUsingEigen is same result as +// QuantizedToFloat. +TEST_F(QuantizationUtilsTest, QuantizedToFloatInPlaceUsingEigen) { + thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */); + + TestQuantizedToFloatInPlaceUsingEigen<quint8>(&eigen_device); + TestQuantizedToFloatInPlaceUsingEigen<qint8>(&eigen_device); + TestQuantizedToFloatInPlaceUsingEigen<quint16>(&eigen_device); + TestQuantizedToFloatInPlaceUsingEigen<qint16>(&eigen_device); + TestQuantizedToFloatInPlaceUsingEigen<qint32>(&eigen_device); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc new file mode 100644 index 0000000000..18dffd1dc6 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc @@ -0,0 +1,97 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS + +#include <math.h> + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template <class T1, class T2> +class QuantizeDownAndShrinkRangeOp : public OpKernel { + public: + explicit QuantizeDownAndShrinkRangeOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(0); + const float input_min_float = ctx->input(1).flat<float>()(0); + const float input_max_float = ctx->input(2).flat<float>()(0); + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); + Tensor* output_min = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min)); + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max)); + + auto input_array = input.flat<T1>(); + const int32 input_lowest_quantized = + static_cast<int32>(Eigen::NumTraits<T1>::lowest()); + const int32 input_highest_quantized = + static_cast<int32>(Eigen::NumTraits<T1>::highest()); + T1 actual_min_quantized = input_highest_quantized; + T1 actual_max_quantized = input_lowest_quantized; + for (int i = 0; i < input_array.size(); ++i) { + const T1 value = input_array(i); + actual_min_quantized = std::min(actual_min_quantized, value); + actual_max_quantized = std::max(actual_max_quantized, value); + } + // We want to make sure that the minimum is no larger than zero, so that the + // convolution operation can run efficiently. + const float actual_min_float = + std::min(0.0f, QuantizedToFloat(actual_min_quantized, input_min_float, + input_max_float)); + const float actual_max_float = QuantizedToFloat( + actual_max_quantized, input_min_float, input_max_float); + +#if 0 + // This is the reference, non-eigen implementation: + auto output_array = output->flat<T2>(); + RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(), + input_min_float, input_max_float, + actual_min_float, actual_max_float, + output_array.data()); +#endif + + if (input_array.size() > 0) { + RequantizeManyInNewRangeUsingEigen<T1, T2>( + ctx->eigen_device<CPUDevice>(), input, input_min_float, + input_max_float, actual_min_float, actual_max_float, output); + } + + output_min->flat<float>().setConstant(actual_min_float); + output_max->flat<float>().setConstant(actual_max_float); + } +}; + +REGISTER_KERNEL_BUILDER(Name("QuantizeDownAndShrinkRange") + .Device(DEVICE_CPU) + .TypeConstraint<qint32>("Tinput") + .TypeConstraint<quint8>("out_type"), + QuantizeDownAndShrinkRangeOp<qint32, quint8>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc new file mode 100644 index 0000000000..73a50aad26 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc @@ -0,0 +1,71 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizeDownAndShrinkRangeTest : public OpsTestBase { + protected: +}; + +// Runs a manually generated array through the operator, and makes sure that the +// results match the expected hand-calculated values. +TEST_F(QuantizeDownAndShrinkRangeTest, HandCrafted) { + TF_ASSERT_OK(NodeDefBuilder("quantize_down_and_shrink_range_op", + "QuantizeDownAndShrinkRange") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Tinput", DataTypeToEnum<qint32>::v()) + .Attr("out_type", DataTypeToEnum<quint8>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // For this test we have an input that has the theoretical range of -256.0f to + // +256.0f, but the actual values present only span -1.0f to 1.0f. We expect + // the operator to take advantage of this, and rescale the output to fill up + // the available range in the lower bit depth, and update to the true min and + // max ranges. + const int value_count = 3; + AddInputFromArray<qint32>(TensorShape({value_count}), + {-(1 << 23), 0, (1 << 23)}); + AddInputFromArray<float>(TensorShape({1}), {-256.0f}); + AddInputFromArray<float>(TensorShape({1}), {256.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count})); + test::FillValues<quint8>(&expected, {0, 127, 255}); + test::ExpectTensorEqual<quint8>(expected, *GetOutput(0)); + Tensor expected_min(allocator(), DT_FLOAT, TensorShape({})); + test::FillValues<float>(&expected_min, {-1.0f}); + test::ExpectTensorEqual<float>(expected_min, *GetOutput(1)); + Tensor expected_max(allocator(), DT_FLOAT, TensorShape({})); + test::FillValues<float>(&expected_max, {1.0f}); + test::ExpectTensorEqual<float>(expected_max, *GetOutput(2)); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantize_op.cc b/tensorflow/contrib/quantization/kernels/quantize_op.cc new file mode 100644 index 0000000000..2bab8ad447 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantize_op.cc @@ -0,0 +1,159 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/math_ops.cc. + +#define EIGEN_USE_THREADS + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace { +enum { QUANTIZE_MODE_MIN_COMBINED, QUANTIZE_MODE_MIN_FIRST }; +} // namespace + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +// Quantize a tensor from float to T, with user-specified min_range and +// max_range. +// TODO(xbing): Add a new QuantizeOp just taking scale, +// rather than min_range and max_range. +template <typename Device, typename T> +class QuantizeV2Op : public OpKernel { + public: + explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) { + half_range_ = !std::is_signed<T>::value + ? 0.0f + : (std::numeric_limits<T>::max() - + std::numeric_limits<T>::min() + 1) / + 2.0f; + string mode_string; + OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string)); + OP_REQUIRES(ctx, + (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST"), + errors::InvalidArgument("Mode string must be 'MIN_COMBINED' or" + " 'MIN_FIRST', is '" + + mode_string + "'")); + if (mode_string == "MIN_COMBINED") { + mode_ = QUANTIZE_MODE_MIN_COMBINED; + } else if (mode_string == "MIN_FIRST") { + mode_ = QUANTIZE_MODE_MIN_FIRST; + } + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(0); + const float input_min_range = ctx->input(1).flat<float>()(0); + const float input_max_range = ctx->input(2).flat<float>()(0); + + float min_range; + float max_range; + OP_REQUIRES(ctx, !(input_max_range < input_min_range), + errors::InvalidArgument( + "input_max_range must be larger than input_min_range.")); + + // When the minimum and maximum ranges are too close together, nudge them + // apart by a small value so that they are slightly different. This helps + // us avoid creating ill-formed buffers where all quantized values map to + // the same float number. These kinds of buffers cause problems for + // downstream ops when they need to do calculations on them. + // We pick the value by making sure that zero is not more than 100x the + // overall range from the maximum, so that the value can be easily + // represented when we promote the quantized value to a higher + // intermediate bit depth, since that's a common requirement. + min_range = input_min_range; + const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range), + fabsf(input_max_range))) / + 100.0f; + max_range = std::max(input_max_range, input_min_range + epsilon); + + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); + if (mode_ == QUANTIZE_MODE_MIN_COMBINED) { + const float scale_factor = + (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) / + (max_range - min_range); + + // Quantize: + // Make input in range of [min_range, max_range], then + // subtract min_range to be in range of [0, max_range - min_range] + // Divide by (max_range - min_range) to get to [0, 1.0] + // Multiply by range of T, after that shift left 1/2 range of T if + // T is signed. + // Note that std::round is used to round the number before the cast. + // std::round implements "round-half-away-zero", + // e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5, + // and 5.5 goes to 6. + auto o = output->template flat<T>(); + bool is_signed = std::is_signed<T>::value; + if (is_signed) { + // The slow path. + // TODO(xbing,yonghui): Speedup this path as well. + o.device(ctx->template eigen_device<Device>()) = + ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) - + min_range) * + scale_factor - + half_range_) + .unaryExpr(std::function<float(float)>(round)) + .template cast<T>(); + } else { + // The fast path that avoids unaryExpr + // According to the micro-benchmark, adding device here doesn't help. + o = ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) - + min_range) * + scale_factor + + 0.5f) + .template cast<T>(); + } + } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) { + FloatTensorToQuantizedInPlaceUsingEigen<T>( + ctx->template eigen_device<Device>(), input, min_range, max_range, + output); + } + + Tensor* output_min_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &output_min_tensor)); + output_min_tensor->flat<float>()(0) = min_range; + + Tensor* output_max_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &output_max_tensor)); + output_max_tensor->flat<float>()(0) = max_range; + } + + private: + float half_range_; + int mode_; +}; + +REGISTER_KERNEL_BUILDER( + Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint8>("T"), + QuantizeV2Op<CPUDevice, quint8>); +REGISTER_KERNEL_BUILDER( + Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint8>("T"), + QuantizeV2Op<CPUDevice, qint8>); +REGISTER_KERNEL_BUILDER( + Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint16>("T"), + QuantizeV2Op<CPUDevice, quint16>); +REGISTER_KERNEL_BUILDER( + Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint16>("T"), + QuantizeV2Op<CPUDevice, qint16>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantize_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_op_test.cc new file mode 100644 index 0000000000..d3ac7d3f7c --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantize_op_test.cc @@ -0,0 +1,113 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +class QuantizedOpTest : public OpsTestBase { + protected: +}; + +TEST_F(QuantizedOpTest, QuantizeV2) { + TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2") + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("mode", "MIN_FIRST") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInputFromArray<float>(TensorShape({6}), + {1.0, 1.25, 1.75, 127.0, 255.0, 500.0}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_QUINT8, TensorShape({6})); + test::FillValues<quint8>(&expected, {1, 1, 2, 127, 255, 255}); + test::ExpectTensorEqual<quint8>(expected, *GetOutput(0)); +} + +TEST_F(QuantizedOpTest, QuantizeV2Ports) { + TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2") + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("mode", "MIN_FIRST") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInputFromArray<float>(TensorShape({6}), + {1.0, 1.25, 1.75, 127.0, 255.0, 500.0}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_QUINT8, TensorShape({6})); + test::FillValues<quint8>(&expected, {1, 1, 2, 127, 255, 255}); + test::ExpectTensorEqual<quint8>(expected, *GetOutput(0)); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + EXPECT_NEAR(0.0f, output_min, 1e-5f); + EXPECT_NEAR(255.0f, output_max, 1e-5f); +} + +TEST_F(QuantizedOpTest, QuantizeV2EqualRange) { + TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2") + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("mode", "MIN_FIRST") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInputFromArray<float>(TensorShape({6}), {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}); + AddInputFromArray<float>(TensorShape({1}), {1.0f}); + AddInputFromArray<float>(TensorShape({1}), {1.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_QUINT8, TensorShape({6})); + test::FillValues<quint8>(&expected, {0, 0, 0, 0, 0, 0}); + test::ExpectTensorEqual<quint8>(expected, *GetOutput(0)); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + EXPECT_NEAR(1.0f, output_min, 1e-5f); + EXPECT_LT(1.0f, output_max); +} + +TEST_F(QuantizedOpTest, Dequantize) { + TF_ASSERT_OK(NodeDefBuilder("dequantize_op", "Dequantize") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("mode", "MIN_FIRST") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInputFromArray<quint8>(TensorShape({6}), {1, 2, 4, 8, 16, 255}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); + test::FillValues<float>(&expected, {1.0, 2.0, 4.0, 8.0, 16.0, 255.0}); + test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.5); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc new file mode 100644 index 0000000000..a86b611ad6 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc @@ -0,0 +1,101 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Implements a quantized version of the Relu6 operation. +#define EIGEN_USE_THREADS + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +template <typename T> +class QuantizedReluOp : public OpKernel { + public: + explicit QuantizedReluOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + const float min_input = context->input(1).flat<float>()(0); + const float max_input = context->input(2).flat<float>()(0); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input); + output->flat<T>().device(context->eigen_cpu_device()) = + input.flat<T>().cwiseMax(min_as_quantized).template cast<T>(); + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = min_input; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = max_input; + } +}; + +template <typename T> +class QuantizedRelu6Op : public OpKernel { + public: + explicit QuantizedRelu6Op(OpKernelConstruction* context) + : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + const float min_input = context->input(1).flat<float>()(0); + const float max_input = context->input(2).flat<float>()(0); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input); + const T max_as_quantized = FloatToQuantized<T>(6.0f, min_input, max_input); + output->flat<T>().device(context->eigen_cpu_device()) = + input.flat<T>() + .cwiseMax(min_as_quantized) + .cwiseMin(max_as_quantized) + .template cast<T>(); + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = min_input; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = max_input; + } +}; + +REGISTER_KERNEL_BUILDER(Name("QuantizedRelu") + .Device(DEVICE_CPU) + .TypeConstraint<qint32>("Tinput") + .TypeConstraint<qint32>("out_type"), + QuantizedReluOp<qint32>); +REGISTER_KERNEL_BUILDER(Name("QuantizedRelu") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("Tinput") + .TypeConstraint<quint8>("out_type"), + QuantizedReluOp<quint8>); + +REGISTER_KERNEL_BUILDER(Name("QuantizedRelu6") + .Device(DEVICE_CPU) + .TypeConstraint<qint32>("Tinput") + .TypeConstraint<qint32>("out_type"), + QuantizedRelu6Op<qint32>); +REGISTER_KERNEL_BUILDER(Name("QuantizedRelu6") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("Tinput") + .TypeConstraint<quint8>("out_type"), + QuantizedRelu6Op<quint8>); +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc new file mode 100644 index 0000000000..19efe6093e --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc @@ -0,0 +1,99 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedActivationsTest : public OpsTestBase { + protected: +}; + +TEST_F(QuantizedActivationsTest, TestRelu) { + TF_ASSERT_OK(NodeDefBuilder("quantized_relu_op", "QuantizedRelu") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = -128.0f; + const float input_max = 127.0f; + const int input_width = 2; + const int input_height = 4; + Tensor input_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&input_float, {-100, -1, 0, 1, 3, 6, 7, 100}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + Tensor expected_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&expected_float, {0, 0, 0, 1, 3, 6, 7, 100}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedActivationsTest, TestRelu6) { + TF_ASSERT_OK(NodeDefBuilder("quantized_relu6_op", "QuantizedRelu6") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = -128.0f; + const float input_max = 127.0f; + const int input_width = 2; + const int input_height = 4; + Tensor input_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&input_float, {-100, -1, 0, 1, 3, 6, 7, 100}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + Tensor expected_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&expected_float, {0, 0, 0, 1, 3, 6, 6, 6}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc new file mode 100644 index 0000000000..2a684824d3 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc @@ -0,0 +1,240 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" + +namespace tensorflow { + +namespace { + +// A slow but straightforward implementation of batch normalization. +template <typename T1, typename T2> +void ReferenceBatchNorm(const Tensor& input, const float input_min, + const float input_max, const Tensor& mean, + float mean_min, float mean_max, const Tensor& var, + float var_min, float var_max, const Tensor& beta, + float beta_min, float beta_max, const Tensor& gamma, + float gamma_min, float gamma_max, + float variance_epsilon, bool scale_after_normalization, + Tensor* output, float* output_min, float* output_max) { + auto input_flat = input.flat<T1>(); + auto mean_flat = mean.flat<T1>(); + auto var_flat = var.flat<T1>(); + auto beta_flat = beta.flat<T1>(); + auto gamma_flat = gamma.flat<T1>(); + auto output_flat = output->flat<T2>(); + + const int depth = mean.dim_size(0); + const int row_count = input_flat.size() / depth; + + *output_min = std::numeric_limits<float>::max(); + *output_max = std::numeric_limits<float>::lowest(); + for (int pass = 0; pass < 2; ++pass) { + const bool is_range_pass = (pass == 0); + for (int row_index = 0; row_index < row_count; ++row_index) { + for (int channel = 0; channel < depth; ++channel) { + const int input_index = (row_index * depth) + channel; + const float input_value = + QuantizedToFloat(input_flat(input_index), input_min, input_max); + const float mean_value = + QuantizedToFloat(mean_flat(channel), mean_min, mean_max); + const float var_value = + QuantizedToFloat(var_flat(channel), var_min, var_max); + const float beta_value = + QuantizedToFloat(beta_flat(channel), beta_min, beta_max); + const float gamma_value = + QuantizedToFloat(gamma_flat(channel), gamma_min, gamma_max); + float output_value; + if (scale_after_normalization) { + output_value = (((input_value - mean_value) / + sqrtf(var_value + variance_epsilon)) * + gamma_value) + + beta_value; + } else { + output_value = ((input_value - mean_value) / + sqrtf(var_value + variance_epsilon)) + + beta_value; + } + if (is_range_pass) { + *output_min = std::min(output_value, *output_min); + *output_max = std::max(output_value, *output_max); + } else { + output_flat(input_index) = + FloatToQuantized<T2>(output_value, *output_min, *output_max); + } + } + } + } +} + +// An implementation of batch normalization that does the main calculations +// using only fixed-point arithmetic. There's a prologue with some floating +// calculations, but assuming the weights are constant these could be hoisted to +// an offline process, or baked into the weights. +template <typename T1, typename T2> +void FixedPointBatchNorm(const Tensor& input, const float input_min, + const float input_max, const Tensor& mean, + float mean_min, float mean_max, const Tensor& var, + float var_min, float var_max, const Tensor& beta, + float beta_min, float beta_max, const Tensor& gamma, + float gamma_min, float gamma_max, + float variance_epsilon, bool scale_after_normalization, + Tensor* output, float* output_min, float* output_max) { + auto input_flat = input.flat<T1>(); + auto mean_flat = mean.flat<T1>(); + auto var_flat = var.flat<T1>(); + auto beta_flat = beta.flat<T1>(); + auto gamma_flat = gamma.flat<T1>(); + auto output_flat = output->flat<T2>(); + + const int depth = mean.dim_size(0); + const int row_count = input_flat.size() / depth; + + // The range here is chosen so that typical input values fit in without any + // overflow or loss of precision, going from +1m to -1m with 10 bits of fixed + // point precision. + *output_min = -(1 << 20); + *output_max = (1 << 20); + + Tensor scale_tensor(DataTypeToEnum<T2>::v(), {depth}); + auto scale_flat = scale_tensor.flat<T2>(); + Tensor offset_tensor(DataTypeToEnum<T2>::v(), {depth}); + auto offset_flat = offset_tensor.flat<T2>(); + for (int channel = 0; channel < depth; ++channel) { + const float mean_value = + QuantizedToFloat(mean_flat(channel), mean_min, mean_max); + const float var_value = + QuantizedToFloat(var_flat(channel), var_min, var_max); + const float beta_value = + QuantizedToFloat(beta_flat(channel), beta_min, beta_max); + const float gamma_value = + QuantizedToFloat(gamma_flat(channel), gamma_min, gamma_max); + float scale_value; + if (scale_after_normalization) { + scale_value = (1.0f / sqrtf(var_value + variance_epsilon)) * gamma_value; + } else { + scale_value = (1.0f / sqrtf(var_value + variance_epsilon)); + } + const float offset_value = (-mean_value * scale_value) + beta_value; + scale_flat(channel) = + FloatToQuantized<T2>(scale_value, *output_min, *output_max); + offset_flat(channel) = + FloatToQuantized<T2>(offset_value, *output_min, *output_max); + } + + const T2 one_in_output_space = + FloatToQuantized<T2>(1.0f, *output_min, *output_max); + for (int row_index = 0; row_index < row_count; ++row_index) { + for (int channel = 0; channel < depth; ++channel) { + const int input_index = (row_index * depth) + channel; + const T2 input_value = + RequantizeInNewRange<T1, T2>(input_flat(input_index), input_min, + input_max, *output_min, *output_max); + const T2 scale_value = scale_flat(channel); + const T2 offset_value = offset_flat(channel); + const T2 output_value = + ((input_value * scale_value) / one_in_output_space) + offset_value; + output_flat(input_index) = output_value; + } + } +} + +} // namespace + +template <typename T1, typename T2> +class QuantizedBatchNormOp : public OpKernel { + public: + explicit QuantizedBatchNormOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("variance_epsilon", &variance_epsilon_)); + OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization", + &scale_after_normalization_)); + } + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + const float input_min = context->input(1).flat<float>()(0); + const float input_max = context->input(2).flat<float>()(0); + const Tensor& mean = context->input(3); + const float mean_min = context->input(4).flat<float>()(0); + const float mean_max = context->input(5).flat<float>()(0); + const Tensor& var = context->input(6); + const float var_min = context->input(7).flat<float>()(0); + const float var_max = context->input(8).flat<float>()(0); + const Tensor& beta = context->input(9); + const float beta_min = context->input(10).flat<float>()(0); + const float beta_max = context->input(11).flat<float>()(0); + const Tensor& gamma = context->input(12); + const float gamma_min = context->input(13).flat<float>()(0); + const float gamma_max = context->input(14).flat<float>()(0); + + OP_REQUIRES(context, input.dims() == 4, + errors::InvalidArgument("input must be 4-dimensional", + input.shape().DebugString())); + OP_REQUIRES(context, mean.dims() == 1, + errors::InvalidArgument("mean must be 1-dimensional", + mean.shape().DebugString())); + OP_REQUIRES(context, var.dims() == 1, + errors::InvalidArgument("var must be 1-dimensional", + var.shape().DebugString())); + OP_REQUIRES(context, beta.dims() == 1, + errors::InvalidArgument("beta must be 1-dimensional", + beta.shape().DebugString())); + OP_REQUIRES(context, gamma.dims() == 1, + errors::InvalidArgument("gamma must be 1-dimensional", + gamma.shape().DebugString())); + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + float output_min; + float output_max; + FixedPointBatchNorm<T1, T2>(input, input_min, input_max, mean, mean_min, + mean_max, var, var_min, var_max, beta, beta_min, + beta_max, gamma, gamma_min, gamma_max, + variance_epsilon_, scale_after_normalization_, + output, &output_min, &output_max); + + Tensor* output_min_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(1, {}, &output_min_tensor)); + output_min_tensor->flat<float>()(0) = output_min; + + Tensor* output_max_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(2, {}, &output_max_tensor)); + output_max_tensor->flat<float>()(0) = output_max; + } + + private: + float variance_epsilon_; + bool scale_after_normalization_; +}; + +REGISTER_KERNEL_BUILDER(Name("QuantizedBatchNormWithGlobalNormalization") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("Tinput") + .TypeConstraint<qint32>("out_type"), + QuantizedBatchNormOp<quint8, qint32>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc new file mode 100644 index 0000000000..ccb6a59ecf --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc @@ -0,0 +1,242 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/common_runtime/eigen_thread_pool.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/batch_norm_op.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedBatchNormOpTest : public OpsTestBase {}; + +TEST_F(QuantizedBatchNormOpTest, Simple) { + TF_EXPECT_OK(NodeDefBuilder("quantized_batch_norm_op", + "QuantizedBatchNormWithGlobalNormalization") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("scale_after_normalization", false) + .Attr("variance_epsilon", 0.001) + .Attr("Tinput", DT_QUINT8) + .Attr("out_type", DT_QINT32) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = -128.0f; + const float input_max = 127.0f; + const int input_batch = 1; + const int input_height = 1; + const int input_width = 6; + const int input_depth = 2; + Tensor input_float(DT_FLOAT, + {input_batch, input_height, input_width, input_depth}); + test::FillValues<float>(&input_float, + {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + const float mean_min = 0.0f; + const float mean_max = 20.0f; + Tensor mean_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&mean_float, {10, 20}); + Tensor mean_quantized = + FloatTensorToQuantized<quint8>(mean_float, mean_min, mean_max); + const float variance_min = 0.0f; + const float variance_max = 1.0f; + Tensor variance_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&variance_float, {0.25, 0.5}); + Tensor variance_quantized = FloatTensorToQuantized<quint8>( + variance_float, variance_min, variance_max); + const float beta_min = 0.0f; + const float beta_max = 1.0f; + Tensor beta_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&beta_float, {0.1, 0.6}); + Tensor beta_quantized = + FloatTensorToQuantized<quint8>(beta_float, beta_min, beta_max); + const float gamma_min = 0.0f; + const float gamma_max = 1.0f; + Tensor gamma_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&gamma_float, {0.0, 0.0}); + Tensor gamma_quantized = + FloatTensorToQuantized<quint8>(gamma_float, gamma_min, gamma_max); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + AddInputFromArray<quint8>(mean_quantized.shape(), + mean_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {mean_min}); + AddInputFromArray<float>(TensorShape({1}), {mean_max}); + AddInputFromArray<quint8>(variance_quantized.shape(), + variance_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {variance_min}); + AddInputFromArray<float>(TensorShape({1}), {variance_max}); + AddInputFromArray<quint8>(beta_quantized.shape(), + beta_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {beta_min}); + AddInputFromArray<float>(TensorShape({1}), {beta_max}); + AddInputFromArray<quint8>(gamma_quantized.shape(), + gamma_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {gamma_min}); + AddInputFromArray<float>(TensorShape({1}), {gamma_max}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected_float( + allocator(), DT_FLOAT, + TensorShape({input_batch, input_height, input_width, input_depth})); + test::FillValues<float>( + &expected_float, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86, + -33.31, -23.85, -34.72, -25.85, -36.13}); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.1); +} + +TEST_F(QuantizedBatchNormOpTest, SameAsFloat) { + TF_EXPECT_OK(NodeDefBuilder("quantized_batch_norm_op", + "QuantizedBatchNormWithGlobalNormalization") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("scale_after_normalization", false) + .Attr("variance_epsilon", 0.001) + .Attr("Tinput", DT_QUINT8) + .Attr("out_type", DT_QINT32) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = -128.0f; + const float input_max = 127.0f; + const int input_batch = 1; + const int input_height = 1; + const int input_width = 6; + const int input_depth = 2; + Tensor input_float(DT_FLOAT, + {input_batch, input_height, input_width, input_depth}); + test::FillValues<float>(&input_float, + {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + const float mean_min = 0.0f; + const float mean_max = 20.0f; + Tensor mean_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&mean_float, {10, 20}); + Tensor mean_quantized = + FloatTensorToQuantized<quint8>(mean_float, mean_min, mean_max); + const float variance_min = 0.0f; + const float variance_max = 1.0f; + Tensor variance_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&variance_float, {0.25, 0.5}); + Tensor variance_quantized = FloatTensorToQuantized<quint8>( + variance_float, variance_min, variance_max); + const float beta_min = 0.0f; + const float beta_max = 1.0f; + Tensor beta_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&beta_float, {0.1, 0.6}); + Tensor beta_quantized = + FloatTensorToQuantized<quint8>(beta_float, beta_min, beta_max); + const float gamma_min = 0.0f; + const float gamma_max = 1.0f; + Tensor gamma_float(DT_FLOAT, {input_depth}); + test::FillValues<float>(&gamma_float, {0.0, 0.0}); + Tensor gamma_quantized = + FloatTensorToQuantized<quint8>(gamma_float, gamma_min, gamma_max); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + AddInputFromArray<quint8>(mean_quantized.shape(), + mean_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {mean_min}); + AddInputFromArray<float>(TensorShape({1}), {mean_max}); + AddInputFromArray<quint8>(variance_quantized.shape(), + variance_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {variance_min}); + AddInputFromArray<float>(TensorShape({1}), {variance_max}); + AddInputFromArray<quint8>(beta_quantized.shape(), + beta_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {beta_min}); + AddInputFromArray<float>(TensorShape({1}), {beta_max}); + AddInputFromArray<quint8>(gamma_quantized.shape(), + gamma_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {gamma_min}); + AddInputFromArray<float>(TensorShape({1}), {gamma_max}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected_float( + allocator(), DT_FLOAT, + TensorShape({input_batch, input_height, input_width, input_depth})); + thread::ThreadPool threadpool(Env::Default(), "test", 1); + EigenThreadPoolWrapper wrapper(&threadpool); + Eigen::ThreadPoolDevice eigen_cpu_device(&wrapper, 1); + const Tensor& const_input_float = input_float; + const Tensor& const_mean_float = mean_float; + const Tensor& const_variance_float = variance_float; + const Tensor& const_beta_float = beta_float; + const Tensor& const_gamma_float = gamma_float; + functor::BatchNorm<Eigen::ThreadPoolDevice, float>()( + eigen_cpu_device, const_input_float.tensor<float, 4>(), + const_mean_float.vec<float>(), const_variance_float.vec<float>(), + const_beta_float.vec<float>(), const_gamma_float.vec<float>(), 0.001, + false, expected_float.tensor<float, 4>()); + + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.1); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc new file mode 100644 index 0000000000..c319eb97da --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc @@ -0,0 +1,89 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Implements a quantized eight-bit version of the bias addition operation. + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template <class T1, class T2, class T3> +class QuantizedBiasAddOp : public OpKernel { + public: + explicit QuantizedBiasAddOp(OpKernelConstruction* context) + : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + const Tensor& bias = context->input(1); + const float input_min = context->input(2).flat<float>()(0); + const float input_max = context->input(3).flat<float>()(0); + const float bias_min = context->input(4).flat<float>()(0); + const float bias_max = context->input(5).flat<float>()(0); + + OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()), + errors::InvalidArgument("Input tensor must be at least 2D: ", + input.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()), + errors::InvalidArgument("Biases must be 1D: ", + bias.shape().DebugString())); + const auto last_dim = input.shape().dims() - 1; + OP_REQUIRES( + context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim), + errors::InvalidArgument( + "Must provide as many biases as the last dimension " + "of the input tensor: ", + bias.shape().DebugString(), " vs. ", input.shape().DebugString())); + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + + float total_min; + float total_max; + QuantizedAddUsingEigen<T1, T2, T3>( + context->template eigen_device<CPUDevice>(), input, input_min, + input_max, bias, bias_min, bias_max, output, &total_min, &total_max); + + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = total_min; + + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = total_max; + } +}; + +REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("T1") + .TypeConstraint<quint8>("T2") + .TypeConstraint<qint32>("out_type"), + QuantizedBiasAddOp<quint8, quint8, qint32>); +REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd") + .Device(DEVICE_CPU) + .TypeConstraint<qint8>("T1") + .TypeConstraint<qint8>("T2") + .TypeConstraint<qint32>("out_type"), + QuantizedBiasAddOp<qint8, qint8, qint32>); +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc new file mode 100644 index 0000000000..56535029b5 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc @@ -0,0 +1,171 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <functional> + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedBiasAddTest : public OpsTestBase { + protected: +}; + +TEST_F(QuantizedBiasAddTest, Small) { + TF_ASSERT_OK(NodeDefBuilder("quantized_bias_add_op", "QuantizedBiasAdd") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = 0.0f; + const float input_max = 60.0f; + const int input_height = 2; + const int input_width = 3; + Tensor input_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&input_float, + {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + + const float bias_min = 0.0f; + const float bias_max = 3.0f; + const int bias_width = 3; + Tensor bias_float(DT_FLOAT, {bias_width}); + test::FillValues<float>(&bias_float, {1.0f, 2.0f, 3.0f}); + Tensor bias_quantized = + FloatTensorToQuantized<quint8>(bias_float, bias_min, bias_max); + + Tensor expected_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>(&expected_float, + {11.0f, 22.0f, 33.0f, 41.0f, 52.0f, 63.0f}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<quint8>(bias_quantized.shape(), + bias_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + AddInputFromArray<float>(TensorShape({1}), {bias_min}); + AddInputFromArray<float>(TensorShape({1}), {bias_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedBiasAddTest, RealData) { + TF_ASSERT_OK(NodeDefBuilder("quantized_bias_add_op", "QuantizedBiasAdd") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = -2164.25f; + const float input_max = 2006.27f; + const int input_height = 1; + const int input_width = 64; + Tensor input_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>( + &input_float, + {-1014.12, -157.382, -810.17, 1435.28, 1016.37, 219.684, -316.054, + -2164.25, 2006.27, -547.444, 857.376, 404.376, 9.72115, 332.588, + 194.385, -286.57, 26.062, 23.1125, 110.436, 247.055, -127.683, + -376.275, -124.81, -846.826, -77.1507, 305.581, -202.747, 12.9528, + 9.64886, 872.686, 40.9069, 197.816, 44.16, -306.768, -1457.52, + -368.939, -1049.42, -486.353, 1745.87, 95.7695, 395.773, -254.333, + -404.27, 787.16, -2.44114, 199.37, -1024.08, 784.901, 235.055, + -42.7295, 241.498, -245.365, 470.763, 186.159, 186.579, -220.163, + 1304.58, 386.272, -358.853, -755.996, 360.109, -866.007, 55.2828, + -508.801}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + + const float bias_min = -0.739539f; + const float bias_max = 0.641057f; + const int bias_width = 64; + Tensor bias_float(DT_FLOAT, {bias_width}); + test::FillValues<float>( + &bias_float, + {-0.294619, -0.0670519, 0.261507, -0.126274, 0.127229, -0.176945, + -0.251223, 0.231086, 0.453694, 0.415666, -0.288733, 0.508717, + 0.211551, 0.0435907, -0.582383, -0.308779, 0.0696883, -0.438122, + 0.114, 0.433964, 0.109883, 0.284931, -0.149661, 0.108657, + 0.458333, -0.130231, -0.35805, -0.123206, -0.437968, 0.0282411, + 0.628818, -0.0522173, -0.0233403, 0.124863, 0.217165, 0.262294, + -0.171005, -0.254693, -0.200433, -0.287354, 0.488166, -0.0354688, + -0.118091, -0.590444, 0.491537, -0.739539, 0.083117, 0.282482, + 0.275269, -0.36574, 0.107476, 0.0511428, -0.136887, -0.0149852, + -0.259694, 0.641057, 0.264054, -0.295126, -0.0218791, 0.361211, + 0.012448, 0.0709718, -0.392394, -0.434215}); + Tensor bias_quantized = + FloatTensorToQuantized<quint8>(bias_float, bias_min, bias_max); + + Tensor expected_float(DT_FLOAT, {input_height, input_width}); + test::FillValues<float>( + &expected_float, + {-1014.42, -157.449, -809.908, 1435.16, 1016.5, 219.507, -316.305, + -2164.02, 2006.73, -547.028, 857.088, 404.885, 9.9327, 332.632, + 193.803, -286.878, 26.1317, 22.6744, 110.55, 247.489, -127.573, + -375.99, -124.959, -846.717, -76.6923, 305.451, -203.105, 12.8296, + 9.21089, 872.714, 41.5357, 197.764, 44.1367, -306.643, -1457.3, + -368.677, -1049.6, -486.608, 1745.67, 95.4821, 396.261, -254.368, + -404.388, 786.57, -1.94961, 198.63, -1024.0, 785.183, 235.33, + -43.0953, 241.605, -245.314, 470.627, 186.144, 186.319, -219.522, + 1304.84, 385.977, -358.874, -755.635, 360.122, -865.936, 54.8904, + -509.235}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<quint8>(bias_quantized.shape(), + bias_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + AddInputFromArray<float>(TensorShape({1}), {bias_min}); + AddInputFromArray<float>(TensorShape({1}), {bias_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 20.0); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc new file mode 100644 index 0000000000..abe8c9138d --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc @@ -0,0 +1,246 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include <vector> + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/concat_lib_cpu.h" + +namespace tensorflow { + +namespace { +template <typename T> +struct RequantizeCopier { + RequantizeCopier( + const std::vector<std::pair<float, float>>* input_min_and_max, + float output_min, float output_max) + : output_min(output_min), + output_max(output_max), + input_min_and_max(input_min_and_max) {} + + inline void Copy(T* dst, const T* src, int input_index, size_t n) { + const float input_min = (*input_min_and_max)[input_index].first; + const float input_max = (*input_min_and_max)[input_index].second; + if (input_min == output_min && input_max == output_max) { + DCHECK(DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())); + memcpy(dst, src, n * sizeof(T)); + } else { + Eigen::array<Eigen::DenseIndex, 1> dims; + dims[0] = n; + typename TTypes<T, 1>::UnalignedConstTensor input_array(src, dims); + typename TTypes<T, 1>::UnalignedTensor output_array(dst, dims); + + QuantizedToFloatStruct<T> q2f(input_min, input_max); + auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f); + FloatToQuantizedStruct<T> f2q(output_min, output_max); + auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T); + + // RequantizeCopier::Copy is called from within a shard of computation, so + // don't use the threadpool device here, simply assign with default CPU + // device. + output_array = input_requantized; + } + } + + float output_min; + float output_max; + const std::vector<std::pair<float, float>>* input_min_and_max; +}; +} // namespace + +template <typename T> +class QuantizedConcatOp : public OpKernel { + public: + typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>> + ConstMatrixVector; + + explicit QuantizedConcatOp(OpKernelConstruction* c) : OpKernel(c) {} + + void CalculateInputAndOutputRange( + const OpInputList& input_mins, const OpInputList& input_maxes, + const size_t N, + std::vector<std::pair<float, float>>* input_mins_and_maxes, + float* output_min, float* output_max) { + input_mins_and_maxes->reserve(N); + float overall_min = std::numeric_limits<float>::max(); + float overall_max = std::numeric_limits<float>::lowest(); + for (int i = 0; i < N; ++i) { + const float input_min = input_mins[i].flat<float>()(0); + const float input_max = input_maxes[i].flat<float>()(0); + input_mins_and_maxes->emplace_back(input_min, input_max); + overall_min = std::min(overall_min, input_min); + overall_max = std::max(overall_max, input_max); + } + if (std::is_signed<T>::value) { + // For signed, we want a symmetrical distribution including zero for the + // output, so pick a range that meets that need. + const float largest_value = + std::max(std::abs(overall_min), std::abs(overall_max)); + *output_min = -largest_value; + *output_max = largest_value; + } else { + *output_min = overall_min; + *output_max = overall_max; + } + } + + int64 CalculateInputsDim(const TensorShape& input_shape, + const int32 concat_dim) { + int64 inputs_flat_dim0 = 1; + for (int d = 0; d < concat_dim; ++d) { + inputs_flat_dim0 *= input_shape.dim_size(d); + } + return inputs_flat_dim0; + } + + void CalculateConcatDims(const size_t N, const TensorShape& input_shape, + int input_dims, const OpInputList& values, + OpKernelContext* context, const int32 concat_dim, + const int64 inputs_flat_dim0, + ConstMatrixVector* inputs_flat, + int* output_concat_dim) { + // Note that we reduce the concat of n-dimensional tensors into a two + // dimensional concat. Assuming the dimensions of any input/output + // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along + // the dimension indicated with size y0, we flatten it to {x, y}, where y = + // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1). + inputs_flat->reserve(N); + *output_concat_dim = 0; + const bool input_is_scalar = IsLegacyScalar(input_shape); + for (int i = 0; i < N; ++i) { + const auto in = values[i]; + const bool in_is_scalar = IsLegacyScalar(in.shape()); + OP_REQUIRES( + context, in.dims() == input_dims || (input_is_scalar && in_is_scalar), + errors::InvalidArgument( + "ConcatOp : Ranks of all input tensors should match: shape[0] = ", + input_shape.DebugString(), " vs. shape[", i, "] = ", + in.shape().DebugString())); + for (int j = 0; j < input_dims; ++j) { + if (j == concat_dim) { + continue; + } + OP_REQUIRES( + context, in.dim_size(j) == input_shape.dim_size(j), + errors::InvalidArgument( + "ConcatOp : Dimensions of inputs should match: shape[0] = ", + input_shape.DebugString(), " vs. shape[", i, "] = ", + in.shape().DebugString())); + } + if (in.NumElements() > 0) { + int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0; + inputs_flat->emplace_back(new typename TTypes<T, 2>::ConstMatrix( + in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1}))); + } + *output_concat_dim += in.dims() > 0 ? in.dim_size(concat_dim) : 1; + } + } + + void Compute(OpKernelContext* context) override { + const Tensor* concat_dim_tensor = nullptr; + OP_REQUIRES_OK(context, context->input("concat_dim", &concat_dim_tensor)); + OP_REQUIRES( + context, IsLegacyScalar(concat_dim_tensor->shape()), + errors::InvalidArgument( + "Concat dim tensor should be a scalar integer, but got shape ", + concat_dim_tensor->shape().DebugString())); + const int32 concat_dim = concat_dim_tensor->scalar<int32>()(); + OpInputList values; + OP_REQUIRES_OK(context, context->input_list("values", &values)); + const size_t N = values.size(); + OpInputList input_mins; + OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins)); + OP_REQUIRES(context, (input_mins.size() == N), + errors::InvalidArgument( + "QuantizedConcatOp : Expected mins input list length ", + input_mins.size(), " to equal values length ", N)) + OpInputList input_maxes; + OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes)); + OP_REQUIRES(context, (input_maxes.size() == N), + errors::InvalidArgument( + "QuantizedConcatOp : Expected maxes input list length ", + input_maxes.size(), " to equal values length ", N)) + const int input_dims = values[0].dims(); + const TensorShape& input_shape = values[0].shape(); + OP_REQUIRES( + context, (0 <= concat_dim && concat_dim < input_dims) || + (allow_legacy_scalars() && concat_dim == 0), + errors::InvalidArgument( + "ConcatOp : Expected concatenating dimensions in the range [", 0, + ", ", input_dims, "), but got ", concat_dim)); + + float output_min = std::numeric_limits<float>::max(); + float output_max = std::numeric_limits<float>::lowest(); + std::vector<std::pair<float, float>> input_mins_and_maxes; + CalculateInputAndOutputRange(input_mins, input_maxes, N, + &input_mins_and_maxes, &output_min, + &output_max); + const int64 inputs_flat_dim0 = CalculateInputsDim(input_shape, concat_dim); + ConstMatrixVector inputs_flat; + int output_concat_dim; + CalculateConcatDims(N, input_shape, input_dims, values, context, concat_dim, + inputs_flat_dim0, &inputs_flat, &output_concat_dim); + + TensorShape output_shape(input_shape); + // TODO(irving): Remove rank 0 case once !kAllowLegacyScalars + if (output_shape.dims() == 0) { + output_shape.AddDim(output_concat_dim); + } else { + output_shape.set_dim(concat_dim, output_concat_dim); + } + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + + if (output->NumElements() > 0) { + int64 output_dim1 = output->NumElements() / inputs_flat_dim0; + auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1}); + ConcatCPUImpl<T>( + context->device(), inputs_flat, sizeof(T) /* cost_per_unit */, + RequantizeCopier<T>(&input_mins_and_maxes, output_min, output_max), + &output_flat); + } + + Tensor* output_min_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(1, {}, &output_min_tensor)); + output_min_tensor->flat<float>()(0) = output_min; + + Tensor* output_max_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(2, {}, &output_max_tensor)); + output_max_tensor->flat<float>()(0) = output_max; + } +}; + +#define REGISTER_QUANTIZED_CONCAT(type) \ + REGISTER_KERNEL_BUILDER(Name("QuantizedConcat") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<type>("T") \ + .HostMemory("concat_dim"), \ + QuantizedConcatOp<type>) + +REGISTER_QUANTIZED_CONCAT(quint8); +REGISTER_QUANTIZED_CONCAT(qint32); + +#undef REGISTER_QUANTIZED_CONCAT + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc new file mode 100644 index 0000000000..1301259fdd --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc @@ -0,0 +1,337 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <functional> +#include <memory> +#include <vector> + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +using test::graph::Constant; + +class QuantizedConcatTest : public OpsTestBase { + protected: + QuantizedConcatTest() {} + + void TestSmall8Bit(float first_min, float first_max, float second_min, + float second_max); + void TestSmall32Bit(float first_min, float first_max, float second_min, + float second_max); + void TestSecondDim8Bit(float first_min, float first_max, float second_min, + float second_max); +}; + +TEST_F(QuantizedConcatTest, Small8Bit) { + TestSmall8Bit(0.0f, 255.0f, 0.0f, 25.0f); +} + +TEST_F(QuantizedConcatTest, Small8BitSameRange) { + // Range for both is the same, so impl can use memcpy. + TestSmall8Bit(0.0f, 255.0f, 0.0f, 255.0f); +} + +void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max, + float second_min, float second_max) { + TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat") + .Input(FakeInput(DT_INT32)) + .Input(FakeInput(2, DT_QUINT8)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_FLOAT)) + .Attr("N", 2) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int first_batch = 2; + const int first_height = 2; + const int first_width = 3; + Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width}); + test::FillValues<float>(&first_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor first_quantized = + FloatTensorToQuantized<quint8>(first_float, first_min, first_max); + + const int second_batch = 2; + const int second_height = 2; + const int second_width = 3; + Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width}); + test::FillValues<float>(&second_float, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + Tensor second_quantized = + FloatTensorToQuantized<quint8>(second_float, second_min, second_max); + + const int expected_batch = first_batch + second_batch; + Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width}); + test::FillValues<float>(&expected_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + + AddInputFromArray<int32>(TensorShape({}), {0}); + AddInputFromArray<quint8>(first_quantized.shape(), + first_quantized.flat<quint8>()); + AddInputFromArray<quint8>(second_quantized.shape(), + second_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({}), {first_min}); + AddInputFromArray<float>(TensorShape({}), {second_min}); + AddInputFromArray<float>(TensorShape({}), {first_max}); + AddInputFromArray<float>(TensorShape({}), {second_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedConcatTest, Small32Bit) { + TestSmall32Bit(0.0f, 1200.0f, 0.0f, 2400.0f); +} + +TEST_F(QuantizedConcatTest, Small32BitSameRange) { + TestSmall32Bit(-2400.0f, 2400.0f, -2400.0f, 2400.0f); +} + +TEST_F(QuantizedConcatTest, Small32BitOneDimSameRangeAsOutput) { + TestSmall32Bit(-2400.0f, 2400.0f, -1200.0f, 2400.0f); +} + +void QuantizedConcatTest::TestSmall32Bit(float first_min, float first_max, + float second_min, float second_max) { + TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat") + .Input(FakeInput(DT_INT32)) + .Input(FakeInput(2, DT_QINT32)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_FLOAT)) + .Attr("N", 2) + .Attr("T", DataTypeToEnum<qint32>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int first_batch = 2; + const int first_height = 2; + const int first_width = 3; + Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width}); + test::FillValues<float>(&first_float, {100, 200, 300, 400, 500, 600, 700, 800, + 900, 1000, 1100, 1200}); + Tensor first_quantized = + FloatTensorToQuantized<qint32>(first_float, first_min, first_max); + + const int second_batch = 2; + const int second_height = 2; + const int second_width = 3; + Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width}); + test::FillValues<float>(&second_float, {1300, 1400, 1500, 1600, 1700, 1800, + 1900, 2000, 2100, 2200, 2300, 2400}); + Tensor second_quantized = + FloatTensorToQuantized<qint32>(second_float, second_min, second_max); + + const int expected_batch = first_batch + second_batch; + Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width}); + test::FillValues<float>( + &expected_float, + {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, + 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400}); + + AddInputFromArray<int32>(TensorShape({}), {0}); + AddInputFromArray<qint32>(first_quantized.shape(), + first_quantized.flat<qint32>()); + AddInputFromArray<qint32>(second_quantized.shape(), + second_quantized.flat<qint32>()); + AddInputFromArray<float>(TensorShape({}), {first_min}); + AddInputFromArray<float>(TensorShape({}), {second_min}); + AddInputFromArray<float>(TensorShape({}), {first_max}); + AddInputFromArray<float>(TensorShape({}), {second_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedConcatTest, SecondDim8Bit) { + TestSecondDim8Bit(-10.0f, 150.0f, 0.0f, 200.0f); +} + +TEST_F(QuantizedConcatTest, SecondDim8BitSameRange) { + TestSecondDim8Bit(-10.0f, 150.0f, -10.0f, 150.0f); +} + +void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max, + float second_min, + float second_max) { + TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat") + .Input(FakeInput(DT_INT32)) + .Input(FakeInput(2, DT_QUINT8)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_FLOAT)) + .Attr("N", 2) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int first_batch = 2; + const int first_height = 2; + const int first_width = 3; + Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width}); + test::FillValues<float>(&first_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor first_quantized = + FloatTensorToQuantized<quint8>(first_float, first_min, first_max); + + const int second_batch = 2; + const int second_height = 2; + const int second_width = 3; + Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width}); + test::FillValues<float>(&second_float, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + Tensor second_quantized = + FloatTensorToQuantized<quint8>(second_float, second_min, second_max); + + const int expected_height = first_height + second_height; + Tensor expected_float(DT_FLOAT, {first_batch, expected_height, first_width}); + test::FillValues<float>(&expected_float, + {1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, + 7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24}); + + AddInputFromArray<int32>(TensorShape({}), {1}); + AddInputFromArray<quint8>(first_quantized.shape(), + first_quantized.flat<quint8>()); + AddInputFromArray<quint8>(second_quantized.shape(), + second_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({}), {first_min}); + AddInputFromArray<float>(TensorShape({}), {second_min}); + AddInputFromArray<float>(TensorShape({}), {first_max}); + AddInputFromArray<float>(TensorShape({}), {second_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 1.0); +} + +// For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim' +// in size, and concat them together along "concat_dimension". +// If <same_limits> is true, then both concatenated dimensions have the same +// quantized range; otherwise, they are set to different values. +template <typename T> +static void ConcatHelper(int iters, int concat_dimension, bool same_limits, + int dim2) { + testing::StopTiming(); + Graph* g = new Graph(OpRegistry::Global()); + + DataType dt = DataTypeToEnum<T>::v(); + const int kDim1 = 100; + TensorShape shape({kDim1, dim2}); + + Tensor concat_dim = test::AsScalar<int32>(concat_dimension); + Tensor in0(dt, shape); + in0.flat<T>().setRandom(); + Tensor in1(dt, shape); + in1.flat<T>().setRandom(); + + Tensor mins0 = test::AsScalar<float>(-1.0); + Tensor maxes0 = test::AsScalar<float>(1.0); + Tensor mins1 = test::AsScalar<float>(same_limits ? -1.0 : -255.0); + Tensor maxes1 = test::AsScalar<float>(same_limits ? 1.0 : 255.0); + + Node* node; + TF_CHECK_OK(NodeBuilder(g->NewName("n"), "QuantizedConcat") + .Input(Constant(g, concat_dim)) + .Input({Constant(g, in0), Constant(g, in1)}) + .Input({Constant(g, mins0), Constant(g, mins1)}) + .Input({Constant(g, maxes0), Constant(g, maxes1)}) + .Attr("N", 2) + .Attr("T", dt) + .Finalize(g, &node)); + + testing::BytesProcessed(static_cast<int64>(iters) * + ((kDim1 * dim2) + (kDim1 * dim2)) * sizeof(T)); + testing::StartTiming(); + test::Benchmark("cpu", g).Run(iters); + testing::UseRealTime(); +} + +static void BM_QConcatDim0SameLimitQInt32(int iters, int dim2) { + ConcatHelper<qint32>(iters, 0 /* concat_dimension */, true /* same_limits */, + dim2); +} + +static void BM_QConcatDim1SameLimitQInt32(int iters, int dim2) { + ConcatHelper<qint32>(iters, 1 /* concat_dimension */, true /* same_limits */, + dim2); +} + +static void BM_QConcatDim0DifferLimitQInt32(int iters, int dim2) { + ConcatHelper<qint32>(iters, 0 /* concat_dimension */, false /* same_limits */, + dim2); +} + +static void BM_QConcatDim1DifferLimitQInt32(int iters, int dim2) { + ConcatHelper<qint32>(iters, 1 /* concat_dimension */, false /* same_limits */, + dim2); +} + +BENCHMARK(BM_QConcatDim0SameLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim1SameLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim0DifferLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim1DifferLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000); + +static void BM_QConcatDim0SameLimitQUint8(int iters, int dim2) { + ConcatHelper<qint32>(iters, 0 /* concat_dimension */, true /* same_limits */, + dim2); +} + +static void BM_QConcatDim1SameLimitQUint8(int iters, int dim2) { + ConcatHelper<qint32>(iters, 1 /* concat_dimension */, true /* same_limits */, + dim2); +} + +static void BM_QConcatDim0DifferLimitQUint8(int iters, int dim2) { + ConcatHelper<qint32>(iters, 0 /* concat_dimension */, false /* same_limits */, + dim2); +} + +static void BM_QConcatDim1DifferLimitQUint8(int iters, int dim2) { + ConcatHelper<qint32>(iters, 1 /* concat_dimension */, false /* same_limits */, + dim2); +} + +BENCHMARK(BM_QConcatDim0SameLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim1SameLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim0DifferLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000); +BENCHMARK(BM_QConcatDim1DifferLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc new file mode 100644 index 0000000000..b25bff45a1 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc @@ -0,0 +1,526 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Implements quantized eight-bit versions of the convolution operations. + +#include <algorithm> +#include <vector> + +#include "public/gemmlowp.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/reference_gemm.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/padding.h" + +namespace tensorflow { + +// This functor implements the convolution operation in as simple a form as +// possible. It won't give great performance, but it is very useful for +// stepping through and instrumenting for debugging, creating minimal benchmarks +// to prototype with, and sharing with teams that want to run this outside of +// our environment. +// With that in mind, I've avoided using anything except pretty standard C++ +// types. This is especially noticeable in the data access through raw array +// indexing. It's deliberate in this case though, since it makes the underlying +// memory order very explicit, which is important for both inspecting memory +// contents during debugging and for specifying what we expect to others. +// The memory layout of the data is, from biggest stride to smallest: +// input_data = [input_batches, input_height, input_width, input_depth] +// filter_data = [filter_height, filter_width, input_depth, filter_count] +// output_data = [input_batches, output_height, output_width, filter_count] +template <class T1, class T2, class T3> +class ReferenceConvFunctor { + public: + void operator()(OpKernelContext* op_context, const T1* input_data, + int input_batches, int input_height, int input_width, + int input_depth, int input_offset, const T2* filter_data, + int filter_height, int filter_width, int filter_count, + int filter_offset, int stride, Padding padding, + T3* output_data, int output_height, int output_width, + int output_shift, int output_offset, int output_mult) { + // Set up some constants we need for the output down-shifting and + // saturation. + const int32 highest = static_cast<int32>(Eigen::NumTraits<T3>::highest()); + const int32 lowest = static_cast<int32>(Eigen::NumTraits<T3>::lowest()); + + // When we're converting the 32 bit accumulator to a lower bit depth, we + // need to add on 0.5 in fixed-point terms to make the operation round half + // up towards positive infinity, rather than a floor. + // We also need to watch out for the case when there's no down shift, + // because a left shift by a negative number gives undefined results. + const int32 rounding = (output_shift < 1) ? 0 : (1 << (output_shift - 1)); + + // The two different padding modes we support can be a bit confusing. SAME + // means we're trying to produce an output image that's the same size as the + // input. It's complicated by stride, which shrinks the output image by a + // a factor, but it means we end up sampling from outside the borders of the + // input. These out-of-bounds values are read as zeroes. VALID means only + // produce output values where the filters can read all their values from + // within the input image. It effectively removes the margins of the output + // image compared to the one produced by SAME. Stride complicates this + // definition though, because it can result in the right and bottom filter + // patches sampling from outside the borders if it's greater than 1. + // Most of the logic for sorting this all out is done before this function, + // when we calculate the output size, but the positioning of the origin of + // the filters is different between the two modes, since SAME positions the + // first filter off the edge of the input. + int filter_left_offset; + int filter_top_offset; + if (padding == VALID) { + filter_left_offset = + ((output_width - 1) * stride + filter_width - input_width) / 2; + filter_top_offset = + ((output_height - 1) * stride + filter_height - input_height) / 2; + } else { + filter_left_offset = + ((output_width - 1) * stride + filter_width - input_width) / 2; + filter_top_offset = + ((output_height - 1) * stride + filter_height - input_height) / 2; + } + + // If we've got multiple images in our input, work through each of them. + for (int batch = 0; batch < input_batches; ++batch) { + // Walk through all the output image values, sliding the filter to + // different + // positions in the input. + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + // Each filter kernel produces one output channel. + for (int out_channel = 0; out_channel < filter_count; ++out_channel) { + // We're going to calculate a single output value, which means we + // need to multiply a three dimensional kernel of weights against + // the current location within the input image. + /* + *-------------------------------... + |\ ^ + | \in_depth + | \ v + | *-------------------------------... + | | ^ + | | in_y_origin + | | v \ + | |<in_x_origin>*---*^ + | | \| |filter_height + . | *---*v + . | <---> + . filter_width + . + */ + const int in_x_origin = (out_x * stride) - filter_left_offset; + const int in_y_origin = (out_y * stride) - filter_top_offset; + int32 total = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int in_channel = 0; in_channel < input_depth; + ++in_channel) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + int32 input_value; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + const T1 input_source_value = + input_data[(batch * input_height * input_width * + input_depth) + + (in_y * input_width * input_depth) + + (in_x * input_depth) + in_channel]; + // We're promoting the T1 type to a higher bit depth here as + // we do the subtraction. + input_value = + static_cast<int32>(input_source_value) - input_offset; + } else { + input_value = 0; + } + const T2 filter_source_value = + filter_data[(filter_y * filter_width * input_depth * + filter_count) + + (filter_x * input_depth * filter_count) + + (in_channel * filter_count) + out_channel]; + // Another promotion to 32 bit, as above. + const int32 filter_value = + static_cast<int32>(filter_source_value) - filter_offset; + total += (input_value * filter_value); + } + } + } + // Here we're applying scale factors to compress the 32 bit + // accumulated total to a potentially lower bit depth. + const int32_t output = + ((((total + output_offset) * output_mult) + rounding) >> + output_shift); + // We need to saturate the results against the largest and smallest + // values that can be represented in this type. + const int32 top_clamped_output = std::min(output, highest); + const int32 clamped_output = std::max(top_clamped_output, lowest); + output_data[(batch * output_height * output_width * filter_count) + + (out_y * output_width * filter_count) + + (out_x * filter_count) + out_channel] = clamped_output; + } + } + } + } + } +}; + +// Implements convolution as a two stage process, first packing the patches of +// the input image into columns (im2col) and then running GEMM to produce the +// final result. +// TODO(petewarden) - We need to update gemmlowp to support 32-bit outputs +// before we can re-enable this path. +template <class T1, class T2, class T3> +class Im2ColConvFunctor { + public: + void operator()(OpKernelContext* op_context, const T1* input_data, + int input_batches, int input_height, int input_width, + int input_depth, int input_offset, const T2* filter_data, + int filter_height, int filter_width, int filter_count, + int filter_offset, int stride, Padding padding, + T3* output_data, int output_height, int output_width, + int output_shift, int output_offset, int output_mult) { + if (input_offset < 0) { + // Only log the first few occurrences of this warning. + static int warning_count = 0; + if (warning_count < 10) { + ++warning_count; + LOG(WARNING) + << "Zero is not representable in the quantized range used by the" + << " input. This means QuantizedConv2d has to fall back to a slow" + << " implementation, since the border of zero values can't be" + << " represented easily. You should try to construct graphs that" + << " avoid this situation."; + } + ReferenceConvFunctor<T1, T2, T3> conv_functor; + conv_functor(op_context, input_data, input_batches, input_height, + input_width, input_depth, input_offset, filter_data, + filter_height, filter_width, filter_count, filter_offset, + stride, padding, output_data, output_height, output_width, + output_shift, output_offset, output_mult); + return; + } + + CHECK_GT(output_width, 0); + CHECK_GT(output_height, 0); + int filter_left_offset; + int filter_top_offset; + if (padding == VALID) { + filter_left_offset = + ((output_width - 1) * stride + filter_width - input_width) / 2; + filter_top_offset = + ((output_height - 1) * stride + filter_height - input_height) / 2; + } else { + filter_left_offset = + ((output_width - 1) * stride + filter_width - input_width) / 2; + filter_top_offset = + ((output_height - 1) * stride + filter_height - input_height) / 2; + } + + // The im2col buffer has # of patches rows, and # of filters cols. + // It's laid out like this, in row major order in memory: + // < filter value count > + // ^ +---------------------+ + // patch | | + // count | | + // v +---------------------+ + // Each patch row contains a filter_width x filter_height patch of the + // input, with the depth channel as the most contiguous in memory, followed + // by the width, then the height. This is the standard memory order in the + // image world if it helps to visualize it. + const int filter_value_count = filter_width * filter_height * input_depth; + const int patch_count = input_batches * output_width * output_height; + const int im2col_size = patch_count * filter_value_count; + // TODO(petewarden) - Memory allocation can be very slow on Android. Can we + // optimize this by keeping the scratch buffer around? + std::unique_ptr<T1[]> im2col_buffer(new T1[im2col_size]); + + for (int batch = 0; batch < input_batches; ++batch) { + const T1* input_batch_start = + input_data + (batch * input_height * input_width * input_depth); + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride) - filter_top_offset; + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride) - filter_left_offset; + const int patch_index = (batch * output_width * output_height) + + (out_y * output_width) + out_x; + T1* im2col_patch_start = + im2col_buffer.get() + (patch_index * filter_value_count); + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + filter_y; + T1* im2col_row_start = + im2col_patch_start + (filter_y * filter_width * input_depth); + // If we're off the top or the bottom of the input, fill the whole + // row with zeroes. + if ((in_y < 0) || (in_y >= input_height)) { + T1* im2col_row_end = + im2col_row_start + (filter_width * input_depth); + // We'll be subtracting this offset during the calculations + // so to get an actual zero after that bias we need to set + // it to input_offset here. + std::fill(im2col_row_start, im2col_row_end, input_offset); + } else { + // What we're doing here is trying to copy and fill the im2col + // buffer as efficiently as possible, using functions to set or + // duplicate values en masse. We know we don't have to worry about + // vertical edges because we dealt with that case above, so we + // just need to handle filters that overlap the left or right + // edges. Here's what that looks like: + // + // < left_zero_count > < center_copy_count > < right_zero_count > + // +------------------+---------------------+--------------------+ + // | (filter) | (image) | (filter) | + // +------------------+---------------------+--------------------+ + // in_x_origin 0 input_width in_x_end + // + // In reality it's unlikely that a filter patch will be wider + // than an input, but this shows all the edge cases. + // We use std::fill() to set the left and right sections to zeroes + // and std::copy() to copy over the input data for the center. + const int in_x_end = in_x_origin + filter_width; + const int left_zero_count = std::max(0, 0 - in_x_origin); + const int right_zero_count = std::max(0, in_x_end - input_width); + const int center_copy_count = + filter_width - (left_zero_count + right_zero_count); + if (left_zero_count > 0) { + T1* im2col_left_start = im2col_row_start; + T1* im2col_left_end = + im2col_left_start + (left_zero_count * input_depth); + std::fill(im2col_left_start, im2col_left_end, input_offset); + } + if (center_copy_count > 0) { + const T1* input_row_start = + input_batch_start + (in_y * input_width * input_depth) + + (std::max(0, in_x_origin) * input_depth); + const T1* input_row_end = + input_row_start + (center_copy_count * input_depth); + T1* im2col_center_start = + im2col_row_start + (left_zero_count * input_depth); + std::copy(input_row_start, input_row_end, im2col_center_start); + } + if (right_zero_count > 0) { + T1* im2col_right_start = + im2col_row_start + + ((left_zero_count + center_copy_count) * input_depth); + T1* im2col_right_end = + im2col_right_start + (right_zero_count * input_depth); + std::fill(im2col_right_start, im2col_right_end, input_offset); + } + } + } + } + } + } + + CHECK_GT(patch_count, 0); + CHECK_GT(filter_count, 0); + CHECK_GT(filter_value_count, 0); + + const bool transpose_a = false; + const bool transpose_b = false; + const bool transpose_c = false; + const int m = patch_count; + const int n = filter_count; + const int k = filter_value_count; + const int lda = filter_value_count; + const int ldb = filter_count; + const int ldc = filter_count; + // The gemmlowp optimized library only works for a particular set of data + // types, so check if we meet those requirements and + // fall back to a slower reference implementation if not. + if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() && + std::is_same<T3, qint32>() && (output_offset == 0) && + (output_mult == 1) && (output_shift == 0)) { + const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value); + const uint8* filter_data_as_uint8 = &(filter_data->value); + int32* output_data_as_int32 = &(output_data->value); + // All of the transpose_* variables are currently compile-time consts, so + // we could just hard-code these values too, but that would break if + // anybody changed those values in the future (e.g. to match the ability + // of MatMul to specify them as attributes). We're using a verbose + // approach of deriving the order values from the transpose variables to + // be able to catch any changes like that. + static const gemmlowp::MapOrder ResultOrder = + !transpose_c ? gemmlowp::MapOrder::RowMajor + : gemmlowp::MapOrder::ColMajor; + static const gemmlowp::MapOrder LhsOrder = + !transpose_a ? gemmlowp::MapOrder::RowMajor + : gemmlowp::MapOrder::ColMajor; + static const gemmlowp::MapOrder RhsOrder = + !transpose_b ? gemmlowp::MapOrder::RowMajor + : gemmlowp::MapOrder::ColMajor; + gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs( + im2col_data_as_uint8, m, k, lda); + gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs( + filter_data_as_uint8, k, n, ldb); + gemmlowp::MatrixMap<std::int32_t, ResultOrder> result( + output_data_as_int32, m, n, ldc); + const std::tuple<> empty_pipeline = {}; + + auto& worker_threads = + *(op_context->device()->tensorflow_cpu_worker_threads()); + TensorflowGemmContext context(worker_threads.num_threads, + worker_threads.workers); + gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, + gemmlowp::DefaultL8R8BitDepthParams>( + &context, lhs, rhs, &result, -input_offset, -filter_offset, + empty_pipeline); + } else { + ReferenceGemm<T1, T2, T3>(transpose_a, transpose_b, transpose_c, m, n, k, + im2col_buffer.get(), input_offset, lda, + filter_data, filter_offset, ldb, output_data, + output_shift, output_offset, output_mult, ldc); + } + } +}; + +template <class T1, class T2, class T3, + template <class TF1, class TF2, class TF3> class ConvFunctor> +class QuantizedConv2DOp : public OpKernel { + public: + explicit QuantizedConv2DOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); + OP_REQUIRES(context, strides_.size() == 4, + errors::InvalidArgument("Sliding window strides field must " + "specify 4 dimensions")); + OP_REQUIRES(context, strides_[1] == strides_[2], + errors::InvalidArgument( + "Current implementation only supports equal length " + "strides in the row and column dimensions.")); + OP_REQUIRES( + context, (strides_[0] == 1 && strides_[3] == 1), + errors::InvalidArgument("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + // Input tensor is of the following dimensions: + // [ batch, in_rows, in_cols, in_depth ] + const Tensor& input = context->input(0); + + // Input filter is of the following dimensions: + // [ filter_rows, filter_cols, in_depth, out_depth] + const Tensor& filter = context->input(1); + + // For 2D convolution, there should be 4 dimensions. + OP_REQUIRES(context, input.dims() == 4, + errors::InvalidArgument("input must be 4-dimensional", + input.shape().DebugString())); + OP_REQUIRES(context, filter.dims() == 4, + errors::InvalidArgument("filter must be 4-dimensional: ", + filter.shape().DebugString())); + + const float min_input = context->input(2).flat<float>()(0); + const float max_input = context->input(3).flat<float>()(0); + const float min_filter = context->input(4).flat<float>()(0); + const float max_filter = context->input(5).flat<float>()(0); + const int32 offset_input = + FloatToQuantizedUnclamped<T1>(0.0f, min_input, max_input); + const int32 offset_filter = + FloatToQuantizedUnclamped<T2>(0.0f, min_filter, max_filter); + const int32 offset_output = 0; + const int32 mult_output = 1; + const int32 shift_output = 0; + + // The last dimension for input is in_depth. It must be the same as the + // filter's in_depth. + const int64 in_depth = input.dim_size(3); + OP_REQUIRES( + context, in_depth == filter.dim_size(2), + errors::InvalidArgument("input and filter must have the same depth: ", + in_depth, " vs ", filter.dim_size(2))); + + // The last dimension for filter is out_depth. + const int64 out_depth = filter.dim_size(3); + + // The second dimension for input is rows/height. + // The first dimension for filter is rows/height. + const int64 input_rows = input.dim_size(1); + const int64 filter_rows = filter.dim_size(0); + + // The third dimension for input is columns/width. + // The second dimension for filter is columns/width. + const int64 input_cols = input.dim_size(2); + const int64 filter_cols = filter.dim_size(1); + + // The first dimension for input is batch. + const int64 batch = input.dim_size(0); + + // For now we take the stride from the second dimension only (we + // assume row = col stride, and do not support striding on the + // batch or depth dimension). + const int stride = strides_[1]; + + int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0; + OP_REQUIRES_OK(context, + GetWindowedOutputSize(input_rows, filter_rows, stride, + padding_, &out_rows, &pad_rows)); + OP_REQUIRES_OK(context, + GetWindowedOutputSize(input_cols, filter_cols, stride, + padding_, &out_cols, &pad_cols)); + CHECK_GT(batch, 0); + CHECK_GT(out_rows, 0); + CHECK_GT(out_cols, 0); + CHECK_GT(out_depth, 0); + TensorShape out_shape({batch, out_rows, out_cols, out_depth}); + + // Output tensor is of the following dimensions: + // [ in_batch, out_rows, out_cols, out_depth ] + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); + + // This will call different implementations (e.g. reference or optimized) + // depending on the template parameter. + ConvFunctor<T1, T2, T3> conv_functor; + conv_functor(context, input.flat<T1>().data(), batch, input_rows, + input_cols, in_depth, offset_input, filter.flat<T2>().data(), + filter_rows, filter_cols, out_depth, offset_filter, stride, + padding_, output->flat<T3>().data(), out_rows, out_cols, + shift_output, offset_output, mult_output); + + float min_output_value; + float max_output_value; + QuantizationRangeForMultiplication<T1, T2, T3>( + min_input, max_input, min_filter, max_filter, &min_output_value, + &max_output_value); + + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = min_output_value; + + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = max_output_value; + } + + private: + std::vector<int32> strides_; + Padding padding_; +}; + +// Right now we only support taking two eight bit inputs, and returning the +// results as signed 32-bit integers. +REGISTER_KERNEL_BUILDER( + Name("QuantizedConv2D") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("Tinput") + .TypeConstraint<quint8>("Tfilter") + .TypeConstraint<qint32>("out_type"), + QuantizedConv2DOp<quint8, quint8, qint32, Im2ColConvFunctor>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc new file mode 100644 index 0000000000..6a07004a92 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc @@ -0,0 +1,324 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <functional> +#include <memory> +#include <vector> + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedConv2DTest : public OpsTestBase { + protected: +}; + +TEST_F(QuantizedConv2DTest, Small) { + const int stride = 1; + TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + // The image data should always be able to represent zero, to allow a fast + // implementation of border padding, so we set the min value to 0. + const float image_min = 0.0f; + const float image_max = 12.0f; + // The image matrix is: + // | 1 | 2 | 3 | 4 | + // | 5 | 6 | 7 | 8 | + // | 9 | 10 | 11 | 12 | + Tensor image_float(DT_FLOAT, + {image_batch_count, image_height, image_width, depth}); + test::FillValues<float>(&image_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor image_quantized = + FloatTensorToQuantized<quint8>(image_float, image_min, image_max); + + // The filter matrix is: + // | 1 | 4 | 7 | + // | 2 | 5 | 8 | + // | 3 | 6 | 9 | + const int filter_size = 3; + const int filter_count = 1; + const float filter_min = 1.0f; + const float filter_max = 9.0f; + Tensor filter_float(DT_FLOAT, + {filter_size, filter_size, depth, filter_count}); + test::FillValues<float>(&filter_float, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + Tensor filter_quantized = + FloatTensorToQuantized<quint8>(filter_float, filter_min, filter_max); + + AddInputFromArray<quint8>(image_quantized.shape(), + image_quantized.flat<quint8>()); + AddInputFromArray<quint8>(filter_quantized.shape(), + filter_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {image_min}); + AddInputFromArray<float>(TensorShape({1}), {image_max}); + AddInputFromArray<float>(TensorShape({1}), {filter_min}); + AddInputFromArray<float>(TensorShape({1}), {filter_max}); + TF_ASSERT_OK(RunOpKernel()); + + // We're sliding the 3x3 filter across the 3x4 image, with accesses outside + // the input set to zero because we're using the 'SAME' padding mode. + // The calculations behind the expected output are: + // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105 + // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150 + // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183 + // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95 + // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235 + // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312 + // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357 + // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178 + // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187 + // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234 + // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261 + // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121 + // This means we should end up with this matrix: + // | 105 | 150 | 183 | 95 | + // | 235 | 312 | 357 | 178 | + // | 187 | 234 | 261 | 121 | + const int expected_width = image_width; + const int expected_height = image_height * filter_count; + Tensor expected_float( + DT_FLOAT, TensorShape({image_batch_count, expected_height, expected_width, + filter_count})); + test::FillValues<float>(&expected_float, {105, 150, 183, 95, 235, 312, 357, + 178, 187, 234, 261, 121}); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 1.0); +} + +TEST_F(QuantizedConv2DTest, Small32Bit) { + const int stride = 1; + TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + AddInputFromArray<quint8>( + TensorShape({image_batch_count, image_height, image_width, depth}), + {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120}); + const int filter_size = 3; + const int filter_count = 1; + AddInputFromArray<quint8>( + TensorShape({filter_size, filter_size, depth, filter_count}), + {10, 40, 70, 20, 50, 80, 30, 60, 90}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + + TF_ASSERT_OK(RunOpKernel()); + const int expected_width = image_width; + const int expected_height = image_height * filter_count; + Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height, + expected_width, filter_count})); + test::FillValues<qint32>( + &expected, {10500, 15000, 18300, 9500, 23500, 31200, 35700, 17800, 18700, + 23400, 26100, 12100}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +TEST_F(QuantizedConv2DTest, OddPadding) { + const int stride = 2; + TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + const int depth = 1; + const int image_width = 4; + const int image_height = 4; + const int image_batch_count = 1; + AddInputFromArray<quint8>( + TensorShape({image_batch_count, image_height, image_width, depth}), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + const int filter_size = 3; + const int filter_count = 1; + AddInputFromArray<quint8>( + TensorShape({filter_size, filter_size, depth, filter_count}), + {1, 2, 3, 4, 5, 6, 7, 8, 9}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + + TF_ASSERT_OK(RunOpKernel()); + const int expected_width = image_width / stride; + const int expected_height = (image_height * filter_count) / stride; + Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height, + expected_width, filter_count})); + test::FillValues<qint32>(&expected, {348, 252, 274, 175}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +TEST_F(QuantizedConv2DTest, OddPaddingBatch) { + const int stride = 2; + TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + const int depth = 1; + const int image_width = 4; + const int image_height = 4; + const int image_batch_count = 3; + AddInputFromArray<quint8>( + TensorShape({image_batch_count, image_height, image_width, depth}), + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + const int filter_size = 3; + const int filter_count = 1; + AddInputFromArray<quint8>( + TensorShape({filter_size, filter_size, depth, filter_count}), + {1, 2, 3, 4, 5, 6, 7, 8, 9}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + + TF_ASSERT_OK(RunOpKernel()); + const int expected_width = image_width / stride; + const int expected_height = (image_height * filter_count) / stride; + Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height, + expected_width, filter_count})); + test::FillValues<qint32>(&expected, {348, 252, 274, 175, // + 348, 252, 274, 175, // + 348, 252, 274, 175}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +TEST_F(QuantizedConv2DTest, SmallWithNoZero) { + const int stride = 1; + TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("out_type", DataTypeToEnum<qint32>::v()) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + // Here we're testing a slow implementation path, where zero is not + // representable in the image data and so simple border padding is not + // possible, so we have a min value greater than 0. + const float image_min = 1.0f; + const float image_max = 12.0f; + Tensor image_float(DT_FLOAT, + {image_batch_count, image_height, image_width, depth}); + test::FillValues<float>(&image_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor image_quantized = + FloatTensorToQuantized<quint8>(image_float, image_min, image_max); + const int filter_size = 3; + const int filter_count = 1; + const float filter_min = 1.0f; + const float filter_max = 9.0f; + Tensor filter_float(DT_FLOAT, + {filter_size, filter_size, depth, filter_count}); + test::FillValues<float>(&filter_float, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + Tensor filter_quantized = + FloatTensorToQuantized<quint8>(filter_float, filter_min, filter_max); + AddInputFromArray<quint8>(image_quantized.shape(), + image_quantized.flat<quint8>()); + AddInputFromArray<quint8>(filter_quantized.shape(), + filter_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {image_min}); + AddInputFromArray<float>(TensorShape({1}), {image_max}); + AddInputFromArray<float>(TensorShape({1}), {filter_min}); + AddInputFromArray<float>(TensorShape({1}), {filter_max}); + TF_ASSERT_OK(RunOpKernel()); + const int expected_width = image_width; + const int expected_height = image_height * filter_count; + Tensor expected_float( + DT_FLOAT, TensorShape({image_batch_count, expected_height, expected_width, + filter_count})); + test::FillValues<float>(&expected_float, {105, 150, 183, 95, 235, 312, 357, + 178, 187, 234, 261, 121}); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 1.0); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc new file mode 100644 index 0000000000..18de2d1d97 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc @@ -0,0 +1,186 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Implements a quantized eight-bit version of the matmul operation. + +#include "public/gemmlowp.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/reference_gemm.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +// We have to break this out as a separate function because there are multiple +// combinations of transpose attributes we need to support, and they have to be +// compile-time constants to work with the templates used internally. +template <bool TransposeA, bool TransposeB, bool TransposeC> +void GemmlowpMultiply(OpKernelContext* op_context, const quint8* a_data, + const quint8* b_data, qint32* c_data, int m, int n, int k, + int offset_a, int offset_b, int lda, int ldb, int ldc) { + const uint8* a_data_as_uint8 = &(a_data->value); + const uint8* b_data_as_uint8 = &(b_data->value); + int32* c_data_as_int32 = &(c_data->value); + static const gemmlowp::MapOrder ResultOrder = + !TransposeC ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; + static const gemmlowp::MapOrder LhsOrder = + !TransposeA ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; + static const gemmlowp::MapOrder RhsOrder = + !TransposeB ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; + gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs(a_data_as_uint8, m, k, + lda); + gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(b_data_as_uint8, k, n, + ldb); + gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(c_data_as_int32, m, n, + ldc); + const std::tuple<> empty_pipeline = {}; + auto& worker_threads = + *(op_context->device()->tensorflow_cpu_worker_threads()); + TensorflowGemmContext context(worker_threads.num_threads, + worker_threads.workers); + gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, + gemmlowp::DefaultL8R8BitDepthParams>( + &context, lhs, rhs, &result, -offset_a, -offset_b, empty_pipeline); +} + +template <class T1, class T2, class Toutput> +class QuantizedMatMulOp : public OpKernel { + public: + explicit QuantizedMatMulOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("transpose_a", &transpose_a_)); + OP_REQUIRES_OK(context, context->GetAttr("transpose_b", &transpose_b_)); + } + + void Compute(OpKernelContext* context) override { + const Tensor& a = context->input(0); + const Tensor& b = context->input(1); + const float min_a = context->input(2).flat<float>()(0); + const float max_a = context->input(3).flat<float>()(0); + const float min_b = context->input(4).flat<float>()(0); + const float max_b = context->input(5).flat<float>()(0); + + // Make sure that we have valid quantization ranges for the input buffers. + // If the difference between the min and max is negative or zero, it makes + // it hard to do meaningful intermediate operations on the values. + OP_REQUIRES(context, (max_a > min_a), + errors::InvalidArgument("max_a must be larger than min_a.")); + OP_REQUIRES(context, (max_b > min_b), + errors::InvalidArgument("max_b must be larger than min_b.")); + const int32 offset_a = FloatToQuantizedUnclamped<T1>(0.0f, min_a, max_a); + const int32 offset_b = FloatToQuantizedUnclamped<T2>(0.0f, min_b, max_b); + const int32 offset_c = 0; + const int32 mult_c = 1; + const int32 shift_c = 0; + + // Check that the dimensions of the two matrices are valid. + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(a.shape()), + errors::InvalidArgument("In[0] is not a matrix")); + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(b.shape()), + errors::InvalidArgument("In[1] is not a matrix")); + Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair; + dim_pair[0].first = transpose_a_ ? 0 : 1; + dim_pair[0].second = transpose_b_ ? 1 : 0; + + OP_REQUIRES(context, + a.dim_size(dim_pair[0].first) == b.dim_size(dim_pair[0].second), + errors::InvalidArgument("Matrix size-compatible: In[0]: ", + a.shape().DebugString(), ", In[1]: ", + b.shape().DebugString())); + + OP_REQUIRES(context, ((shift_c >= 0) && (shift_c <= 31)), + errors::InvalidArgument("shift_c must be between 0 and 31, " + "inclusive.")); + + int a_dim_remaining = 1 - dim_pair[0].first; + int b_dim_remaining = 1 - dim_pair[0].second; + TensorShape out_shape( + {a.dim_size(a_dim_remaining), b.dim_size(b_dim_remaining)}); + Tensor* c = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &c)); + CHECK(c); + + const T1* a_data = a.flat<T1>().data(); + const T2* b_data = b.flat<T2>().data(); + Toutput* c_data = c->flat<Toutput>().data(); + + const bool transpose_c = false; + const size_t m = a.dim_size(a_dim_remaining); + const size_t n = b.dim_size(b_dim_remaining); + const size_t k = a.dim_size(dim_pair[0].first); + const size_t lda = a.dim_size(1); + const size_t ldb = b.dim_size(1); + const size_t ldc = n; + + // The gemmlowp optimized library only works for a particular set of data + // types, so check if we meet those requirements and + // fall back to a slower reference implementation if not. + if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() && + std::is_same<Toutput, qint32>() && (offset_c == 0) && (mult_c == 1) && + (shift_c == 0) && (transpose_c == false)) { + if (transpose_a_) { + if (transpose_b_) { + GemmlowpMultiply<true, true, false>(context, a_data, b_data, c_data, + m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } else { + GemmlowpMultiply<true, false, false>(context, a_data, b_data, c_data, + m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } + } else { + if (transpose_b_) { + GemmlowpMultiply<false, true, false>(context, a_data, b_data, c_data, + m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } else { + GemmlowpMultiply<false, false, false>(context, a_data, b_data, c_data, + m, n, k, offset_a, offset_b, + lda, ldb, ldc); + } + } + } else { + ReferenceGemm<T1, T2, Toutput>( + transpose_a_, transpose_b_, transpose_c, m, n, k, a_data, offset_a, + lda, b_data, offset_b, ldb, c_data, shift_c, offset_c, mult_c, ldc); + } + + float min_c_value; + float max_c_value; + QuantizationRangeForMultiplication<T1, T2, Toutput>( + min_a, max_a, min_b, max_b, &min_c_value, &max_c_value); + Tensor* c_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &c_min)); + c_min->flat<float>()(0) = min_c_value; + + Tensor* c_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &c_max)); + c_max->flat<float>()(0) = max_c_value; + } + + private: + bool transpose_a_; + bool transpose_b_; +}; + +REGISTER_KERNEL_BUILDER(Name("QuantizedMatMul") + .Device(DEVICE_CPU) + .TypeConstraint<quint8>("T1") + .TypeConstraint<quint8>("T2") + .TypeConstraint<qint32>("Toutput"), + QuantizedMatMulOp<quint8, quint8, qint32>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc new file mode 100644 index 0000000000..3eea751818 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc @@ -0,0 +1,336 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <functional> +#include <memory> +#include <vector> + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedMatMulTest : public OpsTestBase { + protected: +}; + +// Runs two small matrices through the operator, and leaves all the parameters +// at their default values. +TEST_F(QuantizedMatMulTest, Small_NoParams) { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + // A matrix is: + // | 1 | 2 | 3 | + // | 4 | 5 | 6 | + AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6}); + // B matrix is: + // | 7 | 8 | 9 | 10 | + // | 11 | 12 | 13 | 14 | + // | 15 | 16 | 17 | 18 | + AddInputFromArray<quint8>(TensorShape({3, 4}), + {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + + TF_ASSERT_OK(RunOpKernel()); + // Here are the results we expect, from hand calculations: + // (1 * 7) + (2 * 11) + (3 * 15) = 74 + // (1 * 8) + (2 * 12) + (3 * 16) = 80 + // (1 * 9) + (2 * 13) + (3 * 17) = 86 + // (1 * 10) + (2 * 14) + (3 * 18) = 92 + // (4 * 7) + (5 * 11) + (6 * 15) = 173 + // (4 * 8) + (5 * 12) + (6 * 16) = 188 + // (4 * 9) + (5 * 13) + (6 * 17) = 203 + // (4 * 10) + (5 * 14) + (6 * 18) = 218 + Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4})); + test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +// This test multiplies two 1x1 8bit matrices, and compares the +// results with hand-calculated expectations. +TEST_F(QuantizedMatMulTest, VerySmall_WithParams) { + // These parameters reflect a typical production usage of eight-bit matmuls + // in an Inception-style network. + const bool transpose_a = true; + const int a_rows = 1; + const int a_cols = 1; + const int b_rows = 1; + const int b_cols = 1; + const bool transpose_b = false; + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Attr("transpose_a", transpose_a) + .Attr("transpose_b", transpose_b) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + // The A matrix is: + // | -1 | + // The input array only contains unsigned bytes, so we specify the actual + // values as n+a_offset, where a_offset is 12 above. For example that means -1 + // is represented as -1 + 12, or 11. + // We have set the transpose_a flag to true, so the matrix is transposed, and + // for filling the the values the in-memory storage order is effectively + // column major, rather than the default row-major. + AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11}); + + // The B matrix is: + // | 1 | + AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0}); + AddInputFromArray<float>(TensorShape({1}), {-12.0f}); + AddInputFromArray<float>(TensorShape({1}), {243.0f}); + AddInputFromArray<float>(TensorShape({1}), {1.0f}); + AddInputFromArray<float>(TensorShape({1}), {256.0f}); + TF_ASSERT_OK(RunOpKernel()); + // We're requesting C = A.transposed() * B, + // so we expect to get these results: + // 1*-1 = -1 + // | -1 | + Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols})); + test::FillValues<qint32>(&expected, {-1}); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +// This test multiplies two 1x1 8bit matrices, but sets an invalid quantization +// range, so we expect to get an error +TEST_F(QuantizedMatMulTest, VerySmall_BadRange) { + // These parameters reflect a typical production usage of eight-bit matmuls + // in an Inception-style network. + const bool transpose_a = true; + const int a_rows = 1; + const int a_cols = 1; + const int b_rows = 1; + const int b_cols = 1; + const bool transpose_b = false; + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Attr("transpose_a", transpose_a) + .Attr("transpose_b", transpose_b) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + // The A matrix is: + // | -1 | + AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11}); + + // The B matrix is: + // | 1 | + AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0}); + AddInputFromArray<float>(TensorShape({1}), {-12.0f}); + AddInputFromArray<float>(TensorShape({1}), {243.0f}); + // Here we set the range so that the min and max are equal, so we expect to + // see an error when we run. + AddInputFromArray<float>(TensorShape({1}), {1.0f}); + AddInputFromArray<float>(TensorShape({1}), {1.0f}); + EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code()); +} + +// This test multiplies a couple of small 8-bit matrices, and compares the +// results with hand-calculated expectations. It uses shifts and offsets to +// control the range of the outputs. +TEST_F(QuantizedMatMulTest, Small_WithParams) { + // These parameters reflect a typical production usage of eight-bit matmuls + // in an Inception-style network. + const bool transpose_a = true; + const int a_rows = 3; + const int a_cols = 4; + const int b_rows = 3; + const int b_cols = 2; + const bool transpose_b = false; + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Attr("transpose_a", transpose_a) + .Attr("transpose_b", transpose_b) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + // The A matrix is: + // | -1 | -5 | -9 | + // | -2 | -6 | -10 | + // | -3 | -7 | -11 | + // | -4 | -8 | -12 | + // The input array only contains unsigned bytes, so we specify the actual + // values as n+a_offset, where a_offset is 12 above. For example that means -1 + // is represented as -1 + 12, or 11. + // We have set the transpose_a flag to true, so the matrix is transposed, and + // for filling the the values the in-memory storage order is effectively + // column major, rather than the default row-major. + AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), + { + 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + }); + + // The B matrix is: + // | 1 | 4| + // | 2 | 5| + // | 3 | 6| + AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), { + 1, 4, 2, 5, 3, 6, + }); + AddInputFromArray<float>(TensorShape({1}), {-12.0f}); + AddInputFromArray<float>(TensorShape({1}), {243.0f}); + AddInputFromArray<float>(TensorShape({1}), {0}); + AddInputFromArray<float>(TensorShape({1}), {255.0f}); + TF_ASSERT_OK(RunOpKernel()); + // We're requesting C = A.transposed() * B, + // so we expect to get these results: + // 1*-1 + 2*-5 + 3*-9 = -38 + // 4*-1 + 5*-5 + 6*-9 = -83 + // 1*-2 + 2*-6 + 3*-10 = -44 + // 4*-2 + 5*-6 + 6*-10 = -98 + // 1*-3 + 2*-7 + 3*-11 = -50 + // 4*-3 + 5*-7 + 6*-11 = -113 + // 1*-4 + 2*-8 + 3*-12 = -56 + // 4*-4 + 5*-8 + 6*-12 = -128 + // | -38 | -83 | + // | -44 | -98 | + // | -50 | -113 | + // | -56 | -128 | + Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols})); + test::FillValues<qint32>(&expected, + { + -38, -83, -44, -98, -50, -113, -56, -128, + }); + test::ExpectTensorEqual<qint32>(expected, *GetOutput(0)); +} + +// This test multiplies a couple of medium-sized 8-bit matrices, and tests the +// results against what we saw from running a float MatMul with equivalent +// inputs. +TEST_F(QuantizedMatMulTest, Medium_WithParams) { + const bool transpose_a = true; + const bool transpose_b = false; + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum<qint32>::v()) + .Attr("transpose_a", transpose_a) + .Attr("transpose_b", transpose_b) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + const int a_rows = 8; + const int a_cols = 8; + const float a_min = -2164.25f; + const float a_max = 2006.27f; + Tensor a_float(DT_FLOAT, {a_rows, a_cols}); + test::FillValues<float>( + &a_float, + {-1014.12, -157.382, -810.17, 1435.28, 1016.37, 219.684, -316.054, + -2164.25, 2006.27, -547.444, 857.376, 404.376, 9.72115, 332.588, + 194.385, -286.57, 26.062, 23.1125, 110.436, 247.055, -127.683, + -376.275, -124.81, -846.826, -77.1507, 305.581, -202.747, 12.9528, + 9.64886, 872.686, 40.9069, 197.816, 44.16, -306.768, -1457.52, + -368.939, -1049.42, -486.353, 1745.87, 95.7695, 395.773, -254.333, + -404.27, 787.16, -2.44114, 199.37, -1024.08, 784.901, 235.055, + -42.7295, 241.498, -245.365, 470.763, 186.159, 186.579, -220.163, + 1304.58, 386.272, -358.853, -755.996, 360.109, -866.007, 55.2828, + -508.801}); + Tensor a_quantized = FloatTensorToQuantized<quint8>(a_float, a_min, a_max); + + const int b_rows = 8; + const int b_cols = 8; + const float b_min = -0.739539f; + const float b_max = 0.641057f; + Tensor b_float(DT_FLOAT, {b_rows, b_cols}); + test::FillValues<float>( + &b_float, + {-0.294619, -0.0670519, 0.261507, -0.126274, 0.127229, -0.176945, + -0.251223, 0.231086, 0.453694, 0.415666, -0.288733, 0.508717, + 0.211551, 0.0435907, -0.582383, -0.308779, 0.0696883, -0.438122, + 0.114, 0.433964, 0.109883, 0.284931, -0.149661, 0.108657, + 0.458333, -0.130231, -0.35805, -0.123206, -0.437968, 0.0282411, + 0.628818, -0.0522173, -0.0233403, 0.124863, 0.217165, 0.262294, + -0.171005, -0.254693, -0.200433, -0.287354, 0.488166, -0.0354688, + -0.118091, -0.590444, 0.491537, -0.739539, 0.083117, 0.282482, + 0.275269, -0.36574, 0.107476, 0.0511428, -0.136887, -0.0149852, + -0.259694, 0.641057, 0.264054, -0.295126, -0.0218791, 0.361211, + 0.012448, 0.0709718, -0.392394, -0.434215}); + Tensor b_quantized = FloatTensorToQuantized<quint8>(b_float, b_min, b_max); + + AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>()); + AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {a_min}); + AddInputFromArray<float>(TensorShape({1}), {a_max}); + AddInputFromArray<float>(TensorShape({1}), {b_min}); + AddInputFromArray<float>(TensorShape({1}), {b_max}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected_float(DT_FLOAT, {a_cols, b_cols}); + test::FillValues<float>( + &expected_float, + {1776.82f, 421.058f, -854.308f, 1430.65f, 503.105f, 57.2744f, + -1514.97f, -1163.66f, -87.0979f, -394.577f, -39.4983f, -79.1938f, + -329.029f, 313.475f, 446.929f, -59.5855f, 350.837f, 238.655f, + -609.21f, 350.499f, 192.238f, 847.576f, -103.177f, 185.886f, + -90.5335f, 200.787f, 99.1981f, -717.076f, 763.815f, -703.726f, + -125.164f, 732.325f, -51.5303f, -418.826f, 60.0783f, -299.658f, + 231.41f, 72.0622f, -289.244f, 663.776f, 391.177f, 294.415f, + -484.148f, -677.932f, -180.342f, -194.764f, 761.715f, 553.061f, + -283.355f, 321.109f, 351.269f, 1171.7f, -857.497f, 343.804f, + -494.599f, -844.119f, 725.237f, 586.052f, -735.013f, -897.723f, + -122.434f, -502.907f, 1264.6f, -239.991f}); + + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 15.0); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc new file mode 100644 index 0000000000..33a12c4746 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc @@ -0,0 +1,135 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/nn_ops.cc. + +#define EIGEN_USE_THREADS + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/pooling_ops_common.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template <typename Device, typename T> +class QuantizedAvgPoolingOp : public OpKernel { + public: + explicit QuantizedAvgPoolingOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_)); + OP_REQUIRES(context, ksize_.size() == 4, + errors::InvalidArgument("Sliding window ksize field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); + OP_REQUIRES(context, stride_.size() == 4, + errors::InvalidArgument("Sliding window strides field must " + "specify 4 dimensions")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1, + errors::Unimplemented( + "Pooling is not yet supported on the batch dimension.")); + } + + void Compute(OpKernelContext* context) override { + const Tensor& tensor_in = context->input(0); + PoolParameters params{context, ksize_, stride_, + padding_, FORMAT_NHWC, tensor_in.shape()}; + if (!context->status().ok()) { + return; + } + + const float min_input = context->input(1).flat<float>()(0); + const float max_input = context->input(2).flat<float>()(0); + + OP_REQUIRES(context, params.depth_window == 1, + errors::Unimplemented("Non-spatial pooling is not " + "yet supported. Volunteers? :)")); + + OP_REQUIRES(context, tensor_in.dims() == 4, + errors::InvalidArgument("tensor_in must be 4-dimensional")); + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output( + 0, params.forward_output_shape(), &output)); + const int32 highest = static_cast<int32>(Eigen::NumTraits<T>::highest()); + const int32 lowest = static_cast<int32>(Eigen::NumTraits<T>::lowest()); + + // TODO(vrv): Switch this to the Eigen::Tensor version of + // SpatialAvgPooling once that version is running quickly. + Tensor int32_output(DT_INT32, params.forward_output_shape()); + // Cast input to int32 tensor and call SpatialAvgPool. + Tensor int32_input(DT_INT32, tensor_in.shape()); + int32_input.flat<int32>() = tensor_in.flat<T>().template cast<int32>(); + SpatialAvgPool<Device, int32>(context, &int32_output, int32_input, params, + padding_); + + // Clamp the int32 output back into quantized space. + output->flat<T>() = int32_output.flat<int32>() + .cwiseMax(lowest) + .cwiseMin(highest) + .template cast<T>(); + + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = min_input; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = max_input; + } + + private: + std::vector<int32> ksize_; + std::vector<int32> stride_; + Padding padding_; +}; + +template <typename Device, typename T> +class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> { + public: + explicit QuantizedMaxPoolingOp(OpKernelConstruction* context) + : MaxPoolingOp<Device, T>(context) {} + + void Compute(OpKernelContext* context) override { + const float min_input = context->input(1).flat<float>()(0); + const float max_input = context->input(2).flat<float>()(0); + MaxPoolingOp<Device, T>::Compute(context); + Tensor* output_min = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); + output_min->flat<float>()(0) = min_input; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); + output_max->flat<float>()(0) = max_input; + } +}; + +REGISTER_KERNEL_BUILDER( + Name("QuantizedAvgPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"), + QuantizedAvgPoolingOp<CPUDevice, quint8>); + +REGISTER_KERNEL_BUILDER( + Name("QuantizedMaxPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"), + QuantizedMaxPoolingOp<CPUDevice, quint8>); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc new file mode 100644 index 0000000000..3bc05ed455 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc @@ -0,0 +1,127 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class QuantizedPoolingTest : public OpsTestBase { + protected: +}; + +TEST_F(QuantizedPoolingTest, SmallAveragePooling) { + const int ksize = 2; + const int stride = 2; + TF_ASSERT_OK(NodeDefBuilder("quantized_avg_pool_op", "QuantizedAvgPool") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("ksize", {1, ksize, ksize, 1}) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = 0.0f; + const float input_max = 255.0f; + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + Tensor input_float(DT_FLOAT, {1, input_height, input_width, input_channels}); + test::FillValues<float>( + &input_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + + const int expected_width = input_width / stride; + const int expected_height = input_height / stride; + Tensor expected_float(DT_FLOAT, + {1, expected_height, expected_width, input_channels}); + test::FillValues<float>(&expected_float, {6, 7, 10, 11, 22, 23, 26, 27}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedPoolingTest, SmallMaxPooling) { + const int ksize = 2; + const int stride = 2; + TF_ASSERT_OK(NodeDefBuilder("quantized_max_pool_op", "QuantizedMaxPool") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum<quint8>::v()) + .Attr("ksize", {1, ksize, ksize, 1}) + .Attr("strides", {1, stride, stride, 1}) + .Attr("padding", "SAME") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const float input_min = 0.0f; + const float input_max = 255.0f; + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + Tensor input_float(DT_FLOAT, {1, input_height, input_width, input_channels}); + test::FillValues<float>( + &input_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}); + Tensor input_quantized = + FloatTensorToQuantized<quint8>(input_float, input_min, input_max); + + const int expected_width = input_width / stride; + const int expected_height = input_height / stride; + Tensor expected_float(DT_FLOAT, + {1, expected_height, expected_width, input_channels}); + test::FillValues<float>(&expected_float, {11, 12, 15, 16, 27, 28, 31, 32}); + + AddInputFromArray<quint8>(input_quantized.shape(), + input_quantized.flat<quint8>()); + AddInputFromArray<float>(TensorShape({1}), {input_min}); + AddInputFromArray<float>(TensorShape({1}), {input_max}); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat<float>()(0); + const float output_max = GetOutput(2)->flat<float>()(0); + Tensor output_float = + QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max); + test::ExpectTensorNear<float>(expected_float, output_float, 0.2); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/reference_gemm.h b/tensorflow/contrib/quantization/kernels/reference_gemm.h new file mode 100644 index 0000000000..5af3a77128 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/reference_gemm.h @@ -0,0 +1,90 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ + +// This is an unoptimized but debuggable implementation of the GEMM matrix +// multiply function, used to compare to faster but more opaque versions, or +// for bit depths or argument combinations that aren't supported by optimized +// code. +// It assumes the row-major convention used by TensorFlow, and implements +// C = A * B, like the standard BLAS GEMM interface. If the tranpose flags are +// true, then the relevant matrix is treated as stored in column-major order. + +namespace tensorflow { +template <class T1, class T2, class T3> +void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c, + size_t m, size_t n, size_t k, const T1* a, int32 offset_a, + size_t lda, const T2* b, int32 offset_b, size_t ldb, T3* c, + int32 shift_c, int32 offset_c, int32 mult_c, size_t ldc) { + int a_i_stride; + int a_l_stride; + if (transpose_a) { + a_i_stride = 1; + a_l_stride = lda; + } else { + a_i_stride = lda; + a_l_stride = 1; + } + int b_j_stride; + int b_l_stride; + if (transpose_b) { + b_j_stride = ldb; + b_l_stride = 1; + } else { + b_j_stride = 1; + b_l_stride = ldb; + } + int c_i_stride; + int c_j_stride; + if (transpose_c) { + c_i_stride = 1; + c_j_stride = ldc; + } else { + c_i_stride = ldc; + c_j_stride = 1; + } + + const int32 highest = static_cast<int32>(Eigen::NumTraits<T3>::highest()); + const int32 lowest = static_cast<int32>(Eigen::NumTraits<T3>::lowest()); + const int32 rounding = (shift_c < 1) ? 0 : (1 << (shift_c - 1)); + + int i, j, l; + for (j = 0; j < n; j++) { + for (i = 0; i < m; i++) { + int32 total = 0; + for (l = 0; l < k; l++) { + const size_t a_index = ((i * a_i_stride) + (l * a_l_stride)); + const int32 a_value = static_cast<int32>(a[a_index]) - offset_a; + const size_t b_index = ((j * b_j_stride) + (l * b_l_stride)); + const int32 b_value = static_cast<int32>(b[b_index]) - offset_b; + total += (a_value * b_value); + } + const size_t c_index = ((i * c_i_stride) + (j * c_j_stride)); + int32_t output = ((((total + offset_c) * mult_c) + rounding) >> shift_c); + if (output > highest) { + output = highest; + } + if (output < lowest) { + output = lowest; + } + c[c_index] = static_cast<T3>(output); + } + } +} +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ diff --git a/tensorflow/contrib/quantization/load_quantized_ops_so.py b/tensorflow/contrib/quantization/load_quantized_ops_so.py new file mode 100644 index 0000000000..6eb424e534 --- /dev/null +++ b/tensorflow/contrib/quantization/load_quantized_ops_so.py @@ -0,0 +1,48 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops for quantized evaluation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import threading + +import tensorflow as tf + +QUANTIZED_OPS_FILE = '_quantized_ops.so' + +_quantized_ops = None +_ops_lock = threading.Lock() + + +# Workaround for the fact that importing tensorflow imports contrib +# (even if a user isn't using this or any other contrib op), but +# there's not yet any guarantee that the shared object exists. +# In which case, "import tensorflow" will always crash, even for users that +# never use contrib. +def Load(library_base_dir=''): + """Load the quantized ops library and return the loaded module.""" + with _ops_lock: + global _quantized_ops + if not _quantized_ops: + data_files_path = os.path.join(library_base_dir, + tf.resource_loader.get_data_files_path()) + tf.logging.info('q:data path: %s', data_files_path) + _quantized_ops = tf.load_op_library(os.path.join( + data_files_path, QUANTIZED_OPS_FILE)) + + assert _quantized_ops, 'Could not load quantized_ops.so' + return _quantized_ops diff --git a/tensorflow/contrib/quantization/ops/array_ops.cc b/tensorflow/contrib/quantization/ops/array_ops.cc new file mode 100644 index 0000000000..ff636c7957 --- /dev/null +++ b/tensorflow/contrib/quantization/ops/array_ops.cc @@ -0,0 +1,195 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizeV2") + .Input("input: float") + .Input("min_range: float") + .Input("max_range: float") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype") + .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. + +[min_range, max_range] are scalar floats that specify the range for +the 'input' data. The 'mode' attribute controls exactly which calculations are +used to convert the float values to their quantized equivalents. + +In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: + +``` +out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +if T == qint8, out[i] -= (range(T) + 1) / 2.0 +``` +here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()` + +*MIN_COMBINED Mode Example* + +Assume the input is type float and has a possible range of [0.0, 6.0] and the +output type is quint8 ([0, 255]). The min_range and max_range values should be +specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +value of the input by 255/6 and cast to quint8. + +If the output type was qint8 ([-128, 127]), the operation will additionally +subtract each value by 128 prior to casting, so that the range of values aligns +with the range of qint8. + +If the mode is 'MIN_FIRST', then this approach is used: + +``` +number_of_steps = 1 << (# of bits in T) +range_adjust = number_of_steps / (number_of_steps - 1) +range = (range_max - range_min) * range_adjust +range_scale = number_of_steps / range +quantized = round(input * range_scale) - round(range_min * range_scale) + + numeric_limits<T>::min() +quantized = max(quantized, numeric_limits<T>::min()) +quantized = min(quantized, numeric_limits<T>::max()) +``` + +The biggest difference between this and MIN_COMBINED is that the minimum range +is rounded first, before it's subtracted from the rounded value. With +MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +and dequantizing will introduce a larger and larger error. + +One thing to watch out for is that the operator may choose to adjust the +requested minimum and maximum values slightly during the quantization process, +so you should always use the output ports as the range for further calculations. +For example, if the requested minimum and maximum values are close to equal, +they will be separated by a small epsilon value to prevent ill-formed quantized +buffers from being created. Otherwise, you can end up with buffers where all the +quantized values map to the same float value, which causes problems for +operations that have to perform further calculations on them. + +min_range: The minimum scalar value possibly produced for the input. +max_range: The maximum scalar value possibly produced for the input. +output: The quantized data produced from the float input. +output_min: The actual minimum scalar value used for the output. +output_max: The actual maximum scalar value used for the output. + +)doc"); + +REGISTER_OP("Dequantize") + .Input("input: T") + .Input("min_range: float") + .Input("max_range: float") + .Output("output: float") + .Attr("T: quantizedtype") + .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + return Status::OK(); + }) + .Doc(R"doc( +Dequantize the 'input' tensor into a float Tensor. + +[min_range, max_range] are scalar floats that specify the range for +the 'input' data. The 'mode' attribute controls exactly which calculations are +used to convert the float values to their quantized equivalents. + +In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: + +``` +if T == qint8, in[i] += (range(T) + 1)/ 2.0 +out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +``` +here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()` + +*MIN_COMBINED Mode Example* + +If the input comes from a QuantizedRelu6, the output type is +quint8 (range of 0-255) but the possible range of QuantizedRelu6 is +0-6. The min_range and max_range values are therefore 0.0 and 6.0. +Dequantize on quint8 will take each value, cast to float, and multiply +by 6 / 255. +Note that if quantizedtype is qint8, the operation will additionally add +each value by 128 prior to casting. + +If the mode is 'MIN_FIRST', then this approach is used: + +``` +number_of_steps = 1 << (# of bits in T) +range_adjust = number_of_steps / (number_of_steps - 1) +range = (range_max - range_min) * range_adjust +range_scale = range / number_of_steps +const double offset_input = static_cast<double>(input) - lowest_quantized; +result = range_min + ((input - numeric_limits<T>::min()) * range_scale) +``` + +min_range: The minimum scalar value possibly produced for the input. +max_range: The maximum scalar value possibly produced for the input. + +)doc"); + +REGISTER_OP("QuantizedConcat") + .Input("concat_dim: int32") + .Input("values: N * T") + .Input("input_mins: N * float32") + .Input("input_maxes: N * float32") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Attr("N: int >= 2") + .Attr("T: type") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c)); + ShapeHandle unused; + for (int i = 2; i < c->num_inputs(); ++i) { + TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); + } + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Concatenates quantized tensors along one dimension. + +concat_dim: 0-D. The dimension along which to concatenate. Must be in the + range [0, rank(values)). +values: The `N` Tensors to concatenate. Their ranks and types must match, + and their sizes must match in all dimensions except `concat_dim`. +input_mins: The minimum scalar values for each of the input tensors. +input_maxes: The maximum scalar values for each of the input tensors. +output_min: The float value that the minimum quantized output value represents. +output_max: The float value that the maximum quantized output value represents. +output: A `Tensor` with the concatenation of values stacked along the + `concat_dim` dimension. This tensor's shape matches that of `values` except + in `concat_dim` where it has the sum of the sizes. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/ops/math_ops.cc b/tensorflow/contrib/quantization/ops/math_ops.cc new file mode 100644 index 0000000000..93bb283630 --- /dev/null +++ b/tensorflow/contrib/quantization/ops/math_ops.cc @@ -0,0 +1,126 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizedMatMul") + .Input("a: T1") + .Input("b: T2") + .Input("min_a: float") + .Input("max_a: float") + .Input("min_b: float") + .Input("max_b: float") + .Output("out: Toutput") + .Output("min_out: float") + .Output("max_out: float") + .Attr("T1: quantizedtype") + .Attr("T2: quantizedtype") + .Attr("Toutput: quantizedtype = DT_QINT32") + .Attr("transpose_a: bool = false") + .Attr("transpose_b: bool = false") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Perform a quantized matrix multiplication of `a` by the matrix `b`. + +The inputs must be two-dimensional matrices and the inner dimension of +`a` (after being transposed if `transpose_a` is non-zero) must match the +outer dimension of `b` (after being transposed if `transposed_b` is +non-zero). + +a: Must be a two-dimensional tensor. +b: Must be a two-dimensional tensor. +transpose_a: If true, `a` is transposed before multiplication. +transpose_b: If true, `b` is transposed before multiplication. +min_a: The float value that the lowest quantized `a` value represents. +max_a: The float value that the highest quantized `a` value represents. +min_b: The float value that the lowest quantized `b` value represents. +max_b: The float value that the highest quantized `b` value represents. +min_out: The float value that the lowest quantized output value represents. +max_out: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizeDownAndShrinkRange") + .Input("input: Tinput") + .Input("input_min: float") + .Input("input_max: float") + .Output("output: out_type") + .Output("output_min: float") + .Output("output_max: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Convert the quantized 'input' tensor into a lower-precision 'output', using the +actual distribution of the values to maximize the usage of the lower bit depth +and adjusting the output min and max ranges accordingly. + +[input_min, input_max] are scalar floats that specify the range for the float +interpretation of the 'input' data. For example, if input_min is -1.0f and +input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. + +This operator tries to squeeze as much precision as possible into an output with +a lower bit depth by calculating the actual min and max values found in the +data. For example, maybe that quint16 input has no values lower than 16,384 and +none higher than 49,152. That means only half the range is actually needed, all +the float interpretations are between -0.5f and 0.5f, so if we want to compress +the data into a quint8 output, we can use that range rather than the theoretical +-1.0f to 1.0f that is suggested by the input min and max. + +In practice, this is most useful for taking output from operations like +QuantizedMatMul that can produce higher bit-depth outputs than their inputs and +may have large potential output ranges, but in practice have a distribution of +input values that only uses a small fraction of the possible range. By feeding +that output into this operator, we can reduce it from 32 bits down to 8 with +minimal loss of accuracy. + +input_min: The float value that the minimum quantized input value represents. +input_max: The float value that the maximum quantized input value represents. +Tinput: The type of the input. +output_min: The float value that the minimum quantized output value represents. +output_max: The float value that the maximum quantized output value represents. +out_type: The type of the output. Should be a lower bit depth than Tinput. + +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/ops/nn_ops.cc b/tensorflow/contrib/quantization/ops/nn_ops.cc new file mode 100644 index 0000000000..720377043d --- /dev/null +++ b/tensorflow/contrib/quantization/ops/nn_ops.cc @@ -0,0 +1,348 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/util/padding.h" + +namespace tensorflow { + +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizedAvgPool") + .Input("input: T") + .Input("min_input: float") + .Input("max_input: float") + .Output("output: T") + .Output("min_output: float") + .Output("max_output: float") + .Attr("T: quantizedtype") + .Attr("ksize: list(int)") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Produces the average pool of the input tensor for quantized types. + +input: 4-D with shape `[batch, height, width, channels]`. +ksize: The size of the window for each dimension of the input tensor. + The length must be 4 to match the number of dimensions of the input. +strides: The stride of the sliding window for each dimension of the input + tensor. The length must be 4 to match the number of dimensions of the input. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedBiasAdd") + .Input("input: T1") + .Input("bias: T2") + .Input("min_input: float") + .Input("max_input: float") + .Input("min_bias: float") + .Input("max_bias: float") + .Output("output: out_type") + .Output("min_out: float") + .Output("max_out: float") + .Attr("T1: quantizedtype") + .Attr("T2: quantizedtype") + .Attr("out_type: quantizedtype") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Adds Tensor 'bias' to Tensor 'input' for Quantized types. + +Broadcasts the values of bias on dimensions 0..N-2 of 'input'. + +bias: A 1D bias Tensor with size matching the last dimension of 'input'. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_bias: The float value that the lowest quantized bias value represents. +max_bias: The float value that the highest quantized bias value represents. +min_out: The float value that the lowest quantized output value represents. +max_out: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedConv2D") + .Input("input: Tinput") + .Input("filter: Tfilter") + .Input("min_input: float") + .Input("max_input: float") + .Input("min_filter: float") + .Input("max_filter: float") + .Output("output: out_type") + .Output("min_output: float") + .Output("max_output: float") + .Attr("Tinput: quantizedtype") + .Attr("Tfilter: quantizedtype") + .Attr("out_type: quantizedtype = DT_QINT32") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes a 2D convolution given quantized 4D input and filter tensors. +The inputs are quantized tensors where the lowest value represents the real +number of the associated minimum, and the highest represents the maximum. +This means that you can only interpret the quantized output in the same way, by +taking the returned minimum and maximum values into account. + +filter: filter's input_depth dimension must match input's depth dimensions. +strides: The stride of the sliding window for each dimension of the input + tensor. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_filter: The float value that the lowest quantized filter value represents. +max_filter: The float value that the highest quantized filter value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedMaxPool") + .Input("input: T") + .Input("min_input: float") + .Input("max_input: float") + .Output("output: T") + .Output("min_output: float") + .Output("max_output: float") + .Attr("T: quantizedtype") + .Attr("ksize: list(int)") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Produces the max pool of the input tensor for quantized types. + +input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +ksize: The size of the window for each dimension of the input tensor. + The length must be 4 to match the number of dimensions of the input. +strides: The stride of the sliding window for each dimension of the input + tensor. The length must be 4 to match the number of dimensions of the input. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedRelu") + .Input("features: Tinput") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear: `max(features, 0)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedRelu6") + .Input("features: Tinput") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedReluX") + .Input("features: Tinput") + .Input("max_value: float") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedBatchNormWithGlobalNormalization") + .Input("t: Tinput") + .Input("t_min: float") + .Input("t_max: float") + .Input("m: Tinput") + .Input("m_min: float") + .Input("m_max: float") + .Input("v: Tinput") + .Input("v_min: float") + .Input("v_max: float") + .Input("beta: Tinput") + .Input("beta_min: float") + .Input("beta_max: float") + .Input("gamma: Tinput") + .Input("gamma_min: float") + .Input("gamma_max: float") + .Output("result: out_type") + .Output("result_min: float") + .Output("result_max: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype") + .Attr("variance_epsilon: float") + .Attr("scale_after_normalization: bool") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); + + DimensionHandle last_dim = c->Dim(input, 3); + for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma + ShapeHandle vec; + TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec)); + TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim)); + } + + ShapeHandle out; + TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out)); + c->set_output(0, out); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + + return Status::OK(); + }) + .Doc(R"doc( +Quantized Batch normalization. + +This op is deprecated and will be removed in the future. Prefer +`tf.nn.batch_normalization`. + +t: A 4D input Tensor. +t_min: The value represented by the lowest quantized input. +t_max: The value represented by the highest quantized input. +m: A 1D mean Tensor with size matching the last dimension of t. + This is the first output from tf.nn.moments, + or a saved moving average thereof. +m_min: The value represented by the lowest quantized mean. +m_max: The value represented by the highest quantized mean. +v: A 1D variance Tensor with size matching the last dimension of t. + This is the second output from tf.nn.moments, + or a saved moving average thereof. +v_min: The value represented by the lowest quantized variance. +v_max: The value represented by the highest quantized variance. +beta: A 1D beta Tensor with size matching the last dimension of t. + An offset to be added to the normalized tensor. +beta_min: The value represented by the lowest quantized offset. +beta_max: The value represented by the highest quantized offset. +gamma: A 1D gamma Tensor with size matching the last dimension of t. + If "scale_after_normalization" is true, this tensor will be multiplied + with the normalized tensor. +gamma_min: The value represented by the lowest quantized gamma. +gamma_max: The value represented by the highest quantized gamma. +variance_epsilon: A small float number to avoid dividing by 0. +scale_after_normalization: A bool indicating whether the resulted tensor + needs to be multiplied with gamma. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/python/array_ops.py b/tensorflow/contrib/quantization/python/array_ops.py index b873d4df14..2ab65e903e 100644 --- a/tensorflow/contrib/quantization/python/array_ops.py +++ b/tensorflow/contrib/quantization/python/array_ops.py @@ -19,7 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import -from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops -from tensorflow.python.ops.gen_array_ops import dequantize -from tensorflow.python.ops.gen_array_ops import quantize_v2 -from tensorflow.python.ops.gen_array_ops import quantized_concat +from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops +from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize +from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2 +from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat diff --git a/tensorflow/contrib/quantization/python/dequantize_op_test.py b/tensorflow/contrib/quantization/python/dequantize_op_test.py new file mode 100644 index 0000000000..b1d47cc4a2 --- /dev/null +++ b/tensorflow/contrib/quantization/python/dequantize_op_test.py @@ -0,0 +1,85 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for Dequantize Operations.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + + +class DequantizeOpTest(tf.test.TestCase): + + def __init__(self, method_name="runTest"): + super(DequantizeOpTest, self).__init__(method_name) + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() + + def _testDequantizeOp(self, inputs, min_range, max_range, dtype): + with self.test_session(): + input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype) + dequantized = tf.contrib.quantization.dequantize( + input_op, min_range, max_range) + tf_ans = dequantized.eval() + + # TODO(vrv): Add support for DT_QINT32 quantization if needed. + type_dict = { + tf.quint8: np.uint8, + tf.qint8: np.int8, + tf.quint16: np.uint16, + tf.qint16: np.int16 + } + self.assertTrue(dtype in type_dict.keys()) + v_max = np.iinfo(type_dict[dtype]).max + v_min = np.iinfo(type_dict[dtype]).min + self.assertTrue(min_range >= v_min) + self.assertTrue(max_range <= v_max) + type_range = v_max - v_min + if v_min < 0: + half_range = (type_range + 1) / 2 + else: + half_range = 0.0 + + np_ans = ((inputs.astype(np.float32) + half_range) * + (max_range - min_range) / type_range) + min_range + self.assertAllClose(tf_ans, np_ans) + + def testBasicQuint8(self): + self._testDequantizeOp(np.array([0, 128, 255]), + 0.0, 6.0, tf.quint8) + self._testDequantizeOp(np.array([0, 128, 255]), + 0.0, 123.456, tf.quint8) + self._testDequantizeOp(np.array([0, 4, 42, 108, 243]), + 5.0, 200.2, tf.quint8) + + def testBasicQint8(self): + self._testDequantizeOp(np.array([-128, 0, 127]), + -1.0, 2.0, tf.qint8) + self._testDequantizeOp(np.array([-2, 4, -17]), + -5.0, -3.0, tf.qint8) + self._testDequantizeOp(np.array([0, -4, 42, -108]), + 5.0, 40.0, tf.qint8) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/contrib/quantization/python/math_ops.py b/tensorflow/contrib/quantization/python/math_ops.py index d863cdad26..d4fabbd36b 100644 --- a/tensorflow/contrib/quantization/python/math_ops.py +++ b/tensorflow/contrib/quantization/python/math_ops.py @@ -19,7 +19,10 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.quantization.ops import gen_math_ops +from tensorflow.contrib.quantization.ops.gen_math_ops import * from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops.gen_math_ops import * + + +ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/quantization/python/nn_ops.py b/tensorflow/contrib/quantization/python/nn_ops.py index fd28423317..d31f1d4e68 100644 --- a/tensorflow/contrib/quantization/python/nn_ops.py +++ b/tensorflow/contrib/quantization/python/nn_ops.py @@ -19,7 +19,17 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.quantization.ops import gen_nn_ops +from tensorflow.contrib.quantization.ops.gen_nn_ops import * from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_nn_ops -from tensorflow.python.ops.gen_nn_ops import * + + +ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py new file mode 100644 index 0000000000..9b24d4129d --- /dev/null +++ b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py @@ -0,0 +1,198 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functional tests for quantized convolutional operations.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + + +class Conv2DTest(tf.test.TestCase): + + def __init__(self, method_name="runTest"): + super(Conv2DTest, self).__init__(method_name) + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() + + def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, + expected): + """Verifies the output values of the convolution function. + + Args: + tensor_in_sizes: Input tensor dimensions in + [batch, input_rows, input_cols, input_depth]. + filter_in_sizes: Filter tensor dimensions in + [kernel_rows, kernel_cols, input_depth, output_depth]. + stride: Stride. + padding: Padding type. + expected: An array containing the expected operation outputs. + """ + total_size_1 = 1 + total_size_2 = 1 + for s in tensor_in_sizes: + total_size_1 *= s + for s in filter_in_sizes: + total_size_2 *= s + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = np.array([f for f in range(1, total_size_1 + 1)]) + x1 = x1.astype(np.uint8).reshape(tensor_in_sizes) + x1_min = 0.0 + x1_max = 255.0 + x2 = np.array([f for f in range(1, total_size_2 + 1)]).astype(np.uint8) + x2 = x2.astype(np.uint8).reshape(filter_in_sizes) + x2_min = 0.0 + x2_max = 255.0 + with self.test_session(use_gpu=False) as sess: + t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8) + t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8) + conv = tf.contrib.quantization.quantized_conv2d(t1, + t2, + out_type=tf.qint32, + strides=[1, stride, + stride, 1], + padding=padding, + min_input=x1_min, + max_input=x1_max, + min_filter=x2_min, + max_filter=x2_max) + value = sess.run(conv) + quantized_output = value[0] + output_min = value[1] + output_max = value[2] + float_output = self._QuantizedOutputToFloat(quantized_output, output_min, + output_max) + self.assertArrayNear(expected, float_output.flatten(), 1.0) + self.assertEqual(value[0].shape, conv[0].get_shape()) + + def _assertQuantizedArrayEquals(self, iarray1, iarray2): + for i1, i2 in zip(iarray1, iarray2): + self.assertTrue(i1 == i2) + + def _QuantizedOutputToFloat(self, quantized, quantized_min, quantized_max): + number_of_bits = 32 + number_of_steps = 1 << number_of_bits + range_adjust = (number_of_steps / (number_of_steps - 1.0)) + quantized_range = ((quantized_max - quantized_min) * range_adjust) + range_scale = (quantized_range / number_of_steps) + lowest_quantized = -(1 << (number_of_bits - 1)) + result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale)) + for x in quantized.flatten()]) + return result + + def testConv2D1x1Filter(self): + # Our generated input is [batch, rows, cols, depth], and looks like this: + # (1,2,3) (4,5,6) (7,8,9) + # (10,11,12) (13,14,15) (16,17,18) + # The filter data is: + # (1,4,7) (2,5,8) (3,6,9) + # That means the calculations are: + # 1*1+2*4+3*7=30 + # 1*2+2*5+3*8=36 + # 1*3+2*6+3*9=42 + # 4*1+5*4+6*7=66 + # 4*2+5*5+6*8=81 + # 4*3+5*6+6*9=96 + # 7*1+5*8+6*9=102 + # 7*2+8*5+9*8=126 + # 7*3+8*6+9*9=150 + # 10*1+11*4+12*7=138 + # 10*2+11*5+12*8=171 + # 10*3+11*6+12*9=204 + # 13*1+14*4+15*7=174 + # 13*2+14*5+15*8=216 + # 13*3+14*6+15*9=258, clamped to 255 + # 16*1+17*4+18*7=210 + # 16*2+17*5+18*8=261, clamped to 255 + # 16*3+17*6+18*9=312, clamped to 255 + # Because the output shift is zero, we call the non-optimized reference + # path for the convolution. + expected_output = [30, 36, 42, 66, 81, 96, 102, 126, 150, 138, 171, 204, + 174, 216, 258, 210, 261, 312] + self._VerifyValues(tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + stride=1, + padding="VALID", + expected=expected_output) + + def testConv2D2x2Filter(self): + # Our generated input is [batch, rows, cols, depth], and looks like this: + # (1,2,3) (4,5,6) (7,8,9) + # (10,11,12) (13,14,15) (16,17,18) + # The filter data is [filter_height, filter_width, depth, filter_count]: + # ( 1, 4, 7) (10, 13, 16) + # (19,22,25) (28, 31, 34) + # - + # ( 2, 5, 8) (11, 14, 17) + # (20,23,26) (29, 32, 35) + # - + # ( 3, 6, 9) (12, 15, 18) + # (21,24,27) (30, 33, 36) + # The raw accumulated totals are: + # 1*1+2*4+3*7+4*10+5*13+6*16+10*19+11*22+12*25+13*28+14*31+15*34=2271 + # 1*2+2*5+3*8+4*11+5*14+6*17+10*20+11*23+12*26+13*29+14*32+15*35=2367 + # 1*3+2*6+3*9+4*12+5*15+6*18+10*21+11*24+12*27+13*30+14*33+15*36=2463 + # 4*1+5*4+6*7+7*10+8*13+9*16+13*19+14*22+15*25+16*28+17*31+18*34=2901 + # 4*2+5*5+6*8+7*11+8*14+9*17+13*20+14*23+15*26+16*29+17*32+18*35=3033 + # 4*3+5*6+6*9+7*12+8*15+9*18+13*21+14*24+15*27+16*30+17*33+18*36=3165 + # The expected values are taken from the raw totals and rescaled to fit into + # eight bits. + expected_output = [2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0] + self._VerifyValues(tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + stride=1, + padding="VALID", + expected=expected_output) + + def testConv2D1x2Filter(self): + # The outputs are computed using third_party/py/IPython/notebook. + # With a shift of 21, we should execute the optimized path here. + expected_output = [231.0, 252.0, 273.0, 384.0, 423.0, 462.0, 690.0, 765.0, + 840.0, 843.0, 936.0, 1029.0] + self._VerifyValues(tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 2, 3, 3], + stride=1, + padding="VALID", + expected=expected_output) + + def testConv2D2x2FilterStride2(self): + # With a shift of 21, we should execute the optimized path here. + expected_output = [2271.0, 2367.0, 2463.0] + self._VerifyValues(tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + stride=2, + padding="VALID", + expected=expected_output) + + def testConv2D2x2FilterStride2Same(self): + # With a shift of 21, we should execute the optimized path here. + expected_output = [2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] + self._VerifyValues(tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + stride=2, + padding="SAME", + expected=expected_output) + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/contrib/quantization/tools/BUILD b/tensorflow/contrib/quantization/tools/BUILD new file mode 100644 index 0000000000..82a13e04d6 --- /dev/null +++ b/tensorflow/contrib/quantization/tools/BUILD @@ -0,0 +1,72 @@ +# Description: +# Utilities for quantizing TensorFlow graphs to lower bit depths. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "quantize_graph_lib", + srcs = ["quantize_graph.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:platform", + ], +) + +py_binary( + name = "quantize_graph", + srcs = ["quantize_graph.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:platform", + ], +) + +py_test( + name = "quantize_graph_test", + size = "small", + srcs = [ + "quantize_graph_test.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":quantize_graph", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], +) + +py_binary( + name = "graph_to_dot", + srcs = [ + "graph_to_dot.py", + ], + main = "graph_to_dot.py", + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/quantization/tools/graph_to_dot.py b/tensorflow/contrib/quantization/tools/graph_to_dot.py new file mode 100644 index 0000000000..c1ee4ea9d3 --- /dev/null +++ b/tensorflow/contrib/quantization/tools/graph_to_dot.py @@ -0,0 +1,69 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Converts a GraphDef file into a DOT format suitable for visualization. + +This script takes a GraphDef representing a network, and produces a DOT file +that can then be visualized by GraphViz tools like dot and xdot. + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re + +import tensorflow as tf + +from google.protobuf import text_format + +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.platform import gfile + + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string("graph", "", """TensorFlow 'GraphDef' file to load.""") +tf.flags.DEFINE_bool("input_binary", True, + """Whether the input files are in binary format.""") +tf.flags.DEFINE_string("dot_output", "", """Where to write the DOT output.""") + + +def main(unused_args): + if not gfile.Exists(FLAGS.graph): + print("Input graph file '" + FLAGS.graph + "' does not exist!") + return -1 + + graph = graph_pb2.GraphDef() + with open(FLAGS.graph, "rb") as f: + if FLAGS.input_binary: + graph.ParseFromString(f.read()) + else: + text_format.Merge(f.read(), graph) + + with open(FLAGS.dot_output, "wb") as f: + print("digraph graphname {", file=f) + for node in graph.node: + output_name = node.name + print(" \"" + output_name + "\" [label=\"" + node.op + "\"];", file=f) + for input_full_name in node.input: + parts = input_full_name.split(":") + input_name = re.sub(r"^\^", "", parts[0]) + print(" \"" + input_name + "\" -> \"" + output_name + "\";", file=f) + print("}", file=f) + print("Created DOT file '" + FLAGS.dot_output + "'.") + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensorflow/contrib/quantization/tools/quantize_graph.py b/tensorflow/contrib/quantization/tools/quantize_graph.py new file mode 100644 index 0000000000..5ded556691 --- /dev/null +++ b/tensorflow/contrib/quantization/tools/quantize_graph.py @@ -0,0 +1,1003 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Transforms a float-trained graph into an equivalent quantized version. + +An example of command-line usage is: +bazel build tensorflow/contrib/quantization/tools:quantize_graph \ +&& bazel-bin/tensorflow/contrib/quantization/tools/quantize_graph \ +--input=tensorflow_inception_graph.pb +--output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \ +--mode=eightbit --logtostderr + +""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re +import numpy as np +import tensorflow as tf + +from tensorflow.python.framework import graph_util +from tensorflow.python.framework import tensor_util + +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + + +flags = tf.app.flags +FLAGS = flags.FLAGS + +flags.DEFINE_boolean("print_nodes", False, """Lists all nodes in the model.""") +flags.DEFINE_string("input", "", """TensorFlow 'GraphDef' file to load.""") +flags.DEFINE_string("output_node_names", "", + """Output node names, comma separated.""") +flags.DEFINE_string("output", "", """File to save the output graph to.""") +flags.DEFINE_integer("bitdepth", 8, + """How many bits to quantize the graph to.""") +flags.DEFINE_string("mode", "round", + """What transformation to apply (round, quantize,""" + """ eightbit, weights, or weights_rounded).""") +flags.DEFINE_string("test_input_dims", "1,224,224,3", + """The size of the input tensor to use when testing a""" + """ graph loaded from a file.""") +flags.DEFINE_boolean("strip_redundant_quantization", True, + """Removes redundant dequantize/quantize pairs.""") +flags.DEFINE_boolean("load_quantization_so", True, + """Explicitly load the quantization ops library""") + + +def print_input_nodes(current_node, nodes_map, indent, already_visited): + print(" " * indent + current_node.op + ":" + current_node.name) + already_visited[current_node.name] = True + for input_node_name in current_node.input: + if input_node_name in already_visited: + continue + input_node = nodes_map[input_node_name] + print_input_nodes(input_node, nodes_map, indent + 1, already_visited) + + +def create_node(op, name, inputs): + new_node = tf.NodeDef() + new_node.op = op + new_node.name = name + for input_name in inputs: + new_node.input.extend([input_name]) + return new_node + + +def create_constant_node(name, value, dtype, shape=None): + node = create_node("Const", name, []) + set_attr_dtype(node, "dtype", dtype) + set_attr_tensor(node, "value", value, dtype, shape) + return node + + +def copy_attr(node, key, attr_value): + try: + node.attr[key].CopyFrom(attr_value) + except KeyError: + pass + + +def set_attr_dtype(node, key, value): + try: + node.attr[key].CopyFrom(tf.AttrValue(type=value.as_datatype_enum)) + except KeyError: + pass + + +def set_attr_tensor(node, key, value, dtype, shape=None): + try: + node.attr[key].CopyFrom(tf.AttrValue( + tensor=tensor_util.make_tensor_proto(value, + dtype=dtype, + shape=shape))) + except KeyError: + pass + + +def set_attr_string(node, key, value): + try: + node.attr[key].CopyFrom(tf.AttrValue(s=value)) + except KeyError: + pass + + +def set_attr_int_list(node, key, value): + list_value = tf.AttrValue.ListValue(i=value) + try: + node.attr[key].CopyFrom(tf.AttrValue(list=list_value)) + except KeyError: + pass + + +def set_attr_bool(node, key, value): + try: + node.attr[key].CopyFrom(tf.AttrValue(b=value)) + except KeyError: + pass + + +def set_attr_int(node, key, value): + try: + node.attr[key].CopyFrom(tf.AttrValue(i=value)) + except KeyError: + pass + + +def set_attr_float(node, key, value): + try: + node.attr[key].CopyFrom(tf.AttrValue(f=value)) + except KeyError: + pass + + +def node_name_from_input(node_name): + """Strips off ports and other decorations to get the underlying node name.""" + if node_name.startswith("^"): + node_name = node_name[1:] + m = re.search(r"(.*):\d+$", node_name) + if m: + node_name = m.group(1) + return node_name + + +def ensure_tensor_name_has_port(node_name): + """Makes sure that a tensor name has :0 if no explicit port exists.""" + m = re.search(r"(.*):\d+$", node_name) + if m: + name_with_port = node_name + else: + name_with_port = node_name + ":0" + return name_with_port + + +def unique_node_name_from_input(node_name): + """Replaces invalid characters in input names to get a unique node name.""" + return node_name.replace(":", "__port__").replace("^", "__hat__") + + +def quantize_array(arr, num_buckets): + """Quantizes a numpy array. + + This function maps each scalar in arr to the center of one of num_buckets + buckets. For instance, + quantize_array([0, 0.3, 0.6, 1], 2) => [0.25, 0.25, 0.75, 0.75] + + Args: + arr: The numpy array to quantize. + num_buckets: The number of buckets to map "var" to. + Returns: + The quantized numpy array. + Raises: + ValueError: when num_buckets < 1. + """ + if num_buckets < 1: + raise ValueError("num_buckets must be >= 1") + arr_max = arr.max() + arr_min = arr.min() + if arr_max == arr_min: + return arr + bucket_width = (arr_max - arr_min) / num_buckets + # Map scalars to bucket indices. Take special care of max(arr). + bucket_indices = np.floor((arr - arr_min) / bucket_width) + bucket_indices[bucket_indices == num_buckets] = num_buckets - 1 + # Map each scalar to the center of a bucket. + arr = arr_min + bucket_width * (bucket_indices + 0.5) + return arr + + +def quantize_weight_rounded(input_node): + """Returns a replacement node for input_node containing bucketed floats.""" + input_tensor = input_node.attr["value"].tensor + tensor_value = tensor_util.MakeNdarray(input_tensor) + tensor_shape = input_tensor.tensor_shape + # Currently, the parameter FLAGS.bitdepth is used to compute the + # number of buckets as 1 << FLAGS.bitdepth, meaning the number of + # buckets can only be a power of 2. + # This could be fixed by intorducing a new parameter, num_buckets, + # which would allow for more flexibility in chosing the right model + # size/accuracy tradeoff. But I didn't want to add more parameters + # to this script than absolutely necessary. + num_buckets = 1 << FLAGS.bitdepth + tensor_value_rounded = quantize_array(tensor_value, num_buckets) + tensor_shape_list = tensor_util.TensorShapeProtoToList(tensor_shape) + return [create_constant_node(input_node.name, tensor_value_rounded, + tf.float32, shape=tensor_shape_list)] + + +def quantize_weight_eightbit(input_node, quantization_mode): + """Returns replacement nodes for input_node using the Dequantize op.""" + base_name = input_node.name + "_" + quint8_const_name = base_name + "quint8_const" + min_name = base_name + "min" + max_name = base_name + "max" + float_tensor = tensor_util.MakeNdarray( + input_node.attr["value"].tensor) + min_value = np.min(float_tensor.flatten()) + max_value = np.max(float_tensor.flatten()) + # min_value == max_value is a tricky case. It can occur for general + # tensors, and of course for scalars. The quantized ops cannot deal + # with this case, so we set max_value to something else. + # It's a tricky question what is the numerically best solution to + # deal with this degeneracy. + # TODO(petewarden): Better use a tolerance than a hard comparison? + if min_value == max_value: + if abs(min_value) < 0.000001: + max_value = min_value + 1.0 + elif min_value > 0: + max_value = 2 * min_value + else: + max_value = min_value / 2.0 + + sess = tf.Session() + with sess.as_default(): + quantize_op = tf.contrib.quantization.python.quantize_v2( + float_tensor, + min_value, + max_value, + tf.quint8, + mode=quantization_mode) + quint8_tensor = quantize_op[0].eval() + shape = tensor_util.TensorShapeProtoToList(input_node.attr[ + "value"].tensor.tensor_shape) + quint8_const_node = create_constant_node(quint8_const_name, + quint8_tensor, + tf.quint8, + shape=shape) + min_node = create_constant_node(min_name, min_value, tf.float32) + max_node = create_constant_node(max_name, max_value, tf.float32) + dequantize_node = create_node("Dequantize", input_node.name, + [quint8_const_name, min_name, max_name]) + set_attr_dtype(dequantize_node, "T", tf.quint8) + set_attr_string(dequantize_node, "mode", quantization_mode) + return [quint8_const_node, min_node, max_node, dequantize_node] + + +class GraphRewriter(object): + """Takes a float graph, and rewrites it in quantized form.""" + + def __init__(self, input_graph, mode): + """Sets up the class to rewrite a float graph. + + Args: + input_graph: A float graph to transform. + mode: A string controlling how quantization is performed - + round, quantize, eightbit, or weights. + + Raises: + ValueError: Two nodes with the same name were found in the graph. + """ + self.input_graph = input_graph + self.nodes_map = self.create_nodes_map(input_graph) + self.output_graph = None + self.mode = mode + if FLAGS.load_quantization_so: + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() + + def create_nodes_map(self, graph): + """Builds a mapping of node names to their defs from the graph.""" + nodes_map = {} + for node in graph.node: + if node.name not in nodes_map.keys(): + nodes_map[node.name] = node + else: + raise ValueError("Duplicate node names detected.") + return nodes_map + + def rewrite(self, output_node_names): + """Triggers rewriting of the float graph. + + Args: + output_node_names: A list of names of the nodes that produce the final + results. + + Returns: + A quantized version of the float graph. + """ + self.output_graph = tf.GraphDef() + output_nodes = [self.nodes_map[output_node_name] + for output_node_name in output_node_names] + if self.mode == "round": + self.already_visited = {} + for output_node in output_nodes: + self.round_nodes_recursively(output_node) + elif self.mode == "quantize": + self.already_visited = {} + self.already_quantized = {} + for output_node in output_nodes: + self.quantize_nodes_recursively(output_node) + elif self.mode == "eightbit": + self.set_input_graph(graph_util.remove_training_nodes(self.input_graph)) + self.already_visited = {} + self.layers_eightbitized = [] + for output_node in output_nodes: + self.eightbitize_nodes_recursively(output_node) + self.output_graph = self.quantize_weights(self.output_graph, b"MIN_FIRST") + if FLAGS.strip_redundant_quantization: + self.output_graph = self.remove_redundant_quantization( + self.output_graph) + self.remove_dead_nodes(output_node_names) + elif self.mode == "weights": + self.output_graph = self.quantize_weights(self.input_graph, + b"MIN_COMBINED") + self.remove_dead_nodes(output_node_names) + elif self.mode == "weights_rounded": + self.output_graph = self.quantize_weights(self.input_graph, self.mode) + self.remove_dead_nodes(output_node_names) + else: + print("Bad mode - " + self.mode + ".") + return self.output_graph + + def round_nodes_recursively(self, current_node): + """The entry point for simple rounding quantization.""" + self.already_visited[current_node.name] = True + for input_node_name in current_node.input: + input_node_name = node_name_from_input(input_node_name) + if input_node_name in self.already_visited: + continue + input_node = self.nodes_map[input_node_name] + self.round_nodes_recursively(input_node) + nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"] + if any(current_node.op in s for s in nodes_to_quantize): + new_node = tf.NodeDef() + new_node.CopyFrom(current_node) + new_node.name = current_node.name + "_original" + self.add_output_graph_node(new_node) + levels = 1 << FLAGS.bitdepth + constant_name = current_node.name + "_round_depth" + constant_tensor = tf.constant(levels, dtype=tf.int32, name=constant_name) + constant_node = constant_tensor.op.node_def + self.add_output_graph_node(constant_node) + quantize_node = tf.NodeDef() + quantize_node.op = "RoundToSteps" + quantize_node.name = current_node.name + quantize_node.input.extend([current_node.name + "_original"]) + quantize_node.input.extend([constant_node.name]) + self.add_output_graph_node(quantize_node) + else: + new_node = tf.NodeDef() + new_node.CopyFrom(current_node) + self.add_output_graph_node(new_node) + + def quantize_nodes_recursively(self, current_node): + """The entry point for quantizing nodes to eight bit and back.""" + self.already_visited[current_node.name] = True + for input_node_name in current_node.input: + input_node_name = node_name_from_input(input_node_name) + if input_node_name in self.already_visited: + continue + input_node = self.nodes_map[input_node_name] + self.quantize_nodes_recursively(input_node) + nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"] + if any(current_node.op in s for s in nodes_to_quantize): + for input_name in current_node.input: + input_name = node_name_from_input(input_name) + input_node = self.nodes_map[input_name] + self.quantize_node(input_node) + self.quantize_node(current_node) + else: + new_node = tf.NodeDef() + new_node.CopyFrom(current_node) + self.add_output_graph_node(new_node) + + def quantize_node(self, input_node): + """Handles quantizing a single node.""" + input_name = input_node.name + if input_name in self.already_quantized: + return + self.already_quantized[input_name] = True + original_input_name = input_name + "_original" + reshape_name = input_name + "_reshape" + reshape_dims_name = input_name + "_reshape_dims" + max_name = input_name + "_max" + min_name = input_name + "_min" + dims_name = input_name + "_dims" + quantize_name = input_name + "_quantize" + dequantize_name = input_name + original_input_node = tf.NodeDef() + original_input_node.CopyFrom(input_node) + original_input_node.name = original_input_name + self.add_output_graph_node(original_input_node) + reshape_dims_node = create_constant_node(reshape_dims_name, -1, tf.int32, + [1]) + self.add_output_graph_node(reshape_dims_node) + reshape_node = create_node("Reshape", reshape_name, [original_input_name, + reshape_dims_name]) + set_attr_dtype(reshape_node, "T", tf.float32) + self.add_output_graph_node(reshape_node) + dims_node = create_constant_node(dims_name, 0, tf.int32, [1]) + self.add_output_graph_node(dims_node) + max_node = create_node("Max", max_name, [reshape_name, dims_name]) + set_attr_dtype(max_node, "T", tf.float32) + set_attr_bool(max_node, "keep_dims", False) + self.add_output_graph_node(max_node) + min_node = create_node("Min", min_name, [reshape_name, dims_name]) + set_attr_dtype(min_node, "T", tf.float32) + set_attr_bool(min_node, "keep_dims", False) + self.add_output_graph_node(min_node) + quantize_node = create_node("Quantize", quantize_name, [original_input_name, + min_name, max_name]) + set_attr_dtype(quantize_node, "T", tf.quint8) + set_attr_string(quantize_node, "mode", b"MIN_FIRST") + self.add_output_graph_node(quantize_node) + dequantize_node = create_node("Dequantize", dequantize_name, + [quantize_name, min_name, max_name]) + set_attr_dtype(dequantize_node, "T", tf.quint8) + set_attr_string(dequantize_node, "mode", b"MIN_FIRST") + self.add_output_graph_node(dequantize_node) + + def eightbitize_nodes_recursively(self, current_node): + """The entry point for transforming a graph into full eight bit.""" + self.already_visited[current_node.name] = True + for input_node_name in current_node.input: + input_node_name = node_name_from_input(input_node_name) + if input_node_name in self.already_visited: + continue + input_node = self.nodes_map[input_node_name] + self.eightbitize_nodes_recursively(input_node) + if current_node.op == "MatMul": + self.eightbitize_mat_mul_node(current_node) + elif current_node.op == "Conv2D": + self.eightbitize_conv_node(current_node) + self.layers_eightbitized.append(current_node.name) + elif current_node.op == "BiasAdd": + self.eightbitize_bias_add_node(current_node) + elif current_node.op == "MaxPool" or current_node.op == "AvgPool": + self.eightbitize_single_input_tensor_node(current_node, + self.add_pool_function) + elif current_node.op == "Relu" or current_node.op == "Relu6": + self.eightbitize_single_input_tensor_node(current_node, + self.add_relu_function) + elif current_node.op == "Concat": + self.eightbitize_concat_node(current_node) + elif current_node.op == "BatchNormWithGlobalNormalization": + self.eightbitize_batch_norm_node(current_node) + else: + new_node = tf.NodeDef() + new_node.CopyFrom(current_node) + self.add_output_graph_node(new_node) + + def add_eightbit_prologue_nodes(self, original_node): + """Adds input conversion nodes to handle quantizing the underlying node.""" + namespace_prefix = original_node.name + "_eightbit" + reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( + namespace_prefix) + input_names = [] + min_max_names = [] + for original_input_name in original_node.input: + quantize_input_name, min_input_name, max_input_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_input_name, + reshape_dims_name, + reduction_dims_name)) + input_names.append(quantize_input_name) + min_max_names.append(min_input_name) + min_max_names.append(max_input_name) + all_input_names = [] + all_input_names.extend(input_names) + all_input_names.extend(min_max_names) + return all_input_names + + def add_common_quantization_nodes(self, namespace_prefix): + """Builds constant nodes needed for quantization of inputs.""" + reshape_dims_name = namespace_prefix + "_reshape_dims" + reduction_dims_name = namespace_prefix + "_reduction_dims" + + reshape_dims_node = create_constant_node(reshape_dims_name, -1, tf.int32, + [1]) + self.add_output_graph_node(reshape_dims_node) + reduction_dims_node = create_constant_node(reduction_dims_name, 0, tf.int32, + [1]) + self.add_output_graph_node(reduction_dims_node) + return reshape_dims_name, reduction_dims_name + + def eightbitize_input_to_node(self, namespace_prefix, original_input_name, + reshape_dims_name, reduction_dims_name): + """Takes one float input to an op, and converts it to quantized form.""" + unique_input_name = unique_node_name_from_input(original_input_name) + reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name + min_input_name = namespace_prefix + "_min_" + unique_input_name + max_input_name = namespace_prefix + "_max_" + unique_input_name + quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name + reshape_input_node = create_node("Reshape", reshape_input_name, + [original_input_name, reshape_dims_name]) + set_attr_dtype(reshape_input_node, "T", tf.float32) + self.add_output_graph_node(reshape_input_node) + min_input_node = create_node("Min", min_input_name, [reshape_input_name, + reduction_dims_name]) + set_attr_dtype(min_input_node, "T", tf.float32) + set_attr_bool(min_input_node, "keep_dims", False) + self.add_output_graph_node(min_input_node) + max_input_node = create_node("Max", max_input_name, [reshape_input_name, + reduction_dims_name]) + set_attr_dtype(max_input_node, "T", tf.float32) + set_attr_bool(max_input_node, "keep_dims", False) + self.add_output_graph_node(max_input_node) + quantize_input_node = create_node("QuantizeV2", quantize_input_name, + [original_input_name, min_input_name, + max_input_name]) + set_attr_dtype(quantize_input_node, "T", tf.quint8) + set_attr_string(quantize_input_node, "mode", b"MIN_FIRST") + self.add_output_graph_node(quantize_input_node) + min_output_name = quantize_input_name + ":1" + max_output_name = quantize_input_name + ":2" + return quantize_input_name, min_output_name, max_output_name + + def add_quantize_down_node(self, original_node, quantized_output_name): + quantize_down_name = original_node.name + "_eightbit_quantize_down" + quantize_down_node = create_node( + "QuantizeDownAndShrinkRange", quantize_down_name, + [quantized_output_name, quantized_output_name + ":1", + quantized_output_name + ":2"]) + set_attr_dtype(quantize_down_node, "Tinput", tf.qint32) + set_attr_dtype(quantize_down_node, "out_type", tf.quint8) + self.add_output_graph_node(quantize_down_node) + return quantize_down_name + + def add_dequantize_result_node(self, quantized_output_name, + original_node_name): + dequantize_name = original_node_name + dequantize_node = create_node("Dequantize", dequantize_name, + [quantized_output_name, + quantized_output_name + ":1", + quantized_output_name + ":2"]) + set_attr_dtype(dequantize_node, "T", tf.quint8) + set_attr_string(dequantize_node, "mode", b"MIN_FIRST") + self.add_output_graph_node(dequantize_node) + + def eightbitize_mat_mul_node(self, original_node): + """Replaces a MatMul node with the eight bit equivalent sub-graph.""" + quantized_mat_mul_name = original_node.name + "_eightbit_quantized_bias_add" + all_input_names = self.add_eightbit_prologue_nodes(original_node) + quantized_mat_mul_node = create_node( + "QuantizedMatMul", quantized_mat_mul_name, + all_input_names) + set_attr_dtype(quantized_mat_mul_node, "T1", tf.quint8) + set_attr_dtype(quantized_mat_mul_node, "T2", tf.quint8) + set_attr_dtype(quantized_mat_mul_node, "Toutput", tf.qint32) + copy_attr(quantized_mat_mul_node, "transpose_a", + original_node.attr["transpose_a"]) + copy_attr(quantized_mat_mul_node, "transpose_b", + original_node.attr["transpose_b"]) + self.add_output_graph_node(quantized_mat_mul_node) + quantize_down_name = self.add_quantize_down_node(original_node, + quantized_mat_mul_name) + self.add_dequantize_result_node(quantize_down_name, original_node.name) + + def eightbitize_conv_node(self, original_node): + """Replaces a Conv2D node with the eight bit equivalent sub-graph.""" + all_input_names = self.add_eightbit_prologue_nodes(original_node) + quantized_conv_name = original_node.name + "_eightbit_quantized_conv" + quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name, + all_input_names) + copy_attr(quantized_conv_node, "strides", original_node.attr["strides"]) + copy_attr(quantized_conv_node, "padding", original_node.attr["padding"]) + set_attr_dtype(quantized_conv_node, "Tinput", tf.quint8) + set_attr_dtype(quantized_conv_node, "Tfilter", tf.quint8) + set_attr_dtype(quantized_conv_node, "out_type", tf.qint32) + self.add_output_graph_node(quantized_conv_node) + quantize_down_name = self.add_quantize_down_node(original_node, + quantized_conv_name) + self.add_dequantize_result_node(quantize_down_name, original_node.name) + + def eightbitize_bias_add_node(self, original_node): + """Replaces a BiasAdd node with the eight bit equivalent sub-graph.""" + quantized_bias_add_name = (original_node.name + + "_eightbit_quantized_bias_add") + all_input_names = self.add_eightbit_prologue_nodes(original_node) + quantized_bias_add_node = create_node( + "QuantizedBiasAdd", quantized_bias_add_name, + all_input_names) + set_attr_dtype(quantized_bias_add_node, "T1", tf.quint8) + set_attr_dtype(quantized_bias_add_node, "T2", tf.quint8) + set_attr_dtype(quantized_bias_add_node, "out_type", tf.qint32) + self.add_output_graph_node(quantized_bias_add_node) + quantize_down_name = self.add_quantize_down_node(original_node, + quantized_bias_add_name) + self.add_dequantize_result_node(quantize_down_name, original_node.name) + + def eightbitize_single_input_tensor_node(self, original_node, + add_op_function): + """Replaces a single-tensor node with the eight bit equivalent sub-graph. + + Converts a node like this: + + Shape(f) Input(f) + | | + +--------v v + Operation + | + v + (f) + + Into a quantized equivalent: + + Input(f) ReshapeDims + +------v v-------------+ + | Reshape + | | + | | ReductionDims + | +-----+ | + | | +---c---------+ + | v v v v-------+ + | Min Max + | +----+ | + v v v--------+ + Quantize + | + v + QuantizedOperation + | | | + v v v + Dequantize + | + v + (f) + + + Args: + original_node: Float node to be converted. + add_op_function: Function to create the actual node. + + Returns: + Subgraph representing the quantized version of the original node. + + """ + quantized_op_name = original_node.name + "_eightbit_quantized" + quantized_op_type = "Quantized" + original_node.op + all_input_names = self.add_eightbit_prologue_nodes(original_node) + quantized_op_node = create_node( + quantized_op_type, quantized_op_name, all_input_names) + add_op_function(original_node, quantized_op_node) + self.add_output_graph_node(quantized_op_node) + self.add_dequantize_result_node(quantized_op_name, original_node.name) + + def add_pool_function(self, original_node, quantized_op_node): + set_attr_dtype(quantized_op_node, "T", tf.quint8) + copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"]) + copy_attr(quantized_op_node, "strides", original_node.attr["strides"]) + copy_attr(quantized_op_node, "padding", original_node.attr["padding"]) + + def add_relu_function(self, unused_arg_node, quantized_op_node): + set_attr_dtype(quantized_op_node, "Tinput", tf.quint8) + + def eightbitize_concat_node(self, original_node): + """Replaces a Concat node with the eight bit equivalent sub-graph. + + Converts a node like this: + + Shape(f) Input0(f) Input1(f) + | | | + +--------v v v----------+ + Concat + | + v + (f) + + Into a quantized equivalent: + + Shape(f) Input0(f) ReshapeDims Input1(f) + | +------v v--------------+------------------v v------+ + | | Reshape Reshape | + | | | | | + | | | ReductionDims | | + | | +------+ | +--------+ | + | | | +---c---------+-----------c-----+ | | + | | +v v v v-------+---------v v v v+ | + | | Min Max Min Max | + | | +----+ | | +-----+ | + | v v v--------+ +----------v v v + | Quantize Quantize + | +------------------+ +----------------------+ + +-------------------------------+ | | + v v v + QuantizedConcat + | | | + v v v + Dequantize + | + v + (f) + Args: + original_node: Float node to be converted. + + Returns: + Subgraph representing the quantized version of the original node. + + """ + namespace_prefix = original_node.name + "_eightbit" + quantized_concat_name = namespace_prefix + "_quantized_concat" + reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( + namespace_prefix) + shape_input_name = original_node.input[0] + original_inputs = original_node.input[1:] + input_names = [] + min_names = [] + max_names = [] + for original_input_name in original_inputs: + quantize_input_name, min_input_name, max_input_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_input_name, + reshape_dims_name, + reduction_dims_name)) + input_names.append(quantize_input_name) + min_names.append(min_input_name) + max_names.append(max_input_name) + all_input_names = [shape_input_name] + all_input_names.extend(input_names) + all_input_names.extend(min_names) + all_input_names.extend(max_names) + quantized_concat_node = create_node( + "QuantizedConcat", quantized_concat_name, all_input_names) + set_attr_int(quantized_concat_node, "N", len(original_inputs)) + set_attr_dtype(quantized_concat_node, "T", tf.quint8) + self.add_output_graph_node(quantized_concat_node) + self.add_dequantize_result_node(quantized_concat_name, original_node.name) + + def eightbitize_batch_norm_node(self, original_node): + """Replaces a MatMul node with the eight bit equivalent sub-graph.""" + namespace_prefix = original_node.name + "_eightbit" + original_input_name = original_node.input[0] + original_mean_name = original_node.input[1] + original_variance_name = original_node.input[2] + original_beta_name = original_node.input[3] + original_gamma_name = original_node.input[4] + quantized_batch_norm_name = namespace_prefix + "_quantized_batch_norm" + + reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( + namespace_prefix) + quantize_input_name, min_input_name, max_input_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_input_name, + reshape_dims_name, reduction_dims_name)) + quantize_mean_name, min_mean_name, max_mean_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_mean_name, + reshape_dims_name, reduction_dims_name)) + quantize_variance_name, min_variance_name, max_variance_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_variance_name, + reshape_dims_name, reduction_dims_name)) + quantize_beta_name, min_beta_name, max_beta_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_beta_name, + reshape_dims_name, reduction_dims_name)) + quantize_gamma_name, min_gamma_name, max_gamma_name = ( + self.eightbitize_input_to_node(namespace_prefix, original_gamma_name, + reshape_dims_name, reduction_dims_name)) + quantized_batch_norm_node = create_node( + "QuantizedBatchNormWithGlobalNormalization", quantized_batch_norm_name, + [quantize_input_name, min_input_name, max_input_name, + quantize_mean_name, min_mean_name, max_mean_name, + quantize_variance_name, min_variance_name, max_variance_name, + quantize_beta_name, min_beta_name, max_beta_name, quantize_gamma_name, + min_gamma_name, max_gamma_name]) + set_attr_dtype(quantized_batch_norm_node, "Tinput", tf.quint8) + set_attr_dtype(quantized_batch_norm_node, "out_type", tf.qint32) + copy_attr(quantized_batch_norm_node, "scale_after_normalization", + original_node.attr["scale_after_normalization"]) + copy_attr(quantized_batch_norm_node, "variance_epsilon", + original_node.attr["variance_epsilon"]) + self.add_output_graph_node(quantized_batch_norm_node) + quantize_down_name = self.add_quantize_down_node(original_node, + quantized_batch_norm_name) + self.add_dequantize_result_node(quantize_down_name, original_node.name) + + def add_output_graph_node(self, output_node): + """Inserts one node into the new graph.""" + self.output_graph.node.extend([output_node]) + + def remove_redundant_quantization(self, old_graph): + """Removes unneeded pairs of quantize/dequantize ops from the graph. + + This is a bit of a tricky function, because it's attempting to spot the + pattern of dequantizing from eight-bit up to float, and then immediately + quantizing back down to eight bits again, that's introduced by previous + passes that do 'key-hole' conversions of individual nodes but have to + convert back to float to match the previous output interface, since they + don't know that the next op can handle quantized tensors. + It works by: + - Looking for Quantize nodes. + - Checking to see if their first input is a Dequantize node. + - Seeing if their min/max inputs come from Min/Max nodes. + - Making sure those Min/Max nodes are being fed from the same Dequantize. + - Or that the Min is indirectly being fed from the same Dequantize as Max. + - Making sure the Dequantize is going through a Reshape (which we add + during the previous pass when we create the quantize sub-graph). + - Looking for the dims Const op for the Min/Max dims. + If all of these conditions are met, then it's a sub-graph pattern that + we know how to optimize out (and is likely the common one we've introduced). + We then rewire the graph to skip it entirely, and then rely on the dead node + removal pass to get rid of any nodes that are no longer needed. + + Args: + old_graph: The model we'll be stripping redundant nodes from. + + Returns: + A graph with the unnecessary nodes removed. + + Raises: + ValueError: Two nodes with the same name were found in the graph. + """ + old_nodes_map = self.create_nodes_map(old_graph) + self.output_graph = tf.GraphDef() + inputs_to_rename = {} + # We go through all the nodes, looking for any that match the patterns we + # know how to optimize away. + for node in old_graph.node: + # We always start with a Quantize node, and examine its inputs to see if + # they are in a form that can be removed. + if node.op not in ["Quantize", "QuantizeV2"]: + continue + dequantize_node_name = node_name_from_input(node.input[0]) + if dequantize_node_name not in old_nodes_map: + raise ValueError("Input node name '" + dequantize_node_name + + "' not found in node '" + node.name + "'") + dequantize_node = old_nodes_map[dequantize_node_name] + # Do we have a Dequantize feeding in, with the same type as the Quantize? + if dequantize_node.op != "Dequantize": + continue + if node.attr["T"] != dequantize_node.attr["T"]: + continue + # Now look at the other inputs, and ensure they're Min/Max nodes. + min_node_name = node_name_from_input(node.input[1]) + max_node_name = node_name_from_input(node.input[2]) + min_node = old_nodes_map[min_node_name] + max_node = old_nodes_map[max_node_name] + is_min_right_type = (min_node.op in ["Min", "Dequantize"]) + is_max_right_type = (max_node.op in ["Max", "Dequantize"]) + if not is_min_right_type or not is_max_right_type: + print("Didn't find expected types on inputs : %s, %s." % ( + min_node.op, max_node.op)) + continue + min_node_input_name = node_name_from_input(min_node.input[0]) + max_node_input_name = node_name_from_input(max_node.input[0]) + # There are two different patterns for Min nodes we can recognize, one + # where the input comes directly from the same one as the Max, and + # another where we run it through another Min first, so check for both. + is_same_input = False + if min_node_input_name == max_node_input_name: + is_same_input = True + else: + first_min_node_input = old_nodes_map[min_node_input_name] + if first_min_node_input.op == "Concat": + second_min_node_name = node_name_from_input( + first_min_node_input.input[1]) + second_min_node = old_nodes_map[second_min_node_name] + if second_min_node.op == "Min": + second_min_node_input_name = node_name_from_input( + second_min_node.input[0]) + is_same_input = (second_min_node_input_name == max_node_input_name) + if not is_same_input: + print("Different min/max inputs: " + min_node_input_name) + continue + # We recognize this pattern, so mark the graph edges to be rewired to + # route around it entirely, since we know it's a no-op. + dequantize_source_name = node_name_from_input(dequantize_node.input[0]) + node_tensor_name = ensure_tensor_name_has_port(node.name) + min_tensor_name = node.name + ":1" + max_tensor_name = node.name + ":2" + inputs_to_rename[node_tensor_name] = dequantize_source_name + inputs_to_rename[min_tensor_name] = dequantize_node.input[1] + inputs_to_rename[max_tensor_name] = dequantize_node.input[2] + # Finally we apply all the rewiring we've marked to the graph. + for node in old_graph.node: + for index, input_full_name in enumerate(node.input): + input_name = ensure_tensor_name_has_port(input_full_name) + if input_name in inputs_to_rename: + node.input[index] = inputs_to_rename[input_name] + self.add_output_graph_node(node) + return self.output_graph + + def remove_dead_nodes(self, output_names): + """Removes nodes that are no longer needed for inference from the graph.""" + old_output_graph = self.output_graph + self.output_graph = graph_util.extract_sub_graph(old_output_graph, + output_names) + + def quantize_weights(self, input_graph, quantization_mode): + """Quantize float Const ops. + + There are two modes of operations, both replace float Const ops with + quantized values. + 1. If quantization_mode is "weights_rounded", this function replaces float + Const ops with quantized float Const ops - same as the original op, but + float values being mapped to the center of one of 1<<FLAGS.bitdepth buckets. + This does not change the raw model size, but compression algorithms such as + zip (as used for compressing apks) or bzip2 will achieve a very good + compression ratio. + 2. For other quantization modes ("MIN_COMBINED" or "MIN_FIRST"), float + Const ops are quantized and replaced by a tuple of four ops to perform + the dequantization at runtime: + * eight-bit Const (bucket indices, same shape as original float Const op + * two float Const ops (min and max value of original float Const op) + * Dequantize op to convert the eight-bit consts to float tensors. + The quantization mode is important because we see accuracy problems when + quantizing weights for different situations depending on the algorithm + used. We haven't figured out exactly what the underlying cause is yet, + unfortunately. + + Args: + input_graph: A GraphDef of the model containing float Const ops. + quantization_mode: How to quantize and dequantize the values. + + Returns: + A GraphDef of the converted graph. + + Raises: + ValueError: If quantization_mode is unsupported. + """ + output_graph = tf.GraphDef() + for input_node in input_graph.node: + should_quantize = False + if input_node.op == "Const": + dtype = tf.as_dtype(input_node.attr["dtype"].type) + if dtype == tf.float32: + should_quantize = True + if should_quantize: + if quantization_mode == "weights_rounded": + output_graph.node.extend(quantize_weight_rounded(input_node)) + elif quantization_mode in (b"MIN_COMBINED", b"MIN_FIRST"): + output_graph.node.extend(quantize_weight_eightbit(input_node, + quantization_mode)) + else: + raise ValueError("Unsupported quantization mode %s." % + quantization_mode) + else: + output_node = tf.NodeDef() + output_node.CopyFrom(input_node) + output_graph.node.extend([output_node]) + return output_graph + + def set_input_graph(self, new_input_graph): + self.input_graph = new_input_graph + self.nodes_map = self.create_nodes_map(self.input_graph) + + +def main(unused_args): + if not tf.gfile.Exists(FLAGS.input): + print("Input graph file '" + FLAGS.input + "' does not exist!") + return -1 + + known_modes = ["round", "quantize", "eightbit", "weights", "test", + "weights_rounded"] + if not any(FLAGS.mode in s for s in known_modes): + print("mode is '" + FLAGS.mode + "', not in " + ", ".join(known_modes) + + ".") + return -1 + + tf_graph = tf.GraphDef() + with tf.gfile.Open(FLAGS.input, "rb") as f: + data = f.read() + tf_graph.ParseFromString(data) + + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(tf_graph, input_map={}, name="") + + rewriter = GraphRewriter(tf_graph, FLAGS.mode) + + output_graph = rewriter.rewrite(FLAGS.output_node_names.split(",")) + + f = tf.gfile.FastGFile(FLAGS.output, "wb") + f.write(output_graph.SerializeToString()) + + return 0 + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensorflow/contrib/quantization/tools/quantize_graph_test.py b/tensorflow/contrib/quantization/tools/quantize_graph_test.py new file mode 100644 index 0000000000..4826ea2689 --- /dev/null +++ b/tensorflow/contrib/quantization/tools/quantize_graph_test.py @@ -0,0 +1,698 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests the graph quantization script. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +import tensorflow as tf +from tensorflow.contrib.quantization.tools import quantize_graph +from tensorflow.python.framework import graph_util + +flags = tf.app.flags +FLAGS = flags.FLAGS + + +def run_graph_def(graph_def, input_map, outputs): + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(graph_def, input_map={}, name="") + with tf.Session(graph=graph) as sess: + results = sess.run(outputs, feed_dict=input_map) + return results + + +def test_mat_mul(m, n, k, a, b): + """Tests a MatMul replacement.""" + a_constant_name = "a_constant" + b_constant_name = "b_constant" + mat_mul_name = "mat_mul" + + float_graph_def = tf.GraphDef() + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=a, + dtype=tf.float32, + shape=[m, k]) + float_graph_def.node.extend([a_constant]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=b, + dtype=tf.float32, + shape=[k, n]) + float_graph_def.node.extend([b_constant]) + mat_mul_node = quantize_graph.create_node("MatMul", mat_mul_name, + [a_constant_name, b_constant_name]) + quantize_graph.set_attr_dtype(mat_mul_node, "T", tf.float32) + quantize_graph.set_attr_bool(mat_mul_node, "transpose_a", False) + quantize_graph.set_attr_bool(mat_mul_node, "transpose_b", False) + float_graph_def.node.extend([mat_mul_node]) + + test_graph(float_graph_def, {}, [mat_mul_name]) + + +def test_conv(depth, image_width, image_height, image_batch_count, filter_size, + filter_count, stride, padding, input_values, filter_values): + """Tests a Conv replacement.""" + input_constant_name = "input_constant" + filter_constant_name = "filter_constant" + conv_name = "conv" + + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node( + input_constant_name, + value=input_values, + dtype=tf.float32, + shape=[ + image_batch_count, image_height, image_width, depth + ]) + float_graph_def.node.extend([input_constant]) + filter_constant = quantize_graph.create_constant_node( + filter_constant_name, + value=filter_values, + dtype=tf.float32, + shape=[ + filter_size, filter_size, depth, filter_count + ]) + float_graph_def.node.extend([filter_constant]) + conv_node = quantize_graph.create_node("Conv2D", conv_name, + [input_constant_name, + filter_constant_name]) + quantize_graph.set_attr_dtype(conv_node, "T", tf.float32) + quantize_graph.set_attr_int_list(conv_node, "strides", [1, stride, stride, 1]) + quantize_graph.set_attr_string(conv_node, "padding", padding) + float_graph_def.node.extend([conv_node]) + + test_graph(float_graph_def, {}, [conv_name]) + + +def are_tensors_near(a, b, tolerance): + """Tests whether two tensors are nearly identical. + + This is a specialized comparison function designed to help debug problems with + quantization. It prints out information about the differences between tensors + on failure, paying special attention to possible biases by looking at the mean + and absolute average errors. + + Args: + a: First comparison tensor. + b: Second comparison tensor. + tolerance: Float value indicating how large an error between values is ok. + + Returns: + Boolean indicating whether the two inputs were close enough. + """ + flat_a = a.flatten() + flat_b = b.flatten() + if len(flat_a) != len(flat_b): + print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + + str(len(flat_b))) + return False + value_count = len(flat_a) + how_many_different = 0 + total_difference = 0 + total_abs_difference = 0 + for index in range(value_count): + a_value = flat_a[index] + b_value = flat_b[index] + difference = a_value - b_value + total_difference += difference + total_abs_difference += abs(difference) + if abs(difference) > tolerance: + how_many_different += 1 + mean_difference = total_difference / value_count + mean_abs_difference = total_abs_difference / value_count + proportion_different = (how_many_different * 1.0) / value_count + if how_many_different == 0: + return True + else: + print("Tensors have {0} different values ({1}%), with mean difference" + " {2} and mean absolute difference {3}".format( + how_many_different, proportion_different * 100, mean_difference, + mean_abs_difference)) + return False + + +def get_top_value(input_values): + max_value = None + max_index = None + for index, value in enumerate(input_values.flatten()): + if max_value is None or value > max: + max_value = value + max_index = index + return max_index, max_value + + +def test_graph(float_graph_def, input_map, output_names): + """Runs the float graph through the rewriter and tests the results.""" + float_results = run_graph_def(float_graph_def, input_map, + [output_name + ":0" + for output_name in output_names]) + # TODO(petewarden): round test is currently failing because there is no + # RoundToSteps op available. + # round_rewriter = quantize_graph.GraphRewriter(float_graph_def, "round") + # round_graph_def = round_rewriter.rewrite(output_name) + # round_results = run_graph_def(round_graph_def, input_map, + # [output_name + ":0"]) + # assert are_tensors_near(expected, round_results[0], 1.0) + # + # TODO(petewarden): Add test for "quantize" mode. + + eightbit_rewriter = quantize_graph.GraphRewriter(float_graph_def, "eightbit") + eightbit_graph_def = eightbit_rewriter.rewrite(output_names) + eightbit_results = run_graph_def(eightbit_graph_def, input_map, + [output_name + ":0" + for output_name in output_names]) + for expected, result in zip(float_results, eightbit_results): + assert are_tensors_near(expected, result, 1.0) + + # Test the weights_rounded mode. This uses the default bit_depth. + weights_rounded_rewriter = quantize_graph.GraphRewriter( + float_graph_def, "weights_rounded") + weights_rounded_graph_def = weights_rounded_rewriter.rewrite(output_names) + weights_rounded_results = run_graph_def(weights_rounded_graph_def, input_map, + [output_name + ":0" + for output_name in output_names]) + for expected, result in zip(float_results, weights_rounded_results): + assert are_tensors_near(expected, result, 1.0) + + +class QuantizeGraphTest(tf.test.TestCase): + + def test_negative_const_problem(self): + shape_constant_name = "shape_constant" + shape_constant = quantize_graph.create_constant_node( + shape_constant_name, value=-0.8, dtype=tf.float32, shape=[1]) + quantization_result = quantize_graph.quantize_weight_eightbit( + shape_constant, b"MIN_COMBINED") + self.assertEqual(4, len(quantization_result)) + + def test_odd_padding_problem(self): + """Tests one error case we ran into in a real graph.""" + test_conv(1, 4, 4, 1, 3, 1, 2, b"SAME", + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + [1, 2, 3, 4, 5, 6, 7, 8, 9]) + + def test_mat_mul_tiny(self): + # These tests are added to test the generate case where + # min(matrix) == max(matrix), which used to cause problems. + test_mat_mul(1, 1, 1, [2], [3]) + test_mat_mul(1, 2, 1, [1], [2, 3]) + test_mat_mul(1, 1, 2, [1, 1], [1, 1]) + test_mat_mul(1, 1, 2, [0, 0], [1, 1]) + # The general case. + test_mat_mul(1, 1, 2, [1, 2], [1, 2]) + + def test_mat_mul_small(self): + test_mat_mul(2, 4, 3, [1, 2, 3, 4, 5, 6], + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]) + + def test_conv(self): + test_conv(1, 4, 3, 1, 3, 1, 1, b"SAME", + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + [1, 4, 7, 2, 5, 8, 3, 6, 9]) + + def test_quantize_array(self): + # Test invalid parameters (empty array, or 0 buckets. + self.assertRaises(ValueError, quantize_graph.quantize_array, + np.array([]), 2) + self.assertRaises(ValueError, quantize_graph.quantize_array, + np.array([1, 2]), 0) + # Test input array of length 1. + arr = np.array([1]) + qarr = quantize_graph.quantize_array(arr, 1) + self.assertEqual(arr, qarr) + qarr = quantize_graph.quantize_array(arr, 2) + self.assertEqual(arr, qarr) + # Test input array with all elements equal. + arr = np.array([1, 1, 1]) + qarr = quantize_graph.quantize_array(arr, 10) + self.assertTrue((np.array([1, 1, 1]) == qarr).all()) + # Test "normal" input arrays. + arr = np.array([0, 0.3, 0.6, 1]) + qarr = quantize_graph.quantize_array(arr, 1) + self.assertTrue((np.array([0.5, 0.5, 0.5, 0.5]) == qarr).all()) + qarr = quantize_graph.quantize_array(arr, 2) + self.assertTrue((np.array([0.25, 0.25, 0.75, 0.75]) == qarr).all()) + qarr = quantize_graph.quantize_array(arr.reshape((2, 2)), 2) + self.assertTrue((np.array([[0.25, 0.25], [0.75, 0.75]]) == qarr).all()) + + def test_concat(self): + shape_constant_name = "shape_constant" + a_constant_name = "a_constant" + b_constant_name = "b_constant" + concat_name = "concat" + + float_graph_def = tf.GraphDef() + shape_constant = quantize_graph.create_constant_node(shape_constant_name, + value=0, + dtype=tf.int32, + shape=[]) + float_graph_def.node.extend([shape_constant]) + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=[1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12], + dtype=tf.float32, + shape=[2, 2, 3]) + float_graph_def.node.extend([a_constant]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=[13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, + 23, 24], + dtype=tf.float32, + shape=[2, 2, 3]) + float_graph_def.node.extend([b_constant]) + concat_node = quantize_graph.create_node("Concat", concat_name, + [shape_constant_name, + a_constant_name, b_constant_name]) + quantize_graph.set_attr_int(concat_node, "N", 2) + quantize_graph.set_attr_dtype(concat_node, "T", tf.float32) + float_graph_def.node.extend([concat_node]) + + test_graph(float_graph_def, {}, [concat_name]) + + def test_multiple_outputs(self): + input_constant_name = "input_constant" + split_constant_name = "split_constant" + split_name = "split" + concat_constant_name = "concat_constant" + concat_name = "concat" + + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[2, 6]) + float_graph_def.node.extend([input_constant]) + split_constant = quantize_graph.create_constant_node(split_constant_name, + value=1, + dtype=tf.int32, + shape=[]) + float_graph_def.node.extend([split_constant]) + split_node = quantize_graph.create_node("Split", split_name, + [split_constant_name, + input_constant_name]) + quantize_graph.set_attr_int(split_node, "num_split", 2) + quantize_graph.set_attr_dtype(split_node, "T", tf.float32) + float_graph_def.node.extend([split_node]) + concat_constant = quantize_graph.create_constant_node(concat_constant_name, + value=1, + dtype=tf.int32, + shape=[]) + float_graph_def.node.extend([concat_constant]) + concat_node = quantize_graph.create_node("Concat", concat_name, + [concat_constant_name, + split_name + ":0", + split_name + ":1"]) + quantize_graph.set_attr_int(concat_node, "N", 2) + quantize_graph.set_attr_dtype(concat_node, "T", tf.float32) + float_graph_def.node.extend([concat_node]) + + test_graph(float_graph_def, {}, [concat_name]) + + def test_node_name_from_input(self): + self.assertEqual("SomeName", + quantize_graph.node_name_from_input("^SomeName:2")) + + def test_unique_node_name_from_input(self): + self.assertEqual("__hat__SomeName__port__2", + quantize_graph.unique_node_name_from_input("^SomeName:2")) + + def test_identity(self): + input_constant_name = "input_constant" + identity_name = "identity" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[2, 6]) + float_graph_def.node.extend([input_constant]) + identity_node = quantize_graph.create_node("Identity", identity_name, + [input_constant_name]) + quantize_graph.set_attr_dtype(identity_node, "T", tf.float32) + float_graph_def.node.extend([identity_node]) + test_graph(float_graph_def, {}, [identity_name]) + + def test_keep_control_edges(self): + no_op_name = "no_op" + a_constant_name = "a_constant" + b_constant_name = "b_constant" + a_check_name = "a_check" + b_check_name = "b_check" + a_identity_name = "a_identity" + b_identity_name = "b_identity" + add_name = "add" + graph_def = tf.GraphDef() + no_op = quantize_graph.create_node("NoOp", no_op_name, []) + graph_def.node.extend([no_op]) + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=1, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([a_constant]) + a_check_node = quantize_graph.create_node("CheckNumerics", a_check_name, + [a_constant_name]) + graph_def.node.extend([a_check_node]) + a_identity_node = quantize_graph.create_node("Identity", a_identity_name, + [a_constant_name, + "^" + a_check_name, + "^" + no_op_name]) + graph_def.node.extend([a_identity_node]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=1, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([b_constant]) + b_check_node = quantize_graph.create_node("CheckNumerics", b_check_name, + [b_constant_name]) + graph_def.node.extend([b_check_node]) + b_identity_node = quantize_graph.create_node("Identity", b_identity_name, + [b_constant_name, + "^" + b_check_name]) + graph_def.node.extend([b_identity_node]) + add_node = quantize_graph.create_node("Add", add_name, + [a_identity_name, + b_identity_name]) + quantize_graph.set_attr_dtype(add_node, "T", tf.float32) + graph_def.node.extend([add_node]) + + expected_output = tf.GraphDef() + no_op = quantize_graph.create_node("NoOp", no_op_name, []) + expected_output.node.extend([no_op]) + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=1, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([a_constant]) + a_identity_node = quantize_graph.create_node("Identity", a_identity_name, + [a_constant_name, + "^" + no_op_name]) + expected_output.node.extend([a_identity_node]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=1, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([b_constant]) + add_node = quantize_graph.create_node("Add", add_name, + [a_identity_name, + b_constant_name]) + quantize_graph.set_attr_dtype(add_node, "T", tf.float32) + expected_output.node.extend([add_node]) + + output = graph_util.remove_training_nodes(graph_def) + stripped_output = graph_util.extract_sub_graph(output, [add_name]) + self.assertProtoEquals(expected_output, stripped_output) + + def test_batch_norm(self): + input_constant_name = "input_constant" + mean_constant_name = "mean_constant" + variance_constant_name = "variance_constant" + beta_constant_name = "beta_constant" + gamma_constant_name = "gamma_constant" + batch_norm_name = "batch_norm" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 4, 2, 5, 3, + 6, -1, -4, -2, + -5, -3, -6], + dtype=tf.float32, + shape=[1, 1, 6, 2]) + float_graph_def.node.extend([input_constant]) + mean_constant = quantize_graph.create_constant_node(mean_constant_name, + value=[10, 20], + dtype=tf.float32, + shape=[2]) + float_graph_def.node.extend([mean_constant]) + variance_constant = quantize_graph.create_constant_node( + variance_constant_name, value=[0.25, 0.5], dtype=tf.float32, shape=[2]) + float_graph_def.node.extend([variance_constant]) + beta_constant = quantize_graph.create_constant_node(beta_constant_name, + value=[0.1, 0.6], + dtype=tf.float32, + shape=[2]) + float_graph_def.node.extend([beta_constant]) + gamma_constant = quantize_graph.create_constant_node(gamma_constant_name, + value=[0, 0], + dtype=tf.float32, + shape=[2]) + float_graph_def.node.extend([gamma_constant]) + batch_norm_node = quantize_graph.create_node( + "BatchNormWithGlobalNormalization", batch_norm_name, + [input_constant_name, mean_constant_name, variance_constant_name, + beta_constant_name, gamma_constant_name]) + quantize_graph.set_attr_dtype(batch_norm_node, "T", tf.float32) + quantize_graph.set_attr_bool(batch_norm_node, "scale_after_normalization", + False) + quantize_graph.set_attr_float(batch_norm_node, "variance_epsilon", 0.001) + float_graph_def.node.extend([batch_norm_node]) + test_graph(float_graph_def, {}, [batch_norm_name]) + + def test_max_pool(self): + input_constant_name = "input_constant" + max_pool_name = "max_pool" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[1, 2, 6, 1]) + float_graph_def.node.extend([input_constant]) + max_pool_node = quantize_graph.create_node("MaxPool", max_pool_name, + [input_constant_name]) + quantize_graph.set_attr_int_list(max_pool_node, "ksize", [1, 2, 2, 1]) + quantize_graph.set_attr_int_list(max_pool_node, "strides", [1, 1, 1, 1]) + quantize_graph.set_attr_string(max_pool_node, "padding", b"SAME") + float_graph_def.node.extend([max_pool_node]) + test_graph(float_graph_def, {}, [max_pool_name]) + + def test_avg_pool(self): + input_constant_name = "input_constant" + avg_pool_name = "avg_pool" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[1, 2, 6, 1]) + float_graph_def.node.extend([input_constant]) + avg_pool_node = quantize_graph.create_node("AvgPool", avg_pool_name, + [input_constant_name]) + quantize_graph.set_attr_dtype(avg_pool_node, "T", tf.float32) + quantize_graph.set_attr_int_list(avg_pool_node, "ksize", [1, 2, 2, 1]) + quantize_graph.set_attr_int_list(avg_pool_node, "strides", [1, 1, 1, 1]) + quantize_graph.set_attr_string(avg_pool_node, "padding", b"SAME") + float_graph_def.node.extend([avg_pool_node]) + test_graph(float_graph_def, {}, [avg_pool_name]) + + def test_relu(self): + input_constant_name = "input_constant" + relu_name = "relu" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[1, 2, 6, 1]) + float_graph_def.node.extend([input_constant]) + relu_node = quantize_graph.create_node("Relu", relu_name, + [input_constant_name]) + quantize_graph.set_attr_dtype(relu_node, "T", tf.float32) + float_graph_def.node.extend([relu_node]) + test_graph(float_graph_def, {}, [relu_name]) + + def test_relu6(self): + input_constant_name = "input_constant" + relu6_name = "relu6" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[1, 2, 6, 1]) + float_graph_def.node.extend([input_constant]) + relu6_node = quantize_graph.create_node("Relu6", relu6_name, + [input_constant_name]) + quantize_graph.set_attr_dtype(relu6_node, "T", tf.float32) + float_graph_def.node.extend([relu6_node]) + test_graph(float_graph_def, {}, [relu6_name]) + + def test_bias_add(self): + input_constant_name = "input_constant" + offset_constant_name = "offset_constant" + bias_add_name = "bias_add" + float_graph_def = tf.GraphDef() + input_constant = quantize_graph.create_constant_node(input_constant_name, + value=[1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12], + dtype=tf.float32, + shape=[1, 1, 2, 6]) + float_graph_def.node.extend([input_constant]) + offset_constant = quantize_graph.create_constant_node(offset_constant_name, + value=[1, 2, 3, 4, 5, + 6], + dtype=tf.float32, + shape=[6]) + float_graph_def.node.extend([offset_constant]) + bias_add_node = quantize_graph.create_node("BiasAdd", bias_add_name, + [input_constant_name, + offset_constant_name]) + quantize_graph.set_attr_dtype(bias_add_node, "T", tf.float32) + float_graph_def.node.extend([bias_add_node]) + test_graph(float_graph_def, {}, [bias_add_name]) + + def test_remove_redundant_quantization(self): + a_constant_name = "a_constant" + a_constant_min_name = "a_constant_min" + a_constant_max_name = "a_constant_max" + a_dequantize_name = "a_dequantize" + a_quantize_name = "a_quantize" + b_constant_name = "b_constant" + b_constant_min_name = "b_constant_min" + b_constant_max_name = "b_constant_max" + b_dequantize_name = "b_dequantize" + b_quantize_name = "b_quantize" + mat_mul_name = "mat_mul" + graph_def = tf.GraphDef() + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=(0,), + dtype=tf.quint8, + shape=[]) + graph_def.node.extend([a_constant]) + a_constant_min = quantize_graph.create_constant_node(a_constant_min_name, + value=2, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([a_constant_min]) + a_constant_max = quantize_graph.create_constant_node(a_constant_max_name, + value=2, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([a_constant_max]) + a_dequantize_node = quantize_graph.create_node("Dequantize", + a_dequantize_name, + [a_constant_name, + a_constant_min_name, + a_constant_max_name]) + quantize_graph.set_attr_dtype(a_dequantize_node, "T", tf.uint8) + graph_def.node.extend([a_dequantize_node]) + a_quantize_node = quantize_graph.create_node("QuantizeV2", + a_quantize_name, + [a_dequantize_name, + a_dequantize_name + ":1", + a_dequantize_name + ":2"]) + quantize_graph.set_attr_dtype(a_quantize_node, "T", tf.uint8) + graph_def.node.extend([a_quantize_node]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=(0,), + dtype=tf.quint8, + shape=[]) + graph_def.node.extend([b_constant]) + b_constant_min = quantize_graph.create_constant_node(b_constant_min_name, + value=3, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([b_constant_min]) + b_constant_max = quantize_graph.create_constant_node(b_constant_max_name, + value=3, + dtype=tf.float32, + shape=[]) + graph_def.node.extend([b_constant_max]) + b_dequantize_node = quantize_graph.create_node("Dequantize", + b_dequantize_name, + [b_constant_name, + b_constant_min_name, + b_constant_max_name]) + quantize_graph.set_attr_dtype(b_dequantize_node, "T", tf.uint8) + graph_def.node.extend([b_dequantize_node]) + b_quantize_node = quantize_graph.create_node("QuantizeV2", + b_quantize_name, + [b_dequantize_name, + b_dequantize_name + ":1", + b_dequantize_name + ":2"]) + quantize_graph.set_attr_dtype(b_quantize_node, "T", tf.uint8) + graph_def.node.extend([b_quantize_node]) + mat_mul_node = quantize_graph.create_node("QuantizedMatMul", mat_mul_name, + [a_quantize_name, + b_quantize_name, + a_quantize_name + ":1", + a_quantize_name + ":2", + b_quantize_name + ":1", + b_quantize_name + ":2"]) + quantize_graph.set_attr_dtype(mat_mul_node, "T1", tf.uint8) + quantize_graph.set_attr_dtype(mat_mul_node, "T2", tf.int32) + graph_def.node.extend([mat_mul_node]) + + expected_output = tf.GraphDef() + a_constant = quantize_graph.create_constant_node(a_constant_name, + value=(0,), + dtype=tf.quint8, + shape=[]) + expected_output.node.extend([a_constant]) + a_constant_min = quantize_graph.create_constant_node(a_constant_min_name, + value=2, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([a_constant_min]) + a_constant_max = quantize_graph.create_constant_node(a_constant_max_name, + value=2, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([a_constant_max]) + b_constant = quantize_graph.create_constant_node(b_constant_name, + value=(0,), + dtype=tf.quint8, + shape=[]) + expected_output.node.extend([b_constant]) + b_constant_min = quantize_graph.create_constant_node(b_constant_min_name, + value=3, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([b_constant_min]) + b_constant_max = quantize_graph.create_constant_node(b_constant_max_name, + value=3, + dtype=tf.float32, + shape=[]) + expected_output.node.extend([b_constant_max]) + mat_mul_node = quantize_graph.create_node("QuantizedMatMul", mat_mul_name, + [a_constant_name, + b_constant_name, + a_constant_min_name, + a_constant_max_name, + b_constant_min_name, + b_constant_max_name]) + quantize_graph.set_attr_dtype(mat_mul_node, "T1", tf.uint8) + quantize_graph.set_attr_dtype(mat_mul_node, "T2", tf.int32) + expected_output.node.extend([mat_mul_node]) + + rewriter = quantize_graph.GraphRewriter(graph_def, [mat_mul_name]) + output = rewriter.remove_redundant_quantization(graph_def) + stripped_output = graph_util.extract_sub_graph(output, [mat_mul_name]) + self.assertProtoEquals(expected_output, stripped_output) + + +if __name__ == "__main__": + tf.test.main() |