Automated rollback of change 134501895

Change: 134506649
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-09-28 00:15:58 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-09-28 01:35:32 -0700
commit: 419d5d072375ee0044fecb94e4bfe21a7b3b0b9e (patch)
tree: cb66e6e7238bf2e7938b58f3638bd31f65d542c2 /tensorflow/core/kernels
parent: c1e4f0f6a1078fd6715e8145fbef874e4d447ab8 (diff)
26 files changed, 0 insertions, 5406 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 306f61e5ec..f391aa9e3f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -29,7 +29,6 @@ load(
     "tf_cc_tests",
     "tf_copts",
     "tf_cuda_library",
-    "tf_custom_op_library",
     "tf_opts_nortti_if_android",
     "tf_kernel_libraries",
     "tf_kernel_library",
@@ -2163,7 +2162,6 @@ filegroup(
     srcs = [
         ":android_extended_ops_group1",
         ":android_extended_ops_group2",
-        ":android_quantized_ops",
     ],
     visibility = ["//visibility:public"],
 )
@@ -2293,26 +2291,6 @@ filegroup(
     ],
 )
 
-filegroup(
-    name = "android_quantized_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    visibility = ["//visibility:public"],
-)
-
 # A file group which contains nearly all available operators which
 # may work on Android. This is intended to be used with selective
 # registration.
@@ -2370,7 +2348,6 @@ cc_library(
         "//tensorflow:android": [
             "//tensorflow/core/kernels:android_core_ops",
             "//tensorflow/core/kernels:android_extended_ops",
-            "@gemmlowp//:eight_bit_int_gemm_sources",
         ],
         "//conditions:default": [],
     }),
@@ -2388,264 +2365,6 @@ cc_library(
     alwayslink = 1,
 )
 
-#   Quantization-specific OpKernels
-
-tf_kernel_library(
-    name = "quantized_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-    ],
-    hdrs = [
-        "quantization_utils.h",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/core",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:conv_ops",
-        "//tensorflow/core/kernels:eigen_helpers",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:pooling_ops",
-        "//third_party/eigen3",
-        "@gemmlowp//:eight_bit_int_gemm",
-    ],
-)
-
-tf_custom_op_library(
-    name = "_quantized_kernels.so",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:ops_util_hdrs",
-        "//tensorflow/core/kernels:pooling_ops_hdrs",
-        "@gemmlowp//:eight_bit_int_gemm",
-    ],
-)
-
-tf_cc_test(
-    name = "quantize_down_and_shrink_range_op_test",
-    size = "small",
-    srcs = ["quantize_down_and_shrink_range_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantization_utils_test",
-    srcs = ["quantization_utils_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//third_party/eigen3",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_activation_ops_test",
-    srcs = ["quantized_activation_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_bias_add_op_test",
-    size = "small",
-    srcs = ["quantized_bias_add_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_conv_ops_test",
-    size = "small",
-    srcs = ["quantized_conv_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantize_op_test",
-    size = "small",
-    srcs = ["quantize_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_matmul_op_test",
-    size = "small",
-    srcs = ["quantized_matmul_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_pooling_ops_test",
-    size = "small",
-    srcs = ["quantized_pooling_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_concat_op_test",
-    size = "small",
-    srcs = ["quantized_concat_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_batch_norm_op_test",
-    size = "small",
-    srcs = ["quantized_batch_norm_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:batch_norm_op",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//third_party/eigen3",
-    ],
-)
-
 # -----------------------------------------------------------------------------
 # Google-internal targets.  These must be at the end for syncrepo.
 
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
deleted file mode 100644
index 375287000e..0000000000
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See docs in ../ops/math_ops.cc.
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/type_traits.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace {
-enum { QUANTIZE_MODE_MIN_COMBINED, QUANTIZE_MODE_MIN_FIRST };
-}  // namespace
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-template <typename Device, typename T>
-class DequantizeOp : public OpKernel {
- public:
-  explicit DequantizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    half_range_ = !std::is_signed<T>::value
-                      ? 0.0f
-                      : (static_cast<float>(std::numeric_limits<T>::max()) -
-                         std::numeric_limits<T>::min() + 1) /
-                            2.0f;
-    string mode_string;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
-    OP_REQUIRES(ctx,
-                (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST"),
-                errors::InvalidArgument("Mode string must be 'MIN_COMBINED' or"
-                                        " 'MIN_FIRST', is '" +
-                                        mode_string + "'"));
-    if (mode_string == "MIN_COMBINED") {
-      mode_ = QUANTIZE_MODE_MIN_COMBINED;
-    } else if (mode_string == "MIN_FIRST") {
-      mode_ = QUANTIZE_MODE_MIN_FIRST;
-    }
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& input = ctx->input(0);
-    const float min_range = ctx->input(1).flat<float>()(0);
-    const float max_range = ctx->input(2).flat<float>()(0);
-
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
-    if (mode_ == QUANTIZE_MODE_MIN_COMBINED) {
-      const float scale_factor =
-          (max_range - min_range) /
-          (static_cast<float>(std::numeric_limits<T>::max()) -
-           std::numeric_limits<T>::min());
-
-      // Multiply by scale factor and add min_range.
-      output->flat<float>() =
-          ((input.flat<T>().template cast<int>().template cast<float>() +
-            half_range_) *
-           scale_factor) +
-          min_range;
-    } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      QuantizedTensorToFloatInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
-    }
-  }
-
- private:
-  float half_range_;
-  int mode_;
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
-    DequantizeOp<CPUDevice, quint8>);
-REGISTER_KERNEL_BUILDER(
-    Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint8>("T"),
-    DequantizeOp<CPUDevice, qint8>);
-REGISTER_KERNEL_BUILDER(
-    Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<quint16>("T"),
-    DequantizeOp<CPUDevice, quint16>);
-REGISTER_KERNEL_BUILDER(
-    Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint16>("T"),
-    DequantizeOp<CPUDevice, qint16>);
-
-REGISTER_KERNEL_BUILDER(
-    Name("Dequantize").Device(DEVICE_CPU).TypeConstraint<qint32>("T"),
-    DequantizeOp<CPUDevice, qint32>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD
deleted file mode 100644
index c31b5c06d7..0000000000
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ /dev/null
@@ -1,45 +0,0 @@
-# Description:
-#   quantization-specific OpKernels for hexagon
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["-parse_headers"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-load(
-    "//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
-)
-
-filegroup(
-    name = "all_files",
-    srcs = glob(
-        ["**/*"],
-        exclude = [
-            "**/METADATA",
-            "**/OWNERS",
-        ],
-    ),
-    visibility = ["//tensorflow:__subpackages__"],
-)
-
-tf_cc_test(
-    name = "quantized_matmul_op_for_hexagon_test",
-    size = "small",
-    srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
-    deps = [
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:quantized_ops",
-    ],
-)
diff --git a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
deleted file mode 100644
index 7b3fdd19a5..0000000000
--- a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Tests in this file are designed to evaluate hexagon DSP operations.
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-#ifdef USE_HEXAGON_LIBS
-#include "tensorflow/core/platform/hexagon/gemm_wrapper.h"
-#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
-#endif
-
-namespace tensorflow {
-
-class QuantizedMatMulOpForHexagonTest : public OpsTestBase {
- protected:
-  void SetUp() final {
-#ifdef USE_HEXAGON_LIBS
-    profile_utils::CpuUtils::EnableClockCycleProfiling(true);
-    LOG(INFO) << "Hexagon libs are linked (wrapper version = "
-              << hexagon_gemm_wrapper_GetWrapperVersion()
-              << ", hexagon binary version = "
-              << hexagon_gemm_wrapper_GetHexagonBinaryVersion() << ")";
-    LOG(INFO) << "Cpu frequency = "
-              << profile_utils::CpuUtils::GetCycleCounterFrequency();
-#else
-    LOG(WARNING) << "Hexagon libs are not linked.";
-#endif
-  }
-};
-
-// Shows some statistics of hexagon dsp using hexagon specific APIs
-#ifdef USE_HEXAGON_LIBS
-TEST_F(QuantizedMatMulOpForHexagonTest, EvaluateSharedLibOverhead) {
-  const uint64 overhead_shared_lib_start =
-      profile_utils::CpuUtils::GetCurrentClockCycle();
-  const int wrapper_version = hexagon_gemm_wrapper_GetWrapperVersion();
-  const uint64 overhead_shared_lib_end =
-      profile_utils::CpuUtils::GetCurrentClockCycle();
-  const uint64 overhead_shared_lib_diff =
-      (overhead_shared_lib_end - overhead_shared_lib_start);
-  const uint64 overhead_hexagon_rpc_start =
-      profile_utils::CpuUtils::GetCurrentClockCycle();
-  const int hexagon_binary_version =
-      hexagon_gemm_wrapper_GetHexagonBinaryVersion();
-  const uint64 overhead_hexagon_rpc_end =
-      profile_utils::CpuUtils::GetCurrentClockCycle();
-  const uint64 overhead_hexagon_rpc_diff =
-      (overhead_hexagon_rpc_end - overhead_hexagon_rpc_start);
-  LOG(INFO) << "Shared lib (ver = " << wrapper_version << ") overhead is "
-            << overhead_shared_lib_diff << " cycles, time = "
-            << std::chrono::duration_cast<std::chrono::microseconds>(
-                   profile_utils::CpuUtils::ConvertClockCycleToTime(
-                       overhead_shared_lib_diff))
-                   .count()
-            << " usec";
-  LOG(INFO) << "hexagon rpc (ver = " << hexagon_binary_version
-            << ") overhead is " << overhead_hexagon_rpc_diff
-            << " cycles, time = "
-            << std::chrono::duration_cast<std::chrono::microseconds>(
-                   profile_utils::CpuUtils::ConvertClockCycleToTime(
-                       overhead_hexagon_rpc_diff))
-                   .count()
-            << " usec";
-}
-#endif
-
-// Runs two small matrices through the operator, and leaves all the parameters
-// at their default values.
-// This test is a sample to execute matmul on hexagon.
-TEST_F(QuantizedMatMulOpForHexagonTest, Small_NoParams) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // A matrix is:
-  // |  1 |  2 |  3 |
-  // |  4 |  5 |  6 |
-  AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
-  // B matrix is:
-  // |  7 |  8 |  9 | 10 |
-  // | 11 | 12 | 13 | 14 |
-  // | 15 | 16 | 17 | 18 |
-  AddInputFromArray<quint8>(TensorShape({3, 4}),
-                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-
-  TF_ASSERT_OK(RunOpKernel());
-  // Here are the results we expect, from hand calculations:
-  // (1 * 7) + (2 * 11) + (3 * 15) = 74
-  // (1 * 8) + (2 * 12) + (3 * 16) = 80
-  // (1 * 9) + (2 * 13) + (3 * 17) = 86
-  // (1 * 10) + (2 * 14) + (3 * 18) = 92
-  // (4 * 7) + (5 * 11) + (6 * 15) = 173
-  // (4 * 8) + (5 * 12) + (6 * 16) = 188
-  // (4 * 9) + (5 * 13) + (6 * 17) = 203
-  // (4 * 10) + (5 * 14) + (6 * 18) = 218
-  Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4}));
-  test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantization_utils.cc b/tensorflow/core/kernels/quantization_utils.cc
deleted file mode 100644
index 6f36c0d482..0000000000
--- a/tensorflow/core/kernels/quantization_utils.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-
-namespace tensorflow {
-
-void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
-                                       float smaller_input_min,
-                                       float smaller_input_max,
-                                       float* output_min, float* output_max) {
-  // We need to have a good range to add our two arguments together in. This
-  // is surprisingly tricky, since it has to satisfy a few different needs:
-  //  - Must be symmetrical around zero, so that 0 + 0 = 0.
-  //  - Must hold the largest of the argument ranges.
-  //  - Should have enough range that the bits of the lowest and highest
-  //    arguments overlap if possible without the lower getting truncated.
-  //  - Should have some headroom so that there's no overflow.
-  //  - Needs to be signed.
-  // This leads us to use a scheme where we (assuming the inputs are eight bit
-  // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the
-  // accumulated results. This gives us all the properties we need.
-  *output_max =
-      std::max(input_max, std::max(-input_min, std::max(smaller_input_max,
-                                                        -smaller_input_min))) *
-      (1 << 17);
-  *output_min = -(*output_max);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantization_utils.h b/tensorflow/core/kernels/quantization_utils.h
deleted file mode 100644
index a098179034..0000000000
--- a/tensorflow/core/kernels/quantization_utils.h
+++ /dev/null
@@ -1,555 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
-
-#define EIGEN_USE_THREADS
-
-// This is a set of functions that standardizes how quantized values are
-// interpreted as float numbers.
-// All of the current implementations are for reference and have not been
-// optimized. They should be implementable using fixed point representations
-// to avoid a dependency on floating-point hardware.
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "public/gemmlowp.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/core/threadpool.h"
-
-namespace tensorflow {
-
-// We have to be able to detect and handle overflows in int32, so this function
-// uses doubles and int64's to make sure we have enough room.
-template <class T>
-int64 FloatToQuantizedUnclamped(float input, float range_min, float range_max) {
-  const int64 lowest_quantized =
-      static_cast<double>(Eigen::NumTraits<T>::lowest());
-  if (range_min == range_max) {
-    return lowest_quantized;
-  }
-  const int number_of_bits = sizeof(T) * 8;
-  const int64 number_of_steps = static_cast<int64>(1) << number_of_bits;
-  const double range_adjust = (number_of_steps / (number_of_steps - 1.0));
-  const double range = ((range_max - range_min) * range_adjust);
-  const double range_scale = (number_of_steps / range);
-  int64 quantized =
-      (round(input * range_scale) - round(range_min * range_scale));
-  quantized += lowest_quantized;
-  return quantized;
-}
-
-// This converts the float into the final quantized type, clamping/saturating
-// any over or underflows.
-template <class T>
-T FloatToQuantized(float input, float range_min, float range_max) {
-  int64 quantized = FloatToQuantizedUnclamped<T>(input, range_min, range_max);
-  const int64 lowest_quantized =
-      static_cast<int64>(Eigen::NumTraits<T>::lowest());
-  const int64 highest_quantized =
-      static_cast<int64>(Eigen::NumTraits<T>::highest());
-  quantized = std::max(quantized, lowest_quantized);
-  quantized = std::min(quantized, highest_quantized);
-  return static_cast<T>(static_cast<int32>(quantized));
-}
-
-template <class T>
-float QuantizedToFloat(T input, float range_min, float range_max) {
-  if (range_min == range_max) {
-    return range_min;
-  }
-  const int number_of_bits = sizeof(T) * 8;
-  const int64 number_of_steps = static_cast<int64>(1) << number_of_bits;
-  const double range_adjust = (number_of_steps / (number_of_steps - 1.0));
-  const double range = ((range_max - range_min) * range_adjust);
-  const double range_scale = (range / number_of_steps);
-  const int64 lowest_quantized =
-      static_cast<int64>(Eigen::NumTraits<T>::lowest());
-  const double offset_input = static_cast<double>(input) - lowest_quantized;
-  const double result = range_min + (offset_input * range_scale);
-  return static_cast<float>(result);
-}
-
-template <class T>
-float FloatForOneQuantizedLevel(float range_min, float range_max) {
-  const int64 highest = static_cast<int64>(Eigen::NumTraits<T>::highest());
-  const int64 lowest = static_cast<int64>(Eigen::NumTraits<T>::lowest());
-  const float float_for_one_quantized_level =
-      (range_max - range_min) / (highest - lowest);
-  return float_for_one_quantized_level;
-}
-
-template <class T1, class T2, class T3>
-void QuantizationRangeForMultiplication(float min_a, float max_a, float min_b,
-                                        float max_b, float* min_c,
-                                        float* max_c) {
-  const float a_float_for_one_quant_level =
-      FloatForOneQuantizedLevel<T1>(min_a, max_a);
-  const float b_float_for_one_quant_level =
-      FloatForOneQuantizedLevel<T2>(min_b, max_b);
-
-  const int64 c_highest = static_cast<int64>(Eigen::NumTraits<T3>::highest());
-  const int64 c_lowest = static_cast<int64>(Eigen::NumTraits<T3>::lowest());
-  const float c_float_for_one_quant_level =
-      a_float_for_one_quant_level * b_float_for_one_quant_level;
-
-  *min_c = c_float_for_one_quant_level * c_lowest;
-  *max_c = c_float_for_one_quant_level * c_highest;
-}
-
-// input_array is an eigen Tensor.  q2f is a QuantizedToFloatStruct.
-// This evaluates to an eigen tensor expression, to be used like:
-// auto tensor = DEQUANTIZE_WITH_EIGEN(input_tensor, q2f);
-#define DEQUANTIZE_WITH_EIGEN(input_array, q2f)                       \
-  (q2f.range_min +                                                    \
-   (((input_array.template cast<float>() - q2f.lowest_quantized())) * \
-    q2f.range_scale));
-
-// input_array is an eigen Tensor.  f2q is a FloatToQuantizedStruct.
-// OutputType is the type of output (e.g. quint8).
-// This evaluates to an eigen tensor expression, to be used like:
-// auto tensor = QUANTIZE_WITH_EIGEN(input_tensor, f2q, T);
-#define QUANTIZE_WITH_EIGEN(input_array, f2q, OutputType) \
-  ((input_array * f2q.range_scale).round() -              \
-   (f2q.range_min_scaled - f2q.lowest_quantized()))       \
-      .cwiseMax(f2q.lower_bound_float())                  \
-      .cwiseMin(f2q.upper_bound_float())                  \
-      .template cast<int32>()                             \
-      .template cast<OutputType>()
-
-// For use with DEQUANTIZE_WITH_EIGEN.
-template <typename T>
-struct QuantizedToFloatStruct {
-  static constexpr int number_of_bits = sizeof(T) * 8;
-  static constexpr int64 number_of_steps = static_cast<int64>(1)
-                                           << number_of_bits;
-
-  static float lowest_quantized() {
-    return static_cast<float>(Eigen::NumTraits<T>::lowest());
-  }
-
-  QuantizedToFloatStruct(float range_min, float range_max)
-      : range_min(range_min),
-        range_scale((range_max - range_min) / (number_of_steps - 1.0)) {}
-
-  const float range_min;
-  const float range_scale;
-};
-
-// For use with QUANTIZE_WITH_EIGEN.
-template <typename T>
-struct FloatToQuantizedStruct {
-  static constexpr int number_of_bits = sizeof(T) * 8;
-  static constexpr int64 number_of_steps = static_cast<int64>(1)
-                                           << number_of_bits;
-  static constexpr double range_adjust =
-      (number_of_steps / (number_of_steps - 1.0));
-
-  // Casting QInt32's lowest or highest to a float gives a float that can't be
-  // cast back to int32 or QInt32.  Instead, use bounds that can be converted
-  // back to int32 without going outside the range of an int32.
-  static float lower_bound_float() {
-    return Eigen::numext::maxi(
-        static_cast<float>(Eigen::NumTraits<T>::lowest()), -2.147483648e+09f);
-  }
-  static float upper_bound_float() {
-    return Eigen::numext::mini(
-        static_cast<float>(Eigen::NumTraits<T>::highest()), +2.147483520e+09f);
-  }
-
-  static float lowest_quantized() {
-    return static_cast<float>(Eigen::NumTraits<T>::lowest());
-  }
-
-  FloatToQuantizedStruct(float range_min, float range_max)
-      : range_min(range_min),
-        range_scale(range_max == range_min
-                        ? 0.0
-                        : (number_of_steps - 1.0) / (range_max - range_min)),
-        range_min_scaled(round(range_min * range_scale)) {}
-
-  const float range_min;
-  const float range_scale;
-  const float range_min_scaled;
-};
-
-template <class T1, class T2>
-inline T2 RequantizeInNewRange(T1 input, float min_input, float max_input,
-                               float min_new, float max_new) {
-  const float input_float = QuantizedToFloat<T1>(input, min_input, max_input);
-  return FloatToQuantized<T2>(input_float, min_new, max_new);
-}
-
-template <class T1, class T2>
-inline void RequantizeManyInNewRange(const T1* input, size_t count,
-                                     float min_input, float max_input,
-                                     float min_output, float max_output,
-                                     T2* output) {
-  for (size_t index = 0; index < count; ++index) {
-    const float input_float =
-        QuantizedToFloat<T1>(input[index], min_input, max_input);
-    output[index] = FloatToQuantized<T2>(input_float, min_output, max_output);
-  }
-}
-
-// Because converting 32-bit accumulated results down to eight bit is a common
-// case, we have a specialized code path to handle it as efficiently as
-// possible using only fixed-point math for the inner loop.
-template <>
-inline void RequantizeManyInNewRange<qint32, quint8>(
-    const qint32* input, size_t count, float min_input, float max_input,
-    float min_output, float max_output, quint8* output) {
-  // Initially we calculate all the constants we need once, before we go into
-  // the inner loop.  If this is updated, also update the Eigen version.
-  const int fp_shift = 16;
-  const float input_range = max_input - min_input;
-  const float output_range = max_output - min_output;
-  const float recip_output_range =
-      output_range == 0.0 ? 0.0 : (255.0 / output_range);
-  const float input_rezero = (min_input + max_input) / 2.0;
-  const int64 range_scale_fp =
-      output_range == 0.0 ? 0.0
-                          : static_cast<int64>(255.0 * (1 << fp_shift) *
-                                               input_range / output_range);
-  const int64 input_offset_fp =
-      static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
-  const int64 output_offset_fp =
-      output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
-                                                   (min_output * 255.0) /
-                                                   output_range);
-  const int64 rounding_delta = 1 << (fp_shift - 1);
-
-  // Inside this loop we just do minimal adds, multiplies, and shifts, in a way
-  // that could be easily adapted for a SIMD implementation. It should also be
-  // possible to perform all the calculations in 32-bit rather than 64, but
-  // that's not been implemented yet.
-  for (size_t index = 0; index < count; ++index) {
-    const int64 input_value = static_cast<int64>(input[index]);
-    const int64 fp_value =
-        ((input_value * range_scale_fp) >> 32) + input_offset_fp;
-    const int64 offset_intermediate = fp_value - output_offset_fp;
-    const int64 round_intermediate = offset_intermediate + rounding_delta;
-    int64 quantized_int64 = round_intermediate >> fp_shift;
-    quantized_int64 = std::max(quantized_int64, 0LL);
-    quantized_int64 = std::min(quantized_int64, 255LL);
-    output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
-  }
-}
-
-template <int shift>
-struct int64_right_shift_op {
-  EIGEN_EMPTY_STRUCT_CTOR(int64_right_shift_op)
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE const int64 operator()(const int64& a) const {
-    return a >> shift;
-  }
-};
-
-// See RequantizeManyInNewRange() for a non-eigen reference implementation.
-template <class T1, class T2>
-inline void RequantizeManyInNewRangeUsingEigen(
-    const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input,
-    float max_input, float min_output, float max_output, Tensor* output) {
-  auto input_array = input.flat<T1>();
-  QuantizedToFloatStruct<T1> q2f(min_input, max_input);
-  auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f);
-  FloatToQuantizedStruct<T2> f2q(min_output, max_output);
-  auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T2);
-
-  output->flat<T2>().device(device) = input_requantized;
-}
-
-// See RequantizeManyInNewRange() for a non-eigen reference implementation.
-//
-// Because converting 32-bit accumulated results down to eight bit is a common
-// case, we have a specialized code path to handle it as efficiently as
-// possible using only fixed-point math for the inner loop.
-template <>
-inline void RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
-    const Eigen::ThreadPoolDevice& device, const Tensor& input, float min_input,
-    float max_input, float min_output, float max_output, Tensor* output) {
-  // Initially we calculate all the constants we need once, before we go into
-  // the inner loop.  If this is updated, also update the non-Eigen version.
-  const int fp_shift = 16;
-  const float input_range = max_input - min_input;
-  const float output_range = max_output - min_output;
-  const float recip_output_range =
-      output_range == 0.0 ? 0.0 : (255.0 / output_range);
-  const float input_rezero = (min_input + max_input) / 2.0;
-  const int64 range_scale_fp =
-      output_range == 0.0 ? 0.0
-                          : static_cast<int64>(255.0 * (1 << fp_shift) *
-                                               input_range / output_range);
-  const int64 input_offset_fp =
-      static_cast<int64>(input_rezero * recip_output_range * (1 << fp_shift));
-  const int64 output_offset_fp =
-      output_range == 0.0 ? 0 : static_cast<int64>((1 << fp_shift) *
-                                                   (min_output * 255.0) /
-                                                   output_range);
-  const int64 rounding_delta = 1 << (fp_shift - 1);
-
-  // Inside this eigen expression we just do minimal adds, multiplies, and
-  // shifts. It should be possible to perform all the calculations in 32-bit
-  // rather than 64, but that's not been implemented yet.
-  auto input_array = input.flat<qint32>();
-  auto fp_value = ((input_array.template cast<int64>() * range_scale_fp)
-                       .unaryExpr(int64_right_shift_op<32>())) +
-                  (input_offset_fp - output_offset_fp + rounding_delta);
-  auto intermediate = fp_value.unaryExpr(int64_right_shift_op<fp_shift>());
-  auto input_requantized = intermediate.cwiseMax(0LL)
-                               .cwiseMin(255LL)
-                               .template cast<int32>()
-                               .template cast<quint8>();
-  output->flat<quint8>().device(device) = input_requantized;
-}
-
-// REQUIRES: 'result->NumElements() == input.NumElements()'
-template <class T>
-void FloatTensorToQuantizedInPlaceUsingEigen(
-    const Eigen::ThreadPoolDevice& device, const Tensor& input, float min,
-    float max, Tensor* result) {
-  DCHECK_EQ(DataTypeToEnum<T>::v(), result->dtype());
-  auto flat_input = input.flat<float>();
-  auto flat_result = result->flat<T>();
-  DCHECK_EQ(flat_input.size(), flat_result.size());
-
-  FloatToQuantizedStruct<T> f2q(min, max);
-  flat_result.device(device) = QUANTIZE_WITH_EIGEN(flat_input, f2q, T);
-}
-
-template <class T>
-void FloatTensorToQuantizedInPlace(const Tensor& input, float min, float max,
-                                   Tensor* result) {
-  DCHECK_EQ(DataTypeToEnum<T>::v(), result->dtype());
-  auto flat_input = input.flat<float>();
-  auto flat_result = result->flat<T>();
-  const int data_size = flat_input.size();
-  DCHECK(data_size == flat_result.size());
-  for (int i = 0; i < data_size; ++i) {
-    flat_result(i) = FloatToQuantized<T>(flat_input(i), min, max);
-  }
-}
-
-template <class T>
-Tensor FloatTensorToQuantized(const Tensor& input, float min, float max) {
-  Tensor result(DataTypeToEnum<T>::v(), input.shape());
-  FloatTensorToQuantizedInPlace<T>(input, min, max, &result);
-  return result;
-}
-
-// REQUIRES: 'result->NumElements() == input.NumElements()'
-template <class T>
-void QuantizedTensorToFloatInPlaceUsingEigen(
-    const Eigen::ThreadPoolDevice& device, const Tensor& input, float min,
-    float max, Tensor* result) {
-  DCHECK_EQ(DataTypeToEnum<T>::v(), input.dtype());
-  auto flat_input = input.flat<T>();
-  auto flat_result = result->flat<float>();
-  const int data_size = flat_input.size();
-  DCHECK(data_size == flat_result.size());
-
-  QuantizedToFloatStruct<T> q2f(min, max);
-  flat_result.device(device) = DEQUANTIZE_WITH_EIGEN(flat_input, q2f);
-}
-
-// REQUIRES: 'result->NumElements() == input.NumElements()'
-template <class T>
-void QuantizedTensorToFloatInPlace(const Tensor& input, float min, float max,
-                                   Tensor* result) {
-  DCHECK_EQ(DataTypeToEnum<T>::v(), input.dtype());
-  auto flat_input = input.flat<T>();
-  auto flat_result = result->flat<float>();
-  const int data_size = flat_input.size();
-  DCHECK(data_size == flat_result.size());
-  for (int i = 0; i < data_size; ++i) {
-    flat_result(i) = QuantizedToFloat<T>(flat_input(i), min, max);
-  }
-}
-
-template <class T>
-Tensor QuantizedTensorToFloat(const Tensor& input, float min, float max) {
-  Tensor result(DT_FLOAT, input.shape());
-  QuantizedTensorToFloatInPlace<T>(input, min, max, &result);
-  return result;
-}
-
-void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max,
-                                       float smaller_input_min,
-                                       float smaller_input_max,
-                                       float* output_min, float* output_max);
-
-// Add <input> and <smaller_input>.  If <smaller_input> has fewer elements than
-// <input>, then it is broadcast onto <input>.
-template <typename T1, typename T2, typename T3>
-void QuantizedAddUsingEigen(const Eigen::ThreadPoolDevice& device,
-                            const Tensor& input, float input_min,
-                            float input_max, const Tensor& smaller_input,
-                            float smaller_input_min, float smaller_input_max,
-                            Tensor* output, float* output_min,
-                            float* output_max) {
-  const auto& input_flat = input.flat<T1>();
-  const auto& smaller_input_flat = smaller_input.flat<T2>();
-  auto output_flat = output->flat<T3>();
-
-  GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
-                                    smaller_input_max, output_min, output_max);
-  // To do addition properly, we need to compensate for a possibly unbalanced
-  // zero point in the total representation. The quantized value that
-  // represents the real number zero needs to be subtracted before addition to
-  // make sure that the identity of zero + zero = zero holds.
-  const T3 zero_in_total_space =
-      FloatToQuantized<T3>(0.0f, *output_min, *output_max);
-
-  const int64 input_element_count = input.NumElements();
-  const int64 smaller_input_element_count = smaller_input.NumElements();
-
-  QuantizedToFloatStruct<T1> smaller_input_q2f(smaller_input_min,
-                                               smaller_input_max);
-  QuantizedToFloatStruct<T2> input_q2f(input_min, input_max);
-  FloatToQuantizedStruct<T3> f2q(*output_min, *output_max);
-
-  auto smaller_input_float =
-      DEQUANTIZE_WITH_EIGEN(smaller_input_flat, smaller_input_q2f);
-  auto smaller_input_in_total_space =
-      QUANTIZE_WITH_EIGEN(smaller_input_float, f2q, T3);
-
-  auto input_float = DEQUANTIZE_WITH_EIGEN(input_flat, input_q2f);
-  auto input_in_total_space = QUANTIZE_WITH_EIGEN(input_float, f2q, T3);
-
-  Eigen::array<Eigen::DenseIndex, 1> bcast;
-  bcast[0] = input_element_count / smaller_input_element_count;
-  output_flat.device(device) =
-      input_in_total_space +
-      (smaller_input_in_total_space.broadcast(bcast) + zero_in_total_space);
-}
-
-// This is a reference implementation of the bias addition for quantized
-// buffers, designed to provide a clear specification for the result we
-// want. We'll want to specialize this for particular hardware, and
-// probably even fuse it with matrix multiplications in a lot of cases. It's
-// important to show the clamping behavior we want in particular.
-template <typename T1, typename T2, typename T3>
-void QuantizedAdd(const Eigen::ThreadPoolDevice& device, const Tensor& input,
-                  float input_min, float input_max, const Tensor& smaller_input,
-                  float smaller_input_min, float smaller_input_max,
-                  Tensor* output, float* output_min, float* output_max) {
-  const auto& input_flat = input.flat<T1>();
-  const auto& smaller_input_flat = smaller_input.flat<T2>();
-  auto output_flat = output->flat<T3>();
-
-  GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, smaller_input_min,
-                                    smaller_input_max, output_min, output_max);
-  // To do addition properly, we need to compensate for a possibly unbalanced
-  // zero point in the total representation. The quantized value that
-  // represents the real number zero needs to be subtracted before addition to
-  // make sure that the identity of zero + zero = zero holds.
-  const T3 zero_in_total_space =
-      FloatToQuantized<T3>(0.0f, *output_min, *output_max);
-
-  const int64 input_element_count = input.NumElements();
-  const int64 smaller_input_element_count = smaller_input.NumElements();
-
-  float total_min = *output_min;
-  float total_max = *output_max;
-  const size_t how_many_iterations =
-      (input_element_count / smaller_input_element_count);
-  for (size_t iteration = 0; iteration < how_many_iterations; ++iteration) {
-    const size_t offset = iteration * smaller_input_element_count;
-    for (int c = 0; c < smaller_input_element_count; ++c) {
-      const int index = (offset + c);
-      // The two numbers we're going to add can each be in very different
-      // ranges (e.g. the quantized value '127' may represent very different
-      // real numbers in both) so we need to convert them to a common range
-      // before we sum them.
-      const T1 input_value = input_flat(index);
-      const T3 input_in_total_space = RequantizeInNewRange<T1, T3>(
-          input_value, input_min, input_max, total_min, total_max);
-      const T2 smaller_input_value = smaller_input_flat(c);
-      const T3 smaller_input_in_total_space =
-          RequantizeInNewRange<T2, T3>(smaller_input_value, smaller_input_min,
-                                       smaller_input_max, total_min, total_max);
-      const T3 total_pre = input_in_total_space + smaller_input_in_total_space;
-      // As noted above, we need to compensate for the offset of the actual
-      // zero point in the space we're operating in.
-      const T3 total = total_pre + zero_in_total_space;
-      output_flat(index) = total;
-    }
-  }
-}
-
-// See gemmlowp/internal/multi_thread_gemm.h for definitions of
-// Prepare, Wait, StartWorker, and CreateWorkers.
-class TensorflowGemmlowpWorkersPool {
- public:
-  TensorflowGemmlowpWorkersPool(thread::ThreadPool* workers)
-      : workers_(workers) {}
-
-  ~TensorflowGemmlowpWorkersPool() {
-    // This workaround ensures that all worker tasks have exited methods in the
-    // BlockingCounter. Without this, there is a race where the context is torn
-    // down while the counter is in use.
-    counter_to_decrement_when_ready_.Reset(0);
-  }
-
-  void Prepare(int workers_count) {
-    counter_to_decrement_when_ready_.Reset(workers_count);
-  }
-
-  void Wait() { counter_to_decrement_when_ready_.Wait(); }
-
-  void StartWorker(int index, gemmlowp::Task* task) {
-    CHECK(workers_ != nullptr);
-    // <index> is ignored - the tensorflow threadpool does not support assigning
-    // to a specific thread.
-    workers_->Schedule([this, task]() {
-      // TODO(cwhipkey): get a local_allocator from a thread local.
-      gemmlowp::Allocator local_allocator;
-      CHECK(task != nullptr);
-      task->local_allocator = &local_allocator;
-      task->Run();
-      delete task;
-      counter_to_decrement_when_ready_.DecrementCount();
-    });
-  }
-
-  void CreateWorkers(std::size_t workers_count) {}
-
- private:
-  thread::ThreadPool* const workers_;
-
-  // The BlockingCounter used to wait for the workers.
-  gemmlowp::BlockingCounter counter_to_decrement_when_ready_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(TensorflowGemmlowpWorkersPool);
-};
-
-class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase {
- public:
-  TensorflowGemmContext(int num_threads, thread::ThreadPool* workers)
-      : workers_pool_(workers) {
-    set_max_num_threads(num_threads);
-  }
-
-  TensorflowGemmlowpWorkersPool* workers_pool() { return &workers_pool_; }
-
- private:
-  TensorflowGemmlowpWorkersPool workers_pool_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(TensorflowGemmContext);
-};
-
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/core/kernels/quantization_utils_test.cc
deleted file mode 100644
index 55b5193ce1..0000000000
--- a/tensorflow/core/kernels/quantization_utils_test.cc
+++ /dev/null
@@ -1,550 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include <limits>
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/random/simple_philox.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizationUtilsTest : public ::testing::Test {
- protected:
-  void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device,
-                          float input_min, float input_max, float output_min,
-                          float output_max,
-                          const std::vector<qint32>& values_quantized,
-                          int tolerance = 1) {
-    const int values_count = values_quantized.size();
-    std::vector<quint8> expected_values;
-    for (int value_index = 0; value_index < values_count; ++value_index) {
-      expected_values.push_back(FloatToQuantized<quint8>(
-          QuantizedToFloat(values_quantized[value_index], input_min, input_max),
-          output_min, output_max));
-    }
-
-    Tensor i_tensor =
-        tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
-    Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
-    auto output_values = o_tensor.flat<quint8>();
-
-    if (eigen_device == nullptr) {
-      auto input_array = i_tensor.flat<qint32>();
-      RequantizeManyInNewRange(input_array.data(), input_array.size(),
-                               input_min, input_max, output_min, output_max,
-                               output_values.data());
-    } else {
-      RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
-          *eigen_device, i_tensor, input_min, input_max, output_min, output_max,
-          &o_tensor);
-    }
-
-    const string tolerance_str = strings::StrCat("+-", tolerance);
-    for (size_t value_index = 0; value_index < values_count; ++value_index) {
-      int e = expected_values[value_index];
-      int v = output_values(value_index);
-      ASSERT_TRUE(std::abs(e - v) <= tolerance)
-          << "actual=" << v << ", expected=" << e << tolerance_str
-          << ", values_quantized[" << value_index
-          << "]=" << values_quantized[value_index]
-          << ", input_min=" << input_min << ", input_max=" << input_max
-          << ", output_min=" << output_min << ", output_max=" << output_max
-          << ", value_index=" << value_index;
-    }
-  }
-
-  // If eigen_device is NULL, then the reference implementation is tested.
-  void TestRequantizeManyInNewRange32To8Bit(
-      Eigen::ThreadPoolDevice* eigen_device) {
-    // These are the float values we're going to test the conversions on.
-    const size_t values_count = 6;
-    const float values[values_count] = {0.0f,  0.45f,  1.0f,
-                                        -1.0f, 127.0f, 255.0f};
-    // These are the input and output ranges we'll test.
-    const size_t ranges_count = 6;
-    const float ranges[ranges_count][4] = {
-        {0.0f, 255.0f, 0.0f, 255.0f},    //
-        {0.0f, 1.0f, 0.0f, 1.0f},        //
-        {-1.0f, 1.0f, -1.0f, 1.0f},      //
-        {-1.0f, 1.0f, -255.0f, 255.0f},  //
-        {3.0f, 3.0f, 0.0f, 255.0f},      // input min == max
-        {0.0f, 255.0f, 5.0f, 5.0f},      // output min == max
-    };
-    for (int i = 0; i < ranges_count; ++i) {
-      const auto& r = ranges[i];
-      std::vector<qint32> values_quantized;
-      for (int value_index = 0; value_index < values_count; ++value_index) {
-        const float v = values[value_index];
-        values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1]));
-      }
-      TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3],
-                         values_quantized);
-    }
-
-    // Test with many different values in the input quantized range.
-    qint32 low = Eigen::NumTraits<qint32>::lowest();
-    qint32 high = Eigen::NumTraits<qint32>::highest();
-    std::vector<qint32> vals{low, high};
-    int num_steps = 14419;
-    qint32 step = static_cast<int32>((1L << 32) / num_steps);
-    qint32 v = low + static_cast<qint32>(1);
-    for (int i = 0; i < num_steps; ++i) {
-      vals.push_back(v);
-      v += step;
-    }
-    TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals);
-    TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals);
-    TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f,
-                       vals);
-    TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f,
-                       12345678.0f, vals);
-
-    // Test when the input range is large and output range is small.
-    // Use all quantized values where the float is in the output range.
-    const float out_min = -29.1234;
-    const float out_max = 23.1234;
-    const float in_min = -1e6;
-    const float in_max = 1e6;
-
-    low = FloatToQuantized<qint32>(out_min, in_min, in_max);
-    high = FloatToQuantized<qint32>(out_max, in_min, in_max);
-    vals.clear();
-    for (int32 i = low; i <= high; ++i) vals.push_back(i);
-    TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals);
-  }
-
-  template <typename InputType, typename OutputType>
-  void TestRequantizeManyInNewRangeEigenVsNonEigen() {
-    thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
-    EigenThreadPoolWrapper wrapper(&threadpool);
-    Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
-
-    const size_t ranges_count = 6;
-    const float ranges[ranges_count][4] = {
-        {0.0f, 255.0f, 0.0f, 255.0f},    //
-        {0.0f, 1.0f, 0.0f, 1.0f},        //
-        {-1.0f, 1.0f, -1.0f, 1.0f},      //
-        {-1.0f, 1.0f, -255.0f, 255.0f},  //
-        {3.0f, 3.0f, 0.0f, 255.0f},      // input min == max
-        {0.0f, 255.0f, 5.0f, 5.0f},      // output min == max
-    };
-
-    // Random values.
-    for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
-      const float input_min = ranges[range_index][0];
-      const float input_max = ranges[range_index][1];
-      const float output_min = ranges[range_index][2];
-      const float output_max = ranges[range_index][3];
-      const int values_count = 10000;
-      random::PhiloxRandom philox(testing::RandomSeed(), 17);
-      random::SimplePhilox rnd(&philox);
-      std::vector<InputType> values_quantized;
-      for (int i = 0; i < values_count; ++i) {
-        float v = (rnd.RandFloat() * (input_max - input_min)) + input_min;
-        values_quantized.push_back(
-            FloatToQuantized<InputType>(v, input_min, input_max));
-      }
-
-      Tensor i_tensor = tensorflow::test::AsTensor(
-          gtl::ArraySlice<InputType>(values_quantized));
-      const auto i_array = i_tensor.flat<InputType>();
-      Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(),
-                            TensorShape{values_count});
-      auto output_values_eigen = o_tensor_eigen.flat<OutputType>();
-      Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(),
-                          TensorShape{values_count});
-      auto output_values_ref = o_tensor_ref.flat<OutputType>();
-
-      RequantizeManyInNewRange(i_array.data(), i_array.size(), input_min,
-                               input_max, output_min, output_max,
-                               output_values_ref.data());
-      RequantizeManyInNewRangeUsingEigen<InputType, OutputType>(
-          eigen_device, i_tensor, input_min, input_max, output_min, output_max,
-          &o_tensor_eigen);
-
-      const int tolerance = 1;
-      for (int i = 0; i < values_quantized.size(); ++i) {
-        auto expected = output_values_ref(i);
-        auto actual = output_values_eigen(i);
-        // The eigen computation uses float for constants and computation
-        // instead of doubles, so can be different by 1 or 2 in some cases
-        // (e.g., input value 144.062744140625, min -1, max 255, type quint8).
-        ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
-            << "expected=" << expected << " actual=" << actual
-            << " tolerance=" << tolerance << " v=" << values_quantized[i]
-            << " i=" << i << " input_min=" << input_min
-            << " input_max=" << input_max
-            << " input_type=" << DataTypeString(DataTypeToEnum<InputType>::v())
-            << " output_type="
-            << DataTypeString(DataTypeToEnum<OutputType>::v());
-      }
-    }
-  }
-
-  template <typename T>
-  void TestFloatToQuantizedInPlaceUsingEigen(
-      Eigen::ThreadPoolDevice* eigen_device) {
-    // These are the float values we're going to test the conversions on.
-    typedef std::pair<float, float> FPair;
-    for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f),  //
-                                                FPair(-1.0f, 1.0f),      //
-                                                FPair(-1.0f, 255.0f),    //
-                                                FPair(0.0f, 1e6),        //
-                                                FPair(0.0f, 1.0f),       //
-                                                FPair(-31.0f, 13.0f)}) {
-      const float f_min = min_and_max.first;
-      const float f_max = min_and_max.second;
-      const float f_range = f_max - f_min;
-      const int values_count = 50000;
-      Tensor input(DT_FLOAT, TensorShape{values_count});
-      auto input_array = input.flat<float>();
-      for (int i = 0; i < values_count; ++i) {
-        input_array(i) = f_min + f_range * i / (values_count - 1);
-      }
-
-      Tensor output(DataTypeToEnum<T>::v(), TensorShape{values_count});
-      FloatTensorToQuantizedInPlaceUsingEigen<T>(*eigen_device, input, f_min,
-                                                 f_max, &output);
-      auto output_array = output.flat<T>();
-
-      const int tolerance = 1;
-      for (int i = 0; i < values_count; ++i) {
-        int32 expected = FloatToQuantized<T>(input_array(i), f_min, f_max);
-        int32 actual = output_array(i);
-
-        // The eigen computation uses float for constants and computation
-        // instead
-        // of doubles, so can be different by 1 or 2 in some cases (e.g., input
-        // value 144.062744140625, min -1, max 255, type quint8).
-        ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
-            << "expected=" << expected << " actual=" << actual
-            << " tolerance=" << tolerance << " v=" << input_array(i)
-            << " i=" << i << " f_min=" << f_min << " f_max=" << f_max
-            << " type=" << DataTypeString(DataTypeToEnum<T>::v());
-      }
-    }
-  }
-
-  template <typename T>
-  void TestQuantizedToFloatInPlaceUsingEigen(
-      Eigen::ThreadPoolDevice* eigen_device) {
-    // These are the float values we're going to test the conversions on.
-    typedef std::pair<float, float> FPair;
-    for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f),  //
-                                                FPair(-1.0f, 1.0f),      //
-                                                FPair(-1.0f, 255.0f),    //
-                                                FPair(0.0f, 1e6),        //
-                                                FPair(0.0f, 1.0f),       //
-                                                FPair(-31.0f, 13.0f)}) {
-      const float f_min = min_and_max.first;
-      const float f_max = min_and_max.second;
-      const int values_count = sizeof(T) == 1 ? 256 : 50000;
-      Tensor input(DataTypeToEnum<T>::v(), TensorShape{values_count});
-      auto input_array = input.flat<T>();
-      const double q_range =
-          static_cast<double>(Eigen::NumTraits<T>::highest()) -
-          Eigen::NumTraits<T>::lowest();
-      for (int i = 0; i < values_count; ++i) {
-        if (sizeof(T) == 1) {
-          input_array(i) = Eigen::NumTraits<T>::lowest() + i;
-        } else {
-          int64 offset = static_cast<int64>(q_range / values_count * i);
-          input_array(i) = static_cast<int32>(
-              Eigen::NumTraits<T>::lowest() +
-              std::min<int64>(Eigen::NumTraits<T>::highest(), offset));
-        }
-      }
-
-      Tensor output(DT_FLOAT, TensorShape{values_count});
-      QuantizedTensorToFloatInPlaceUsingEigen<T>(*eigen_device, input, f_min,
-                                                 f_max, &output);
-      auto output_array = output.flat<float>();
-      const double range = static_cast<double>(f_max) - f_min;
-      for (int i = 0; i < values_count; ++i) {
-        float expected = QuantizedToFloat<T>(input_array(i), f_min, f_max);
-        float actual = output_array(i);
-        ASSERT_NEAR(expected, actual, range * 1e-6)
-            << "expected=" << expected << " actual=" << actual
-            << " v=" << input_array(i) << " i=" << i << " f_min=" << f_min
-            << " f_max=" << f_max
-            << " type=" << DataTypeString(DataTypeToEnum<T>::v());
-      }
-    }
-  }
-};
-
-TEST_F(QuantizationUtilsTest, FloatToQuantized) {
-  EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 1.0f));
-  EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 2.0f));
-  EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.5f, 0.0f, 1.0f));
-  EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(1.0f, 0.0f, 2.0f));
-  EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(1.0f, 0.0f, 1.0f));
-  EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(2.0f, 0.0f, 2.0f));
-  EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(-128.0f, -128.0f, 127.0f));
-  EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.0f, -128.0f, 127.0f));
-  EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(127.0f, -128.0f, 127.0f));
-  EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(1.0f, 1.0f, 256.0f));
-  EXPECT_EQ(quint8(127), FloatToQuantized<quint8>(128.0f, 1.0f, 256.0f));
-  EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(256.0f, 1.0f, 256.0f));
-
-  const int int32_min = std::numeric_limits<int>::min();
-  const int int32_max = std::numeric_limits<int>::max();
-
-  EXPECT_EQ(qint32(int32_min),
-            FloatToQuantized<qint32>(-128.0f, -128.0f, 128.0f));
-  EXPECT_EQ(qint32(0), FloatToQuantized<qint32>(0.0f, -128.0f, 128.0f));
-  EXPECT_EQ(qint32(int32_max),
-            FloatToQuantized<qint32>(128.0f, -128.0f, 128.0f));
-}
-
-TEST_F(QuantizationUtilsTest, QuantizedToFloat) {
-  EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 1.0f)), 1 / 255.0f);
-  EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 2.0f)), 1 / 255.0f);
-  EXPECT_LT(fabsf(0.5f - QuantizedToFloat<quint8>(127, 0.0f, 1.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(127, 0.0f, 2.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(255, 0.0f, 1.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(2.0f - QuantizedToFloat<quint8>(255, 0.0f, 2.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(0, 1.0f, 256.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(128.0f - QuantizedToFloat<quint8>(127, 1.0f, 256.0f)),
-            1 / 255.0f);
-  EXPECT_LT(fabsf(256.0f - QuantizedToFloat<quint8>(255, 1.0f, 256.0f)),
-            1 / 255.0f);
-
-  const int int32_min = std::numeric_limits<int>::min();
-  const int int32_max = std::numeric_limits<int>::max();
-
-  EXPECT_LT(
-      fabsf(-1.0f - QuantizedToFloat<qint32>(qint32(int32_min), -1.0f, 1.0f)),
-      1e-5f);
-  EXPECT_LT(fabsf(0.0f - QuantizedToFloat<qint32>(qint32(0), -1.0f, 1.0f)),
-            1e-5f);
-  EXPECT_LT(
-      fabsf(1.0f - QuantizedToFloat<qint32>(qint32(int32_max), -1.0f, 1.0f)),
-      1e-5f);
-}
-
-TEST_F(QuantizationUtilsTest, AvoidBias) {
-  for (int i = 0; i < 256; ++i) {
-    const float as_float = QuantizedToFloat<quint8>(i, 0.0f, 2.0f);
-    const int back_to_int = FloatToQuantized<quint8>(as_float, 0.0f, 2.0f);
-    EXPECT_EQ(i, back_to_int);
-  }
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeInNewRange) {
-  // These are the float values we're going to test the conversions on.
-  const size_t values_count = 6;
-  const float values[values_count] = {0.0f, 0.5f, 1.0f, -1.0f, 127.0f, 255.0f};
-  // These are the input and output ranges we'll test.
-  const size_t ranges_count = 4;
-  const float ranges[ranges_count][4] = {
-      {0.0f, 255.0f, 0.0f, 255.0f},
-      {0.0f, 1.0f, 0.0f, 1.0f},
-      {-1.0f, 1.0f, -1.0f, 1.0f},
-      {-1.0f, 1.0f, -255.0f, 255.0f},
-  };
-  for (size_t value_index = 0; value_index < values_count; ++value_index) {
-    const float value_float = values[value_index];
-    for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
-      const float input_min = ranges[range_index][0];
-      const float input_max = ranges[range_index][1];
-      const float output_min = ranges[range_index][2];
-      const float output_max = ranges[range_index][3];
-      const quint8 input_value =
-          FloatToQuantized<quint8>(value_float, input_min, input_max);
-      // Here we convert the quantized input value to what we expect
-      // to get in the output range.
-      const qint32 expected_value = FloatToQuantized<qint32>(
-          QuantizedToFloat(input_value, input_min, input_max), output_min,
-          output_max);
-      EXPECT_EQ(expected_value,
-                (RequantizeInNewRange<quint8, qint32>(
-                    input_value, input_min, input_max, output_min, output_max)))
-          << "value_float=" << value_float << ", input_min=" << input_min
-          << ", input_max=" << input_max << ", output_min=" << output_min
-          << ", output_max=" << output_max;
-    }
-  }
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeInNewRangeRealData) {
-  const float value_as_float = -0.290169f;
-  const float input_min = -0.739539f;
-  const float input_max = 0.641057f;
-  const float output_min = -2381.49f;
-  const float output_max = 2207.6f;
-  const quint8 value_as_quint8 =
-      FloatToQuantized<quint8>(value_as_float, input_min, input_max);
-  EXPECT_EQ(quint8(83), value_as_quint8);
-  const qint32 actual_output = RequantizeInNewRange<quint8, qint32>(
-      value_as_quint8, input_min, input_max, output_min, output_max);
-  const qint32 value_as_qint32 =
-      FloatToQuantized<qint32>(value_as_float, output_min, output_max);
-  EXPECT_LT(std::abs(value_as_qint32 - actual_output), 10);
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeInNewRange32To8Bit) {
-  // These are the float values we're going to test the conversions on.
-  const size_t values_count = 6;
-  const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f};
-  // These are the input and output ranges we'll test.
-  const size_t ranges_count = 4;
-  const float ranges[ranges_count][4] = {
-      {0.0f, 255.0f, 0.0f, 255.0f},
-      {0.0f, 1.0f, 0.0f, 1.0f},
-      {-1.0f, 1.0f, -1.0f, 1.0f},
-      {-1.0f, 1.0f, -255.0f, 255.0f},
-  };
-  for (size_t value_index = 0; value_index < values_count; ++value_index) {
-    const float value_float = values[value_index];
-    for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
-      const float input_min = ranges[range_index][0];
-      const float input_max = ranges[range_index][1];
-      const float output_min = ranges[range_index][2];
-      const float output_max = ranges[range_index][3];
-      const qint32 input_value =
-          FloatToQuantized<qint32>(value_float, input_min, input_max);
-      // Here we convert the quantized input value to what we expect
-      // to get in the output range.
-      const quint8 expected_value = FloatToQuantized<quint8>(
-          QuantizedToFloat(input_value, input_min, input_max), output_min,
-          output_max);
-      EXPECT_EQ(expected_value,
-                (RequantizeInNewRange<qint32, quint8>(
-                    input_value, input_min, input_max, output_min, output_max)))
-          << "input_value=" << input_value << ", value_float=" << value_float
-          << ", input_min=" << input_min << ", input_max=" << input_max
-          << ", output_min=" << output_min << ", output_max=" << output_max;
-    }
-  }
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8Bit) {
-  TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */);
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitUsingEigen) {
-  thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
-  EigenThreadPoolWrapper wrapper(&threadpool);
-  Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
-  TestRequantizeManyInNewRange32To8Bit(&eigen_device);
-}
-
-TEST_F(QuantizationUtilsTest, RequantizeManyInNewRange32To8BitEigenVsNonEigen) {
-  TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>();
-}
-
-TEST_F(QuantizationUtilsTest,
-       RequantizeManyInNewRange32To8BitSignedEigenVsNonEigen) {
-  TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, qint8>();
-}
-
-TEST_F(QuantizationUtilsTest, FloatTensorToQuantized) {
-  const int input_width = 3;
-  const int input_height = 3;
-  const float input_min = 0.0f;
-  const float input_max = 255.0f;
-  Tensor input(DT_FLOAT, TensorShape({input_height, input_width}));
-  test::FillValues<float>(&input, {1.0f, -1.0f, 10.0f, 10.25f, 127.0f, 255.0f,
-                                   512.0f, 0.0f, 23.0f});
-  Tensor expected(DT_QUINT8, TensorShape({input_height, input_width}));
-  test::FillValues<quint8>(&expected, {1, 0, 10, 10, 127, 255, 255, 0, 23});
-  Tensor output = FloatTensorToQuantized<quint8>(input, input_min, input_max);
-  test::ExpectTensorEqual<quint8>(expected, output);
-}
-
-// Verify that FloatToQuantizedInPlaceUsingEigen is same result as
-// FloatToQuantized.
-TEST_F(QuantizationUtilsTest, FloatToQuantizedInPlaceUsingEigen) {
-  thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
-  EigenThreadPoolWrapper wrapper(&threadpool);
-  Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
-
-  TestFloatToQuantizedInPlaceUsingEigen<quint8>(&eigen_device);
-  TestFloatToQuantizedInPlaceUsingEigen<qint8>(&eigen_device);
-  TestFloatToQuantizedInPlaceUsingEigen<quint16>(&eigen_device);
-  TestFloatToQuantizedInPlaceUsingEigen<qint16>(&eigen_device);
-}
-
-TEST_F(QuantizationUtilsTest, OverflowWithEigen) {
-  thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
-  EigenThreadPoolWrapper wrapper(&threadpool);
-  Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
-
-  const int num_vals = 4;
-  const float input_min = 0.0f;
-  const float input_max = 2400.0f;
-  TensorShape shape({num_vals});
-  Tensor input(DT_FLOAT, shape);
-  test::FillValues<float>(&input, {-100.f, 0.f, 2400.0f, 2400.0f});
-  Tensor expected(DT_QINT32, shape);
-  // Note that the positive expected values are not the highest int32 value,
-  // because the implementation does a bounds check using float, not int32.
-  test::FillValues<qint32>(
-      &expected,
-      {static_cast<int32>(-2147483648), static_cast<int32>(-2147483648),
-       static_cast<int32>(2147483520), static_cast<int32>(2147483520)});
-
-  FloatToQuantizedStruct<qint32> f2q(input_min, input_max);
-  Tensor output(DT_QINT32, shape);
-  auto input_array = input.flat<float>();
-  output.flat<qint32>() = QUANTIZE_WITH_EIGEN(input_array, f2q, qint32);
-  test::ExpectTensorEqual<qint32>(expected, output);
-}
-
-TEST_F(QuantizationUtilsTest, QuantizedTensorToFloat) {
-  const int input_width = 3;
-  const int input_height = 3;
-  const float input_min = -128.0f;
-  const float input_max = 127.0f;
-  Tensor input(DT_QUINT8, TensorShape({input_height, input_width}));
-  test::FillValues<quint8>(&input, {0, 128, 255, 23, 24, 25, 243, 244, 245});
-  Tensor expected(DT_FLOAT, TensorShape({input_height, input_width}));
-  test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f,
-                                      -103.0f, 115.0f, 116.0f, 117.0f});
-  Tensor output = QuantizedTensorToFloat<quint8>(input, input_min, input_max);
-  test::ExpectTensorEqual<float>(expected, output);
-}
-
-// Verify that QuantizedToFloatInPlaceUsingEigen is same result as
-// QuantizedToFloat.
-TEST_F(QuantizationUtilsTest, QuantizedToFloatInPlaceUsingEigen) {
-  thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
-  EigenThreadPoolWrapper wrapper(&threadpool);
-  Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
-
-  TestQuantizedToFloatInPlaceUsingEigen<quint8>(&eigen_device);
-  TestQuantizedToFloatInPlaceUsingEigen<qint8>(&eigen_device);
-  TestQuantizedToFloatInPlaceUsingEigen<quint16>(&eigen_device);
-  TestQuantizedToFloatInPlaceUsingEigen<qint16>(&eigen_device);
-  TestQuantizedToFloatInPlaceUsingEigen<qint32>(&eigen_device);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
deleted file mode 100644
index aef5f0b6a3..0000000000
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See docs in ../ops/array_ops.cc.
-
-#define EIGEN_USE_THREADS
-
-#include <math.h>
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/type_traits.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-template <class T1, class T2>
-class QuantizeDownAndShrinkRangeOp : public OpKernel {
- public:
-  explicit QuantizeDownAndShrinkRangeOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& input = ctx->input(0);
-    const float input_min_float = ctx->input(1).flat<float>()(0);
-    const float input_max_float = ctx->input(2).flat<float>()(0);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min));
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max));
-
-    auto input_array = input.flat<T1>();
-    const int32 input_lowest_quantized =
-        static_cast<int32>(Eigen::NumTraits<T1>::lowest());
-    const int32 input_highest_quantized =
-        static_cast<int32>(Eigen::NumTraits<T1>::highest());
-    T1 actual_min_quantized = input_highest_quantized;
-    T1 actual_max_quantized = input_lowest_quantized;
-    for (int i = 0; i < input_array.size(); ++i) {
-      const T1 value = input_array(i);
-      actual_min_quantized = std::min(actual_min_quantized, value);
-      actual_max_quantized = std::max(actual_max_quantized, value);
-    }
-    // We want to make sure that the minimum is no larger than zero, so that the
-    // convolution operation can run efficiently.
-    const float actual_min_float =
-        std::min(0.0f, QuantizedToFloat(actual_min_quantized, input_min_float,
-                                        input_max_float));
-    const float actual_max_float = QuantizedToFloat(
-        actual_max_quantized, input_min_float, input_max_float);
-
-#if 0
-    // This is the reference, non-eigen implementation:
-    auto output_array = output->flat<T2>();
-    RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(),
-                                     input_min_float, input_max_float,
-                                     actual_min_float, actual_max_float,
-                                     output_array.data());
-#endif
-
-    if (input_array.size() > 0) {
-      RequantizeManyInNewRangeUsingEigen<T1, T2>(
-          ctx->eigen_device<CPUDevice>(), input, input_min_float,
-          input_max_float, actual_min_float, actual_max_float, output);
-    }
-
-    output_min->flat<float>().setConstant(actual_min_float);
-    output_max->flat<float>().setConstant(actual_max_float);
-  }
-};
-
-REGISTER_KERNEL_BUILDER(Name("QuantizeDownAndShrinkRange")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<qint32>("Tinput")
-                            .TypeConstraint<quint8>("out_type"),
-                        QuantizeDownAndShrinkRangeOp<qint32, quint8>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
deleted file mode 100644
index 73a50aad26..0000000000
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizeDownAndShrinkRangeTest : public OpsTestBase {
- protected:
-};
-
-// Runs a manually generated array through the operator, and makes sure that the
-// results match the expected hand-calculated values.
-TEST_F(QuantizeDownAndShrinkRangeTest, HandCrafted) {
-  TF_ASSERT_OK(NodeDefBuilder("quantize_down_and_shrink_range_op",
-                              "QuantizeDownAndShrinkRange")
-                   .Input(FakeInput(DT_QINT32))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Tinput", DataTypeToEnum<qint32>::v())
-                   .Attr("out_type", DataTypeToEnum<quint8>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  // For this test we have an input that has the theoretical range of -256.0f to
-  // +256.0f, but the actual values present only span -1.0f to 1.0f. We expect
-  // the operator to take advantage of this, and rescale the output to fill up
-  // the available range in the lower bit depth, and update to the true min and
-  // max ranges.
-  const int value_count = 3;
-  AddInputFromArray<qint32>(TensorShape({value_count}),
-                            {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count}));
-  test::FillValues<quint8>(&expected, {0, 127, 255});
-  test::ExpectTensorEqual<quint8>(expected, *GetOutput(0));
-  Tensor expected_min(allocator(), DT_FLOAT, TensorShape({}));
-  test::FillValues<float>(&expected_min, {-1.0f});
-  test::ExpectTensorEqual<float>(expected_min, *GetOutput(1));
-  Tensor expected_max(allocator(), DT_FLOAT, TensorShape({}));
-  test::FillValues<float>(&expected_max, {1.0f});
-  test::ExpectTensorEqual<float>(expected_max, *GetOutput(2));
-}
-
-}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
deleted file mode 100644
index 003654c1b0..0000000000
--- a/tensorflow/core/kernels/quantize_op.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See docs in ../ops/math_ops.cc.
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/type_traits.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace {
-enum { QUANTIZE_MODE_MIN_COMBINED, QUANTIZE_MODE_MIN_FIRST };
-}  // namespace
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-// Quantize a tensor from float to T, with user-specified min_range and
-// max_range.
-// TODO(xbing): Add a new QuantizeOp just taking scale,
-//              rather than min_range and max_range.
-template <typename Device, typename T>
-class QuantizeV2Op : public OpKernel {
- public:
-  explicit QuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    half_range_ = !std::is_signed<T>::value
-                      ? 0.0f
-                      : (std::numeric_limits<T>::max() -
-                         std::numeric_limits<T>::min() + 1) /
-                            2.0f;
-    string mode_string;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
-    OP_REQUIRES(ctx,
-                (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST"),
-                errors::InvalidArgument("Mode string must be 'MIN_COMBINED' or"
-                                        " 'MIN_FIRST', is '" +
-                                        mode_string + "'"));
-    if (mode_string == "MIN_COMBINED") {
-      mode_ = QUANTIZE_MODE_MIN_COMBINED;
-    } else if (mode_string == "MIN_FIRST") {
-      mode_ = QUANTIZE_MODE_MIN_FIRST;
-    }
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const Tensor& input = ctx->input(0);
-    const float input_min_range = ctx->input(1).flat<float>()(0);
-    const float input_max_range = ctx->input(2).flat<float>()(0);
-
-    float min_range;
-    float max_range;
-    OP_REQUIRES(ctx, !(input_max_range < input_min_range),
-                errors::InvalidArgument(
-                    "input_max_range must be larger than input_min_range."));
-
-    // When the minimum and maximum ranges are too close together, nudge them
-    // apart by a small value so that they are slightly different. This helps
-    // us avoid creating ill-formed buffers where all quantized values map to
-    // the same float number. These kinds of buffers cause problems for
-    // downstream ops when they need to do calculations on them.
-    // We pick the value by making sure that zero is not more than 100x the
-    // overall range from the maximum, so that the value can be easily
-    // represented when we promote the quantized value to a higher
-    // intermediate bit depth, since that's a common requirement.
-    min_range = input_min_range;
-    const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range),
-                                                  fabsf(input_max_range))) /
-                          100.0f;
-    max_range = std::max(input_max_range, input_min_range + epsilon);
-
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
-    if (mode_ == QUANTIZE_MODE_MIN_COMBINED) {
-      const float scale_factor =
-          (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) /
-          (max_range - min_range);
-
-      // Quantize:
-      // Make input in range of [min_range, max_range], then
-      // subtract min_range to be in range of [0, max_range - min_range]
-      // Divide by (max_range - min_range) to get to [0, 1.0]
-      // Multiply by range of T, after that shift left 1/2 range of T if
-      // T is signed.
-      // Note that std::round is used to round the number before the cast.
-      // std::round implements "round-half-away-zero",
-      // e.g., -5.5 gets rounded to -6, -5.4 goes to -5, 5.4 goes to 5,
-      // and 5.5 goes to 6.
-      auto o = output->template flat<T>();
-      bool is_signed = std::is_signed<T>::value;
-      if (is_signed) {
-        // The slow path.
-        // TODO(xbing,yonghui): Speedup this path as well.
-        o.device(ctx->template eigen_device<Device>()) =
-            ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) -
-              min_range) *
-                 scale_factor -
-             half_range_)
-                .unaryExpr(std::function<float(float)>(round))
-                .template cast<T>();
-      } else {
-        // The fast path that avoids unaryExpr
-        // According to the micro-benchmark, adding device here doesn't help.
-        o = ((input.flat<float>().cwiseMin(max_range).cwiseMax(min_range) -
-              min_range) *
-                 scale_factor +
-             0.5f)
-                .template cast<T>();
-      }
-    } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) {
-      FloatTensorToQuantizedInPlaceUsingEigen<T>(
-          ctx->template eigen_device<Device>(), input, min_range, max_range,
-          output);
-    }
-
-    Tensor* output_min_tensor = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &output_min_tensor));
-    output_min_tensor->flat<float>()(0) = min_range;
-
-    Tensor* output_max_tensor = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &output_max_tensor));
-    output_max_tensor->flat<float>()(0) = max_range;
-  }
-
- private:
-  float half_range_;
-  int mode_;
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
-    QuantizeV2Op<CPUDevice, quint8>);
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint8>("T"),
-    QuantizeV2Op<CPUDevice, qint8>);
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<quint16>("T"),
-    QuantizeV2Op<CPUDevice, quint16>);
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizeV2").Device(DEVICE_CPU).TypeConstraint<qint16>("T"),
-    QuantizeV2Op<CPUDevice, qint16>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantize_op_test.cc b/tensorflow/core/kernels/quantize_op_test.cc
deleted file mode 100644
index d3ac7d3f7c..0000000000
--- a/tensorflow/core/kernels/quantize_op_test.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/platform/test_benchmark.h"
-
-namespace tensorflow {
-
-class QuantizedOpTest : public OpsTestBase {
- protected:
-};
-
-TEST_F(QuantizedOpTest, QuantizeV2) {
-  TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("mode", "MIN_FIRST")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  AddInputFromArray<float>(TensorShape({6}),
-                           {1.0, 1.25, 1.75, 127.0, 255.0, 500.0});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor expected(allocator(), DT_QUINT8, TensorShape({6}));
-  test::FillValues<quint8>(&expected, {1, 1, 2, 127, 255, 255});
-  test::ExpectTensorEqual<quint8>(expected, *GetOutput(0));
-}
-
-TEST_F(QuantizedOpTest, QuantizeV2Ports) {
-  TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("mode", "MIN_FIRST")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  AddInputFromArray<float>(TensorShape({6}),
-                           {1.0, 1.25, 1.75, 127.0, 255.0, 500.0});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor expected(allocator(), DT_QUINT8, TensorShape({6}));
-  test::FillValues<quint8>(&expected, {1, 1, 2, 127, 255, 255});
-  test::ExpectTensorEqual<quint8>(expected, *GetOutput(0));
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  EXPECT_NEAR(0.0f, output_min, 1e-5f);
-  EXPECT_NEAR(255.0f, output_max, 1e-5f);
-}
-
-TEST_F(QuantizedOpTest, QuantizeV2EqualRange) {
-  TF_ASSERT_OK(NodeDefBuilder("quantize_op", "QuantizeV2")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("mode", "MIN_FIRST")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  AddInputFromArray<float>(TensorShape({6}), {1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor expected(allocator(), DT_QUINT8, TensorShape({6}));
-  test::FillValues<quint8>(&expected, {0, 0, 0, 0, 0, 0});
-  test::ExpectTensorEqual<quint8>(expected, *GetOutput(0));
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  EXPECT_NEAR(1.0f, output_min, 1e-5f);
-  EXPECT_LT(1.0f, output_max);
-}
-
-TEST_F(QuantizedOpTest, Dequantize) {
-  TF_ASSERT_OK(NodeDefBuilder("dequantize_op", "Dequantize")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("mode", "MIN_FIRST")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  AddInputFromArray<quint8>(TensorShape({6}), {1, 2, 4, 8, 16, 255});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor expected(allocator(), DT_FLOAT, TensorShape({6}));
-  test::FillValues<float>(&expected, {1.0, 2.0, 4.0, 8.0, 16.0, 255.0});
-  test::ExpectTensorNear<float>(expected, *GetOutput(0), 0.5);
-}
-
-}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
deleted file mode 100644
index ea1cf15f7b..0000000000
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Implements a quantized version of the Relu6 operation.
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-
-template <typename T>
-class QuantizedReluOp : public OpKernel {
- public:
-  explicit QuantizedReluOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, input.shape(), &output));
-    const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>().cwiseMax(min_as_quantized).template cast<T>();
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = min_input;
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = max_input;
-  }
-};
-
-template <typename T>
-class QuantizedRelu6Op : public OpKernel {
- public:
-  explicit QuantizedRelu6Op(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, input.shape(), &output));
-    const T min_as_quantized = FloatToQuantized<T>(0.0f, min_input, max_input);
-    const T max_as_quantized = FloatToQuantized<T>(6.0f, min_input, max_input);
-    output->flat<T>().device(context->eigen_cpu_device()) =
-        input.flat<T>()
-            .cwiseMax(min_as_quantized)
-            .cwiseMin(max_as_quantized)
-            .template cast<T>();
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = min_input;
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = max_input;
-  }
-};
-
-REGISTER_KERNEL_BUILDER(Name("QuantizedRelu")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<qint32>("Tinput")
-                            .TypeConstraint<qint32>("out_type"),
-                        QuantizedReluOp<qint32>);
-REGISTER_KERNEL_BUILDER(Name("QuantizedRelu")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<quint8>("Tinput")
-                            .TypeConstraint<quint8>("out_type"),
-                        QuantizedReluOp<quint8>);
-
-REGISTER_KERNEL_BUILDER(Name("QuantizedRelu6")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<qint32>("Tinput")
-                            .TypeConstraint<qint32>("out_type"),
-                        QuantizedRelu6Op<qint32>);
-REGISTER_KERNEL_BUILDER(Name("QuantizedRelu6")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<quint8>("Tinput")
-                            .TypeConstraint<quint8>("out_type"),
-                        QuantizedRelu6Op<quint8>);
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_activation_ops_test.cc b/tensorflow/core/kernels/quantized_activation_ops_test.cc
deleted file mode 100644
index 38c7d4ffef..0000000000
--- a/tensorflow/core/kernels/quantized_activation_ops_test.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedActivationsTest : public OpsTestBase {
- protected:
-};
-
-TEST_F(QuantizedActivationsTest, TestRelu) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_relu_op", "QuantizedRelu")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = -128.0f;
-  const float input_max = 127.0f;
-  const int input_width = 2;
-  const int input_height = 4;
-  Tensor input_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&input_float, {-100, -1, 0, 1, 3, 6, 7, 100});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-  Tensor expected_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&expected_float, {0, 0, 0, 1, 3, 6, 7, 100});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-TEST_F(QuantizedActivationsTest, TestRelu6) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_relu6_op", "QuantizedRelu6")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = -128.0f;
-  const float input_max = 127.0f;
-  const int input_width = 2;
-  const int input_height = 4;
-  Tensor input_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&input_float, {-100, -1, 0, 1, 3, 6, 7, 100});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-  Tensor expected_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&expected_float, {0, 0, 0, 1, 3, 6, 6, 6});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op.cc b/tensorflow/core/kernels/quantized_batch_norm_op.cc
deleted file mode 100644
index 18d83b4149..0000000000
--- a/tensorflow/core/kernels/quantized_batch_norm_op.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-
-namespace {
-
-// A slow but straightforward implementation of batch normalization.
-template <typename T1, typename T2>
-void ReferenceBatchNorm(const Tensor& input, const float input_min,
-                        const float input_max, const Tensor& mean,
-                        float mean_min, float mean_max, const Tensor& var,
-                        float var_min, float var_max, const Tensor& beta,
-                        float beta_min, float beta_max, const Tensor& gamma,
-                        float gamma_min, float gamma_max,
-                        float variance_epsilon, bool scale_after_normalization,
-                        Tensor* output, float* output_min, float* output_max) {
-  auto input_flat = input.flat<T1>();
-  auto mean_flat = mean.flat<T1>();
-  auto var_flat = var.flat<T1>();
-  auto beta_flat = beta.flat<T1>();
-  auto gamma_flat = gamma.flat<T1>();
-  auto output_flat = output->flat<T2>();
-
-  const int depth = mean.dim_size(0);
-  const int row_count = input_flat.size() / depth;
-
-  *output_min = std::numeric_limits<float>::max();
-  *output_max = std::numeric_limits<float>::lowest();
-  for (int pass = 0; pass < 2; ++pass) {
-    const bool is_range_pass = (pass == 0);
-    for (int row_index = 0; row_index < row_count; ++row_index) {
-      for (int channel = 0; channel < depth; ++channel) {
-        const int input_index = (row_index * depth) + channel;
-        const float input_value =
-            QuantizedToFloat(input_flat(input_index), input_min, input_max);
-        const float mean_value =
-            QuantizedToFloat(mean_flat(channel), mean_min, mean_max);
-        const float var_value =
-            QuantizedToFloat(var_flat(channel), var_min, var_max);
-        const float beta_value =
-            QuantizedToFloat(beta_flat(channel), beta_min, beta_max);
-        const float gamma_value =
-            QuantizedToFloat(gamma_flat(channel), gamma_min, gamma_max);
-        float output_value;
-        if (scale_after_normalization) {
-          output_value = (((input_value - mean_value) /
-                           sqrtf(var_value + variance_epsilon)) *
-                          gamma_value) +
-                         beta_value;
-        } else {
-          output_value = ((input_value - mean_value) /
-                          sqrtf(var_value + variance_epsilon)) +
-                         beta_value;
-        }
-        if (is_range_pass) {
-          *output_min = std::min(output_value, *output_min);
-          *output_max = std::max(output_value, *output_max);
-        } else {
-          output_flat(input_index) =
-              FloatToQuantized<T2>(output_value, *output_min, *output_max);
-        }
-      }
-    }
-  }
-}
-
-// An implementation of batch normalization that does the main calculations
-// using only fixed-point arithmetic. There's a prologue with some floating
-// calculations, but assuming the weights are constant these could be hoisted to
-// an offline process, or baked into the weights.
-template <typename T1, typename T2>
-void FixedPointBatchNorm(const Tensor& input, const float input_min,
-                         const float input_max, const Tensor& mean,
-                         float mean_min, float mean_max, const Tensor& var,
-                         float var_min, float var_max, const Tensor& beta,
-                         float beta_min, float beta_max, const Tensor& gamma,
-                         float gamma_min, float gamma_max,
-                         float variance_epsilon, bool scale_after_normalization,
-                         Tensor* output, float* output_min, float* output_max) {
-  auto input_flat = input.flat<T1>();
-  auto mean_flat = mean.flat<T1>();
-  auto var_flat = var.flat<T1>();
-  auto beta_flat = beta.flat<T1>();
-  auto gamma_flat = gamma.flat<T1>();
-  auto output_flat = output->flat<T2>();
-
-  const int depth = mean.dim_size(0);
-  const int row_count = input_flat.size() / depth;
-
-  // The range here is chosen so that typical input values fit in without any
-  // overflow or loss of precision, going from +1m to -1m with 10 bits of fixed
-  // point precision.
-  *output_min = -(1 << 20);
-  *output_max = (1 << 20);
-
-  Tensor scale_tensor(DataTypeToEnum<T2>::v(), {depth});
-  auto scale_flat = scale_tensor.flat<T2>();
-  Tensor offset_tensor(DataTypeToEnum<T2>::v(), {depth});
-  auto offset_flat = offset_tensor.flat<T2>();
-  for (int channel = 0; channel < depth; ++channel) {
-    const float mean_value =
-        QuantizedToFloat(mean_flat(channel), mean_min, mean_max);
-    const float var_value =
-        QuantizedToFloat(var_flat(channel), var_min, var_max);
-    const float beta_value =
-        QuantizedToFloat(beta_flat(channel), beta_min, beta_max);
-    const float gamma_value =
-        QuantizedToFloat(gamma_flat(channel), gamma_min, gamma_max);
-    float scale_value;
-    if (scale_after_normalization) {
-      scale_value = (1.0f / sqrtf(var_value + variance_epsilon)) * gamma_value;
-    } else {
-      scale_value = (1.0f / sqrtf(var_value + variance_epsilon));
-    }
-    const float offset_value = (-mean_value * scale_value) + beta_value;
-    scale_flat(channel) =
-        FloatToQuantized<T2>(scale_value, *output_min, *output_max);
-    offset_flat(channel) =
-        FloatToQuantized<T2>(offset_value, *output_min, *output_max);
-  }
-
-  const T2 one_in_output_space =
-      FloatToQuantized<T2>(1.0f, *output_min, *output_max);
-  for (int row_index = 0; row_index < row_count; ++row_index) {
-    for (int channel = 0; channel < depth; ++channel) {
-      const int input_index = (row_index * depth) + channel;
-      const T2 input_value =
-          RequantizeInNewRange<T1, T2>(input_flat(input_index), input_min,
-                                       input_max, *output_min, *output_max);
-      const T2 scale_value = scale_flat(channel);
-      const T2 offset_value = offset_flat(channel);
-      const T2 output_value =
-          ((input_value * scale_value) / one_in_output_space) + offset_value;
-      output_flat(input_index) = output_value;
-    }
-  }
-}
-
-}  // namespace
-
-template <typename T1, typename T2>
-class QuantizedBatchNormOp : public OpKernel {
- public:
-  explicit QuantizedBatchNormOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context,
-                   context->GetAttr("variance_epsilon", &variance_epsilon_));
-    OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization",
-                                             &scale_after_normalization_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
-    const float input_min = context->input(1).flat<float>()(0);
-    const float input_max = context->input(2).flat<float>()(0);
-    const Tensor& mean = context->input(3);
-    const float mean_min = context->input(4).flat<float>()(0);
-    const float mean_max = context->input(5).flat<float>()(0);
-    const Tensor& var = context->input(6);
-    const float var_min = context->input(7).flat<float>()(0);
-    const float var_max = context->input(8).flat<float>()(0);
-    const Tensor& beta = context->input(9);
-    const float beta_min = context->input(10).flat<float>()(0);
-    const float beta_max = context->input(11).flat<float>()(0);
-    const Tensor& gamma = context->input(12);
-    const float gamma_min = context->input(13).flat<float>()(0);
-    const float gamma_max = context->input(14).flat<float>()(0);
-
-    OP_REQUIRES(context, input.dims() == 4,
-                errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
-    OP_REQUIRES(context, mean.dims() == 1,
-                errors::InvalidArgument("mean must be 1-dimensional",
-                                        mean.shape().DebugString()));
-    OP_REQUIRES(context, var.dims() == 1,
-                errors::InvalidArgument("var must be 1-dimensional",
-                                        var.shape().DebugString()));
-    OP_REQUIRES(context, beta.dims() == 1,
-                errors::InvalidArgument("beta must be 1-dimensional",
-                                        beta.shape().DebugString()));
-    OP_REQUIRES(context, gamma.dims() == 1,
-                errors::InvalidArgument("gamma must be 1-dimensional",
-                                        gamma.shape().DebugString()));
-
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, input.shape(), &output));
-    float output_min;
-    float output_max;
-    FixedPointBatchNorm<T1, T2>(input, input_min, input_max, mean, mean_min,
-                                mean_max, var, var_min, var_max, beta, beta_min,
-                                beta_max, gamma, gamma_min, gamma_max,
-                                variance_epsilon_, scale_after_normalization_,
-                                output, &output_min, &output_max);
-
-    Tensor* output_min_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(1, {}, &output_min_tensor));
-    output_min_tensor->flat<float>()(0) = output_min;
-
-    Tensor* output_max_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(2, {}, &output_max_tensor));
-    output_max_tensor->flat<float>()(0) = output_max;
-  }
-
- private:
-  float variance_epsilon_;
-  bool scale_after_normalization_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("QuantizedBatchNormWithGlobalNormalization")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<quint8>("Tinput")
-                            .TypeConstraint<qint32>("out_type"),
-                        QuantizedBatchNormOp<quint8, qint32>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc b/tensorflow/core/kernels/quantized_batch_norm_op_test.cc
deleted file mode 100644
index 9880d972cd..0000000000
--- a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/batch_norm_op.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedBatchNormOpTest : public OpsTestBase {};
-
-TEST_F(QuantizedBatchNormOpTest, Simple) {
-  TF_EXPECT_OK(NodeDefBuilder("quantized_batch_norm_op",
-                              "QuantizedBatchNormWithGlobalNormalization")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("scale_after_normalization", false)
-                   .Attr("variance_epsilon", 0.001)
-                   .Attr("Tinput", DT_QUINT8)
-                   .Attr("out_type", DT_QINT32)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = -128.0f;
-  const float input_max = 127.0f;
-  const int input_batch = 1;
-  const int input_height = 1;
-  const int input_width = 6;
-  const int input_depth = 2;
-  Tensor input_float(DT_FLOAT,
-                     {input_batch, input_height, input_width, input_depth});
-  test::FillValues<float>(&input_float,
-                          {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-  const float mean_min = 0.0f;
-  const float mean_max = 20.0f;
-  Tensor mean_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&mean_float, {10, 20});
-  Tensor mean_quantized =
-      FloatTensorToQuantized<quint8>(mean_float, mean_min, mean_max);
-  const float variance_min = 0.0f;
-  const float variance_max = 1.0f;
-  Tensor variance_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&variance_float, {0.25, 0.5});
-  Tensor variance_quantized = FloatTensorToQuantized<quint8>(
-      variance_float, variance_min, variance_max);
-  const float beta_min = 0.0f;
-  const float beta_max = 1.0f;
-  Tensor beta_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&beta_float, {0.1, 0.6});
-  Tensor beta_quantized =
-      FloatTensorToQuantized<quint8>(beta_float, beta_min, beta_max);
-  const float gamma_min = 0.0f;
-  const float gamma_max = 1.0f;
-  Tensor gamma_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&gamma_float, {0.0, 0.0});
-  Tensor gamma_quantized =
-      FloatTensorToQuantized<quint8>(gamma_float, gamma_min, gamma_max);
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<quint8>(mean_quantized.shape(),
-                            mean_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {mean_min});
-  AddInputFromArray<float>(TensorShape({1}), {mean_max});
-  AddInputFromArray<quint8>(variance_quantized.shape(),
-                            variance_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {variance_min});
-  AddInputFromArray<float>(TensorShape({1}), {variance_max});
-  AddInputFromArray<quint8>(beta_quantized.shape(),
-                            beta_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {beta_min});
-  AddInputFromArray<float>(TensorShape({1}), {beta_max});
-  AddInputFromArray<quint8>(gamma_quantized.shape(),
-                            gamma_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {gamma_min});
-  AddInputFromArray<float>(TensorShape({1}), {gamma_max});
-  TF_ASSERT_OK(RunOpKernel());
-
-  Tensor expected_float(
-      allocator(), DT_FLOAT,
-      TensorShape({input_batch, input_height, input_width, input_depth}));
-  test::FillValues<float>(
-      &expected_float, {-17.86, -22.00, -15.87, -20.59, -13.87, -19.18, -21.86,
-                        -33.31, -23.85, -34.72, -25.85, -36.13});
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.1);
-}
-
-TEST_F(QuantizedBatchNormOpTest, SameAsFloat) {
-  TF_EXPECT_OK(NodeDefBuilder("quantized_batch_norm_op",
-                              "QuantizedBatchNormWithGlobalNormalization")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("scale_after_normalization", false)
-                   .Attr("variance_epsilon", 0.001)
-                   .Attr("Tinput", DT_QUINT8)
-                   .Attr("out_type", DT_QINT32)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = -128.0f;
-  const float input_max = 127.0f;
-  const int input_batch = 1;
-  const int input_height = 1;
-  const int input_width = 6;
-  const int input_depth = 2;
-  Tensor input_float(DT_FLOAT,
-                     {input_batch, input_height, input_width, input_depth});
-  test::FillValues<float>(&input_float,
-                          {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-  const float mean_min = 0.0f;
-  const float mean_max = 20.0f;
-  Tensor mean_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&mean_float, {10, 20});
-  Tensor mean_quantized =
-      FloatTensorToQuantized<quint8>(mean_float, mean_min, mean_max);
-  const float variance_min = 0.0f;
-  const float variance_max = 1.0f;
-  Tensor variance_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&variance_float, {0.25, 0.5});
-  Tensor variance_quantized = FloatTensorToQuantized<quint8>(
-      variance_float, variance_min, variance_max);
-  const float beta_min = 0.0f;
-  const float beta_max = 1.0f;
-  Tensor beta_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&beta_float, {0.1, 0.6});
-  Tensor beta_quantized =
-      FloatTensorToQuantized<quint8>(beta_float, beta_min, beta_max);
-  const float gamma_min = 0.0f;
-  const float gamma_max = 1.0f;
-  Tensor gamma_float(DT_FLOAT, {input_depth});
-  test::FillValues<float>(&gamma_float, {0.0, 0.0});
-  Tensor gamma_quantized =
-      FloatTensorToQuantized<quint8>(gamma_float, gamma_min, gamma_max);
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<quint8>(mean_quantized.shape(),
-                            mean_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {mean_min});
-  AddInputFromArray<float>(TensorShape({1}), {mean_max});
-  AddInputFromArray<quint8>(variance_quantized.shape(),
-                            variance_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {variance_min});
-  AddInputFromArray<float>(TensorShape({1}), {variance_max});
-  AddInputFromArray<quint8>(beta_quantized.shape(),
-                            beta_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {beta_min});
-  AddInputFromArray<float>(TensorShape({1}), {beta_max});
-  AddInputFromArray<quint8>(gamma_quantized.shape(),
-                            gamma_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {gamma_min});
-  AddInputFromArray<float>(TensorShape({1}), {gamma_max});
-  TF_ASSERT_OK(RunOpKernel());
-
-  Tensor expected_float(
-      allocator(), DT_FLOAT,
-      TensorShape({input_batch, input_height, input_width, input_depth}));
-  thread::ThreadPool threadpool(Env::Default(), "test", 1);
-  EigenThreadPoolWrapper wrapper(&threadpool);
-  Eigen::ThreadPoolDevice eigen_cpu_device(&wrapper, 1);
-  const Tensor& const_input_float = input_float;
-  const Tensor& const_mean_float = mean_float;
-  const Tensor& const_variance_float = variance_float;
-  const Tensor& const_beta_float = beta_float;
-  const Tensor& const_gamma_float = gamma_float;
-  functor::BatchNorm<Eigen::ThreadPoolDevice, float>()(
-      eigen_cpu_device, const_input_float.tensor<float, 4>(),
-      const_mean_float.vec<float>(), const_variance_float.vec<float>(),
-      const_beta_float.vec<float>(), const_gamma_float.vec<float>(), 0.001,
-      false, expected_float.tensor<float, 4>());
-
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.1);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
deleted file mode 100644
index 0b34bfcad8..0000000000
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Implements a quantized eight-bit version of the bias addition operation.
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-template <class T1, class T2, class T3>
-class QuantizedBiasAddOp : public OpKernel {
- public:
-  explicit QuantizedBiasAddOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
-    const Tensor& bias = context->input(1);
-    const float input_min = context->input(2).flat<float>()(0);
-    const float input_max = context->input(3).flat<float>()(0);
-    const float bias_min = context->input(4).flat<float>()(0);
-    const float bias_max = context->input(5).flat<float>()(0);
-
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()),
-                errors::InvalidArgument("Input tensor must be at least 2D: ",
-                                        input.shape().DebugString()));
-    OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()),
-                errors::InvalidArgument("Biases must be 1D: ",
-                                        bias.shape().DebugString()));
-    const auto last_dim = input.shape().dims() - 1;
-    OP_REQUIRES(
-        context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim),
-        errors::InvalidArgument(
-            "Must provide as many biases as the last dimension "
-            "of the input tensor: ",
-            bias.shape().DebugString(), " vs. ", input.shape().DebugString()));
-
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, input.shape(), &output));
-
-    float total_min;
-    float total_max;
-    QuantizedAddUsingEigen<T1, T2, T3>(
-        context->template eigen_device<CPUDevice>(), input, input_min,
-        input_max, bias, bias_min, bias_max, output, &total_min, &total_max);
-
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = total_min;
-
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = total_max;
-  }
-};
-
-REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<quint8>("T1")
-                            .TypeConstraint<quint8>("T2")
-                            .TypeConstraint<qint32>("out_type"),
-                        QuantizedBiasAddOp<quint8, quint8, qint32>);
-REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<qint8>("T1")
-                            .TypeConstraint<qint8>("T2")
-                            .TypeConstraint<qint32>("out_type"),
-                        QuantizedBiasAddOp<qint8, qint8, qint32>);
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_bias_add_op_test.cc b/tensorflow/core/kernels/quantized_bias_add_op_test.cc
deleted file mode 100644
index 3fd0eaa981..0000000000
--- a/tensorflow/core/kernels/quantized_bias_add_op_test.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <functional>
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedBiasAddTest : public OpsTestBase {
- protected:
-};
-
-TEST_F(QuantizedBiasAddTest, Small) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_bias_add_op", "QuantizedBiasAdd")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = 0.0f;
-  const float input_max = 60.0f;
-  const int input_height = 2;
-  const int input_width = 3;
-  Tensor input_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&input_float,
-                          {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-
-  const float bias_min = 0.0f;
-  const float bias_max = 3.0f;
-  const int bias_width = 3;
-  Tensor bias_float(DT_FLOAT, {bias_width});
-  test::FillValues<float>(&bias_float, {1.0f, 2.0f, 3.0f});
-  Tensor bias_quantized =
-      FloatTensorToQuantized<quint8>(bias_float, bias_min, bias_max);
-
-  Tensor expected_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(&expected_float,
-                          {11.0f, 22.0f, 33.0f, 41.0f, 52.0f, 63.0f});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(bias_quantized.shape(),
-                            bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-TEST_F(QuantizedBiasAddTest, RealData) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_bias_add_op", "QuantizedBiasAdd")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = -2164.25f;
-  const float input_max = 2006.27f;
-  const int input_height = 1;
-  const int input_width = 64;
-  Tensor input_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(
-      &input_float,
-      {-1014.12, -157.382, -810.17,  1435.28,  1016.37,  219.684,  -316.054,
-       -2164.25, 2006.27,  -547.444, 857.376,  404.376,  9.72115,  332.588,
-       194.385,  -286.57,  26.062,   23.1125,  110.436,  247.055,  -127.683,
-       -376.275, -124.81,  -846.826, -77.1507, 305.581,  -202.747, 12.9528,
-       9.64886,  872.686,  40.9069,  197.816,  44.16,    -306.768, -1457.52,
-       -368.939, -1049.42, -486.353, 1745.87,  95.7695,  395.773,  -254.333,
-       -404.27,  787.16,   -2.44114, 199.37,   -1024.08, 784.901,  235.055,
-       -42.7295, 241.498,  -245.365, 470.763,  186.159,  186.579,  -220.163,
-       1304.58,  386.272,  -358.853, -755.996, 360.109,  -866.007, 55.2828,
-       -508.801});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-
-  const float bias_min = -0.739539f;
-  const float bias_max = 0.641057f;
-  const int bias_width = 64;
-  Tensor bias_float(DT_FLOAT, {bias_width});
-  test::FillValues<float>(
-      &bias_float,
-      {-0.294619, -0.0670519, 0.261507,   -0.126274, 0.127229,   -0.176945,
-       -0.251223, 0.231086,   0.453694,   0.415666,  -0.288733,  0.508717,
-       0.211551,  0.0435907,  -0.582383,  -0.308779, 0.0696883,  -0.438122,
-       0.114,     0.433964,   0.109883,   0.284931,  -0.149661,  0.108657,
-       0.458333,  -0.130231,  -0.35805,   -0.123206, -0.437968,  0.0282411,
-       0.628818,  -0.0522173, -0.0233403, 0.124863,  0.217165,   0.262294,
-       -0.171005, -0.254693,  -0.200433,  -0.287354, 0.488166,   -0.0354688,
-       -0.118091, -0.590444,  0.491537,   -0.739539, 0.083117,   0.282482,
-       0.275269,  -0.36574,   0.107476,   0.0511428, -0.136887,  -0.0149852,
-       -0.259694, 0.641057,   0.264054,   -0.295126, -0.0218791, 0.361211,
-       0.012448,  0.0709718,  -0.392394,  -0.434215});
-  Tensor bias_quantized =
-      FloatTensorToQuantized<quint8>(bias_float, bias_min, bias_max);
-
-  Tensor expected_float(DT_FLOAT, {input_height, input_width});
-  test::FillValues<float>(
-      &expected_float,
-      {-1014.42, -157.449, -809.908, 1435.16,  1016.5,  219.507,  -316.305,
-       -2164.02, 2006.73,  -547.028, 857.088,  404.885, 9.9327,   332.632,
-       193.803,  -286.878, 26.1317,  22.6744,  110.55,  247.489,  -127.573,
-       -375.99,  -124.959, -846.717, -76.6923, 305.451, -203.105, 12.8296,
-       9.21089,  872.714,  41.5357,  197.764,  44.1367, -306.643, -1457.3,
-       -368.677, -1049.6,  -486.608, 1745.67,  95.4821, 396.261,  -254.368,
-       -404.388, 786.57,   -1.94961, 198.63,   -1024.0, 785.183,  235.33,
-       -43.0953, 241.605,  -245.314, 470.627,  186.144, 186.319,  -219.522,
-       1304.84,  385.977,  -358.874, -755.635, 360.122, -865.936, 54.8904,
-       -509.235});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(bias_quantized.shape(),
-                            bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 20.0);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc
deleted file mode 100644
index f929dd61cb..0000000000
--- a/tensorflow/core/kernels/quantized_concat_op.cc
+++ /dev/null
@@ -1,246 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#define EIGEN_USE_THREADS
-
-#include <vector>
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/concat_lib_cpu.h"
-
-namespace tensorflow {
-
-namespace {
-template <typename T>
-struct RequantizeCopier {
-  RequantizeCopier(
-      const std::vector<std::pair<float, float>>* input_min_and_max,
-      float output_min, float output_max)
-      : output_min(output_min),
-        output_max(output_max),
-        input_min_and_max(input_min_and_max) {}
-
-  inline void Copy(T* dst, const T* src, int input_index, size_t n) {
-    const float input_min = (*input_min_and_max)[input_index].first;
-    const float input_max = (*input_min_and_max)[input_index].second;
-    if (input_min == output_min && input_max == output_max) {
-      DCHECK(DataTypeCanUseMemcpy(DataTypeToEnum<T>::v()));
-      memcpy(dst, src, n * sizeof(T));
-    } else {
-      Eigen::array<Eigen::DenseIndex, 1> dims;
-      dims[0] = n;
-      typename TTypes<T, 1>::UnalignedConstTensor input_array(src, dims);
-      typename TTypes<T, 1>::UnalignedTensor output_array(dst, dims);
-
-      QuantizedToFloatStruct<T> q2f(input_min, input_max);
-      auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f);
-      FloatToQuantizedStruct<T> f2q(output_min, output_max);
-      auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T);
-
-      // RequantizeCopier::Copy is called from within a shard of computation, so
-      // don't use the threadpool device here, simply assign with default CPU
-      // device.
-      output_array = input_requantized;
-    }
-  }
-
-  float output_min;
-  float output_max;
-  const std::vector<std::pair<float, float>>* input_min_and_max;
-};
-}  // namespace
-
-template <typename T>
-class QuantizedConcatOp : public OpKernel {
- public:
-  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
-      ConstMatrixVector;
-
-  explicit QuantizedConcatOp(OpKernelConstruction* c) : OpKernel(c) {}
-
-  void CalculateInputAndOutputRange(
-      const OpInputList& input_mins, const OpInputList& input_maxes,
-      const size_t N,
-      std::vector<std::pair<float, float>>* input_mins_and_maxes,
-      float* output_min, float* output_max) {
-    input_mins_and_maxes->reserve(N);
-    float overall_min = std::numeric_limits<float>::max();
-    float overall_max = std::numeric_limits<float>::lowest();
-    for (int i = 0; i < N; ++i) {
-      const float input_min = input_mins[i].flat<float>()(0);
-      const float input_max = input_maxes[i].flat<float>()(0);
-      input_mins_and_maxes->emplace_back(input_min, input_max);
-      overall_min = std::min(overall_min, input_min);
-      overall_max = std::max(overall_max, input_max);
-    }
-    if (std::is_signed<T>::value) {
-      // For signed, we want a symmetrical distribution including zero for the
-      // output, so pick a range that meets that need.
-      const float largest_value =
-          std::max(std::abs(overall_min), std::abs(overall_max));
-      *output_min = -largest_value;
-      *output_max = largest_value;
-    } else {
-      *output_min = overall_min;
-      *output_max = overall_max;
-    }
-  }
-
-  int64 CalculateInputsDim(const TensorShape& input_shape,
-                           const int32 concat_dim) {
-    int64 inputs_flat_dim0 = 1;
-    for (int d = 0; d < concat_dim; ++d) {
-      inputs_flat_dim0 *= input_shape.dim_size(d);
-    }
-    return inputs_flat_dim0;
-  }
-
-  void CalculateConcatDims(const size_t N, const TensorShape& input_shape,
-                           int input_dims, const OpInputList& values,
-                           OpKernelContext* context, const int32 concat_dim,
-                           const int64 inputs_flat_dim0,
-                           ConstMatrixVector* inputs_flat,
-                           int* output_concat_dim) {
-    // Note that we reduce the concat of n-dimensional tensors into a two
-    // dimensional concat. Assuming the dimensions of any input/output
-    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
-    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
-    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
-    inputs_flat->reserve(N);
-    *output_concat_dim = 0;
-    const bool input_is_scalar = IsLegacyScalar(input_shape);
-    for (int i = 0; i < N; ++i) {
-      const auto in = values[i];
-      const bool in_is_scalar = IsLegacyScalar(in.shape());
-      OP_REQUIRES(
-          context, in.dims() == input_dims || (input_is_scalar && in_is_scalar),
-          errors::InvalidArgument(
-              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
-              input_shape.DebugString(), " vs. shape[", i, "] = ",
-              in.shape().DebugString()));
-      for (int j = 0; j < input_dims; ++j) {
-        if (j == concat_dim) {
-          continue;
-        }
-        OP_REQUIRES(
-            context, in.dim_size(j) == input_shape.dim_size(j),
-            errors::InvalidArgument(
-                "ConcatOp : Dimensions of inputs should match: shape[0] = ",
-                input_shape.DebugString(), " vs. shape[", i, "] = ",
-                in.shape().DebugString()));
-      }
-      if (in.NumElements() > 0) {
-        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
-        inputs_flat->emplace_back(new typename TTypes<T, 2>::ConstMatrix(
-            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
-      }
-      *output_concat_dim += in.dims() > 0 ? in.dim_size(concat_dim) : 1;
-    }
-  }
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor* concat_dim_tensor = nullptr;
-    OP_REQUIRES_OK(context, context->input("concat_dim", &concat_dim_tensor));
-    OP_REQUIRES(
-        context, IsLegacyScalar(concat_dim_tensor->shape()),
-        errors::InvalidArgument(
-            "Concat dim tensor should be a scalar integer, but got shape ",
-            concat_dim_tensor->shape().DebugString()));
-    const int32 concat_dim = concat_dim_tensor->scalar<int32>()();
-    OpInputList values;
-    OP_REQUIRES_OK(context, context->input_list("values", &values));
-    const size_t N = values.size();
-    OpInputList input_mins;
-    OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins));
-    OP_REQUIRES(context, (input_mins.size() == N),
-                errors::InvalidArgument(
-                    "QuantizedConcatOp : Expected mins input list length ",
-                    input_mins.size(), " to equal values length ", N))
-    OpInputList input_maxes;
-    OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes));
-    OP_REQUIRES(context, (input_maxes.size() == N),
-                errors::InvalidArgument(
-                    "QuantizedConcatOp : Expected maxes input list length ",
-                    input_maxes.size(), " to equal values length ", N))
-    const int input_dims = values[0].dims();
-    const TensorShape& input_shape = values[0].shape();
-    OP_REQUIRES(
-        context, (0 <= concat_dim && concat_dim < input_dims) ||
-                     (allow_legacy_scalars() && concat_dim == 0),
-        errors::InvalidArgument(
-            "ConcatOp : Expected concatenating dimensions in the range [", 0,
-            ", ", input_dims, "), but got ", concat_dim));
-
-    float output_min = std::numeric_limits<float>::max();
-    float output_max = std::numeric_limits<float>::lowest();
-    std::vector<std::pair<float, float>> input_mins_and_maxes;
-    CalculateInputAndOutputRange(input_mins, input_maxes, N,
-                                 &input_mins_and_maxes, &output_min,
-                                 &output_max);
-    const int64 inputs_flat_dim0 = CalculateInputsDim(input_shape, concat_dim);
-    ConstMatrixVector inputs_flat;
-    int output_concat_dim;
-    CalculateConcatDims(N, input_shape, input_dims, values, context, concat_dim,
-                        inputs_flat_dim0, &inputs_flat, &output_concat_dim);
-
-    TensorShape output_shape(input_shape);
-    // TODO(irving): Remove rank 0 case once !kAllowLegacyScalars
-    if (output_shape.dims() == 0) {
-      output_shape.AddDim(output_concat_dim);
-    } else {
-      output_shape.set_dim(concat_dim, output_concat_dim);
-    }
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
-
-    if (output->NumElements() > 0) {
-      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
-      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
-      ConcatCPUImpl<T>(
-          context->device(), inputs_flat, sizeof(T) /* cost_per_unit */,
-          RequantizeCopier<T>(&input_mins_and_maxes, output_min, output_max),
-          &output_flat);
-    }
-
-    Tensor* output_min_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(1, {}, &output_min_tensor));
-    output_min_tensor->flat<float>()(0) = output_min;
-
-    Tensor* output_max_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(2, {}, &output_max_tensor));
-    output_max_tensor->flat<float>()(0) = output_max;
-  }
-};
-
-#define REGISTER_QUANTIZED_CONCAT(type)                  \
-  REGISTER_KERNEL_BUILDER(Name("QuantizedConcat")        \
-                              .Device(DEVICE_CPU)        \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("concat_dim"), \
-                          QuantizedConcatOp<type>)
-
-REGISTER_QUANTIZED_CONCAT(quint8);
-REGISTER_QUANTIZED_CONCAT(qint32);
-
-#undef REGISTER_QUANTIZED_CONCAT
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_concat_op_test.cc b/tensorflow/core/kernels/quantized_concat_op_test.cc
deleted file mode 100644
index dc1439279e..0000000000
--- a/tensorflow/core/kernels/quantized_concat_op_test.cc
+++ /dev/null
@@ -1,337 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <functional>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/graph/node_builder.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/platform/test_benchmark.h"
-
-namespace tensorflow {
-
-using test::graph::Constant;
-
-class QuantizedConcatTest : public OpsTestBase {
- protected:
-  QuantizedConcatTest() {}
-
-  void TestSmall8Bit(float first_min, float first_max, float second_min,
-                     float second_max);
-  void TestSmall32Bit(float first_min, float first_max, float second_min,
-                      float second_max);
-  void TestSecondDim8Bit(float first_min, float first_max, float second_min,
-                         float second_max);
-};
-
-TEST_F(QuantizedConcatTest, Small8Bit) {
-  TestSmall8Bit(0.0f, 255.0f, 0.0f, 25.0f);
-}
-
-TEST_F(QuantizedConcatTest, Small8BitSameRange) {
-  // Range for both is the same, so impl can use memcpy.
-  TestSmall8Bit(0.0f, 255.0f, 0.0f, 255.0f);
-}
-
-void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max,
-                                        float second_min, float second_max) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
-                   .Input(FakeInput(DT_INT32))
-                   .Input(FakeInput(2, DT_QUINT8))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Attr("N", 2)
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const int first_batch = 2;
-  const int first_height = 2;
-  const int first_width = 3;
-  Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
-  test::FillValues<float>(&first_float,
-                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-  Tensor first_quantized =
-      FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
-
-  const int second_batch = 2;
-  const int second_height = 2;
-  const int second_width = 3;
-  Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
-  test::FillValues<float>(&second_float,
-                          {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
-  Tensor second_quantized =
-      FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
-
-  const int expected_batch = first_batch + second_batch;
-  Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width});
-  test::FillValues<float>(&expected_float,
-                          {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                           13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
-
-  AddInputFromArray<int32>(TensorShape({}), {0});
-  AddInputFromArray<quint8>(first_quantized.shape(),
-                            first_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(second_quantized.shape(),
-                            second_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({}), {first_min});
-  AddInputFromArray<float>(TensorShape({}), {second_min});
-  AddInputFromArray<float>(TensorShape({}), {first_max});
-  AddInputFromArray<float>(TensorShape({}), {second_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-TEST_F(QuantizedConcatTest, Small32Bit) {
-  TestSmall32Bit(0.0f, 1200.0f, 0.0f, 2400.0f);
-}
-
-TEST_F(QuantizedConcatTest, Small32BitSameRange) {
-  TestSmall32Bit(-2400.0f, 2400.0f, -2400.0f, 2400.0f);
-}
-
-TEST_F(QuantizedConcatTest, Small32BitOneDimSameRangeAsOutput) {
-  TestSmall32Bit(-2400.0f, 2400.0f, -1200.0f, 2400.0f);
-}
-
-void QuantizedConcatTest::TestSmall32Bit(float first_min, float first_max,
-                                         float second_min, float second_max) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
-                   .Input(FakeInput(DT_INT32))
-                   .Input(FakeInput(2, DT_QINT32))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Attr("N", 2)
-                   .Attr("T", DataTypeToEnum<qint32>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const int first_batch = 2;
-  const int first_height = 2;
-  const int first_width = 3;
-  Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
-  test::FillValues<float>(&first_float, {100, 200, 300, 400, 500, 600, 700, 800,
-                                         900, 1000, 1100, 1200});
-  Tensor first_quantized =
-      FloatTensorToQuantized<qint32>(first_float, first_min, first_max);
-
-  const int second_batch = 2;
-  const int second_height = 2;
-  const int second_width = 3;
-  Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
-  test::FillValues<float>(&second_float, {1300, 1400, 1500, 1600, 1700, 1800,
-                                          1900, 2000, 2100, 2200, 2300, 2400});
-  Tensor second_quantized =
-      FloatTensorToQuantized<qint32>(second_float, second_min, second_max);
-
-  const int expected_batch = first_batch + second_batch;
-  Tensor expected_float(DT_FLOAT, {expected_batch, first_height, first_width});
-  test::FillValues<float>(
-      &expected_float,
-      {100,  200,  300,  400,  500,  600,  700,  800,  900,  1000, 1100, 1200,
-       1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400});
-
-  AddInputFromArray<int32>(TensorShape({}), {0});
-  AddInputFromArray<qint32>(first_quantized.shape(),
-                            first_quantized.flat<qint32>());
-  AddInputFromArray<qint32>(second_quantized.shape(),
-                            second_quantized.flat<qint32>());
-  AddInputFromArray<float>(TensorShape({}), {first_min});
-  AddInputFromArray<float>(TensorShape({}), {second_min});
-  AddInputFromArray<float>(TensorShape({}), {first_max});
-  AddInputFromArray<float>(TensorShape({}), {second_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-TEST_F(QuantizedConcatTest, SecondDim8Bit) {
-  TestSecondDim8Bit(-10.0f, 150.0f, 0.0f, 200.0f);
-}
-
-TEST_F(QuantizedConcatTest, SecondDim8BitSameRange) {
-  TestSecondDim8Bit(-10.0f, 150.0f, -10.0f, 150.0f);
-}
-
-void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max,
-                                            float second_min,
-                                            float second_max) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "QuantizedConcat")
-                   .Input(FakeInput(DT_INT32))
-                   .Input(FakeInput(2, DT_QUINT8))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Input(FakeInput(2, DT_FLOAT))
-                   .Attr("N", 2)
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const int first_batch = 2;
-  const int first_height = 2;
-  const int first_width = 3;
-  Tensor first_float(DT_FLOAT, {first_batch, first_height, first_width});
-  test::FillValues<float>(&first_float,
-                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-  Tensor first_quantized =
-      FloatTensorToQuantized<quint8>(first_float, first_min, first_max);
-
-  const int second_batch = 2;
-  const int second_height = 2;
-  const int second_width = 3;
-  Tensor second_float(DT_FLOAT, {second_batch, second_height, second_width});
-  test::FillValues<float>(&second_float,
-                          {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
-  Tensor second_quantized =
-      FloatTensorToQuantized<quint8>(second_float, second_min, second_max);
-
-  const int expected_height = first_height + second_height;
-  Tensor expected_float(DT_FLOAT, {first_batch, expected_height, first_width});
-  test::FillValues<float>(&expected_float,
-                          {1, 2, 3, 4,  5,  6,  13, 14, 15, 16, 17, 18,
-                           7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24});
-
-  AddInputFromArray<int32>(TensorShape({}), {1});
-  AddInputFromArray<quint8>(first_quantized.shape(),
-                            first_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(second_quantized.shape(),
-                            second_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({}), {first_min});
-  AddInputFromArray<float>(TensorShape({}), {second_min});
-  AddInputFromArray<float>(TensorShape({}), {first_max});
-  AddInputFromArray<float>(TensorShape({}), {second_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
-}
-
-// For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim'
-// in size, and concat them together along "concat_dimension".
-// If <same_limits> is true, then both concatenated dimensions have the same
-// quantized range; otherwise, they are set to different values.
-template <typename T>
-static void ConcatHelper(int iters, int concat_dimension, bool same_limits,
-                         int dim2) {
-  testing::StopTiming();
-  Graph* g = new Graph(OpRegistry::Global());
-
-  DataType dt = DataTypeToEnum<T>::v();
-  const int kDim1 = 100;
-  TensorShape shape({kDim1, dim2});
-
-  Tensor concat_dim = test::AsScalar<int32>(concat_dimension);
-  Tensor in0(dt, shape);
-  in0.flat<T>().setRandom();
-  Tensor in1(dt, shape);
-  in1.flat<T>().setRandom();
-
-  Tensor mins0 = test::AsScalar<float>(-1.0);
-  Tensor maxes0 = test::AsScalar<float>(1.0);
-  Tensor mins1 = test::AsScalar<float>(same_limits ? -1.0 : -255.0);
-  Tensor maxes1 = test::AsScalar<float>(same_limits ? 1.0 : 255.0);
-
-  Node* node;
-  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "QuantizedConcat")
-                  .Input(Constant(g, concat_dim))
-                  .Input({Constant(g, in0), Constant(g, in1)})
-                  .Input({Constant(g, mins0), Constant(g, mins1)})
-                  .Input({Constant(g, maxes0), Constant(g, maxes1)})
-                  .Attr("N", 2)
-                  .Attr("T", dt)
-                  .Finalize(g, &node));
-
-  testing::BytesProcessed(static_cast<int64>(iters) *
-                          ((kDim1 * dim2) + (kDim1 * dim2)) * sizeof(T));
-  testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
-  testing::UseRealTime();
-}
-
-static void BM_QConcatDim0SameLimitQInt32(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 0 /* concat_dimension */, true /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim1SameLimitQInt32(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 1 /* concat_dimension */, true /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim0DifferLimitQInt32(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 0 /* concat_dimension */, false /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim1DifferLimitQInt32(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 1 /* concat_dimension */, false /* same_limits */,
-                       dim2);
-}
-
-BENCHMARK(BM_QConcatDim0SameLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim1SameLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim0DifferLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim1DifferLimitQInt32)->Arg(1000)->Arg(20000)->Arg(100000);
-
-static void BM_QConcatDim0SameLimitQUint8(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 0 /* concat_dimension */, true /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim1SameLimitQUint8(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 1 /* concat_dimension */, true /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim0DifferLimitQUint8(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 0 /* concat_dimension */, false /* same_limits */,
-                       dim2);
-}
-
-static void BM_QConcatDim1DifferLimitQUint8(int iters, int dim2) {
-  ConcatHelper<qint32>(iters, 1 /* concat_dimension */, false /* same_limits */,
-                       dim2);
-}
-
-BENCHMARK(BM_QConcatDim0SameLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim1SameLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim0DifferLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000);
-BENCHMARK(BM_QConcatDim1DifferLimitQUint8)->Arg(1000)->Arg(20000)->Arg(100000);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
deleted file mode 100644
index fb69d770c0..0000000000
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ /dev/null
@@ -1,526 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Implements quantized eight-bit versions of the convolution operations.
-
-#include <algorithm>
-#include <vector>
-
-#include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/util/padding.h"
-
-namespace tensorflow {
-
-// This functor implements the convolution operation in as simple a form as
-// possible. It won't give great performance, but it is very useful for
-// stepping through and instrumenting for debugging, creating minimal benchmarks
-// to prototype with, and sharing with teams that want to run this outside of
-// our environment.
-// With that in mind, I've avoided using anything except pretty standard C++
-// types. This is especially noticeable in the data access through raw array
-// indexing. It's deliberate in this case though, since it makes the underlying
-// memory order very explicit, which is important for both inspecting memory
-// contents during debugging and for specifying what we expect to others.
-// The memory layout of the data is, from biggest stride to smallest:
-// input_data = [input_batches, input_height, input_width, input_depth]
-// filter_data = [filter_height, filter_width, input_depth, filter_count]
-// output_data = [input_batches, output_height, output_width, filter_count]
-template <class T1, class T2, class T3>
-class ReferenceConvFunctor {
- public:
-  void operator()(OpKernelContext* op_context, const T1* input_data,
-                  int input_batches, int input_height, int input_width,
-                  int input_depth, int input_offset, const T2* filter_data,
-                  int filter_height, int filter_width, int filter_count,
-                  int filter_offset, int stride, Padding padding,
-                  T3* output_data, int output_height, int output_width,
-                  int output_shift, int output_offset, int output_mult) {
-    // Set up some constants we need for the output down-shifting and
-    // saturation.
-    const int32 highest = static_cast<int32>(Eigen::NumTraits<T3>::highest());
-    const int32 lowest = static_cast<int32>(Eigen::NumTraits<T3>::lowest());
-
-    // When we're converting the 32 bit accumulator to a lower bit depth, we
-    // need to add on 0.5 in fixed-point terms to make the operation round half
-    // up towards positive infinity, rather than a floor.
-    // We also need to watch out for the case when there's no down shift,
-    // because a left shift by a negative number gives undefined results.
-    const int32 rounding = (output_shift < 1) ? 0 : (1 << (output_shift - 1));
-
-    // The two different padding modes we support can be a bit confusing. SAME
-    // means we're trying to produce an output image that's the same size as the
-    // input. It's complicated by stride, which shrinks the output image by a
-    // a factor, but it means we end up sampling from outside the borders of the
-    // input. These out-of-bounds values are read as zeroes. VALID means only
-    // produce output values where the filters can read all their values from
-    // within the input image. It effectively removes the margins of the output
-    // image compared to the one produced by SAME. Stride complicates this
-    // definition though, because it can result in the right and bottom filter
-    // patches sampling from outside the borders if it's greater than 1.
-    // Most of the logic for sorting this all out is done before this function,
-    // when we calculate the output size, but the positioning of the origin of
-    // the filters is different between the two modes, since SAME positions the
-    // first filter off the edge of the input.
-    int filter_left_offset;
-    int filter_top_offset;
-    if (padding == VALID) {
-      filter_left_offset =
-          ((output_width - 1) * stride + filter_width - input_width) / 2;
-      filter_top_offset =
-          ((output_height - 1) * stride + filter_height - input_height) / 2;
-    } else {
-      filter_left_offset =
-          ((output_width - 1) * stride + filter_width - input_width) / 2;
-      filter_top_offset =
-          ((output_height - 1) * stride + filter_height - input_height) / 2;
-    }
-
-    // If we've got multiple images in our input, work through each of them.
-    for (int batch = 0; batch < input_batches; ++batch) {
-      // Walk through all the output image values, sliding the filter to
-      // different
-      // positions in the input.
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          // Each filter kernel produces one output channel.
-          for (int out_channel = 0; out_channel < filter_count; ++out_channel) {
-            // We're going to calculate a single output value, which means we
-            // need to multiply a three dimensional kernel of weights against
-            // the current location within the input image.
-            /*
-              *-------------------------------...
-              |\ ^
-              | \in_depth
-              |  \ v
-              |   *-------------------------------...
-              |   |            ^
-              |   |       in_y_origin
-              |   |            v   \
-              |   |<in_x_origin>*---*^
-              |   |            \|   |filter_height
-              .   |             *---*v
-              .   |             <--->
-                  .         filter_width
-                  .
-            */
-            const int in_x_origin = (out_x * stride) - filter_left_offset;
-            const int in_y_origin = (out_y * stride) - filter_top_offset;
-            int32 total = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                for (int in_channel = 0; in_channel < input_depth;
-                     ++in_channel) {
-                  const int in_x = in_x_origin + filter_x;
-                  const int in_y = in_y_origin + filter_y;
-                  int32 input_value;
-                  // If the location is outside the bounds of the input image,
-                  // use zero as a default value.
-                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                      (in_y < input_height)) {
-                    const T1 input_source_value =
-                        input_data[(batch * input_height * input_width *
-                                    input_depth) +
-                                   (in_y * input_width * input_depth) +
-                                   (in_x * input_depth) + in_channel];
-                    // We're promoting the T1 type to a higher bit depth here as
-                    // we do the subtraction.
-                    input_value =
-                        static_cast<int32>(input_source_value) - input_offset;
-                  } else {
-                    input_value = 0;
-                  }
-                  const T2 filter_source_value =
-                      filter_data[(filter_y * filter_width * input_depth *
-                                   filter_count) +
-                                  (filter_x * input_depth * filter_count) +
-                                  (in_channel * filter_count) + out_channel];
-                  // Another promotion to 32 bit, as above.
-                  const int32 filter_value =
-                      static_cast<int32>(filter_source_value) - filter_offset;
-                  total += (input_value * filter_value);
-                }
-              }
-            }
-            // Here we're applying scale factors to compress the 32 bit
-            // accumulated total to a potentially lower bit depth.
-            const int32_t output =
-                ((((total + output_offset) * output_mult) + rounding) >>
-                 output_shift);
-            // We need to saturate the results against the largest and smallest
-            // values that can be represented in this type.
-            const int32 top_clamped_output = std::min(output, highest);
-            const int32 clamped_output = std::max(top_clamped_output, lowest);
-            output_data[(batch * output_height * output_width * filter_count) +
-                        (out_y * output_width * filter_count) +
-                        (out_x * filter_count) + out_channel] = clamped_output;
-          }
-        }
-      }
-    }
-  }
-};
-
-// Implements convolution as a two stage process, first packing the patches of
-// the input image into columns (im2col) and then running GEMM to produce the
-// final result.
-// TODO(petewarden) - We need to update gemmlowp to support 32-bit outputs
-// before we can re-enable this path.
-template <class T1, class T2, class T3>
-class Im2ColConvFunctor {
- public:
-  void operator()(OpKernelContext* op_context, const T1* input_data,
-                  int input_batches, int input_height, int input_width,
-                  int input_depth, int input_offset, const T2* filter_data,
-                  int filter_height, int filter_width, int filter_count,
-                  int filter_offset, int stride, Padding padding,
-                  T3* output_data, int output_height, int output_width,
-                  int output_shift, int output_offset, int output_mult) {
-    if (input_offset < 0) {
-      // Only log the first few occurrences of this warning.
-      static int warning_count = 0;
-      if (warning_count < 10) {
-        ++warning_count;
-        LOG(WARNING)
-            << "Zero is not representable in the quantized range used by the"
-            << " input. This means QuantizedConv2d has to fall back to a slow"
-            << " implementation, since the border of zero values can't be"
-            << " represented easily. You should try to construct graphs that"
-            << " avoid this situation.";
-      }
-      ReferenceConvFunctor<T1, T2, T3> conv_functor;
-      conv_functor(op_context, input_data, input_batches, input_height,
-                   input_width, input_depth, input_offset, filter_data,
-                   filter_height, filter_width, filter_count, filter_offset,
-                   stride, padding, output_data, output_height, output_width,
-                   output_shift, output_offset, output_mult);
-      return;
-    }
-
-    CHECK_GT(output_width, 0);
-    CHECK_GT(output_height, 0);
-    int filter_left_offset;
-    int filter_top_offset;
-    if (padding == VALID) {
-      filter_left_offset =
-          ((output_width - 1) * stride + filter_width - input_width) / 2;
-      filter_top_offset =
-          ((output_height - 1) * stride + filter_height - input_height) / 2;
-    } else {
-      filter_left_offset =
-          ((output_width - 1) * stride + filter_width - input_width) / 2;
-      filter_top_offset =
-          ((output_height - 1) * stride + filter_height - input_height) / 2;
-    }
-
-    // The im2col buffer has # of patches rows, and # of filters cols.
-    // It's laid out like this, in row major order in memory:
-    //        < filter value count >
-    //   ^   +---------------------+
-    // patch |                     |
-    // count |                     |
-    //   v   +---------------------+
-    // Each patch row contains a filter_width x filter_height patch of the
-    // input, with the depth channel as the most contiguous in memory, followed
-    // by the width, then the height. This is the standard memory order in the
-    // image world if it helps to visualize it.
-    const int filter_value_count = filter_width * filter_height * input_depth;
-    const int patch_count = input_batches * output_width * output_height;
-    const int im2col_size = patch_count * filter_value_count;
-    // TODO(petewarden) - Memory allocation can be very slow on Android. Can we
-    // optimize this by keeping the scratch buffer around?
-    std::unique_ptr<T1[]> im2col_buffer(new T1[im2col_size]);
-
-    for (int batch = 0; batch < input_batches; ++batch) {
-      const T1* input_batch_start =
-          input_data + (batch * input_height * input_width * input_depth);
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        const int in_y_origin = (out_y * stride) - filter_top_offset;
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          const int in_x_origin = (out_x * stride) - filter_left_offset;
-          const int patch_index = (batch * output_width * output_height) +
-                                  (out_y * output_width) + out_x;
-          T1* im2col_patch_start =
-              im2col_buffer.get() + (patch_index * filter_value_count);
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            const int in_y = in_y_origin + filter_y;
-            T1* im2col_row_start =
-                im2col_patch_start + (filter_y * filter_width * input_depth);
-            // If we're off the top or the bottom of the input, fill the whole
-            // row with zeroes.
-            if ((in_y < 0) || (in_y >= input_height)) {
-              T1* im2col_row_end =
-                  im2col_row_start + (filter_width * input_depth);
-              // We'll be subtracting this offset during the calculations
-              // so to get an actual zero after that bias we need to set
-              // it to input_offset here.
-              std::fill(im2col_row_start, im2col_row_end, input_offset);
-            } else {
-              // What we're doing here is trying to copy and fill the im2col
-              // buffer as efficiently as possible, using functions to set or
-              // duplicate values en masse. We know we don't have to worry about
-              // vertical edges because we dealt with that case above, so we
-              // just need to handle filters that overlap the left or right
-              // edges. Here's what that looks like:
-              //
-              // < left_zero_count > < center_copy_count > < right_zero_count >
-              // +------------------+---------------------+--------------------+
-              // |     (filter)     |       (image)       |      (filter)      |
-              // +------------------+---------------------+--------------------+
-              // in_x_origin        0                 input_width       in_x_end
-              //
-              // In reality it's unlikely that a filter patch will be wider
-              // than an input, but this shows all the edge cases.
-              // We use std::fill() to set the left and right sections to zeroes
-              // and std::copy() to copy over the input data for the center.
-              const int in_x_end = in_x_origin + filter_width;
-              const int left_zero_count = std::max(0, 0 - in_x_origin);
-              const int right_zero_count = std::max(0, in_x_end - input_width);
-              const int center_copy_count =
-                  filter_width - (left_zero_count + right_zero_count);
-              if (left_zero_count > 0) {
-                T1* im2col_left_start = im2col_row_start;
-                T1* im2col_left_end =
-                    im2col_left_start + (left_zero_count * input_depth);
-                std::fill(im2col_left_start, im2col_left_end, input_offset);
-              }
-              if (center_copy_count > 0) {
-                const T1* input_row_start =
-                    input_batch_start + (in_y * input_width * input_depth) +
-                    (std::max(0, in_x_origin) * input_depth);
-                const T1* input_row_end =
-                    input_row_start + (center_copy_count * input_depth);
-                T1* im2col_center_start =
-                    im2col_row_start + (left_zero_count * input_depth);
-                std::copy(input_row_start, input_row_end, im2col_center_start);
-              }
-              if (right_zero_count > 0) {
-                T1* im2col_right_start =
-                    im2col_row_start +
-                    ((left_zero_count + center_copy_count) * input_depth);
-                T1* im2col_right_end =
-                    im2col_right_start + (right_zero_count * input_depth);
-                std::fill(im2col_right_start, im2col_right_end, input_offset);
-              }
-            }
-          }
-        }
-      }
-    }
-
-    CHECK_GT(patch_count, 0);
-    CHECK_GT(filter_count, 0);
-    CHECK_GT(filter_value_count, 0);
-
-    const bool transpose_a = false;
-    const bool transpose_b = false;
-    const bool transpose_c = false;
-    const int m = patch_count;
-    const int n = filter_count;
-    const int k = filter_value_count;
-    const int lda = filter_value_count;
-    const int ldb = filter_count;
-    const int ldc = filter_count;
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<T3, qint32>() && (output_offset == 0) &&
-        (output_mult == 1) && (output_shift == 0)) {
-      const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value);
-      const uint8* filter_data_as_uint8 = &(filter_data->value);
-      int32* output_data_as_int32 = &(output_data->value);
-      // All of the transpose_* variables are currently compile-time consts, so
-      // we could just hard-code these values too, but that would break if
-      // anybody changed those values in the future (e.g. to match the ability
-      // of MatMul to specify them as attributes). We're using a verbose
-      // approach of deriving the order values from the transpose variables to
-      // be able to catch any changes like that.
-      static const gemmlowp::MapOrder ResultOrder =
-          !transpose_c ? gemmlowp::MapOrder::RowMajor
-                       : gemmlowp::MapOrder::ColMajor;
-      static const gemmlowp::MapOrder LhsOrder =
-          !transpose_a ? gemmlowp::MapOrder::RowMajor
-                       : gemmlowp::MapOrder::ColMajor;
-      static const gemmlowp::MapOrder RhsOrder =
-          !transpose_b ? gemmlowp::MapOrder::RowMajor
-                       : gemmlowp::MapOrder::ColMajor;
-      gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs(
-          im2col_data_as_uint8, m, k, lda);
-      gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(
-          filter_data_as_uint8, k, n, ldb);
-      gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(
-          output_data_as_int32, m, n, ldc);
-      const std::tuple<> empty_pipeline = {};
-
-      auto& worker_threads =
-          *(op_context->device()->tensorflow_cpu_worker_threads());
-      TensorflowGemmContext context(worker_threads.num_threads,
-                                    worker_threads.workers);
-      gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t,
-                                       gemmlowp::DefaultL8R8BitDepthParams>(
-          &context, lhs, rhs, &result, -input_offset, -filter_offset,
-          empty_pipeline);
-    } else {
-      ReferenceGemm<T1, T2, T3>(transpose_a, transpose_b, transpose_c, m, n, k,
-                                im2col_buffer.get(), input_offset, lda,
-                                filter_data, filter_offset, ldb, output_data,
-                                output_shift, output_offset, output_mult, ldc);
-    }
-  }
-};
-
-template <class T1, class T2, class T3,
-          template <class TF1, class TF2, class TF3> class ConvFunctor>
-class QuantizedConv2DOp : public OpKernel {
- public:
-  explicit QuantizedConv2DOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    OP_REQUIRES(context, strides_.size() == 4,
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify 4 dimensions"));
-    OP_REQUIRES(context, strides_[1] == strides_[2],
-                errors::InvalidArgument(
-                    "Current implementation only supports equal length "
-                    "strides in the row and column dimensions."));
-    OP_REQUIRES(
-        context, (strides_[0] == 1 && strides_[3] == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // Input tensor is of the following dimensions:
-    // [ batch, in_rows, in_cols, in_depth ]
-    const Tensor& input = context->input(0);
-
-    // Input filter is of the following dimensions:
-    // [ filter_rows, filter_cols, in_depth, out_depth]
-    const Tensor& filter = context->input(1);
-
-    // For 2D convolution, there should be 4 dimensions.
-    OP_REQUIRES(context, input.dims() == 4,
-                errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
-    OP_REQUIRES(context, filter.dims() == 4,
-                errors::InvalidArgument("filter must be 4-dimensional: ",
-                                        filter.shape().DebugString()));
-
-    const float min_input = context->input(2).flat<float>()(0);
-    const float max_input = context->input(3).flat<float>()(0);
-    const float min_filter = context->input(4).flat<float>()(0);
-    const float max_filter = context->input(5).flat<float>()(0);
-    const int32 offset_input =
-        FloatToQuantizedUnclamped<T1>(0.0f, min_input, max_input);
-    const int32 offset_filter =
-        FloatToQuantizedUnclamped<T2>(0.0f, min_filter, max_filter);
-    const int32 offset_output = 0;
-    const int32 mult_output = 1;
-    const int32 shift_output = 0;
-
-    // The last dimension for input is in_depth. It must be the same as the
-    // filter's in_depth.
-    const int64 in_depth = input.dim_size(3);
-    OP_REQUIRES(
-        context, in_depth == filter.dim_size(2),
-        errors::InvalidArgument("input and filter must have the same depth: ",
-                                in_depth, " vs ", filter.dim_size(2)));
-
-    // The last dimension for filter is out_depth.
-    const int64 out_depth = filter.dim_size(3);
-
-    // The second dimension for input is rows/height.
-    // The first dimension for filter is rows/height.
-    const int64 input_rows = input.dim_size(1);
-    const int64 filter_rows = filter.dim_size(0);
-
-    // The third dimension for input is columns/width.
-    // The second dimension for filter is columns/width.
-    const int64 input_cols = input.dim_size(2);
-    const int64 filter_cols = filter.dim_size(1);
-
-    // The first dimension for input is batch.
-    const int64 batch = input.dim_size(0);
-
-    // For now we take the stride from the second dimension only (we
-    // assume row = col stride, and do not support striding on the
-    // batch or depth dimension).
-    const int stride = strides_[1];
-
-    int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_rows, filter_rows, stride,
-                                         padding_, &out_rows, &pad_rows));
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_cols, filter_cols, stride,
-                                         padding_, &out_cols, &pad_cols));
-    CHECK_GT(batch, 0);
-    CHECK_GT(out_rows, 0);
-    CHECK_GT(out_cols, 0);
-    CHECK_GT(out_depth, 0);
-    TensorShape out_shape({batch, out_rows, out_cols, out_depth});
-
-    // Output tensor is of the following dimensions:
-    // [ in_batch, out_rows, out_cols, out_depth ]
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
-
-    // This will call different implementations (e.g. reference or optimized)
-    // depending on the template parameter.
-    ConvFunctor<T1, T2, T3> conv_functor;
-    conv_functor(context, input.flat<T1>().data(), batch, input_rows,
-                 input_cols, in_depth, offset_input, filter.flat<T2>().data(),
-                 filter_rows, filter_cols, out_depth, offset_filter, stride,
-                 padding_, output->flat<T3>().data(), out_rows, out_cols,
-                 shift_output, offset_output, mult_output);
-
-    float min_output_value;
-    float max_output_value;
-    QuantizationRangeForMultiplication<T1, T2, T3>(
-        min_input, max_input, min_filter, max_filter, &min_output_value,
-        &max_output_value);
-
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = min_output_value;
-
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = max_output_value;
-  }
-
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-};
-
-// Right now we only support taking two eight bit inputs, and returning the
-// results as signed 32-bit integers.
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizedConv2D")
-        .Device(DEVICE_CPU)
-        .TypeConstraint<quint8>("Tinput")
-        .TypeConstraint<quint8>("Tfilter")
-        .TypeConstraint<qint32>("out_type"),
-    QuantizedConv2DOp<quint8, quint8, qint32, Im2ColConvFunctor>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_conv_ops_test.cc b/tensorflow/core/kernels/quantized_conv_ops_test.cc
deleted file mode 100644
index 01e55f8593..0000000000
--- a/tensorflow/core/kernels/quantized_conv_ops_test.cc
+++ /dev/null
@@ -1,324 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <functional>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedConv2DTest : public OpsTestBase {
- protected:
-};
-
-TEST_F(QuantizedConv2DTest, Small) {
-  const int stride = 1;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  const int depth = 1;
-  const int image_width = 4;
-  const int image_height = 3;
-  const int image_batch_count = 1;
-  // The image data should always be able to represent zero, to allow a fast
-  // implementation of border padding, so we set the min value to 0.
-  const float image_min = 0.0f;
-  const float image_max = 12.0f;
-  // The image matrix is:
-  // |  1 |  2 |  3 |  4 |
-  // |  5 |  6 |  7 |  8 |
-  // |  9 | 10 | 11 | 12 |
-  Tensor image_float(DT_FLOAT,
-                     {image_batch_count, image_height, image_width, depth});
-  test::FillValues<float>(&image_float,
-                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-  Tensor image_quantized =
-      FloatTensorToQuantized<quint8>(image_float, image_min, image_max);
-
-  // The filter matrix is:
-  // | 1 | 4 | 7 |
-  // | 2 | 5 | 8 |
-  // | 3 | 6 | 9 |
-  const int filter_size = 3;
-  const int filter_count = 1;
-  const float filter_min = 1.0f;
-  const float filter_max = 9.0f;
-  Tensor filter_float(DT_FLOAT,
-                      {filter_size, filter_size, depth, filter_count});
-  test::FillValues<float>(&filter_float, {1, 4, 7, 2, 5, 8, 3, 6, 9});
-  Tensor filter_quantized =
-      FloatTensorToQuantized<quint8>(filter_float, filter_min, filter_max);
-
-  AddInputFromArray<quint8>(image_quantized.shape(),
-                            image_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(filter_quantized.shape(),
-                            filter_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {image_min});
-  AddInputFromArray<float>(TensorShape({1}), {image_max});
-  AddInputFromArray<float>(TensorShape({1}), {filter_min});
-  AddInputFromArray<float>(TensorShape({1}), {filter_max});
-  TF_ASSERT_OK(RunOpKernel());
-
-  // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
-  // the input set to zero because we're using the 'SAME' padding mode.
-  // The calculations behind the expected output are:
-  // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
-  // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
-  // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
-  // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
-  // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
-  // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
-  // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
-  // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
-  // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
-  // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
-  // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
-  // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
-  // This means we should end up with this matrix:
-  // |  105  |  150  |  183  |   95  |
-  // |  235  |  312  |  357  |  178  |
-  // |  187  |  234  |  261  |  121  |
-  const int expected_width = image_width;
-  const int expected_height = image_height * filter_count;
-  Tensor expected_float(
-      DT_FLOAT, TensorShape({image_batch_count, expected_height, expected_width,
-                             filter_count}));
-  test::FillValues<float>(&expected_float, {105, 150, 183, 95, 235, 312, 357,
-                                            178, 187, 234, 261, 121});
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
-}
-
-TEST_F(QuantizedConv2DTest, Small32Bit) {
-  const int stride = 1;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  const int depth = 1;
-  const int image_width = 4;
-  const int image_height = 3;
-  const int image_batch_count = 1;
-  AddInputFromArray<quint8>(
-      TensorShape({image_batch_count, image_height, image_width, depth}),
-      {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120});
-  const int filter_size = 3;
-  const int filter_count = 1;
-  AddInputFromArray<quint8>(
-      TensorShape({filter_size, filter_size, depth, filter_count}),
-      {10, 40, 70, 20, 50, 80, 30, 60, 90});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-
-  TF_ASSERT_OK(RunOpKernel());
-  const int expected_width = image_width;
-  const int expected_height = image_height * filter_count;
-  Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height,
-                                          expected_width, filter_count}));
-  test::FillValues<qint32>(
-      &expected, {10500, 15000, 18300, 9500, 23500, 31200, 35700, 17800, 18700,
-                  23400, 26100, 12100});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-TEST_F(QuantizedConv2DTest, OddPadding) {
-  const int stride = 2;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  const int depth = 1;
-  const int image_width = 4;
-  const int image_height = 4;
-  const int image_batch_count = 1;
-  AddInputFromArray<quint8>(
-      TensorShape({image_batch_count, image_height, image_width, depth}),
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
-  const int filter_size = 3;
-  const int filter_count = 1;
-  AddInputFromArray<quint8>(
-      TensorShape({filter_size, filter_size, depth, filter_count}),
-      {1, 2, 3, 4, 5, 6, 7, 8, 9});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-
-  TF_ASSERT_OK(RunOpKernel());
-  const int expected_width = image_width / stride;
-  const int expected_height = (image_height * filter_count) / stride;
-  Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height,
-                                          expected_width, filter_count}));
-  test::FillValues<qint32>(&expected, {348, 252, 274, 175});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-TEST_F(QuantizedConv2DTest, OddPaddingBatch) {
-  const int stride = 2;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  const int depth = 1;
-  const int image_width = 4;
-  const int image_height = 4;
-  const int image_batch_count = 3;
-  AddInputFromArray<quint8>(
-      TensorShape({image_batch_count, image_height, image_width, depth}),
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
-  const int filter_size = 3;
-  const int filter_count = 1;
-  AddInputFromArray<quint8>(
-      TensorShape({filter_size, filter_size, depth, filter_count}),
-      {1, 2, 3, 4, 5, 6, 7, 8, 9});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-
-  TF_ASSERT_OK(RunOpKernel());
-  const int expected_width = image_width / stride;
-  const int expected_height = (image_height * filter_count) / stride;
-  Tensor expected(DT_QINT32, TensorShape({image_batch_count, expected_height,
-                                          expected_width, filter_count}));
-  test::FillValues<qint32>(&expected, {348, 252, 274, 175,  //
-                                       348, 252, 274, 175,  //
-                                       348, 252, 274, 175});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-TEST_F(QuantizedConv2DTest, SmallWithNoZero) {
-  const int stride = 1;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "QuantizedConv2D")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("out_type", DataTypeToEnum<qint32>::v())
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const int depth = 1;
-  const int image_width = 4;
-  const int image_height = 3;
-  const int image_batch_count = 1;
-  // Here we're testing a slow implementation path, where zero is not
-  // representable in the image data and so simple border padding is not
-  // possible, so we have a min value greater than 0.
-  const float image_min = 1.0f;
-  const float image_max = 12.0f;
-  Tensor image_float(DT_FLOAT,
-                     {image_batch_count, image_height, image_width, depth});
-  test::FillValues<float>(&image_float,
-                          {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-  Tensor image_quantized =
-      FloatTensorToQuantized<quint8>(image_float, image_min, image_max);
-  const int filter_size = 3;
-  const int filter_count = 1;
-  const float filter_min = 1.0f;
-  const float filter_max = 9.0f;
-  Tensor filter_float(DT_FLOAT,
-                      {filter_size, filter_size, depth, filter_count});
-  test::FillValues<float>(&filter_float, {1, 4, 7, 2, 5, 8, 3, 6, 9});
-  Tensor filter_quantized =
-      FloatTensorToQuantized<quint8>(filter_float, filter_min, filter_max);
-  AddInputFromArray<quint8>(image_quantized.shape(),
-                            image_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(filter_quantized.shape(),
-                            filter_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {image_min});
-  AddInputFromArray<float>(TensorShape({1}), {image_max});
-  AddInputFromArray<float>(TensorShape({1}), {filter_min});
-  AddInputFromArray<float>(TensorShape({1}), {filter_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const int expected_width = image_width;
-  const int expected_height = image_height * filter_count;
-  Tensor expected_float(
-      DT_FLOAT, TensorShape({image_batch_count, expected_height, expected_width,
-                             filter_count}));
-  test::FillValues<float>(&expected_float, {105, 150, 183, 95, 235, 312, 357,
-                                            178, 187, 234, 261, 121});
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
deleted file mode 100644
index 0ce9e37642..0000000000
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Implements a quantized eight-bit version of the matmul operation.
-
-#include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-
-// We have to break this out as a separate function because there are multiple
-// combinations of transpose attributes we need to support, and they have to be
-// compile-time constants to work with the templates used internally.
-template <bool TransposeA, bool TransposeB, bool TransposeC>
-void GemmlowpMultiply(OpKernelContext* op_context, const quint8* a_data,
-                      const quint8* b_data, qint32* c_data, int m, int n, int k,
-                      int offset_a, int offset_b, int lda, int ldb, int ldc) {
-  const uint8* a_data_as_uint8 = &(a_data->value);
-  const uint8* b_data_as_uint8 = &(b_data->value);
-  int32* c_data_as_int32 = &(c_data->value);
-  static const gemmlowp::MapOrder ResultOrder =
-      !TransposeC ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor;
-  static const gemmlowp::MapOrder LhsOrder =
-      !TransposeA ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor;
-  static const gemmlowp::MapOrder RhsOrder =
-      !TransposeB ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor;
-  gemmlowp::MatrixMap<const std::uint8_t, LhsOrder> lhs(a_data_as_uint8, m, k,
-                                                        lda);
-  gemmlowp::MatrixMap<const std::uint8_t, RhsOrder> rhs(b_data_as_uint8, k, n,
-                                                        ldb);
-  gemmlowp::MatrixMap<std::int32_t, ResultOrder> result(c_data_as_int32, m, n,
-                                                        ldc);
-  const std::tuple<> empty_pipeline = {};
-  auto& worker_threads =
-      *(op_context->device()->tensorflow_cpu_worker_threads());
-  TensorflowGemmContext context(worker_threads.num_threads,
-                                worker_threads.workers);
-  gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t,
-                                   gemmlowp::DefaultL8R8BitDepthParams>(
-      &context, lhs, rhs, &result, -offset_a, -offset_b, empty_pipeline);
-}
-
-template <class T1, class T2, class Toutput>
-class QuantizedMatMulOp : public OpKernel {
- public:
-  explicit QuantizedMatMulOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("transpose_a", &transpose_a_));
-    OP_REQUIRES_OK(context, context->GetAttr("transpose_b", &transpose_b_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& a = context->input(0);
-    const Tensor& b = context->input(1);
-    const float min_a = context->input(2).flat<float>()(0);
-    const float max_a = context->input(3).flat<float>()(0);
-    const float min_b = context->input(4).flat<float>()(0);
-    const float max_b = context->input(5).flat<float>()(0);
-
-    // Make sure that we have valid quantization ranges for the input buffers.
-    // If the difference between the min and max is negative or zero, it makes
-    // it hard to do meaningful intermediate operations on the values.
-    OP_REQUIRES(context, (max_a > min_a),
-                errors::InvalidArgument("max_a must be larger than min_a."));
-    OP_REQUIRES(context, (max_b > min_b),
-                errors::InvalidArgument("max_b must be larger than min_b."));
-    const int32 offset_a = FloatToQuantizedUnclamped<T1>(0.0f, min_a, max_a);
-    const int32 offset_b = FloatToQuantizedUnclamped<T2>(0.0f, min_b, max_b);
-    const int32 offset_c = 0;
-    const int32 mult_c = 1;
-    const int32 shift_c = 0;
-
-    // Check that the dimensions of the two matrices are valid.
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(a.shape()),
-                errors::InvalidArgument("In[0] is not a matrix"));
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(b.shape()),
-                errors::InvalidArgument("In[1] is not a matrix"));
-    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
-    dim_pair[0].first = transpose_a_ ? 0 : 1;
-    dim_pair[0].second = transpose_b_ ? 1 : 0;
-
-    OP_REQUIRES(context,
-                a.dim_size(dim_pair[0].first) == b.dim_size(dim_pair[0].second),
-                errors::InvalidArgument("Matrix size-compatible: In[0]: ",
-                                        a.shape().DebugString(), ", In[1]: ",
-                                        b.shape().DebugString()));
-
-    OP_REQUIRES(context, ((shift_c >= 0) && (shift_c <= 31)),
-                errors::InvalidArgument("shift_c must be between 0 and 31, "
-                                        "inclusive."));
-
-    int a_dim_remaining = 1 - dim_pair[0].first;
-    int b_dim_remaining = 1 - dim_pair[0].second;
-    TensorShape out_shape(
-        {a.dim_size(a_dim_remaining), b.dim_size(b_dim_remaining)});
-    Tensor* c = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &c));
-    CHECK(c);
-
-    const T1* a_data = a.flat<T1>().data();
-    const T2* b_data = b.flat<T2>().data();
-    Toutput* c_data = c->flat<Toutput>().data();
-
-    const bool transpose_c = false;
-    const size_t m = a.dim_size(a_dim_remaining);
-    const size_t n = b.dim_size(b_dim_remaining);
-    const size_t k = a.dim_size(dim_pair[0].first);
-    const size_t lda = a.dim_size(1);
-    const size_t ldb = b.dim_size(1);
-    const size_t ldc = n;
-
-    // The gemmlowp optimized library only works for a particular set of data
-    // types, so check if we meet those requirements and
-    // fall back to a slower reference implementation if not.
-    if (std::is_same<T1, quint8>() && std::is_same<T2, quint8>() &&
-        std::is_same<Toutput, qint32>() && (offset_c == 0) && (mult_c == 1) &&
-        (shift_c == 0) && (transpose_c == false)) {
-      if (transpose_a_) {
-        if (transpose_b_) {
-          GemmlowpMultiply<true, true, false>(context, a_data, b_data, c_data,
-                                              m, n, k, offset_a, offset_b, lda,
-                                              ldb, ldc);
-        } else {
-          GemmlowpMultiply<true, false, false>(context, a_data, b_data, c_data,
-                                               m, n, k, offset_a, offset_b, lda,
-                                               ldb, ldc);
-        }
-      } else {
-        if (transpose_b_) {
-          GemmlowpMultiply<false, true, false>(context, a_data, b_data, c_data,
-                                               m, n, k, offset_a, offset_b, lda,
-                                               ldb, ldc);
-        } else {
-          GemmlowpMultiply<false, false, false>(context, a_data, b_data, c_data,
-                                                m, n, k, offset_a, offset_b,
-                                                lda, ldb, ldc);
-        }
-      }
-    } else {
-      ReferenceGemm<T1, T2, Toutput>(
-          transpose_a_, transpose_b_, transpose_c, m, n, k, a_data, offset_a,
-          lda, b_data, offset_b, ldb, c_data, shift_c, offset_c, mult_c, ldc);
-    }
-
-    float min_c_value;
-    float max_c_value;
-    QuantizationRangeForMultiplication<T1, T2, Toutput>(
-        min_a, max_a, min_b, max_b, &min_c_value, &max_c_value);
-    Tensor* c_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &c_min));
-    c_min->flat<float>()(0) = min_c_value;
-
-    Tensor* c_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &c_max));
-    c_max->flat<float>()(0) = max_c_value;
-  }
-
- private:
-  bool transpose_a_;
-  bool transpose_b_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("QuantizedMatMul")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<quint8>("T1")
-                            .TypeConstraint<quint8>("T2")
-                            .TypeConstraint<qint32>("Toutput"),
-                        QuantizedMatMulOp<quint8, quint8, qint32>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_matmul_op_test.cc b/tensorflow/core/kernels/quantized_matmul_op_test.cc
deleted file mode 100644
index e82464d4e7..0000000000
--- a/tensorflow/core/kernels/quantized_matmul_op_test.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <functional>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedMatMulTest : public OpsTestBase {
- protected:
-};
-
-// Runs two small matrices through the operator, and leaves all the parameters
-// at their default values.
-TEST_F(QuantizedMatMulTest, Small_NoParams) {
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // A matrix is:
-  // |  1 |  2 |  3 |
-  // |  4 |  5 |  6 |
-  AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
-  // B matrix is:
-  // |  7 |  8 |  9 | 10 |
-  // | 11 | 12 | 13 | 14 |
-  // | 15 | 16 | 17 | 18 |
-  AddInputFromArray<quint8>(TensorShape({3, 4}),
-                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-
-  TF_ASSERT_OK(RunOpKernel());
-  // Here are the results we expect, from hand calculations:
-  // (1 * 7) + (2 * 11) + (3 * 15) = 74
-  // (1 * 8) + (2 * 12) + (3 * 16) = 80
-  // (1 * 9) + (2 * 13) + (3 * 17) = 86
-  // (1 * 10) + (2 * 14) + (3 * 18) = 92
-  // (4 * 7) + (5 * 11) + (6 * 15) = 173
-  // (4 * 8) + (5 * 12) + (6 * 16) = 188
-  // (4 * 9) + (5 * 13) + (6 * 17) = 203
-  // (4 * 10) + (5 * 14) + (6 * 18) = 218
-  Tensor expected(allocator(), DT_QINT32, TensorShape({2, 4}));
-  test::FillValues<qint32>(&expected, {74, 80, 86, 92, 173, 188, 203, 218});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-// This test multiplies two 1x1 8bit matrices, and compares the
-// results with hand-calculated expectations.
-TEST_F(QuantizedMatMulTest, VerySmall_WithParams) {
-  // These parameters reflect a typical production usage of eight-bit matmuls
-  // in an Inception-style network.
-  const bool transpose_a = true;
-  const int a_rows = 1;
-  const int a_cols = 1;
-  const int b_rows = 1;
-  const int b_cols = 1;
-  const bool transpose_b = false;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Attr("transpose_a", transpose_a)
-                   .Attr("transpose_b", transpose_b)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // The A matrix is:
-  // |  -1 |
-  // The input array only contains unsigned bytes, so we specify the actual
-  // values as n+a_offset, where a_offset is 12 above. For example that means -1
-  // is represented as -1 + 12, or 11.
-  // We have set the transpose_a flag to true, so the matrix is transposed, and
-  // for filling the the values the in-memory storage order is effectively
-  // column major, rather than the default row-major.
-  AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
-
-  // The B matrix is:
-  // |   1 |
-  AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  // We're requesting C = A.transposed() * B,
-  // so we expect to get these results:
-  // 1*-1 = -1
-  // | -1 |
-  Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
-  test::FillValues<qint32>(&expected, {-1});
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-// This test multiplies two 1x1 8bit matrices, but sets an invalid quantization
-// range, so we expect to get an error
-TEST_F(QuantizedMatMulTest, VerySmall_BadRange) {
-  // These parameters reflect a typical production usage of eight-bit matmuls
-  // in an Inception-style network.
-  const bool transpose_a = true;
-  const int a_rows = 1;
-  const int a_cols = 1;
-  const int b_rows = 1;
-  const int b_cols = 1;
-  const bool transpose_b = false;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Attr("transpose_a", transpose_a)
-                   .Attr("transpose_b", transpose_b)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // The A matrix is:
-  // |  -1 |
-  AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
-
-  // The B matrix is:
-  // |   1 |
-  AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  // Here we set the range so that the min and max are equal, so we expect to
-  // see an error when we run.
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
-}
-
-// This test multiplies a couple of small 8-bit matrices, and compares the
-// results with hand-calculated expectations. It uses shifts and offsets to
-// control the range of the outputs.
-TEST_F(QuantizedMatMulTest, Small_WithParams) {
-  // These parameters reflect a typical production usage of eight-bit matmuls
-  // in an Inception-style network.
-  const bool transpose_a = true;
-  const int a_rows = 3;
-  const int a_cols = 4;
-  const int b_rows = 3;
-  const int b_cols = 2;
-  const bool transpose_b = false;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Attr("transpose_a", transpose_a)
-                   .Attr("transpose_b", transpose_b)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // The A matrix is:
-  // |  -1 |  -5 |  -9 |
-  // |  -2 |  -6 | -10 |
-  // |  -3 |  -7 | -11 |
-  // |  -4 |  -8 | -12 |
-  // The input array only contains unsigned bytes, so we specify the actual
-  // values as n+a_offset, where a_offset is 12 above. For example that means -1
-  // is represented as -1 + 12, or 11.
-  // We have set the transpose_a flag to true, so the matrix is transposed, and
-  // for filling the the values the in-memory storage order is effectively
-  // column major, rather than the default row-major.
-  AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}),
-                            {
-                                11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
-                            });
-
-  // The B matrix is:
-  // |   1 |   4|
-  // |   2 |   5|
-  // |   3 |   6|
-  AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {
-                                                               1, 4, 2, 5, 3, 6,
-                                                           });
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  TF_ASSERT_OK(RunOpKernel());
-  // We're requesting C = A.transposed() * B,
-  // so we expect to get these results:
-  // 1*-1 + 2*-5 + 3*-9 = -38
-  // 4*-1 + 5*-5 + 6*-9 = -83
-  // 1*-2 + 2*-6 + 3*-10 = -44
-  // 4*-2 + 5*-6 + 6*-10 = -98
-  // 1*-3 + 2*-7 + 3*-11 = -50
-  // 4*-3 + 5*-7 + 6*-11 = -113
-  // 1*-4 + 2*-8 + 3*-12 = -56
-  // 4*-4 + 5*-8 + 6*-12 = -128
-  // |  -38 |  -83 |
-  // |  -44 |  -98 |
-  // |  -50 | -113 |
-  // |  -56 | -128 |
-  Tensor expected(allocator(), DT_QINT32, TensorShape({a_cols, b_cols}));
-  test::FillValues<qint32>(&expected,
-                           {
-                               -38, -83, -44, -98, -50, -113, -56, -128,
-                           });
-  test::ExpectTensorEqual<qint32>(expected, *GetOutput(0));
-}
-
-// This test multiplies a couple of medium-sized 8-bit matrices, and tests the
-// results against what we saw from running a float MatMul with equivalent
-// inputs.
-TEST_F(QuantizedMatMulTest, Medium_WithParams) {
-  const bool transpose_a = true;
-  const bool transpose_b = false;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
-                   .Attr("transpose_a", transpose_a)
-                   .Attr("transpose_b", transpose_b)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-
-  const int a_rows = 8;
-  const int a_cols = 8;
-  const float a_min = -2164.25f;
-  const float a_max = 2006.27f;
-  Tensor a_float(DT_FLOAT, {a_rows, a_cols});
-  test::FillValues<float>(
-      &a_float,
-      {-1014.12, -157.382, -810.17,  1435.28,  1016.37,  219.684,  -316.054,
-       -2164.25, 2006.27,  -547.444, 857.376,  404.376,  9.72115,  332.588,
-       194.385,  -286.57,  26.062,   23.1125,  110.436,  247.055,  -127.683,
-       -376.275, -124.81,  -846.826, -77.1507, 305.581,  -202.747, 12.9528,
-       9.64886,  872.686,  40.9069,  197.816,  44.16,    -306.768, -1457.52,
-       -368.939, -1049.42, -486.353, 1745.87,  95.7695,  395.773,  -254.333,
-       -404.27,  787.16,   -2.44114, 199.37,   -1024.08, 784.901,  235.055,
-       -42.7295, 241.498,  -245.365, 470.763,  186.159,  186.579,  -220.163,
-       1304.58,  386.272,  -358.853, -755.996, 360.109,  -866.007, 55.2828,
-       -508.801});
-  Tensor a_quantized = FloatTensorToQuantized<quint8>(a_float, a_min, a_max);
-
-  const int b_rows = 8;
-  const int b_cols = 8;
-  const float b_min = -0.739539f;
-  const float b_max = 0.641057f;
-  Tensor b_float(DT_FLOAT, {b_rows, b_cols});
-  test::FillValues<float>(
-      &b_float,
-      {-0.294619, -0.0670519, 0.261507,   -0.126274, 0.127229,   -0.176945,
-       -0.251223, 0.231086,   0.453694,   0.415666,  -0.288733,  0.508717,
-       0.211551,  0.0435907,  -0.582383,  -0.308779, 0.0696883,  -0.438122,
-       0.114,     0.433964,   0.109883,   0.284931,  -0.149661,  0.108657,
-       0.458333,  -0.130231,  -0.35805,   -0.123206, -0.437968,  0.0282411,
-       0.628818,  -0.0522173, -0.0233403, 0.124863,  0.217165,   0.262294,
-       -0.171005, -0.254693,  -0.200433,  -0.287354, 0.488166,   -0.0354688,
-       -0.118091, -0.590444,  0.491537,   -0.739539, 0.083117,   0.282482,
-       0.275269,  -0.36574,   0.107476,   0.0511428, -0.136887,  -0.0149852,
-       -0.259694, 0.641057,   0.264054,   -0.295126, -0.0218791, 0.361211,
-       0.012448,  0.0709718,  -0.392394,  -0.434215});
-  Tensor b_quantized = FloatTensorToQuantized<quint8>(b_float, b_min, b_max);
-
-  AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>());
-  AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {a_min});
-  AddInputFromArray<float>(TensorShape({1}), {a_max});
-  AddInputFromArray<float>(TensorShape({1}), {b_min});
-  AddInputFromArray<float>(TensorShape({1}), {b_max});
-  TF_ASSERT_OK(RunOpKernel());
-
-  Tensor expected_float(DT_FLOAT, {a_cols, b_cols});
-  test::FillValues<float>(
-      &expected_float,
-      {1776.82f,  421.058f,  -854.308f, 1430.65f,  503.105f,  57.2744f,
-       -1514.97f, -1163.66f, -87.0979f, -394.577f, -39.4983f, -79.1938f,
-       -329.029f, 313.475f,  446.929f,  -59.5855f, 350.837f,  238.655f,
-       -609.21f,  350.499f,  192.238f,  847.576f,  -103.177f, 185.886f,
-       -90.5335f, 200.787f,  99.1981f,  -717.076f, 763.815f,  -703.726f,
-       -125.164f, 732.325f,  -51.5303f, -418.826f, 60.0783f,  -299.658f,
-       231.41f,   72.0622f,  -289.244f, 663.776f,  391.177f,  294.415f,
-       -484.148f, -677.932f, -180.342f, -194.764f, 761.715f,  553.061f,
-       -283.355f, 321.109f,  351.269f,  1171.7f,   -857.497f, 343.804f,
-       -494.599f, -844.119f, 725.237f,  586.052f,  -735.013f, -897.723f,
-       -122.434f, -502.907f, 1264.6f,   -239.991f});
-
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 15.0);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/core/kernels/quantized_pooling_ops.cc
deleted file mode 100644
index 33a12c4746..0000000000
--- a/tensorflow/core/kernels/quantized_pooling_ops.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// See docs in ../ops/nn_ops.cc.
-
-#define EIGEN_USE_THREADS
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/pooling_ops_common.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/util/padding.h"
-#include "tensorflow/core/util/tensor_format.h"
-
-namespace tensorflow {
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-template <typename Device, typename T>
-class QuantizedAvgPoolingOp : public OpKernel {
- public:
-  explicit QuantizedAvgPoolingOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
-    OP_REQUIRES(context, ksize_.size() == 4,
-                errors::InvalidArgument("Sliding window ksize field must "
-                                        "specify 4 dimensions"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
-    OP_REQUIRES(context, stride_.size() == 4,
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify 4 dimensions"));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-    OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
-                errors::Unimplemented(
-                    "Pooling is not yet supported on the batch dimension."));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& tensor_in = context->input(0);
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
-    if (!context->status().ok()) {
-      return;
-    }
-
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
-
-    OP_REQUIRES(context, params.depth_window == 1,
-                errors::Unimplemented("Non-spatial pooling is not "
-                                      "yet supported. Volunteers? :)"));
-
-    OP_REQUIRES(context, tensor_in.dims() == 4,
-                errors::InvalidArgument("tensor_in must be 4-dimensional"));
-
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, params.forward_output_shape(), &output));
-    const int32 highest = static_cast<int32>(Eigen::NumTraits<T>::highest());
-    const int32 lowest = static_cast<int32>(Eigen::NumTraits<T>::lowest());
-
-    // TODO(vrv): Switch this to the Eigen::Tensor version of
-    // SpatialAvgPooling once that version is running quickly.
-    Tensor int32_output(DT_INT32, params.forward_output_shape());
-    // Cast input to int32 tensor and call SpatialAvgPool.
-    Tensor int32_input(DT_INT32, tensor_in.shape());
-    int32_input.flat<int32>() = tensor_in.flat<T>().template cast<int32>();
-    SpatialAvgPool<Device, int32>(context, &int32_output, int32_input, params,
-                                  padding_);
-
-    // Clamp the int32 output back into quantized space.
-    output->flat<T>() = int32_output.flat<int32>()
-                            .cwiseMax(lowest)
-                            .cwiseMin(highest)
-                            .template cast<T>();
-
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = min_input;
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = max_input;
-  }
-
- private:
-  std::vector<int32> ksize_;
-  std::vector<int32> stride_;
-  Padding padding_;
-};
-
-template <typename Device, typename T>
-class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> {
- public:
-  explicit QuantizedMaxPoolingOp(OpKernelConstruction* context)
-      : MaxPoolingOp<Device, T>(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
-    MaxPoolingOp<Device, T>::Compute(context);
-    Tensor* output_min = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
-    output_min->flat<float>()(0) = min_input;
-    Tensor* output_max = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max));
-    output_max->flat<float>()(0) = max_input;
-  }
-};
-
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizedAvgPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
-    QuantizedAvgPoolingOp<CPUDevice, quint8>);
-
-REGISTER_KERNEL_BUILDER(
-    Name("QuantizedMaxPool").Device(DEVICE_CPU).TypeConstraint<quint8>("T"),
-    QuantizedMaxPoolingOp<CPUDevice, quint8>);
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/quantized_pooling_ops_test.cc b/tensorflow/core/kernels/quantized_pooling_ops_test.cc
deleted file mode 100644
index d3247d15d6..0000000000
--- a/tensorflow/core/kernels/quantized_pooling_ops_test.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-class QuantizedPoolingTest : public OpsTestBase {
- protected:
-};
-
-TEST_F(QuantizedPoolingTest, SmallAveragePooling) {
-  const int ksize = 2;
-  const int stride = 2;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_avg_pool_op", "QuantizedAvgPool")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("ksize", {1, ksize, ksize, 1})
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = 0.0f;
-  const float input_max = 255.0f;
-  const int input_height = 4;
-  const int input_width = 4;
-  const int input_channels = 2;
-  Tensor input_float(DT_FLOAT, {1, input_height, input_width, input_channels});
-  test::FillValues<float>(
-      &input_float,
-      {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-
-  const int expected_width = input_width / stride;
-  const int expected_height = input_height / stride;
-  Tensor expected_float(DT_FLOAT,
-                        {1, expected_height, expected_width, input_channels});
-  test::FillValues<float>(&expected_float, {6, 7, 10, 11, 22, 23, 26, 27});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-TEST_F(QuantizedPoolingTest, SmallMaxPooling) {
-  const int ksize = 2;
-  const int stride = 2;
-  TF_ASSERT_OK(NodeDefBuilder("quantized_max_pool_op", "QuantizedMaxPool")
-                   .Input(FakeInput(DT_QUINT8))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("T", DataTypeToEnum<quint8>::v())
-                   .Attr("ksize", {1, ksize, ksize, 1})
-                   .Attr("strides", {1, stride, stride, 1})
-                   .Attr("padding", "SAME")
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  const float input_min = 0.0f;
-  const float input_max = 255.0f;
-  const int input_height = 4;
-  const int input_width = 4;
-  const int input_channels = 2;
-  Tensor input_float(DT_FLOAT, {1, input_height, input_width, input_channels});
-  test::FillValues<float>(
-      &input_float,
-      {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32});
-  Tensor input_quantized =
-      FloatTensorToQuantized<quint8>(input_float, input_min, input_max);
-
-  const int expected_width = input_width / stride;
-  const int expected_height = input_height / stride;
-  Tensor expected_float(DT_FLOAT,
-                        {1, expected_height, expected_width, input_channels});
-  test::FillValues<float>(&expected_float, {11, 12, 15, 16, 27, 28, 31, 32});
-
-  AddInputFromArray<quint8>(input_quantized.shape(),
-                            input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  TF_ASSERT_OK(RunOpKernel());
-  const Tensor& output_quantized = *GetOutput(0);
-  const float output_min = GetOutput(1)->flat<float>()(0);
-  const float output_max = GetOutput(2)->flat<float>()(0);
-  Tensor output_float =
-      QuantizedTensorToFloat<quint8>(output_quantized, output_min, output_max);
-  test::ExpectTensorNear<float>(expected_float, output_float, 0.2);
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reference_gemm.h b/tensorflow/core/kernels/reference_gemm.h
deleted file mode 100644
index 5e4cde07d7..0000000000
--- a/tensorflow/core/kernels/reference_gemm.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
-
-// This is an unoptimized but debuggable implementation of the GEMM matrix
-// multiply function, used to compare to faster but more opaque versions, or
-// for bit depths or argument combinations that aren't supported by optimized
-// code.
-// It assumes the row-major convention used by TensorFlow, and implements
-// C = A * B, like the standard BLAS GEMM interface. If the tranpose flags are
-// true, then the relevant matrix is treated as stored in column-major order.
-
-namespace tensorflow {
-template <class T1, class T2, class T3>
-void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c,
-                   size_t m, size_t n, size_t k, const T1* a, int32 offset_a,
-                   size_t lda, const T2* b, int32 offset_b, size_t ldb, T3* c,
-                   int32 shift_c, int32 offset_c, int32 mult_c, size_t ldc) {
-  int a_i_stride;
-  int a_l_stride;
-  if (transpose_a) {
-    a_i_stride = 1;
-    a_l_stride = lda;
-  } else {
-    a_i_stride = lda;
-    a_l_stride = 1;
-  }
-  int b_j_stride;
-  int b_l_stride;
-  if (transpose_b) {
-    b_j_stride = ldb;
-    b_l_stride = 1;
-  } else {
-    b_j_stride = 1;
-    b_l_stride = ldb;
-  }
-  int c_i_stride;
-  int c_j_stride;
-  if (transpose_c) {
-    c_i_stride = 1;
-    c_j_stride = ldc;
-  } else {
-    c_i_stride = ldc;
-    c_j_stride = 1;
-  }
-
-  const int32 highest = static_cast<int32>(Eigen::NumTraits<T3>::highest());
-  const int32 lowest = static_cast<int32>(Eigen::NumTraits<T3>::lowest());
-  const int32 rounding = (shift_c < 1) ? 0 : (1 << (shift_c - 1));
-
-  int i, j, l;
-  for (j = 0; j < n; j++) {
-    for (i = 0; i < m; i++) {
-      int32 total = 0;
-      for (l = 0; l < k; l++) {
-        const size_t a_index = ((i * a_i_stride) + (l * a_l_stride));
-        const int32 a_value = static_cast<int32>(a[a_index]) - offset_a;
-        const size_t b_index = ((j * b_j_stride) + (l * b_l_stride));
-        const int32 b_value = static_cast<int32>(b[b_index]) - offset_b;
-        total += (a_value * b_value);
-      }
-      const size_t c_index = ((i * c_i_stride) + (j * c_j_stride));
-      int32_t output = ((((total + offset_c) * mult_c) + rounding) >> shift_c);
-      if (output > highest) {
-        output = highest;
-      }
-      if (output < lowest) {
-        output = lowest;
-      }
-      c[c_index] = static_cast<T3>(output);
-    }
-  }
-}
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-09-28 00:15:58 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-09-28 01:35:32 -0700
commit	419d5d072375ee0044fecb94e4bfe21a7b3b0b9e (patch)
tree	cb66e6e7238bf2e7938b58f3638bd31f65d542c2 /tensorflow/core/kernels
parent	c1e4f0f6a1078fd6715e8145fbef874e4d447ab8 (diff)