diff options
author | 2018-09-17 15:11:22 -0700 | |
---|---|---|
committer | 2018-09-17 15:15:50 -0700 | |
commit | bb30dfce198341b2ec80d0aa22b49eaa5eac533b (patch) | |
tree | 498f8ecd7751ebcfaf966993efa40e3ccebbae65 | |
parent | 55581a5bed7108c2d39ab603db8c916b6d624648 (diff) |
Add benchmarks comparing Mkl vs Default Conv2D ops.
PiperOrigin-RevId: 213346439
-rw-r--r-- | tensorflow/core/kernels/BUILD | 21 | ||||
-rw-r--r-- | tensorflow/core/kernels/mkl_conv_ops_test.cc | 407 | ||||
-rw-r--r-- | tensorflow/tensorflow.bzl | 2 |
3 files changed, 430 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 94d3ab4467..ef176a7de6 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -30,6 +30,7 @@ load( "//tensorflow:tensorflow.bzl", "if_android", "tf_cc_test", + "tf_cc_test_mkl", "tf_cc_tests", "tf_cc_binary", "tf_copts", @@ -6228,6 +6229,26 @@ tf_mkl_kernel_library( ] + mkl_deps(), ) +tf_cc_test_mkl( + name = "mkl_conv_ops_test", + size = "small", + srcs = ["mkl_conv_ops_test.cc"], + deps = [ + ":ops_testutil", + ":ops_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl_conv_ops_test.cc new file mode 100644 index 0000000000..a055351337 --- /dev/null +++ b/tensorflow/core/kernels/mkl_conv_ops_test.cc @@ -0,0 +1,407 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/public/session.h" + +#if defined(INTEL_MKL_DNN_ONLY) +#include "third_party/intel_mkl_dnn/include/mkldnn.h" +#include "tensorflow/core/util/mkl_util.h" +#endif + +// TODO(ezhulenev): Add numerical tests that will compare results of default +// (aka Eigen) convolutions with MKL convolutions. + +// -------------------------------------------------------------------------- // +// Performance Benchmarks. // +// -------------------------------------------------------------------------- // + +// Compare performance of default Tensorflow convolution kernels (Eigen) with +// MKL kernels on CPU. + +// Before running these benchmarks configure OpenMP environment variables: +// export KMP_BLOCKTIME=0 +// export OMP_NUM_THREADS=${num_threads} + +namespace tensorflow { + +struct Conv2DDimensions { + Conv2DDimensions(int n, int h, int w, int c, int fc, int fh, int fw) + : input_batches(n), + input_height(h), + input_width(w), + input_depth(c), + filter_count(fc), + filter_height(fh), + filter_width(fw) {} + + int input_batches; + int input_height; + int input_width; + int input_depth; + int filter_count; + int filter_height; + int filter_width; +}; + +static Tensor GetRandomTensor(const TensorShape& shape) { + Tensor tensor(DT_FLOAT, TensorShape(shape)); + tensor.flat<float>() = tensor.flat<float>().setRandom(); + return tensor; +} + +// Get a random Tensor for the Conv2D input. +static Tensor GetRandomInputTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.input_batches, dims.input_height, + dims.input_width, dims.input_depth}); +} + +// Get a random Tensor for the Conv2D filter. +static Tensor GetRandomFilterTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.filter_height, dims.filter_width, + dims.input_depth, dims.filter_count}); +} + +// Get a random Tensor for the Conv2D output (assuming SAME padding). +static Tensor GetRandomOutputTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.input_batches, dims.input_height, + dims.input_width, dims.filter_count}); +} + +// Get a Tensor encoding Conv2D input shape. +static Tensor GetInputSizesTensor(const Conv2DDimensions& dims) { + return test::AsTensor<int32>({dims.input_batches, dims.input_height, + dims.input_width, dims.input_depth}); +} + +// Get a Tensor encoding Conv2D filter shape. +static Tensor GetFilterSizesTensor(const Conv2DDimensions& dims) { + return test::AsTensor<int32>({dims.filter_height, dims.filter_width, + dims.input_depth, dims.filter_count}); +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Tensor NonMklTensor() { + MklDnnShape non_mkl_shape; + non_mkl_shape.SetMklTensor(false); + + auto size = static_cast<int64>(non_mkl_shape.GetSerializeBufferSize()); + Tensor tensor(DT_UINT8, {size}); + + non_mkl_shape.SerializeMklDnnShape(tensor.flat<uint8>().data(), + size * sizeof(uint8)); + return tensor; +} +#endif + +static Graph* DefaultConv2D(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + + Node* conv2d; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d"), "Conv2D") + .Input(input) + .Input(filter) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2D(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d; + TF_CHECK_OK(NodeBuilder(graph->NewName("mkl_conv_2d"), "_MklConv2D") + .Input(input) + .Input(filter) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d)); + + return graph; +} +#endif + +static Graph* DefaultConv2DBwdInput(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_sizes_t = GetInputSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input_sizes = + test::graph::Constant(graph, input_sizes_t, "input_sizes"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* conv2d_bwd_input; + TF_CHECK_OK( + NodeBuilder(graph->NewName("conv_2d_bwd_input"), "Conv2DBackpropInput") + .Input(input_sizes) + .Input(filter) + .Input(out_backprop) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d_bwd_input)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2DBwdInput(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_sizes_t = GetInputSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input_sizes = + test::graph::Constant(graph, input_sizes_t, "input_sizes"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d_bwd_input; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_input"), + "_MklConv2DBackpropInput") + .Input(input_sizes) + .Input(filter) + .Input(out_backprop) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d_bwd_input)); + + return graph; +} +#endif + +static Graph* DefaultConv2DBwdFilter(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_sizes_t = GetFilterSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter_sizes = + test::graph::Constant(graph, filter_sizes_t, "filter_sizes"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* conv2d_bwd_filter; + TF_CHECK_OK( + NodeBuilder(graph->NewName("conv_2d_bwd_filter"), "Conv2DBackpropFilter") + .Input(input) + .Input(filter_sizes) + .Input(out_backprop) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d_bwd_filter)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2DBwdFilter(const Conv2DDimensions& dims) { + Graph* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_sizes_t = GetFilterSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter_sizes = + test::graph::Constant(graph, filter_sizes_t, "filter_sizes"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d_bwd_filter; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_filter"), + "_MklConv2DBackpropFilter") + .Input(input) + .Input(filter_sizes) + .Input(out_backprop) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d_bwd_filter)); + + return graph; +} +#endif + +// Macro arguments names: --------------------------------------------------- // +// N: batch size +// H: height +// W: width +// C: channels +// FC: filter count +// FH: filter height +// FW: filter width + +#define BM_CONCAT(a, b) a##b + +#define BM_NAME(p, type, N, H, W, C, FC, FH, FW) \ + BM_CONCAT(BM_##p##_##type##_in_##N##_##H##_##W##_##C, _f_##FC##_##FH##_##FW) + +// Flops computation in these benchmarks are the same as in +// eigen_benchmark_cpu_test.cc. + +#define BM_Conv2DT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (N) * (H) * (W) * (FC); \ + int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW)); \ + testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2D)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +#define BM_Conv2DBwdInputT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (N) * (H) * (W) * (C); \ + int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW)); \ + testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdInput)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DBwdInputT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +#define BM_Conv2DBwdFilterT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (FH) * (FW) * (C) * (FC); \ + int64 flops_per_iter = num_computed_elements * ((N) * (H) * (W)); \ + testing::ItemsProcessed(static_cast<int64>(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdFilter)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DBwdFilterT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +// ImageNet Convolutions ---------------------------------------------------- // + +BM_Conv2D(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2D(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2D(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2D(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2D(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2D(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2D(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +BM_Conv2DBwdInput(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2DBwdInput(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2DBwdInput(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2DBwdInput(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2DBwdInput(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2DBwdInput(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2DBwdInput(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +BM_Conv2DBwdFilter(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2DBwdFilter(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2DBwdFilter(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2DBwdFilter(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2DBwdFilter(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2DBwdFilter(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2DBwdFilter(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +} // namespace tensorflow diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 16f7b217b4..689679c838 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1216,9 +1216,11 @@ def tf_mkl_kernel_library( if prefix: srcs = srcs + native.glob( [prefix + "*.cc"], + exclude = [prefix + "*test*"], ) hdrs = hdrs + native.glob( [prefix + "*.h"], + exclude = [prefix + "*test*"], ) # -fno-exceptions in nocopts breaks compilation if header modules are enabled. |