aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/nn_ops_test.cc
diff options
context:
space:
mode:
authorGravatar Jianmin Chen <goog.jmchen@gmail.com>2016-03-18 08:25:28 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-18 09:28:02 -0700
commita8d61ab8162b56a950b2b5ee8310d2d6c569ebac (patch)
tree0dd1787091024f12e1fc1fb048ea0c0d3bfa6118 /tensorflow/core/kernels/nn_ops_test.cc
parenteb289f5171c97bfec011488ba4b7f24f2d7f5a38 (diff)
Add benchmark tests for depthwise conv forward gpu kernels
Benchmark Time(ns) CPU(ns) Iterations BM_ConvFloatDepthwiseFwdGPU_conv0 4800416 4937895 141 32.7G items/s 32_112_112_3_8_24_3_3_1_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv1 13550072 13922813 100 30.9G items/s 32_112_112_64_1_64_3_3_1_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv2 7032385 7324553 100 29.4G items/s 32_56_56_128_1_128_3_3_1_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv3 2285033 2425335 228 22.2G items/s 32_56_56_128_1_128_3_3_2_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv4 1743948 1858093 359 29.0G items/s 32_28_28_128_1_128_3_3_1_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv5 1784560 1897147 320 28.4G items/s 32_14_14_512_1_512_3_3_1_2_gpu BM_ConvFloatDepthwiseFwdGPU_conv6 971179 1044185 562 25.8G items/s 32_7_7_1024_1_1024_3_3_1_2_gpu Change: 117553964
Diffstat (limited to 'tensorflow/core/kernels/nn_ops_test.cc')
-rw-r--r--tensorflow/core/kernels/nn_ops_test.cc19
1 files changed, 17 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc
index 05808840f9..dddb8bbb4b 100644
--- a/tensorflow/core/kernels/nn_ops_test.cc
+++ b/tensorflow/core/kernels/nn_ops_test.cc
@@ -492,6 +492,8 @@ static void BM_ConvFloatDepthwise(int iters, int batch, int rows, int cols,
// OD: output_depth
// KR: kernel_rows
// KC: kernel_cols
+// STR: stride
+// PAD: padding
#define BM_ConvFloatDepthwiseFwd(BS, R, C, ID, DM, OD, KR, KC, STR, PAD, \
LABEL) \
@@ -509,12 +511,25 @@ static void BM_ConvFloatDepthwise(int iters, int batch, int rows, int cols,
strings::StrCat(BS, "_", R, "_", C, "_", ID, "_", DM, "_", OD, "_", \
KR, "_", KC, "_", STR, "_", PAD, "_cpu4")); \
} \
+ static void BM_ConvFloatDepthwiseFwdGPU_##LABEL(int iters) { \
+ BM_ConvFloatDepthwise( \
+ iters, BS, R, C, ID, DM, OD, KR, KC, DEPTHWISE_CONV_OP_FWD, 1, STR, \
+ PAD, true, \
+ strings::StrCat(BS, "_", R, "_", C, "_", ID, "_", DM, "_", OD, "_", \
+ KR, "_", KC, "_", STR, "_", PAD, "_gpu")); \
+ } \
BENCHMARK(BM_ConvFloatDepthwiseFwdCPU1_##LABEL); \
- BENCHMARK(BM_ConvFloatDepthwiseFwdCPU4_##LABEL)
+ BENCHMARK(BM_ConvFloatDepthwiseFwdCPU4_##LABEL); \
+ BENCHMARK(BM_ConvFloatDepthwiseFwdGPU_##LABEL);
-// TODO(andydavis,jmchen) Add more benchmarks.
+// The configurations below are mostly from mobilenet models.
BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 1, SAME, conv0);
BM_ConvFloatDepthwiseFwd(32, 112, 112, 64, 1, 64, 3, 3, 1, SAME, conv1);
+BM_ConvFloatDepthwiseFwd(32, 56, 56, 128, 1, 128, 3, 3, 1, SAME, conv2);
+BM_ConvFloatDepthwiseFwd(32, 56, 56, 128, 1, 128, 3, 3, 2, SAME, conv3);
+BM_ConvFloatDepthwiseFwd(32, 28, 28, 128, 1, 128, 3, 3, 1, SAME, conv4);
+BM_ConvFloatDepthwiseFwd(32, 14, 14, 512, 1, 512, 3, 3, 1, SAME, conv5);
+BM_ConvFloatDepthwiseFwd(32, 7, 7, 1024, 1, 1024, 3, 3, 1, SAME, conv6);
static void BM_LRNFloat(int iters, int depth, int cols, int rows,
int batch_size, int range, int num_threads,