diff options
author | 2018-08-31 17:12:05 -0700 | |
---|---|---|
committer | 2018-08-31 17:21:42 -0700 | |
commit | a8476c15454a8310b9c5e9386fdb094c515b1269 (patch) | |
tree | 01d562f9e6b00bce03877fda79e65f0d64f1b03d /tensorflow/core/kernels/eigen_benchmark_cpu_test.cc | |
parent | 918d671f1bc6c50c7dae116237d0dd9e17b24453 (diff) |
Add benchmarks for Spatial/Cuboid backward-kernel convolutions.
PiperOrigin-RevId: 211167699
Diffstat (limited to 'tensorflow/core/kernels/eigen_benchmark_cpu_test.cc')
-rw-r--r-- | tensorflow/core/kernels/eigen_benchmark_cpu_test.cc | 124 |
1 files changed, 118 insertions, 6 deletions
diff --git a/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc b/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc index fde406ba31..2a8308ef9a 100644 --- a/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc +++ b/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc @@ -80,6 +80,33 @@ void SpatialConvolutionBackwardInput(int iters, int num_threads, ::tensorflow::testing::ItemsProcessed(flops * iters); } +void SpatialConvolutionBackwardKernel(int iters, int num_threads, + /* Input dimensions: */ + int input_batches, int input_height, + int input_width, int input_depth, + /* Filter (kernel) dimensions: */ + int filter_count, int filter_height, + int filter_width) { + ::tensorflow::testing::StopTiming(); + + CREATE_THREAD_POOL(num_threads); + + using Benchmark = + SpatialConvolutionBenchmarksSuite<float, Eigen::ThreadPoolDevice>; + auto benchmark = Benchmark(iters, device); + + typename Benchmark::Dimensions input_dims(input_batches, input_height, + input_width, input_depth); + typename Benchmark::Dimensions filter_dims(filter_height, filter_width, + input_depth, filter_count); + + benchmark.SpatialConvolutionBackwardKernel(input_dims, filter_dims); + + auto filter_size = filter_dims.TotalSize(); + auto flops = filter_size * (input_batches * input_height * input_width); + ::tensorflow::testing::ItemsProcessed(flops * iters); +} + // Macro arguments names: --------------------------------------------------- // // NT: num threads // N: batch size @@ -108,6 +135,14 @@ void SpatialConvolutionBackwardInput(int iters, int num_threads, BENCHMARK( \ BM_SPATIAL_NAME(SpatialConvolutionBwdInput, NT, N, H, W, C, FC, FH, FW)) +#define BM_SpatialConvolutionBwdKernel(NT, N, H, W, C, FC, FH, FW, LABEL) \ + static void BM_SPATIAL_NAME(SpatialConvolutionBwdKernel, NT, N, H, W, C, FC, \ + FH, FW)(int iters) { \ + SpatialConvolutionBackwardKernel(iters, NT, N, H, W, C, FC, FH, FW); \ + } \ + BENCHMARK(BM_SPATIAL_NAME(SpatialConvolutionBwdKernel, NT, N, H, W, C, FC, \ + FH, FW)) + #define BM_SpatialConvolutions(N, H, W, C, FC, FH, FW, LABEL) \ BM_SpatialConvolution(2, N, H, W, C, FC, FH, FW, LABEL); \ BM_SpatialConvolution(4, N, H, W, C, FC, FH, FW, LABEL); \ @@ -120,6 +155,12 @@ void SpatialConvolutionBackwardInput(int iters, int num_threads, BM_SpatialConvolutionBwdInput(8, N, H, W, C, FC, FH, FW, LABEL); \ BM_SpatialConvolutionBwdInput(16, N, H, W, C, FC, FH, FW, LABEL); +#define BM_SpatialConvolutionsBwdKernel(N, H, W, C, FC, FH, FW, LABEL) \ + BM_SpatialConvolutionBwdKernel(2, N, H, W, C, FC, FH, FW, LABEL); \ + BM_SpatialConvolutionBwdKernel(4, N, H, W, C, FC, FH, FW, LABEL); \ + BM_SpatialConvolutionBwdKernel(8, N, H, W, C, FC, FH, FW, LABEL); \ + BM_SpatialConvolutionBwdKernel(16, N, H, W, C, FC, FH, FW, LABEL); + // ImageNet Forward Convolutions -------------------------------------------- // BM_SpatialConvolutions(32, // batch size @@ -174,6 +215,28 @@ BM_SpatialConvolutionsBwdInput(32, 7, 7, 48, 128, 5, 5, "conv5a_00_5x5 / conv5_00_5x5"); BM_SpatialConvolutionsBwdInput(32, 7, 7, 192, 384, 3, 3, "conv5_00_3x3"); +// ImageNet BackwardKernel Convolutions ------------------------------------- // + +BM_SpatialConvolutionsBwdKernel(32, 56, 56, 64, 192, 3, 3, "conv2_00"); +BM_SpatialConvolutionsBwdKernel(32, 28, 28, 96, 128, 3, 3, "conv3a_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 28, 28, 16, 32, 5, 5, "conv3a_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 28, 28, 128, 192, 3, 3, "conv3_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 28, 28, 32, 96, 5, 5, "conv3_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 96, 204, 3, 3, "conv4a_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 16, 48, 5, 5, "conv4a_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 112, 224, 3, 3, "conv4b_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 24, 64, 5, 5, + "conv4b_00_5x5 / conv4c_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 128, 256, 3, 3, "conv4c_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 144, 288, 3, 3, "conv4d_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 32, 64, 5, 5, "conv4d_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 160, 320, 3, 3, "conv4_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 14, 14, 32, 128, 5, 5, "conv4_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 7, 7, 160, 320, 3, 3, "conv5a_00_3x3"); +BM_SpatialConvolutionsBwdKernel(32, 7, 7, 48, 128, 5, 5, + "conv5a_00_5x5 / conv5_00_5x5"); +BM_SpatialConvolutionsBwdKernel(32, 7, 7, 192, 384, 3, 3, "conv5_00_3x3"); + // -------------------------------------------------------------------------- // // Cuboid Convolutions // // -------------------------------------------------------------------------- // @@ -235,6 +298,35 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, ::tensorflow::testing::ItemsProcessed(flops * iters); } +void CuboidConvolutionBackwardKernel(int iters, int num_threads, + /* Input dimensions: */ + int input_batches, int input_height, + int input_width, int input_planes, + int input_depth, + /* Filter (kernel) dimensions: */ + int filter_count, int filter_height, + int filter_width, int filter_planes) { + ::tensorflow::testing::StopTiming(); + + CREATE_THREAD_POOL(num_threads); + + using Benchmark = + CuboidConvolutionBenchmarksSuite<float, Eigen::ThreadPoolDevice>; + auto benchmark = Benchmark(iters, device); + + typename Benchmark::Dimensions input_dims( + input_batches, input_height, input_width, input_planes, input_depth); + typename Benchmark::Dimensions filter_dims( + filter_height, filter_width, filter_planes, input_depth, filter_count); + + benchmark.CuboidConvolutionBackwardKernel(input_dims, filter_dims); + + auto filter_size = filter_dims.TotalSize(); + auto flops = + filter_size * (input_batches * input_height * input_width * input_planes); + ::tensorflow::testing::ItemsProcessed(flops * iters); +} + // Macro arguments names: --------------------------------------------------- // // NT: num threads // N: batch size @@ -247,8 +339,11 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, // FW: filter width // FP: filter panes -#define BM_CUBOID_NAME(p, NT, N, H, W, P, C, FC, FH, FW, FP) \ - BM_##p##_CPU_##NT##T_in_##N##_##H##_##W##_##P##_##_##C##_f_##FC##_##FH##_##FW +#define BM_CONCAT(a, b) a##b + +#define BM_CUBOID_NAME(p, NT, N, H, W, P, C, FC, FH, FW, FP) \ + BM_CONCAT(BM_##p##_CPU_##NT##T_in_##N##_##H##_##W##_##P##_##C, \ + _f_##FC##_##FH##_##FW##_##FP) #define BM_CuboidConvolution(NT, N, H, W, P, C, FC, FH, FW, FP, LABEL) \ static void BM_CUBOID_NAME(CuboidConvolution, NT, N, H, W, P, C, FC, FH, FW, \ @@ -266,6 +361,15 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, BENCHMARK(BM_CUBOID_NAME(CuboidConvolutionBwdInput, NT, N, H, W, P, C, FC, \ FH, FW, FP)) +#define BM_CuboidConvolutionBwdKernel(NT, N, H, W, P, C, FC, FH, FW, FP, \ + LABEL) \ + static void BM_CUBOID_NAME(CuboidConvolutionBwdKernel, NT, N, H, W, P, C, \ + FC, FH, FW, FP)(int iters) { \ + CuboidConvolutionBackwardKernel(iters, NT, N, H, W, P, C, FC, FH, FW, FP); \ + } \ + BENCHMARK(BM_CUBOID_NAME(CuboidConvolutionBwdKernel, NT, N, H, W, P, C, FC, \ + FH, FW, FP)) + #define BM_CuboidConvolutions(N, H, W, P, C, FC, FH, FW, FP, LABEL) \ BM_CuboidConvolution(2, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ BM_CuboidConvolution(4, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ @@ -278,13 +382,21 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, BM_CuboidConvolutionBwdInput(8, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ BM_CuboidConvolutionBwdInput(16, N, H, W, P, C, FC, FH, FW, FP, LABEL); +#define BM_CuboidConvolutionsBwdKernel(N, H, W, P, C, FC, FH, FW, FP, LABEL) \ + BM_CuboidConvolutionBwdKernel(2, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ + BM_CuboidConvolutionBwdKernel(4, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ + BM_CuboidConvolutionBwdKernel(8, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ + BM_CuboidConvolutionBwdKernel(16, N, H, W, P, C, FC, FH, FW, FP, LABEL); + // Random Cuboid Convolutions ----------------------------------------------- // // TODO(ezhulenev): find representative dims for cuboid convolutions (find // models using Conv3D ops). -BM_CuboidConvolutions(16, // batch size - 25, 25, 25, 8, // input: height, width, panes, depth - 32, 5, 5, 5, // filter: count, height, width, panes +BM_CuboidConvolutions(8, // batch size + 25, 25, 25, 4, // input: height, width, panes, depth + 16, 5, 5, 5, // filter: count, height, width, panes "conv3d"); -BM_CuboidConvolutionsBwdInput(16, 25, 25, 25, 8, 32, 5, 5, 5, "conv3d"); +BM_CuboidConvolutionsBwdInput(8, 25, 25, 25, 4, 16, 5, 5, 5, "conv3d"); + +BM_CuboidConvolutionsBwdKernel(8, 25, 25, 25, 4, 16, 5, 5, 5, "conv3d"); |