diff options
Diffstat (limited to 'bench/tensors/tensor_benchmarks_cpu.cc')
-rw-r--r-- | bench/tensors/tensor_benchmarks_cpu.cc | 90 |
1 files changed, 51 insertions, 39 deletions
diff --git a/bench/tensors/tensor_benchmarks_cpu.cc b/bench/tensors/tensor_benchmarks_cpu.cc index 68653ba15..8947f4b7f 100644 --- a/bench/tensors/tensor_benchmarks_cpu.cc +++ b/bench/tensors/tensor_benchmarks_cpu.cc @@ -1,35 +1,31 @@ #define EIGEN_USE_THREADS -#include "base/sysinfo.h" -#include "strings/strcat.h" -#include "third_party/eigen3/tensor_benchmarks.h" -#include "thread/threadpool.h" +#include <string> + +#include "tensor_benchmarks.h" -#ifdef __ANDROID__ -#define CREATE_THREAD_POOL(threads) \ -Eigen::ThreadPoolDevice device(threads); -#else #define CREATE_THREAD_POOL(threads) \ -ThreadPool tp(threads); \ -tp.StartWorkers(); \ -Eigen::ThreadPoolDevice device(&tp, threads); -#endif +Eigen::ThreadPool pool(threads); \ +Eigen::ThreadPoolDevice device(&pool, threads); // Simple functions -#define BM_FuncCPU(FUNC, THREADS) \ - static void BM_##FUNC##_##THREADS##T(int iters, int N) { \ - StopBenchmarkTiming(); \ - CREATE_THREAD_POOL(THREADS); \ - BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \ - suite.FUNC(iters); \ - SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \ - } \ +#define BM_FuncCPU(FUNC, THREADS) \ + static void BM_##FUNC##_##THREADS##T(int iters, int N) { \ + StopBenchmarkTiming(); \ + CREATE_THREAD_POOL(THREADS); \ + BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \ + suite.FUNC(iters); \ + } \ BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000); BM_FuncCPU(memcpy, 4); BM_FuncCPU(memcpy, 8); BM_FuncCPU(memcpy, 12); +BM_FuncCPU(typeCasting, 4); +BM_FuncCPU(typeCasting, 8); +BM_FuncCPU(typeCasting, 12); + BM_FuncCPU(random, 4); BM_FuncCPU(random, 8); BM_FuncCPU(random, 12); @@ -38,6 +34,14 @@ BM_FuncCPU(slicing, 4); BM_FuncCPU(slicing, 8); BM_FuncCPU(slicing, 12); +BM_FuncCPU(rowChip, 4); +BM_FuncCPU(rowChip, 8); +BM_FuncCPU(rowChip, 12); + +BM_FuncCPU(colChip, 4); +BM_FuncCPU(colChip, 8); +BM_FuncCPU(colChip, 12); + BM_FuncCPU(shuffling, 4); BM_FuncCPU(shuffling, 8); BM_FuncCPU(shuffling, 12); @@ -66,26 +70,29 @@ BM_FuncCPU(transcendentalFunc, 4); BM_FuncCPU(transcendentalFunc, 8); BM_FuncCPU(transcendentalFunc, 12); -BM_FuncCPU(reduction, 4); -BM_FuncCPU(reduction, 8); -BM_FuncCPU(reduction, 12); +BM_FuncCPU(rowReduction, 4); +BM_FuncCPU(rowReduction, 8); +BM_FuncCPU(rowReduction, 12); + +BM_FuncCPU(colReduction, 4); +BM_FuncCPU(colReduction, 8); +BM_FuncCPU(colReduction, 12); // Contractions -#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \ - static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) {\ - StopBenchmarkTiming(); \ - if (THREADS == 1) { \ - Eigen::DefaultDevice device; \ - BenchmarkSuite<Eigen::DefaultDevice> suite(device, D1, D2, D3); \ - suite.FUNC(iters); \ - } else { \ - CREATE_THREAD_POOL(THREADS); \ - BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \ - suite.FUNC(iters); \ - } \ - SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \ - } \ +#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \ + static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \ + StopBenchmarkTiming(); \ + if (THREADS == 1) { \ + Eigen::DefaultDevice device; \ + BenchmarkSuite<Eigen::DefaultDevice, float> suite(device, D1, D2, D3); \ + suite.FUNC(iters); \ + } else { \ + CREATE_THREAD_POOL(THREADS); \ + BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \ + suite.FUNC(iters); \ + } \ + } \ BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000); @@ -107,6 +114,12 @@ BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8); BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12); BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16); + BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1); BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4); BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8); @@ -125,9 +138,8 @@ BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16); static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \ StopBenchmarkTiming(); \ CREATE_THREAD_POOL(THREADS); \ - BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \ + BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \ suite.FUNC(iters, DIM1, DIM2); \ - SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \ } \ BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000); |