aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/tensors/tensor_benchmarks_cpu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'bench/tensors/tensor_benchmarks_cpu.cc')
-rw-r--r--bench/tensors/tensor_benchmarks_cpu.cc90
1 files changed, 51 insertions, 39 deletions
diff --git a/bench/tensors/tensor_benchmarks_cpu.cc b/bench/tensors/tensor_benchmarks_cpu.cc
index 68653ba15..8947f4b7f 100644
--- a/bench/tensors/tensor_benchmarks_cpu.cc
+++ b/bench/tensors/tensor_benchmarks_cpu.cc
@@ -1,35 +1,31 @@
#define EIGEN_USE_THREADS
-#include "base/sysinfo.h"
-#include "strings/strcat.h"
-#include "third_party/eigen3/tensor_benchmarks.h"
-#include "thread/threadpool.h"
+#include <string>
+
+#include "tensor_benchmarks.h"
-#ifdef __ANDROID__
-#define CREATE_THREAD_POOL(threads) \
-Eigen::ThreadPoolDevice device(threads);
-#else
#define CREATE_THREAD_POOL(threads) \
-ThreadPool tp(threads); \
-tp.StartWorkers(); \
-Eigen::ThreadPoolDevice device(&tp, threads);
-#endif
+Eigen::ThreadPool pool(threads); \
+Eigen::ThreadPoolDevice device(&pool, threads);
// Simple functions
-#define BM_FuncCPU(FUNC, THREADS) \
- static void BM_##FUNC##_##THREADS##T(int iters, int N) { \
- StopBenchmarkTiming(); \
- CREATE_THREAD_POOL(THREADS); \
- BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
- suite.FUNC(iters); \
- SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
- } \
+#define BM_FuncCPU(FUNC, THREADS) \
+ static void BM_##FUNC##_##THREADS##T(int iters, int N) { \
+ StopBenchmarkTiming(); \
+ CREATE_THREAD_POOL(THREADS); \
+ BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
+ suite.FUNC(iters); \
+ } \
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
BM_FuncCPU(memcpy, 4);
BM_FuncCPU(memcpy, 8);
BM_FuncCPU(memcpy, 12);
+BM_FuncCPU(typeCasting, 4);
+BM_FuncCPU(typeCasting, 8);
+BM_FuncCPU(typeCasting, 12);
+
BM_FuncCPU(random, 4);
BM_FuncCPU(random, 8);
BM_FuncCPU(random, 12);
@@ -38,6 +34,14 @@ BM_FuncCPU(slicing, 4);
BM_FuncCPU(slicing, 8);
BM_FuncCPU(slicing, 12);
+BM_FuncCPU(rowChip, 4);
+BM_FuncCPU(rowChip, 8);
+BM_FuncCPU(rowChip, 12);
+
+BM_FuncCPU(colChip, 4);
+BM_FuncCPU(colChip, 8);
+BM_FuncCPU(colChip, 12);
+
BM_FuncCPU(shuffling, 4);
BM_FuncCPU(shuffling, 8);
BM_FuncCPU(shuffling, 12);
@@ -66,26 +70,29 @@ BM_FuncCPU(transcendentalFunc, 4);
BM_FuncCPU(transcendentalFunc, 8);
BM_FuncCPU(transcendentalFunc, 12);
-BM_FuncCPU(reduction, 4);
-BM_FuncCPU(reduction, 8);
-BM_FuncCPU(reduction, 12);
+BM_FuncCPU(rowReduction, 4);
+BM_FuncCPU(rowReduction, 8);
+BM_FuncCPU(rowReduction, 12);
+
+BM_FuncCPU(colReduction, 4);
+BM_FuncCPU(colReduction, 8);
+BM_FuncCPU(colReduction, 12);
// Contractions
-#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \
- static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) {\
- StopBenchmarkTiming(); \
- if (THREADS == 1) { \
- Eigen::DefaultDevice device; \
- BenchmarkSuite<Eigen::DefaultDevice> suite(device, D1, D2, D3); \
- suite.FUNC(iters); \
- } else { \
- CREATE_THREAD_POOL(THREADS); \
- BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \
- suite.FUNC(iters); \
- } \
- SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
- } \
+#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \
+ static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \
+ StopBenchmarkTiming(); \
+ if (THREADS == 1) { \
+ Eigen::DefaultDevice device; \
+ BenchmarkSuite<Eigen::DefaultDevice, float> suite(device, D1, D2, D3); \
+ suite.FUNC(iters); \
+ } else { \
+ CREATE_THREAD_POOL(THREADS); \
+ BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \
+ suite.FUNC(iters); \
+ } \
+ } \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
@@ -107,6 +114,12 @@ BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8);
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12);
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16);
+BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1);
+BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4);
+BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8);
+BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12);
+BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16);
+
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1);
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4);
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8);
@@ -125,9 +138,8 @@ BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16);
static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \
StopBenchmarkTiming(); \
CREATE_THREAD_POOL(THREADS); \
- BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
+ BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, N); \
suite.FUNC(iters, DIM1, DIM2); \
- SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
} \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);