diff options
Diffstat (limited to 'tensorflow/core/kernels/cast_op_test.cc')
-rw-r--r-- | tensorflow/core/kernels/cast_op_test.cc | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc index 5b7529bb8a..a106f287c1 100644 --- a/tensorflow/core/kernels/cast_op_test.cc +++ b/tensorflow/core/kernels/cast_op_test.cc @@ -105,7 +105,12 @@ static void BM_gpu_float_int64(int iters, int num) { testing::BytesProcessed(static_cast<int64>(iters) * num * (sizeof(float) + sizeof(int64))); testing::UseRealTime(); +#if GOOGLE_CUDA test::Benchmark("gpu", Cast<float, int64>(num)).Run(iters); +#endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL + test::Benchmark("sycl", Cast<float, int64>(num)).Run(iters); +#endif // TENSORFLOW_USE_SYCL } BENCHMARK(BM_gpu_float_int64)->Arg(64 << 10)->Arg(32 << 20); @@ -123,7 +128,12 @@ static void BM_gpu_bool_float(int iters, int num) { testing::BytesProcessed(static_cast<int64>(iters) * num * (sizeof(bool) + sizeof(float))); testing::UseRealTime(); +#if GOOGLE_CUDA test::Benchmark("gpu", Cast<bool, float>(num)).Run(iters); +#endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL + test::Benchmark("sycl", Cast<bool, float>(num)).Run(iters); +#endif // TENSORFLOW_USE_SYCL } BENCHMARK(BM_gpu_bool_float)->Arg(64 << 10)->Arg(32 << 20); @@ -168,7 +178,9 @@ static void BM_gpu_float_half(int iters, int num) { testing::BytesProcessed(static_cast<int64>(iters) * num * (sizeof(float) + sizeof(Eigen::half))); testing::UseRealTime(); +#if GOOGLE_CUDA test::Benchmark("gpu", Cast<float, Eigen::half>(num)).Run(iters); +#endif // GOOGLE_CUDA } BENCHMARK(BM_gpu_float_half)->Arg(64 << 10)->Arg(32 << 20); @@ -177,7 +189,9 @@ static void BM_gpu_half_float(int iters, int num) { testing::BytesProcessed(static_cast<int64>(iters) * num * (sizeof(float) + sizeof(Eigen::half))); testing::UseRealTime(); +#if GOOGLE_CUDA test::Benchmark("gpu", Cast<Eigen::half, float>(num)).Run(iters); +#endif // GOOGLE_CUDA } BENCHMARK(BM_gpu_half_float)->Arg(64 << 10)->Arg(32 << 20); |