diff options
Diffstat (limited to 'tensorflow/core/kernels/cast_op_test.cc')
-rw-r--r-- | tensorflow/core/kernels/cast_op_test.cc | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc new file mode 100644 index 0000000000..f774fbcfe8 --- /dev/null +++ b/tensorflow/core/kernels/cast_op_test.cc @@ -0,0 +1,100 @@ +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/public/tensor.h" +#include <gtest/gtest.h> + +namespace tensorflow { + +template <typename Src, typename Dst> +static Graph* Cast(int num) { + Graph* g = new Graph(OpRegistry::Global()); + Tensor data(DataTypeToEnum<Src>::value, + TensorShape({64, 64, num / (64 * 64)})); + data.flat<Src>().setRandom(); + test::graph::Cast(g, test::graph::Constant(g, data), + DataTypeToEnum<Dst>::value); + return g; +} + +class CastOpTest : public OpsTestBase { + protected: + void MakeOp(DataType src, DataType dst) { + RequireDefaultOps(); + EXPECT_OK(NodeDefBuilder("cast_op", "Cast") + .Input(FakeInput(DT_INT32)) + .Attr("SrcT", src) + .Attr("DstT", dst) + .Finalize(node_def())); + EXPECT_OK(InitOp()); + } +}; + +TEST_F(CastOpTest, Int32ToUint8) { + MakeOp(DT_INT32, DT_UINT8); + AddInputFromArray<int32>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4}); + ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_UINT8, TensorShape({1, 2, 2, 1})); + test::FillValues<uint8>(&expected, {1, 2, 3, 4}); + test::ExpectTensorEqual<uint8>(expected, *GetOutput(0)); +} + +static void BM_cpu_float_int64(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(float) + sizeof(int64))); + testing::UseRealTime(); + test::Benchmark("cpu", Cast<float, int64>(num)).Run(iters); +} +BENCHMARK(BM_cpu_float_int64)->Arg(64 << 10)->Arg(32 << 20); + +static void BM_gpu_float_int64(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(float) + sizeof(int64))); + testing::UseRealTime(); + test::Benchmark("gpu", Cast<float, int64>(num)).Run(iters); +} +BENCHMARK(BM_gpu_float_int64)->Arg(64 << 10)->Arg(32 << 20); + +static void BM_cpu_bool_float(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(bool) + sizeof(float))); + testing::UseRealTime(); + test::Benchmark("cpu", Cast<bool, float>(num)).Run(iters); +} +BENCHMARK(BM_cpu_bool_float)->Arg(64 << 10)->Arg(32 << 20); + +static void BM_gpu_bool_float(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(bool) + sizeof(float))); + testing::UseRealTime(); + test::Benchmark("gpu", Cast<bool, float>(num)).Run(iters); +} +BENCHMARK(BM_gpu_bool_float)->Arg(64 << 10)->Arg(32 << 20); + +static void BM_cpu_float_bfloat16(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(float) + sizeof(bfloat16))); + testing::UseRealTime(); + test::Benchmark("cpu", Cast<float, bfloat16>(num)).Run(iters); +} +BENCHMARK(BM_cpu_float_bfloat16)->Arg(64 << 10)->Arg(32 << 20); + +static void BM_cpu_bfloat16_float(int iters, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * + (sizeof(float) + sizeof(bfloat16))); + testing::UseRealTime(); + test::Benchmark("cpu", Cast<bfloat16, float>(num)).Run(iters); +} +BENCHMARK(BM_cpu_bfloat16_float)->Arg(64 << 10)->Arg(32 << 20); + +} // end namespace tensorflow |