diff options
Diffstat (limited to 'tensorflow/compiler/xla/tests')
73 files changed, 21419 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD new file mode 100644 index 0000000000..93fe1fee4a --- /dev/null +++ b/tensorflow/compiler/xla/tests/BUILD @@ -0,0 +1,1436 @@ +# Description: +# Base testing infrastructure for XLA. + +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = [":friends"], + features = ["no_layering_check"], +) + +package_group( + name = "friends", + includes = [ + "//tensorflow/compiler/xla:friends", + ], +) + +# Filegroup used to collect source files for dependency checking. +filegroup( + name = "c_srcs", + data = glob([ + "**/*.cc", + "**/*.h", + ]), +) + +load("//tensorflow/compiler/xla:xla.bzl", "export_dynamic_linkopts") +load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") +load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites") +load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_test_macros") + +# Generate test_suites for all backends, named "${backend}_tests". +generate_backend_suites() + +cc_library( + name = "test_macros_header", + testonly = True, + hdrs = ["test_macros.h"], + deps = [ + "//tensorflow/compiler/xla:types", + "//tensorflow/core:test", + ], +) + +# Generate a test_macros_${BACKEND} library per backend with the proper copts. +generate_backend_test_macros() + +cc_library( + name = "test_utils", + testonly = True, + hdrs = ["test_utils.h"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "literal_test_util", + testonly = True, + srcs = ["literal_test_util.cc"], + hdrs = ["literal_test_util.h"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +cc_library( + name = "hlo_test_base", + testonly = True, + srcs = ["hlo_test_base.cc"], + hdrs = ["hlo_test_base.h"], + deps = [ + ":literal_test_util", + "//tensorflow/compiler/xla:shape_layout", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:hlo_test_base_flags", + "//tensorflow/compiler/xla/service", + "//tensorflow/compiler/xla/service:backend", + "//tensorflow/compiler/xla/service:compiler", + "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_execution_profile", + "//tensorflow/compiler/xla/service:hlo_graph_dumper", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + "//third_party/eigen3", + ], +) + +cc_binary( + name = "local_client_aot_test_helper", + srcs = ["local_client_aot_test_helper.cc"], + deps = [ + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + +genrule( + name = "local_client_aot_test_computation", + outs = ["local_client_aot_test_computation.o"], + cmd = "$(location :local_client_aot_test_helper) $(TARGET_CPU) > $(OUTS)", + local = 1, + tools = [":local_client_aot_test_helper"], +) + +cc_library( + name = "client_library_test_base", + testonly = True, + srcs = ["client_library_test_base.cc"], + hdrs = ["client_library_test_base.h"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:hlo_pass_pipeline_flags", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + +cc_library( + name = "codegen_test_base", + testonly = True, + srcs = ["codegen_test_base.cc"], + hdrs = ["codegen_test_base.h"], + data = [ + "@llvm//:FileCheck", + ], + deps = [ + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:backend", + "//tensorflow/compiler/xla/service:compiler", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +cc_library( + name = "local_client_test_base", + testonly = True, + srcs = ["local_client_test_base.cc"], + hdrs = ["local_client_test_base.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/service:device_memory_allocator", + "//tensorflow/compiler/xla/service:local_service", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + ], +) + +xla_test( + name = "bad_rng_shape_validation_test", + srcs = ["bad_rng_shape_validation_test.cc"], + deps = [ + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "check_execution_arity_test", + srcs = ["check_execution_arity_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "query_inferred_shape_test", + srcs = ["query_inferred_shape_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "while_test", + srcs = ["while_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "axpy_simple_test", + srcs = ["axpy_simple_test.cc"], + deps = [ + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "map_test", + srcs = ["map_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla:xla_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "params_test", + srcs = ["params_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "pred_test", + srcs = ["pred_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "select_test", + srcs = ["select_test.cc"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "unary_op_test", + srcs = ["unary_op_test.cc"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "scalar_computations_test", + srcs = ["scalar_computations_test.cc"], + shard_count = 16, + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:llvm_backend_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "deallocation_test", + srcs = ["deallocation_test.cc"], + deps = [ + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "deconstruct_tuple_test", + srcs = ["deconstruct_tuple_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "array_elementwise_ops_test", + srcs = ["array_elementwise_ops_test.cc"], + # This test includes comparisons to NAN, so disable fast-math. + backend_args = { + "cpu": ["--xla_fast_math=false"], + "cpu_parallel": ["--xla_fast_math=false"], + "gpu": ["--xla_fast_math=false"], + }, + shard_count = 25, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:llvm_backend_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "dot_operation_test", + srcs = ["dot_operation_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:cpu_runtime_flags", + "//tensorflow/compiler/xla/legacy_flags:layout_util_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +# Tests the dot operation in some cases that can be performed via a +# runtime call on some backends - e.g. a runtime call to to Eigen. +xla_test( + name = "dot_operation_runtime_test", + srcs = ["dot_operation_test.cc"], + backend_args = { + "cpu": ["--xla_cpu_use_eigen"], + "cpu_parallel": ["--xla_cpu_use_eigen"], + }, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:cpu_runtime_flags", + "//tensorflow/compiler/xla/legacy_flags:layout_util_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +# Repeat dot_operation_runtime_test with single-threded eigen. +xla_test( + name = "dot_operation_single_threaded_runtime_test", + srcs = ["dot_operation_test.cc"], + backend_args = { + "cpu": [ + "--xla_cpu_use_eigen", + "--xla_cpu_multi_thread_eigen=false", + ], + "cpu_parallel": [ + "--xla_cpu_use_eigen", + "--xla_cpu_multi_thread_eigen=false", + ], + }, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:cpu_runtime_flags", + "//tensorflow/compiler/xla/legacy_flags:layout_util_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "dot_operation_rowmajor_runtime_test", + srcs = ["dot_operation_test.cc"], + backend_args = { + "cpu": [ + "--xla_cpu_use_eigen", + "--xla_default_layout=major2minor", + ], + "cpu_parallel": [ + "--xla_cpu_use_eigen", + "--xla_default_layout=major2minor", + ], + }, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/legacy_flags:cpu_runtime_flags", + "//tensorflow/compiler/xla/legacy_flags:layout_util_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "transpose_test", + srcs = ["transpose_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "constants_test", + srcs = ["constants_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "convolution_test", + timeout = "long", + srcs = ["convolution_test.cc"], + shard_count = 25, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "convolution_variants_test", + timeout = "long", + srcs = ["convolution_variants_test.cc"], + backend_tags = { + # TODO(b/31436974): Fix msan failure. Failed on 2016-09-12. + "cpu": ["nomsan"], + "cpu_parallel": ["nomsan"], + }, + shard_count = 30, + deps = [ + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "convolution_dimension_numbers_test", + timeout = "long", + srcs = ["convolution_dimension_numbers_test.cc"], + shard_count = 20, + deps = [ + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "batch_normalization_test", + srcs = ["batch_normalization_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "slice_test", + srcs = ["slice_test.cc"], + shard_count = 40, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "multidimensional_slice_test", + srcs = ["multidimensional_slice_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "dynamic_ops_test", + srcs = ["dynamic_ops_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:device_memory_allocator", + "//tensorflow/compiler/xla/service:local_service", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "tuple_test", + srcs = ["tuple_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "vector_ops_reduce_test", + srcs = ["vector_ops_reduce_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "reduce_test", + srcs = ["reduce_test.cc"], + shard_count = 40, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "reduce_window_test", + timeout = "long", + srcs = ["reduce_window_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "select_and_scatter_test", + timeout = "long", + srcs = ["select_and_scatter_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "copy_test", + srcs = ["copy_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "call_test", + srcs = ["call_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "custom_call_test", + srcs = ["custom_call_test.cc"], + linkopts = export_dynamic_linkopts, + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "binop_scaling_test", + srcs = ["binop_scaling_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "broadcast_simple_test", + srcs = ["broadcast_simple_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "pad_test", + srcs = ["pad_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "fmax_test", + srcs = ["fmax_test.cc"], + deps = [ + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "log_test", + srcs = ["log_test.cc"], + deps = [ + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "matrix_ops_simple_test", + srcs = ["matrix_ops_simple_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "prng_test", + srcs = ["prng_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "reshape_test", + srcs = ["reshape_test.cc"], + shard_count = 30, + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "reverse_test", + srcs = ["reverse_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "vector_ops_simple_test", + srcs = ["vector_ops_simple_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "concat_test", + srcs = ["concat_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "convert_test", + srcs = ["convert_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "compilation_cache_test", + srcs = ["compilation_cache_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla:xla_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "floor_ceil_test", + srcs = ["floor_ceil_test.cc"], + deps = [ + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "compute_constant_test", + srcs = ["compute_constant_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "client_test", + srcs = ["client_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "inprocess_service_test", + srcs = ["inprocess_service_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "replay_test", + srcs = ["replay_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:protobuf_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:session_proto", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "broadcast_test", + srcs = ["broadcast_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "round_trip_packed_literal_test", + srcs = ["round_trip_packed_literal_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:packed_literal_reader", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "fusion_test", + srcs = ["fusion_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +cc_test( + name = "local_client_aot_test", + srcs = [ + "local_client_aot_test.cc", + ":local_client_aot_test_computation.o", + ], + deps = [ + "//tensorflow/compiler/xla:executable_run_options", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +xla_test( + name = "round_trip_transfer_test", + srcs = ["round_trip_transfer_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "set_return_value_test", + srcs = ["set_return_value_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +xla_test( + name = "reshape_motion_test", + srcs = ["reshape_motion_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/legacy_flags:cpu_compiler_flags", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + +cc_test( + name = "literal_test_util_test", + srcs = ["literal_test_util_test.cc"], + deps = [ + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +# ----------------------------------------------------------------------------- + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc new file mode 100644 index 0000000000..cf6f9a825c --- /dev/null +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -0,0 +1,1662 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cmath> +#include <limits> +#include <memory> +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/llvm_backend_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ArrayElementwiseOpTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001}; +}; + +class ArrayElementwiseOpTestParamCount + : public ArrayElementwiseOpTest, + public ::testing::WithParamInterface<int> {}; + +XLA_TEST_F(ArrayElementwiseOpTest, NegConstantZeroElementF32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto result = builder.Neg(a); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, NegConstantF32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); + auto result = builder.Neg(a); + + ComputeAndCompareR1<float>(&builder, {2.5f, -3.14f, -2.25f, 10.0f, -6.0f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, NegConstantS32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({-1, 0, 1, 324, + std::numeric_limits<int32>::min(), + std::numeric_limits<int32>::max()}); + auto result = builder.Neg(a); + + // -min == min for int32 due to an overflow. In C++ it is undefined behavior + // to do this calculation. For XLA we have not specified that, so it + // ought to work. + ComputeAndCompareR1<int32>(&builder, + {1, 0, -1, -324, std::numeric_limits<int32>::min(), + -std::numeric_limits<int32>::max()}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, AddTwoConstantF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); + auto b = builder.ConstantR1<float>({100.0f, 3.13f, 2.75f, 10.5f, -999.0f}); + auto add = builder.Add(a, b); + + ComputeAndCompareR1<float>(&builder, {97.5f, 6.27f, 5.0f, 0.5f, -993.0f}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto add = builder.Add(a, b); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_P(ArrayElementwiseOpTestParamCount, AddManyValues) { + const int count = GetParam(); + ComputationBuilder builder(client_, TestName()); + std::vector<float> a_values; + std::vector<float> b_values; + for (int i = 0; i < count; ++i) { + a_values.push_back(i / static_cast<float>(count)); + b_values.push_back(2 * i / static_cast<float>(count + 2)); + } + + std::unique_ptr<Literal> a_literal = LiteralUtil::CreateR1<float>({a_values}); + std::unique_ptr<GlobalData> a_data = + client_->TransferToServer(*a_literal).ConsumeValueOrDie(); + auto a_constant = builder.ConstantR1<float>(a_values); + auto a_param = builder.Parameter(0, a_literal->shape(), "a_param"); + + std::unique_ptr<Literal> b_literal = LiteralUtil::CreateR1<float>({b_values}); + std::unique_ptr<GlobalData> b_data = + client_->TransferToServer(*b_literal).ConsumeValueOrDie(); + auto b_constant = builder.Parameter(1, a_literal->shape(), "b_param"); + auto b_param = builder.ConstantR1<float>(b_values); + + auto sum1 = builder.Add(a_constant, b_constant); + auto sum2 = builder.Add(a_constant, b_param); + auto sum3 = builder.Add(a_param, b_constant); + auto sum4 = builder.Add(a_param, b_param); + + auto sum = builder.Add(sum1, sum2); + sum = builder.Add(sum, sum3); + sum = builder.Add(sum, sum4); + + std::vector<float> expected; + for (int64 i = 0; i < count; ++i) { + expected.push_back(4 * (a_values[i] + b_values[i])); + } + + ComputeAndCompareR1<float>(&builder, expected, {a_data.get(), b_data.get()}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, SubTwoConstantF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); + auto b = builder.ConstantR1<float>({100.0f, 3.13f, 2.75f, 10.5f, -999.0f}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1<float>(&builder, {-102.5f, 0.01f, -0.5f, -20.5f, 1005.0f}, + {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, SubTwoConstantS32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({-1, 0, 2, 1000000000}); + auto b = builder.ConstantR1<int32>({-1, 2, 1, -1}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1<int32>(&builder, {0, -2, 1, 1000000001}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementS32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({}); + auto b = builder.ConstantR1<int32>({}); + auto add = builder.Sub(a, b); + + ComputeAndCompareR1<int32>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, DivTwoConstantF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); + auto b = builder.ConstantR1<float>({10.0f, 5.1f, 1.0f, 10.0f, -6.0f}); + auto add = builder.Div(a, b); + + ComputeAndCompareR1<float>(&builder, {-0.25f, 5.0f, 2.25f, -1.0f, -1.0f}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto add = builder.Div(a, b); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, RemF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>( + {-2.5f, 25.5f, 2.25f, -10.0f, 6.0f, 3.0f, 3.0f, -1.0f, -8.0f}); + auto b = builder.ConstantR1<float>( + {10.0f, 5.1f, 1.0f, 10.0f, -6.0f, 2.0f, -2.0f, 7.0f, -4.0f}); + auto add = builder.Rem(a, b); + + ComputeAndCompareR1<float>( + &builder, {-2.5f, 0.0f, 0.25f, 0.0f, -0.0f, 1.0f, 1.0f, -1.0f, -0.0f}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, RemZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto add = builder.Rem(a, b); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, RemF64s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<double>( + {-2.5, 25.5, 2.25, -10.0, 6.0, 3.0, 3.0, -1.0, -8.0}); + auto b = builder.ConstantR1<double>( + {10.0, 5.1, 1.0, 10.0, -6.0, 2.0, -2.0, 7.0, -4.0}); + auto add = builder.Rem(a, b); + + ComputeAndCompareR1<double>( + &builder, {-2.5, 0.0, 0.25, 0.0, -0.0, 1.0, 1.0, -1.0, -0.0}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, MulTwoConstantF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); + auto b = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, -6.0f}); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1<float>(&builder, {-25.0f, 127.5f, 2.25f, -100.0f, -36.0f}, + {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, MulTwoConstantS32s) { + std::vector<int32> data = {0, + 1, + -1, + 1234, + 0x1a243514, + std::numeric_limits<int32>::max(), + std::numeric_limits<int32>::min()}; + // Form the test data set using all products of 'data' with itself. + std::vector<int32> a_data, b_data, expected; + for (int32 a : data) { + for (int32 b : data) { + a_data.push_back(a); + b_data.push_back(b); + expected.push_back(static_cast<uint32>(a) * static_cast<uint32>(b)); + } + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>(a_data); + auto b = builder.ConstantR1<int32>(b_data); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementS32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({}); + auto b = builder.ConstantR1<int32>({}); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1<int32>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, MulTwoConstantU32s) { + std::vector<uint32> data = {0, 1, 0xDEADBEEF, 1234, + 0x1a243514, 0xFFFFFFFF, 0x80808080}; + + // Form the test data set using all products of 'data' with itself. + std::vector<uint32> a_data, b_data, expected; + for (uint32 a : data) { + for (uint32 b : data) { + a_data.push_back(a); + b_data.push_back(b); + expected.push_back(a * b); + } + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<uint32>(a_data); + auto b = builder.ConstantR1<uint32>(b_data); + auto add = builder.Mul(a, b); + + ComputeAndCompareR1<uint32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, LogicalAnd) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({false, false, true, true}); + auto b = builder.ConstantR1<bool>({false, true, false, true}); + auto out = builder.LogicalAnd(a, b); + + ComputeAndCompareR1<bool>(&builder, {false, false, false, true}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, LogicalAndZeroElement) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({}); + auto b = builder.ConstantR1<bool>({}); + auto out = builder.LogicalAnd(a, b); + + ComputeAndCompareR1<bool>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, LogicalOr) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({false, false, true, true}); + auto b = builder.ConstantR1<bool>({false, true, false, true}); + auto out = builder.LogicalOr(a, b); + + ComputeAndCompareR1<bool>(&builder, {false, true, true, true}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, LogicalOrZeroElement) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({}); + auto b = builder.ConstantR1<bool>({}); + auto out = builder.LogicalOr(a, b); + + ComputeAndCompareR1<bool>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, LogicalNot) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({false, true, true, false}); + auto out = builder.LogicalNot(a); + + ComputeAndCompareR1<bool>(&builder, {true, false, false, true}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, LogicalNotZeroElement) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({}); + auto out = builder.LogicalNot(a); + + ComputeAndCompareR1<bool>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareEqF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({10.0f, 5.0f, 2.25f, 10.0f, NAN}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {false, false, true, false, false}, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({}); + auto rhs = builder.ConstantR1<float>({}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGeF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, NAN}); + auto compare = builder.Ge(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {false, true, true, false, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGtF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, NAN}); + auto compare = builder.Gt(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {false, true, true, false, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLeF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({-2.5f, 5.0f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, NAN}); + auto compare = builder.Le(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {true, true, false, false, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLtF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, NAN}); + auto compare = builder.Lt(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {true, false, false, false, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareEqS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, false, false, false, true, false, false, false, true}, + {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({}); + auto rhs = builder.ConstantR1<int32>({}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1<bool>(&builder, {}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareNeS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Ne(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, true, true, true, false, true, true, true, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGeS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Ge(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, false, false, true, true, false, true, true, true}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGtS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Gt(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, false, false, true, false, false, true, true, false}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLeS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Le(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, true, true, false, true, true, false, false, true}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLtS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<int32>({min, min, min, 0, 0, 0, max, max, max}); + auto rhs = builder.ConstantR1<int32>({min, 0, max, -1, 0, 1, min, 0, max}); + auto compare = builder.Lt(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, true, true, false, false, true, false, false, false}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareEqU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Eq(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, false, false, false, true, false, false, false, true}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareNeU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Ne(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, true, true, true, false, true, true, true, false}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGeU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Ge(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, false, false, true, true, false, true, true, true}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareGtU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Gt(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, false, false, true, false, false, true, true, false}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLeU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Le(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {true, true, true, false, true, true, false, false, true}, {}); +} + +TEST_F(ArrayElementwiseOpTest, CompareLtU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<uint32>({0, 0, 0, 5, 5, 5, max, max, max}); + auto rhs = builder.ConstantR1<uint32>({0, 1, max, 4, 5, 6, 0, 1, max}); + auto compare = builder.Lt(lhs, rhs); + + ComputeAndCompareR1<bool>( + &builder, {false, true, true, false, false, true, false, false, false}, + {}); +} + +TEST_F(ArrayElementwiseOpTest, PowF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({4.0f, 2.0f, 2.0f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({2.0f, -2.0f, 3.0f, 10.0f, NAN}); + auto minimum = builder.Pow(lhs, rhs); + + ComputeAndCompareR1<float>(&builder, {16.0f, 0.25f, 8.0f, NAN, NAN}, {}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, PowZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({}); + auto rhs = builder.ConstantR1<float>({}); + auto minimum = builder.Pow(lhs, rhs); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +// Some Pow cases that can be implemented more efficiently. +TEST_F(ArrayElementwiseOpTest, PowSpecialF32) { + ComputationBuilder b(client_, TestName()); + + std::vector<float> values = {1.0f, 2.0f, 3.2f, -4.0f}; + std::vector<float> exponents = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; + + std::unique_ptr<Literal> param_literal = LiteralUtil::CreateR1<float>(values); + std::unique_ptr<GlobalData> param_data = + client_->TransferToServer(*param_literal).ConsumeValueOrDie(); + + auto sum = b.ConstantR0<float>(0.0f); + auto param = b.Parameter(0, param_literal->shape(), "param"); + for (float exponent : exponents) { + sum = b.Add(sum, b.Pow(param, b.ConstantR0<float>(exponent))); + } + + std::vector<float> expected; + for (auto value : values) { + float sum = 0.0f; + for (float exponent : exponents) { + sum += std::pow(value, exponent); + } + expected.push_back(sum); + } + + ComputeAndCompareR1<float>(&b, expected, {param_data.get()}, error_spec_); +} + +TEST_P(ArrayElementwiseOpTestParamCount, SquareManyValues) { + const int count = GetParam(); + ComputationBuilder builder(client_, TestName()); + std::vector<float> values; + for (int i = 0; i < count; ++i) { + values.push_back(i / static_cast<float>(count)); + } + auto x = builder.ConstantR1<float>(values); + auto exp = builder.Pow(x, builder.ConstantR0<float>(2.0f)); + + std::vector<float> expected; + for (float value : values) { + expected.push_back(value * value); + } + + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, SquareIn4D) { + ComputationBuilder builder(client_, TestName()); + Array4D<float> values(2, 2, 2, 2); + + std::vector<float> values_vector; + std::vector<float> expected_vector; + for (int i = 0; i < values.num_elements(); ++i) { + values_vector.push_back(static_cast<float>(i) / values.num_elements()); + expected_vector.push_back(values_vector.back() * values_vector.back()); + } + values.SetValues(values_vector); + + Array4D<float> expected(2, 2, 2, 2, expected_vector); + + auto x = builder.ConstantR4FromArray4D<float>(values); + auto exp = builder.Pow(x, builder.ConstantR0<float>(2.0f)); + + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) { + ComputationBuilder builder(client_, TestName()); + Array4D<float> values(2, 2, 0, 2); + Array4D<float> expected(2, 2, 0, 2); + + auto x = builder.ConstantR4FromArray4D<float>(values); + auto exp = builder.Pow(x, builder.ConstantR0<float>(2.0f)); + + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +// GPU backend emits nvvm intrinsic for fmin and fmax, whose semantics is NOT +// such +// * fmin(NaN, x) = x +// * fmax(NaN, x) = x +// so we only test NAN on CPU. +// +// TODO(b/28180546): Make this compile in a way that is consistent +// among backends. +TEST_F(ArrayElementwiseOpTest, MinF32s) { + ComputationBuilder builder(client_, TestName()); +#if !defined(XLA_TEST_BACKEND_CPU) + auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f}); + auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f}); +#else + auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f, 10.0f, NAN}); +#endif + auto minimum = builder.Min(lhs, rhs); + + ComputeAndCompareR1<float>(&builder, +#if !defined(XLA_TEST_BACKEND_CPU) + {1.0f, -5.0f, 1.0f}, +#else + {1.0f, -5.0f, 1.0f, 10.0f, 6.0f}, +#endif + {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({}); + auto rhs = builder.ConstantR1<float>({}); + auto minimum = builder.Min(lhs, rhs); + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +// TODO(b/28180546): Make this compile in a way that is consistent +// among backends. See comment on MinF32s test above. +XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) { + ComputationBuilder builder(client_, TestName()); +#if !defined(XLA_TEST_BACKEND_CPU) + auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25}); + auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0}); +#else + auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25, NAN, 6.0}); + auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0, 10.0, NAN}); +#endif + auto minimum = builder.Min(lhs, rhs); + + ComputeAndCompareR1<double>(&builder, +#if !defined(XLA_TEST_BACKEND_CPU) + {1.0, -5.0, 1.0}, +#else + {1.0, -5.0, 1.0, 10.0, 6.0}, +#endif + {}, error_spec_); +} + +// TODO(b/28180546): Make this compile in a way that is consistent +// among backends. See comment on MinF32s test above. +TEST_F(ArrayElementwiseOpTest, MaxF32s) { + ComputationBuilder builder(client_, TestName()); +#if !defined(XLA_TEST_BACKEND_CPU) + auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f}); + auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f}); +#else + auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f, NAN, 6.0f}); + auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f, 10.0f, NAN}); +#endif + auto maximum = builder.Max(lhs, rhs); + + ComputeAndCompareR1<float>(&builder, +#if !defined(XLA_TEST_BACKEND_CPU) + {2.0f, 1.0f, 2.25f}, +#else + {2.0f, 1.0f, 2.25f, 10.0f, 6.0f}, +#endif + {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({}); + auto rhs = builder.ConstantR1<float>({}); + auto minimum = builder.Max(lhs, rhs); + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +// TODO(b/28180546): Make this compile in a way that is consistent +// among backends. See comment on MinF32s test above. +XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { + ComputationBuilder builder(client_, TestName()); +#if !defined(XLA_TEST_BACKEND_CPU) + auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25}); + auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0}); +#else + auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25, NAN, 6.0}); + auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0, 10.0, NAN}); +#endif + auto maximum = builder.Max(lhs, rhs); + + ComputeAndCompareR1<double>(&builder, +#if !defined(XLA_TEST_BACKEND_CPU) + {2.0, 1.0, 2.25}, +#else + {2.0, 1.0, 2.25, 10.0, 6.0}, +#endif + {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, MaxS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>( + {min, min, min, -1, -1, 0, 0, 0, 1, 1, max, max, max}); + auto y = builder.ConstantR1<int32>( + {min, max, 0, -10, 0, -1, 0, 1, 0, 10, 0, max, min}); + builder.Max(x, y); + + std::vector<int32> expected = {min, max, 0, -1, 0, 0, 0, + 1, 1, 10, max, max, max}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, MinS32s) { + const int32 min = std::numeric_limits<int32>::min(); + const int32 max = std::numeric_limits<int32>::max(); + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>( + {min, min, min, -1, -1, 0, 0, 0, 1, 1, max, max, max}); + auto y = builder.ConstantR1<int32>( + {min, max, 0, -10, 0, -1, 0, 1, 0, 10, 0, max, min}); + builder.Min(x, y); + + std::vector<int32> expected = {min, min, min, -10, -1, -1, 0, + 0, 0, 1, 0, max, min}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, MaxU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<uint32>({0, 0, 1, 1, 1, max, max, max}); + auto y = builder.ConstantR1<uint32>({0, 1, 0, 1, 10, 0, 234234, max}); + builder.Max(x, y); + + std::vector<uint32> expected = {0, 1, 1, 1, 10, max, max, max}; + ComputeAndCompareR1<uint32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, MinU32s) { + const uint32 max = std::numeric_limits<uint32>::max(); + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<uint32>({0, 0, 1, 1, 1, max, max, max}); + auto y = builder.ConstantR1<uint32>({0, 1, 0, 1, 10, 0, 234234, max}); + builder.Min(x, y); + + std::vector<uint32> expected = {0, 0, 0, 1, 1, 0, 234234, max}; + ComputeAndCompareR1<uint32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, MaxTenF32s) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {-0.0, 1.0, 2.0, -3.0, -4.0, 5.0, 6.0, -7.0, -8.0, 9.0}); + auto y = builder.ConstantR1<float>( + {-0.0, -1.0, -2.0, 3.0, 4.0, -5.0, -6.0, 7.0, 8.0, -9.0}); + builder.Max(x, y); + + std::vector<float> expected = {-0.0, 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, 9.0}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S1AndR1S0F32s) { + ComputationBuilder builder(client_, TestName()); + auto u = builder.ConstantR1<float>({3.5}); + auto v = builder.ConstantR1<float>({}); + builder.Max(u, v); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S0AndR2S0x2F32s) { + for (int broadcast_dim : {0, 1}) { + ComputationBuilder builder(client_, TestName()); + auto u = builder.ConstantR1<float>({3.5}); + auto v = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + builder.Max(u, v, /*broadcast_dimensions=*/{broadcast_dim}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 2), {}, error_spec_); + } +} + +TEST_F(ArrayElementwiseOpTest, Max1DAnd2DF32s) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({2.0f, 3.0f, 4.0f}); + auto m = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + builder.Max(v, m, /*broadcast_dimensions=*/{1}); + + Array2D<float> expected({{2.0f, 3.14f, 4.0f}, {2.25f, 3.0f, 4.0f}}); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Max1DAnd2DZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({}); + auto m = builder.ConstantR2<float>({{}, {}}); + builder.Max(v, m, /*broadcast_dimensions=*/{1}); + + Array2D<float> expected({{}, {}}); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarS32s) { + ComputationBuilder builder(client_, TestName()); + auto scalar = builder.ConstantR0<int32>(2); + Array3D<int32> a_3d({{{3, 9, -1}, {2, -10, 3}}, {{-2, 2, 8}, {12, 10, 4}}}); + auto array = builder.ConstantR3FromArray3D<int32>(a_3d); + builder.Max(array, scalar, /*broadcast_dimensions=*/{}); + + Array3D<int32> expected({{{3, 9, 2}, {2, 2, 3}}, {{2, 2, 8}, {12, 10, 4}}}); + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarZeroElementS32s) { + ComputationBuilder builder(client_, TestName()); + auto scalar = builder.ConstantR0<int32>(2); + Array3D<int32> a_3d(2, 0, 3); + auto array = builder.ConstantR3FromArray3D<int32>(a_3d); + builder.Max(array, scalar, /*broadcast_dimensions=*/{}); + + Array3D<int32> expected(2, 0, 3); + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +TEST_F(ArrayElementwiseOpTest, Min2DTo1DF32s) { + ComputationBuilder builder(client_, TestName()); + auto m = + builder.ConstantR2<float>({{-10.4f, 64.0f, 6.0f}, {0.1f, 32.0f, 16.1f}}); + auto v = builder.ConstantR1<float>({-10.2f, 16.4f}); + builder.Min(m, v, /*broadcast_dimensions=*/{0}); + + Array2D<float> expected({{-10.4f, -10.2f, -10.2f}, {0.1f, 16.4f, 16.1f}}); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo1DZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantR2<float>({{}, {}}); + auto v = builder.ConstantR1<float>({-10.2f, 16.4f}); + builder.Min(m, v, /*broadcast_dimensions=*/{0}); + + Array2D<float> expected({{}, {}}); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DF32s) { + ComputationBuilder builder(client_, TestName()); + auto array2d = + builder.ConstantR2<float>({{-12.2f, 64.3f, 6.1f}, {0.0f, 32.2f, 2.5f}}); + auto array4d = builder.ConstantR4FromArray4D<float>( + {{{{-12.1f, 32.3f, 6.2f}}, {{0.0f, 32.5f, 3.0f}}}, + {{{-2.5f, 64.29f, 6.5f}}, {{-0.01f, 32.25f, 2.6f}}}}); + builder.Min(array2d, array4d, /*broadcast_dimensions=*/{1, 3}); + + Array4D<float> expected( + {{{{-12.2f, 32.3f, 6.1f}}, {{0.0f, 32.2f, 2.5f}}}, + {{{-12.2f, 64.29f, 6.1f}}, {{-0.01f, 32.2f, 2.5f}}}}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + auto array2d = + builder.ConstantR2<float>({{-12.2f, 64.3f, 6.1f}, {0.0f, 32.2f, 2.5f}}); + Array4D<float> arg(2, 2, 0, 3); + auto array4d = builder.ConstantR4FromArray4D<float>(arg); + builder.Min(array2d, array4d, /*broadcast_dimensions=*/{1, 3}); + + Array4D<float> expected(2, 2, 0, 3); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MinTenS32s) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto y = builder.ConstantR1<int32>({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + builder.Min(x, y); + + std::vector<int32> expected = {0, 1, 2, 3, 4, 4, 3, 2, 1, 0}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, MaxTenS32s) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto y = builder.ConstantR1<int32>({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); + builder.Max(x, y); + + std::vector<int32> expected = {9, 8, 7, 6, 5, 5, 6, 7, 8, 9}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +XLA_TEST_F(ArrayElementwiseOpTest, RemTwoConstantS32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({-3, 26, 2, -1, 1}); + auto b = builder.ConstantR1<int32>({10, 5, 1, 10, -10}); + auto add = builder.Rem(a, b); + + ComputeAndCompareR1<int32>(&builder, {-3, 1, 0, -1, 1}, {}); +} + +TEST_F(ArrayElementwiseOpTest, NonNanClampF32) { + ComputationBuilder builder(client_, TestName()); + auto minimum = builder.ConstantR1<float>({1.0f, -6.5f, 1.0f, 2.25f, 0.0f}); + auto argument = builder.ConstantR1<float>({2.0f, 10.0f, -5.0f, 1.0f, 10.0f}); + auto maximum = builder.ConstantR1<float>({3.0f, 0.5f, 25.5f, 5.0f, 123.0}); + auto clamp = builder.Clamp(minimum, argument, maximum); + + ComputeAndCompareR1<float>(&builder, {2.0f, 0.5f, 1.0f, 2.25f, 10.0f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, ClampF32Scalar) { + ComputationBuilder builder(client_, TestName()); + auto minimum = builder.ConstantR0<float>(0.0f); + auto argument = builder.ConstantR1<float>({2.0f, 10.0f, -5.0f, 1.0f, 4.0f}); + auto maximum = builder.ConstantR0<float>(5.0f); + auto clamp = builder.Clamp(minimum, argument, maximum); + + ComputeAndCompareR1<float>(&builder, {2.0f, 5.0f, 0.0f, 1.0f, 4.0f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, ClampF32ScalarVector) { + ComputationBuilder builder(client_, TestName()); + auto min_scalar = builder.ConstantR0<float>(0.0f); + auto min_vector = builder.ConstantR1<float>({1.0f, -6.5f, 1.0f, 2.25f, 0.0f}); + auto arg_vector = builder.ConstantR1<float>({2.0f, 10.0f, -5.0f, 1.0f, 4.0f}); + auto arg_scalar = builder.ConstantR1<float>({2.0f, 10.0f, -5.0f, 1.0f, 4.0f}); + auto max_scalar = builder.ConstantR0<float>(3.0f); + auto max_vector = builder.ConstantR1<float>({3.0f, 0.5f, 25.5f, 5.0f, 123.0}); + // Perform clamp with broadcasted scalar and vector. + auto clamp = builder.Add( + builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), + builder.Clamp(min_scalar, arg_vector, max_vector)), + builder.Add(builder.Clamp(min_vector, arg_scalar, max_vector), + builder.Clamp(min_scalar, arg_scalar, max_vector))); + + ComputeAndCompareR1<float>(&builder, {8.0f, 4.5f, 2.0f, 6.5f, 15.0f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddTwoParametersF32s) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({1.1f, 2.2f, 3.3f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR1<float>({7.2f, 2.3f, 3.4f, 5.6f}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto p0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto p1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto add = builder.Add(p0, p1); + + ComputeAndCompareR1<float>(&builder, {8.3f, 4.5f, 6.7f, 11.1f}, + {param0_data.get(), param1_data.get()}, + error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersZeroElementF32s) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR3FromArray3D<float>(Array3D<float>(0, 7, 0)); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR3FromArray3D<float>(Array3D<float>(0, 7, 0)); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto p0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto p1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto add = builder.Add(p0, p1); + + Array3D<float> expected(0, 7, 0); + ComputeAndCompareR3<float>( + &builder, expected, {param0_data.get(), param1_data.get()}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddParameterToConstantF32s) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({1.1f, 2.2f, 3.3f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto a = builder.ConstantR1<float>({1.1f, 2.2f, 3.3f, 4.4f}); + auto p = builder.Parameter(0, param0_literal->shape(), "param0"); + auto add = builder.Add(a, p); + + ComputeAndCompareR1<float>(&builder, {2.2f, 4.4f, 6.6f, 9.9f}, + {param0_data.get()}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, TanhF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({-2.5f, 3.14f, 2.25f}); + auto result = builder.Tanh(a); + + ComputeAndCompareR1<float>(&builder, {-0.986614f, 0.996260f, 0.978026}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) { + // a ------ (add) --------- (add) + // / / + // b -----/ / + // c---------------------/ + ComputationBuilder builder(client_, TestName()); + + auto a = builder.ConstantR1<float>({1.1f, 2.2f, 3.3f, 4.4f}); + auto b = builder.ConstantR1<float>({2.1f, 3.2f, 4.3f, 5.4f}); + auto c = builder.ConstantR1<float>({-3.3f, -15.5f, -7.7f, -29.9f}); + + auto add = builder.Add(a, b); + auto add2 = builder.Add(add, c); + + ComputeAndCompareR1<float>(&builder, {-0.1f, -10.1f, -0.1f, -20.1f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddChainFoldRight) { + // b ------ (add) --------- (add) + // / / + // c -----/ / + // a---------------------/ + ComputationBuilder builder(client_, TestName()); + + auto a = builder.ConstantR1<float>({91.1f, 2.2f, 3.3f, 4.4f}); + auto b = builder.ConstantR1<float>({2.1f, 3.2f, 4.3f, 5.4f}); + auto c = builder.ConstantR1<float>({-3.3f, -15.5f, -7.7f, -29.9f}); + + auto add = builder.Add(b, c); + auto add2 = builder.Add(a, add); + + ComputeAndCompareR1<float>(&builder, {89.9f, -10.1f, -0.1f, -20.1f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddWithNeg) { + // a ----- (neg) ----- (add) + // / + // b ----- (neg) ----/ + ComputationBuilder builder(client_, TestName()); + + auto a = builder.ConstantR1<float>({91.1f, 2.2f, 3.3f, 4.4f}); + auto b = builder.ConstantR1<float>({2.1f, 3.2f, 4.3f, 5.4f}); + + auto neg_a = builder.Neg(a); + auto neg_b = builder.Neg(b); + auto result = builder.Add(neg_a, neg_b); + + ComputeAndCompareR1<float>(&builder, {-93.2f, -5.4f, -7.6f, -9.8f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, AddChainTwoSide) { + // a ------ (add) ------------\ + // / \ + // b -----/ (add) + // / + // c ------ (add) ------------/ + // / + // d -----/ + ComputationBuilder builder(client_, TestName()); + + auto a = builder.ConstantR1<float>({91.1f, 2.2f, 3.3f, 4.4f}); + auto b = builder.ConstantR1<float>({2.1f, 3.2f, 4.3f, 5.4f}); + auto c = builder.ConstantR1<float>({-3.3f, -15.5f, -7.7f, -29.9f}); + auto d = builder.ConstantR1<float>({-19.0f, 10.0f, -40.0f, 20.2f}); + + auto add_ab = builder.Add(a, b); + auto add_cd = builder.Add(c, d); + auto add_all = builder.Add(add_ab, add_cd); + + ComputeAndCompareR1<float>(&builder, {70.9f, -0.1f, -40.1f, 0.1f}, {}, + error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, 2DBinaryOpF32s) { + ComputationBuilder builder(client_, TestName()); + auto a = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + auto b = + builder.ConstantR2<float>({{-1.5f, 8.14f, 42.0}, {-1.0f, -4.0f, 5.55f}}); + auto add = builder.Add(a, b); + + Array2D<float> expected_array( + {{-4.0f, 11.28f, 43.0f}, {1.25f, -14.0f, 8.88f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, ScalarPlus2DF32) { + // Add a scalar + matrix. + ComputationBuilder builder(client_, TestName()); + auto a = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + auto scalar = builder.ConstantR0<float>(3.0f); + auto add = builder.Add(scalar, a); + + Array2D<float> expected_array({{0.5f, 6.14f, 4.0f}, {5.25f, -7.0f, 6.33f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, 2DPlusScalarF32) { + // Add a matrix + scalar. + ComputationBuilder builder(client_, TestName()); + auto a = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + auto scalar = builder.ConstantR0<float>(3.0f); + auto add = builder.Add(a, scalar); + + Array2D<float> expected_array({{0.5f, 6.14f, 4.0f}, {5.25f, -7.0f, 6.33f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32) { + // Test simple broadcasting of a R1F32 over R2F32. The vector's size matches + // only dim 0 of the matrix. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({20.0f, 40.0f, 60.0f}); + // clang-format off + auto m = builder.ConstantR2<float>({ + {-2.5f, 3.14f, 1.0f}, + {2.25f, -10.0f, 3.33f}}); + // clang-format on + auto add = builder.Add(v, m, /*broadcast_dimensions=*/{1}); + Array2D<float> expected_array( + {{17.5f, 43.14f, 61.0f}, {22.25f, 30.0f, 63.33f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Eq) { + // Test broadcasting in Eq comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({42, 73}); + auto m = builder.ConstantR2<int32>({{42, 73}, {42, 52}}); + + // This test exercises both possible broadcast dimensions for a vector/matrix + // comparison. + auto cmp_dim_0 = builder.Eq(v, m, /*broadcast_dimensions=*/{1}); + auto cmp_dim_1 = builder.Eq(v, m, /*broadcast_dimensions=*/{0}); + auto result = builder.Tuple({cmp_dim_0, cmp_dim_1}); + + auto expected = LiteralUtil::MakeTuple( + {LiteralUtil::CreateR2<bool>({{true, true}, {true, false}}).get(), + LiteralUtil::CreateR2<bool>({{true, false}, {false, false}}).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ne) { + // Test broadcasting in Ne comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({42, 73}); + auto m = builder.ConstantR2<int32>({{42, 73}, {42, 52}}); + auto cmp = builder.Ne(v, m, /*broadcast_dimensions=*/{1}); + + const string expected = R"(pred[2,2] { + { 00 }, + { 01 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ge) { + // Test broadcasting in Ge comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({1, 2, 3, 4}); + auto m = builder.ConstantR2<int32>({{1, 0, 5, 6}, {42, 52, 10, 4}}); + auto cmp = builder.Ge(v, m, /*broadcast_dimensions=*/{1}); + + const string expected = R"(pred[2,4] { + { 1100 }, + { 0001 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Gt) { + // Test broadcasting in Gt comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({1, 2, 3, 4}); + auto m = builder.ConstantR2<int32>({{1, 0, 5, 6}, {42, 52, 10, 4}}); + auto cmp = builder.Gt(v, m, /*broadcast_dimensions=*/{1}); + + const string expected = R"(pred[2,4] { + { 0100 }, + { 0000 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Le) { + // Test broadcasting in Le comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({1, 2, 3, 4}); + auto m = builder.ConstantR2<int32>({{1, 0, 5, 6}, {42, 52, 10, 4}}); + auto cmp = builder.Le(v, m, /*broadcast_dimensions=*/{1}); + + const string expected = R"(pred[2,4] { + { 1011 }, + { 1111 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Lt) { + // Test broadcasting in Lt comparison. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({1, 2, 3, 4}); + auto m = builder.ConstantR2<int32>({{1, 0, 5, 6}, {42, 52, 10, 4}}); + auto cmp = builder.Lt(v, m, /*broadcast_dimensions=*/{1}); + + const string expected = R"(pred[2,4] { + { 0011 }, + { 1110 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +TEST_F(ArrayElementwiseOpTest, Mul2Dby1DF32) { + // Test simple broadcasting of a R1F32 over R2F32 when the order of binary op + // arguments is reversed. + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantR2<float>({{1.5f, 2.5f, 3.5f}, {4.5f, 5.5f, 6.5f}}); + auto v = builder.ConstantR1<float>({2.0f, 4.0f, 6.0f}); + auto add = builder.Mul(m, v, /*broadcast_dimensions=*/{1}); + Array2D<float> expected_array({{3.0f, 10.0f, 21.0f}, {9.0f, 22.0f, 39.0f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, Add2DTo2DWithDegenerateDim1) { + // Tests broadcasting for arrays with degenerate (size == 1) dimensions. + ComputationBuilder builder(client_, TestName()); + // m's shape in XLA notation is {3, 2} + // md's shape in XLA notation is {3, 1} + // The result has shape {3, 2}, where md is broadcast over m + auto m = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + auto md = builder.ConstantR2<float>({{10.0f, 20.0f, 30.0f}}); + auto add = builder.Add(m, md); + Array2D<float> expected_array( + {{7.5f, 23.14f, 31.0f}, {12.25f, 10.0f, 33.33f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo2DWithDegenerateDim0) { + // Tests broadcasting for arrays with degenerate (size == 1) dimensions. + ComputationBuilder builder(client_, TestName()); + // m's shape in XLA notation is {3, 2} + // md's shape in XLA notation is {1, 2} + // The result has shape {3, 2}, where md is broadcast over m + auto m = + builder.ConstantR2<float>({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); + auto md = builder.ConstantR2<float>({{10.0f}, {20.0f}}); + auto add = builder.Add(m, md); + Array2D<float> expected_array( + {{7.5f, 13.14f, 11.0f}, {22.25f, 10.0f, 23.33f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add2DsWithDegenerateDimsOuterProduct) { + // Tests broadcasting for two degenerate arrays. This kind of broadcasting + // effectively creates an "outer product" operation. + // This is taken from the Numpy docs example at: + // http://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html + ComputationBuilder builder(client_, TestName()); + // a's shape in XLA notation is {1, 4} + // b's shape in XLA notation is {3, 1} + // The result has shape {3, 4}. + auto a = builder.ConstantR2<float>({{0.0f}, {10.0f}, {20.0f}, {30.0f}}); + auto b = builder.ConstantR2<float>({{1.0f, 2.0f, 3.0f}}); + auto add = builder.Add(a, b); + Array2D<float> expected_array({{1.0f, 2.0f, 3.0f}, + {11.0f, 12.0f, 13.0f}, + {21.0f, 22.0f, 23.0f}, + {31.0f, 32.0f, 33.0f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32TwoWaysOver1) { + // Add together a (2,2) array and a (2) array, using dimension 0 for + // broadcasting (though there are two ways to broadcast these shapes). + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({20.0f, 40.0f}); + auto m = builder.ConstantR2<float>({{10.0f, 50.0f}, {77.0f, 88.0f}}); + auto add = builder.Add(v, m, /*broadcast_dimensions=*/{1}); + Array2D<float> expected_array({{30.0f, 90.0f}, {97.0f, 128.0f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32TwoWaysOver0) { + // Add together a (2,2) array and a (2) array, using dimension 1 for + // broadcasting (though there are two ways to broadcast these shapes). + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({20.0f, 40.0f}); + auto m = builder.ConstantR2<float>({{10.0f, 50.0f}, {77.0f, 88.0f}}); + auto add = builder.Add(v, m, /*broadcast_dimensions=*/{0}); + Array2D<float> expected_array({{30.0f, 70.0f}, {117.0f, 128.0f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, 3DBinaryOpF32s) { + // Binary add of two R3s together + ComputationBuilder builder(client_, TestName()); + Array3D<float> a_3d({{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}); + auto a = builder.ConstantR3FromArray3D<float>(a_3d); + + Array3D<float> b_3d({{{2.0f, 4.0f}, {6.0f, 8.0f}, {10.0f, 12.0f}}, + {{14.0f, 16.0f}, {18.0f, 20.0f}, {22.0f, 24.0f}}}); + auto b = builder.ConstantR3FromArray3D<float>(b_3d); + auto add = builder.Add(a, b); + + Array3D<float> expected_3d( + {{{3.0f, 6.0f}, {9.0f, 12.0f}, {15.0f, 18.0f}}, + {{21.0f, 24.0f}, {27.0f, 30.0f}, {33.0f, 36.0f}}}); + ComputeAndCompareR3<float>(&builder, expected_3d, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver2) { + // Add together a (2, 3, 2) array with a (2) array, using dimension 0 for + // broadcasting (though there are two ways to broadcast these shapes). + ComputationBuilder builder(client_, TestName()); + // clang-format off + Array3D<float> a_3d({ + {{1.0f, 2.0f}, + {3.0f, 4.0f}, + {5.0f, 6.0f}}, + {{7.0f, 8.0f}, + {9.0f, 10.0f}, + {11.0f, 12.0f}}, + }); + // clang-format on + auto a = builder.ConstantR3FromArray3D<float>(a_3d); + auto v = builder.ConstantR1<float>({10.0f, 20.0f}); + auto add = builder.Add(a, v, /*broadcast_dimensions=*/{2}); + + Array3D<float> expected_3d( + {{{11.0f, 22.0f}, {13.0f, 24.0f}, {15.0f, 26.0f}}, + {{17.0f, 28.0f}, {19.0f, 30.0f}, {21.0f, 32.0f}}}); + ComputeAndCompareR3<float>(&builder, expected_3d, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver0) { + // Add together a (2, 3, 2) array with a (2) array, using dimension 2 for + // broadcasting (though there are two ways to broadcast these shapes). + ComputationBuilder builder(client_, TestName()); + // clang-format off + Array3D<float> a_3d({ + {{1.0f, 2.0f}, + {3.0f, 4.0f}, + {5.0f, 6.0f}}, + {{7.0f, 8.0f}, + {9.0f, 10.0f}, + {11.0f, 12.0f}}, + }); + // clang-format on + auto a = builder.ConstantR3FromArray3D<float>(a_3d); + auto v = builder.ConstantR1<float>({10.0f, 20.0f}); + auto add = builder.Add(a, v, /*broadcast_dimensions=*/{0}); + + // clang-format off + Array3D<float> expected_3d({ + {{11.0f, 12.0f}, + {13.0f, 14.0f}, + {15.0f, 16.0f}}, + {{27.0f, 28.0f}, + {29.0f, 30.0f}, + {31.0f, 32.0f}}, + }); + // clang-format on + ComputeAndCompareR3<float>(&builder, expected_3d, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo3D) { + // Add together a (2, 3, 2) array with a (3, 2) array, using dimensions {1,2} + // for broadcasting. + ComputationBuilder builder(client_, TestName()); + // clang-format off + Array3D<float> a_3d({ + {{1.0f, 2.0f}, + {3.0f, 4.0f}, + {5.0f, 6.0f}}, + {{7.0f, 8.0f}, + {9.0f, 10.0f}, + {11.0f, 12.0f}}, + }); + auto a = builder.ConstantR3FromArray3D<float>(a_3d); + auto m = builder.ConstantR2<float>({ + {10.0f, 20.0f, 30.0f}, + {40.0f, 50.0f, 60.0f}, + }); + auto add = builder.Add(a, m, /*broadcast_dimensions=*/{0, 1}); + + Array3D<float> expected_3d({ + {{11.0f, 12.0f}, + {23.0f, 24.0f}, + {35.0f, 36.0f}}, + {{47.0f, 48.0f}, + {59.0f, 60.0f}, + {71.0f, 72.0f}}, + }); + // clang-format on + ComputeAndCompareR3<float>(&builder, expected_3d, {}, error_spec_); +} + +XLA_TEST_F(ArrayElementwiseOpTest, CompareGtR3F32sWithDegenerateDim2) { + // Comparison between two 3D arrays of compatible shapes: + // (2, 3, 2) and (2, 3, 1): expected to produce a (2, 3, 2) shape of PREDs. + ComputationBuilder builder(client_, TestName()); + Array3D<float> a_3d({{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}); + auto a = builder.ConstantR3FromArray3D<float>(a_3d); + + Array3D<float> b_3d({{{7.0f, 1.0f}, {3.0f, 10.0f}, {15.0f, 6.0f}}}); + auto b = builder.ConstantR3FromArray3D<float>(b_3d); + + auto compare = builder.Gt(a, b); + + Array3D<int> expected_3d( + {{{0, 1}, {0, 0}, {0, 0}}, {{0, 1}, {1, 0}, {0, 1}}}); + const string expected = R"(pred[2,3,2] { +{ { 01 }, + { 00 }, + { 00 } }, +{ { 01 }, + { 10 }, + { 01 } } +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +TEST_F(ArrayElementwiseOpTest, 4DBinaryOpF32s) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Array4D<float>> operand_a_4d(new Array4D<float>(2, 3, 4, 5)); + std::unique_ptr<Array4D<float>> operand_b_4d(new Array4D<float>(2, 3, 4, 5)); + std::unique_ptr<Array4D<float>> expected_4d(new Array4D<float>(2, 3, 4, 5)); + float value = 0.0; + for (int64 p = 0; p < 2; ++p) { + for (int64 z = 0; z < 3; ++z) { + for (int64 y = 0; y < 4; ++y) { + for (int64 x = 0; x < 5; ++x) { + (*operand_a_4d)(p, z, y, x) = value; + (*operand_b_4d)(p, z, y, x) = 2.0 * value; + (*expected_4d)(p, z, y, x) = 3.0 * value; + value += 0.1; + } + } + } + } + + auto a = builder.ConstantR4FromArray4D<float>(*operand_a_4d); + auto b = builder.ConstantR4FromArray4D<float>(*operand_b_4d); + auto add = builder.Add(a, b); + + ComputeAndCompareR4<float>(&builder, *expected_4d, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, R4PlusR1InDim1) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Array4D<float>> operand_a_4d(new Array4D<float>(2, 3, 4, 5)); + std::unique_ptr<Array4D<float>> expected_4d(new Array4D<float>(2, 3, 4, 5)); + std::vector<float> operand_b_1d(3); + std::iota(operand_b_1d.begin(), operand_b_1d.end(), 1.0); + + float value = 0.0; + for (int64 p = 0; p < 2; ++p) { + for (int64 z = 0; z < 3; ++z) { + for (int64 y = 0; y < 4; ++y) { + for (int64 x = 0; x < 5; ++x) { + (*operand_a_4d)(p, z, y, x) = value; + (*expected_4d)(p, z, y, x) = value + operand_b_1d[z]; + value += 0.1; + } + } + } + } + + auto a = builder.ConstantR4FromArray4D<float>(*operand_a_4d); + auto b = builder.ConstantR1<float>(operand_b_1d); + auto add = builder.Add(a, b, {1}); + + ComputeAndCompareR4<float>(&builder, *expected_4d, {}, error_spec_); +} + +TEST_F(ArrayElementwiseOpTest, R4_32x64x2x2_Plus_R1_64) { + constexpr int d0 = 16; + constexpr int d1 = 16; + constexpr int d2 = 2; + constexpr int d3 = 2; + Array4D<float> r4(d0, d1, d2, d3); + r4.Fill(1.0); + std::vector<float> r1(d1); + std::iota(r1.begin(), r1.end(), 1.0); + + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> a_literal = LiteralUtil::CreateR4FromArray4D(r4); + *a_literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout({0, 1, 2, 3}); + auto a = builder.ConstantLiteral(*a_literal); + auto b = builder.ConstantR1<float>(r1); + builder.Add(a, b, {1}); + + for (int i0 = 0; i0 < d0; ++i0) { + for (int i1 = 0; i1 < d1; ++i1) { + for (int i2 = 0; i2 < d2; ++i2) { + for (int i3 = 0; i3 < d3; ++i3) { + r4(i0, i1, i2, i3) += r1[i1]; + } + } + } + } + ComputeAndCompareR4<float>(&builder, r4, {}, error_spec_); +} + +// Show that we can't add two opaques. +TEST_F(ArrayElementwiseOpTest, CannotAddOpaques) { + ComputationBuilder builder(client_, TestName()); + auto shape = ShapeUtil::MakeOpaqueShape(); + auto x = builder.Parameter(0, shape, "x"); + auto concatenated = builder.Add(x, x); + StatusOr<Computation> computation_status = builder.Build(); + ASSERT_FALSE(computation_status.ok()); + EXPECT_MATCH(computation_status.status().ToString(), + testing::ContainsRegex( + "Expected non-opaque argument for lhs of binary operation")); +} + +// Regression test for b/31927799. "slice - y" is fused and requires implicit +// broadcast. +TEST_F(ArrayElementwiseOpTest, ImplictBroadcastInFusedExpressions) { + ComputationBuilder builder(client_, TestName()); + auto x_literal = LiteralUtil::CreateR1<float>({1, 2, 3}); + auto y_literal = LiteralUtil::CreateR1<float>({4, 5}); + auto x_data = client_->TransferToServer(*x_literal).ConsumeValueOrDie(); + auto y_data = client_->TransferToServer(*y_literal).ConsumeValueOrDie(); + + auto x = builder.Parameter(0, x_literal->shape(), "x"); + auto y = builder.Parameter(1, y_literal->shape(), "y"); + auto slice = builder.Slice(x, {1}, {2}); + builder.Sub(slice, y); + + ComputeAndCompareR1<float>(&builder, {-2, -3}, {x_data.get(), y_data.get()}, + error_spec_); +} + +INSTANTIATE_TEST_CASE_P(ArrayElementwiseOpTestParamCount, + ArrayElementwiseOpTestParamCount, + ::testing::Values(127, 128, 129, 17 * 4096)); + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::legacy_flags::AppendLlvmBackendFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/axpy_simple_test.cc b/tensorflow/compiler/xla/tests/axpy_simple_test.cc new file mode 100644 index 0000000000..adffac09e3 --- /dev/null +++ b/tensorflow/compiler/xla/tests/axpy_simple_test.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class AxpySimpleTest : public ClientLibraryTestBase {}; + +TEST_F(AxpySimpleTest, AxTenValues) { + ComputationBuilder builder(client_, "ax_10"); + auto alpha = builder.ConstantR0<float>(3.1415926535); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto ax = builder.Mul(alpha, x); + + std::vector<float> expected = { + -3.14159265, 3.14159265, 6.28318531, -6.28318531, -9.42477796, + 9.42477796, 12.56637061, -12.56637061, -15.70796327, 15.70796327}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(AxpySimpleTest, AxpyZeroValues) { + ComputationBuilder builder(client_, "axpy_10"); + auto alpha = builder.ConstantR0<float>(3.1415926535); + auto x = builder.ConstantR1<float>({}); + auto y = builder.ConstantR1<float>({}); + auto ax = builder.Mul(alpha, x); + auto axpy = builder.Add(ax, y); + + std::vector<float> expected = {}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(AxpySimpleTest, AxpyTenValues) { + ComputationBuilder builder(client_, "axpy_10"); + auto alpha = builder.ConstantR0<float>(3.1415926535); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto y = builder.ConstantR1<float>( + {5.0, -5.0, -4.0, 4.0, 3.0, -3.0, -2.0, 2.0, 1.0, -1.0}); + auto ax = builder.Mul(alpha, x); + auto axpy = builder.Add(ax, y); + + std::vector<float> expected = { + 1.85840735, -1.85840735, 2.28318531, -2.28318531, -6.42477796, + 6.42477796, 10.56637061, -10.56637061, -14.70796327, 14.70796327}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc b/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc new file mode 100644 index 0000000000..c7b533b80f --- /dev/null +++ b/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc @@ -0,0 +1,85 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that passing a bad shape to RNG's output parameter causes a validation +// failure rather than causing a crash. + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class BadRngShapeValidationTest : public ClientLibraryTestBase {}; + +TEST_F(BadRngShapeValidationTest, DefaultConstructedShapeCreatesError) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR0<float>(0.0); + auto one = builder.ConstantR0<float>(1.0); + Shape default_constructed; + builder.RngUniform(zero, one, default_constructed); + + StatusOr<Computation> computation = builder.Build(); + EXPECT_FALSE(computation.ok()); + LOG(INFO) << "status received: " << computation.status(); + EXPECT_MATCH(computation.status().error_message(), + testing::HasSubstr("shape has invalid")); +} + +TEST_F(BadRngShapeValidationTest, ShapeWithoutLayoutIsOk) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR0<float>(0.0); + auto one = builder.ConstantR0<float>(1.0); + Shape sans_layout; + sans_layout.set_element_type(F32); + sans_layout.add_dimensions(1); + + builder.RngUniform(zero, one, sans_layout); + + StatusOr<Computation> computation = builder.Build(); + ASSERT_TRUE(computation.ok()); + LOG(INFO) << computation.status(); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/batch_normalization_test.cc b/tensorflow/compiler/xla/tests/batch_normalization_test.cc new file mode 100644 index 0000000000..598fd69909 --- /dev/null +++ b/tensorflow/compiler/xla/tests/batch_normalization_test.cc @@ -0,0 +1,210 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cmath> +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class BatchNormalizationTest : public ClientLibraryTestBase { + protected: + BatchNormalizationTest() : input_array_(kSamples, kZ, kY, kX) { + Array2D<float> pz({ + // z0 z1 + {-1.0f, 4.1f}, // p0 + {2.0f, 4.1f}, // p1 + {5.0f, 4.4f}, // p2 + }); + input_array_.FillWithPZ(pz); + input_literal_ = *LiteralUtil::CreateR4FromArray4D(input_array_); + CHECK_EQ(kSamples, input_array_.planes()); + CHECK_EQ(kZ, input_array_.depth()); + CHECK_EQ(kY, input_array_.height()); + CHECK_EQ(kY, input_array_.width()); + } + + static constexpr int64 kSamples = 3; + static constexpr int64 kX = 1; + static constexpr int64 kY = 1; + static constexpr int64 kZ = 2; + + Array4D<float> input_array_; + Literal input_literal_; + const ErrorSpec error_spec_{0.001, 0.001}; +}; + +TEST_F(BatchNormalizationTest, SubtractInZ) { + ComputationBuilder builder(client_, "subtract_in_z_one_sample"); + auto x = builder.ConstantLiteral(input_literal_); + auto y = builder.ConstantR1<float>({3.14, 4.25}); + builder.Sub(x, y, /*broadcast_dimensions=*/{1}); + + Array4D<float> expected(kSamples, kZ, kY, kX); + Array2D<float> pz({ + {-1.0f - 3.14f, 4.1f - 4.25f}, // p0 + {2.0f - 3.14f, 4.1f - 4.25f}, // p1 + {5.0f - 3.14f, 4.4f - 4.25f}, // p2 + }); + expected.FillWithPZ(pz); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(BatchNormalizationTest, SquareTesseractElementwise) { + ComputationBuilder builder(client_, "square_tesseract_elementwise"); + auto x = builder.ConstantLiteral(input_literal_); + builder.SquareF32(x); + + Array4D<float> expected(kSamples, kZ, kY, kX); + Array2D<float> expected_pz({ + {std::pow(-1.0f, 2.0f), std::pow(4.1f, 2.0f)}, + {std::pow(2.0f, 2.0f), std::pow(4.1f, 2.0f)}, + {std::pow(5.0f, 2.0f), std::pow(4.4f, 2.0f)}, + }); + expected.FillWithPZ(expected_pz); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(BatchNormalizationTest, SumToZ) { + ComputationBuilder builder(client_, "sum_to_z"); + auto input_activations = builder.ConstantLiteral(input_literal_); + Computation add = CreateScalarAddComputation(F32, &builder); + // Reduce all but the Z dimension. + builder.Reduce(input_activations, builder.ConstantR0<float>(0.0f), add, + {0, 2, 3}); + + std::vector<float> expected = {6, 12.6}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(BatchNormalizationTest, SquareAndReduce) { + ComputationBuilder builder(client_, "square_and_reduce"); + auto input_activations = builder.ConstantLiteral(input_literal_); + auto set_means = builder.ConstantR1<float>({2.f, 4.2f}); + auto activation_deviations = builder.Sub(input_activations, set_means, + /*broadcast_dimensions=*/{1}); + Computation add = CreateScalarAddComputation(F32, &builder); + auto dev_squares = builder.SquareF32(activation_deviations); + auto sum_of_squares = builder.Reduce( + dev_squares, builder.ConstantR0<float>(0.0f), add, {0, 2, 3}); + + std::vector<float> expected = {18, 0.06}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(BatchNormalizationTest, VarianceToStddev) { + ComputationBuilder builder(client_, "variance_to_stddev"); + auto variance = builder.ConstantR1<float>({6.f, .02f}); + auto sqrt = builder.SqrtF32(variance); + + std::vector<float> expected = {2.44948974f, 0.14142136f}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +// Compare against a forward batch normalization example in the NN spec +// reference. +TEST_F(BatchNormalizationTest, SpecComparisonForward) { + ComputationBuilder builder(client_, "batch_normalize_per_spec"); + auto input_activations = + builder.CheckShape(builder.ConstantLiteral(input_literal_), + ShapeUtil::MakeShape(F32, {3, 2, 1, 1})); + auto gamma = builder.ConstantR1<float>({1.0, 1.0}); + auto beta = builder.ConstantR1<float>({0.0, 0.0}); + Computation add = CreateScalarAddComputation(F32, &builder); + // Reduce all dimensions except dimension 1. + Shape TwoElementVectorF32 = ShapeUtil::MakeShape(F32, {2}); + auto sum = builder.CheckShape( + builder.Reduce(input_activations, builder.ConstantR0<float>(0.0f), add, + /*dimensions_to_reduce=*/{0, 2, 3}), + TwoElementVectorF32); + auto input_shape = builder.GetShape(input_activations).ConsumeValueOrDie(); + auto sum_shape = builder.GetShape(sum).ConsumeValueOrDie(); + auto count = builder.ConstantR0<float>(ShapeUtil::ElementsIn(*input_shape) / + ShapeUtil::ElementsIn(*sum_shape)); + auto set_means = builder.Div(sum, count); + + const float kEpsilon = 1e-9f; + auto epsilon = builder.ConstantR0<float>(kEpsilon); + auto epsilon2 = builder.ConstantR1<float>({kEpsilon, kEpsilon}); + auto activation_deviations = builder.Sub(input_activations, set_means, + /*broadcast_dimensions=*/{1}); + auto dev_squares = builder.SquareF32(activation_deviations); + auto sum_of_squares = builder.CheckShape( + builder.Reduce(dev_squares, builder.ConstantR0<float>(0.0f), add, + /*dimensions_to_reduce=*/{0, 2, 3}), + TwoElementVectorF32); + auto variance = builder.Div(sum_of_squares, count); + auto standard_deviation = builder.SqrtF32(variance); + auto standard_deviation_above_epsilon = builder.CheckShape( + builder.Gt(standard_deviation, epsilon), ShapeUtil::MakeShape(PRED, {2})); + auto gt_eps = builder.Select(standard_deviation_above_epsilon, + standard_deviation, epsilon2); + auto normalization_factors = builder.ReciprocalF32(gt_eps); + auto normalized_input_activations = + builder.Mul(activation_deviations, normalization_factors, + /*broadcast_dimensions=*/{1}); + /* auto output_activations = */ builder.Add( + builder.Mul(normalized_input_activations, gamma, + /*broadcast_dimensions=*/{1}), + beta, /*broadcast_dimensions=*/{1}); + + Array4D<float> expected(kSamples, kZ, kY, kX); + Array2D<float> pz({ + {-3.f / std::sqrt(6.f), -.1f / std::sqrt(.02f)}, + {0.f, -.1f / std::sqrt(.02f)}, + {3.f / std::sqrt(6.f), .2f / std::sqrt(.02f)}, + }); + expected.FillWithPZ(pz); + + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/binop_scaling_test.cc b/tensorflow/compiler/xla/tests/binop_scaling_test.cc new file mode 100644 index 0000000000..e825bd435b --- /dev/null +++ b/tensorflow/compiler/xla/tests/binop_scaling_test.cc @@ -0,0 +1,157 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class BinopScalingTest : public ClientLibraryTestBase {}; + +TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixRowVector_32x4) { + auto alhs = MakeLinspaceArray2D(0.0, 1.0, 32, 4); + auto arhs = MakeLinspaceArray2D(0.0, 1.0, 1, 4); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(*alhs); + auto rhs = builder.ConstantR2FromArray2D<float>(*arhs); + builder.Add(lhs, rhs); + + auto aexpected = ReferenceUtil::MapWithIndexArray2D( + *alhs, [&](float lhs_value, int64 row, int64 col) { + return lhs_value + (*arhs)(0, col); + }); + ComputeAndCompareR2<float>(&builder, *aexpected, {}, ErrorSpec(0.0001)); +} + +TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixRowVector_129x129) { + auto alhs = MakeLinspaceArray2D(0.0, 1.0, 129, 129); + auto arhs = MakeLinspaceArray2D(0.0, 1.0, 1, 129); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(*alhs); + auto rhs = builder.ConstantR2FromArray2D<float>(*arhs); + builder.Add(lhs, rhs); + + auto aexpected = ReferenceUtil::MapWithIndexArray2D( + *alhs, [&](float lhs_value, int64 row, int64 col) { + return lhs_value + (*arhs)(0, col); + }); + ComputeAndCompareR2<float>(&builder, *aexpected, {}, ErrorSpec(0.0001)); +} + +TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixColVector_9x5) { + auto alhs = MakeLinspaceArray2D(0.0, 1.0, 9, 5); + auto arhs = MakeLinspaceArray2D(0.0, 1.0, 9, 1); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(*alhs); + auto rhs = builder.ConstantR2FromArray2D<float>(*arhs); + builder.Add(lhs, rhs); + + auto aexpected = ReferenceUtil::MapWithIndexArray2D( + *alhs, [&](float lhs_value, int64 row, int64 col) { + return lhs_value + (*arhs)(row, 0); + }); + ComputeAndCompareR2<float>(&builder, *aexpected, {}, ErrorSpec(0.0001)); +} + +TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixColVector_129x257) { + auto alhs = MakeLinspaceArray2D(0.0, 1.0, 129, 257); + auto arhs = MakeLinspaceArray2D(0.0, 1.0, 129, 1); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(*alhs); + auto rhs = builder.ConstantR2FromArray2D<float>(*arhs); + builder.Add(lhs, rhs); + + auto aexpected = ReferenceUtil::MapWithIndexArray2D( + *alhs, [&](float lhs_value, int64 row, int64 col) { + return lhs_value + (*arhs)(row, 0); + }); + ComputeAndCompareR2<float>(&builder, *aexpected, {}, ErrorSpec(0.0001)); +} + +TEST_F(BinopScalingTest, R0PlusR2F32) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR0<float>(42.0); + auto rhs = builder.ConstantR2<float>({ + {1.0, 2.0}, {3.0, 4.0}, + }); + builder.Add(lhs, rhs); + + Array2D<float> expected(2, 2); + expected(0, 0) = 42.0 + 1.0; + expected(0, 1) = 42.0 + 2.0; + expected(1, 0) = 42.0 + 3.0; + expected(1, 1) = 42.0 + 4.0; + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(BinopScalingTest, R4PlusR0S32) { + ComputationBuilder builder(client_, TestName()); + // clang-format off + Array4D<int> lhs_array({ + {{{1, 2}, + {3, 4}, + {5, 6}}}, + {{{7, 8}, + {9, 10}, + {11, 12}}}, + }); + Array4D<int> expected({ + {{{43, 44}, + {45, 46}, + {47, 48}}}, + {{{49, 50}, + {51, 52}, + {53, 54}}}, + }); + // clang-format on + + auto lhs = builder.ConstantR4FromArray4D(lhs_array); + auto rhs = builder.ConstantR0<int>(42); + builder.Add(lhs, rhs); + ComputeAndCompareR4<int>(&builder, expected, {}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc new file mode 100644 index 0000000000..200d4d4563 --- /dev/null +++ b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc @@ -0,0 +1,179 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +using BroadcastSimpleTest = ClientLibraryTestBase; + +XLA_TEST_F(BroadcastSimpleTest, ScalarNoOpBroadcast) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR0<float>(1.5), {}); + ComputeAndCompareR0<float>(&b, 1.5, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_2x3) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR0<float>(2.25), {2, 3}); + Array2D<float> expected(2, 3, 2.25); + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_2x0) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR0<float>(2.25), {2, 0}); + Array2D<float> expected(2, 0); + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_0x2) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR0<float>(2.25), {0, 2}); + Array2D<float> expected(0, 2); + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, 1DTo2D) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR1<float>({1, 2, 3}), {2}); + + Array2D<float> expected(2, 3); + expected(0, 0) = 1; + expected(0, 1) = 2; + expected(0, 2) = 3; + expected(1, 0) = 1; + expected(1, 1) = 2; + expected(1, 2) = 3; + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, ZeroElement_1DTo2D) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR1<float>({}), {2}); + + Array2D<float> expected(2, 0); + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, 1DToZeroElement2D) { + ComputationBuilder b(client_, TestName()); + b.Broadcast(b.ConstantR1<float>({1, 2, 3}), {0}); + + Array2D<float> expected(0, 3); + ComputeAndCompareR2<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, InDimensionAndDegenerateBroadcasting) { + // Verify that binary op and degenerate dimension broadcast work together in + // the same operation. + // + // The lhs shape [1, 2] is first broadcast up to [2, 1, 2] using in-dimension + // broadcasting (broadcast_dimensions {1, 2}), then is added to the rhs shape + // [2, 3, 1]. Degenerate dimension broadcasting then broadcasts the size one + // dimensions. + ComputationBuilder b(client_, TestName()); + + b.Add(b.ConstantR2<float>({{1.0, 5.0}}), + b.ConstantLiteral(*LiteralUtil::CreateR3<float>( + {{{2.0}, {3.0}, {4.0}}, {{5.0}, {6.0}, {7.0}}})), + /*broadcast_dimensions=*/{1, 2}); + + auto expected = + LiteralUtil::CreateR3<float>({{{3.0, 7.0}, {4.0, 8.0}, {5.0, 9.0}}, + {{6.0, 10.0}, {7.0, 11.0}, {8.0, 12.0}}}); + + ComputeAndCompareLiteral(&b, *expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(BroadcastSimpleTest, InvalidBinaryAndDegenerateBroadcasting) { + // Binary dimension broadcasting of the smaller lhs ([2, 2] up to [2, 2, 2]) + // results in a shape incompatible with the lhs [2, 3, 1]. + ComputationBuilder b(client_, TestName()); + + b.Add(b.ConstantR2<float>({{1.0, 5.0}, {1.0, 5.0}}), + b.ConstantLiteral(*LiteralUtil::CreateR3<float>( + {{{2.0}, {3.0}, {4.0}}, {{5.0}, {6.0}, {7.0}}})), + /*broadcast_dimensions=*/{1, 2}); + + auto result_status = Execute(&b, {}); + EXPECT_FALSE(result_status.ok()); + EXPECT_MATCH(result_status.status().error_message(), + testing::ContainsRegex("broadcast dimension 0 mismatch")); +} + +XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { + // Test invalid broadcasting with [1, 2] and [2, 3] inputs. + ComputationBuilder b(client_, TestName()); + + b.Add(b.ConstantR2<float>({{1.0, 2.0}}), + b.ConstantR2<float>({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); + + auto result_status = Execute(&b, {}); + EXPECT_FALSE(result_status.ok()); + EXPECT_MATCH( + result_status.status().error_message(), + testing::ContainsRegex("binary op BINOP_ADD with incompatible shapes")); +} + +XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { + // Test invalid broadcasting with [1, 2] and [2, 3] inputs. + ComputationBuilder b(client_, TestName()); + + b.Add(b.ConstantR2<float>({{1.0, 2.0}}), + b.ConstantR2<float>({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); + + auto result_status = Execute(&b, {}); + EXPECT_FALSE(result_status.ok()); + EXPECT_MATCH( + result_status.status().error_message(), + testing::ContainsRegex("binary op BINOP_ADD with incompatible shapes")); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/broadcast_test.cc b/tensorflow/compiler/xla/tests/broadcast_test.cc new file mode 100644 index 0000000000..1796a732e5 --- /dev/null +++ b/tensorflow/compiler/xla/tests/broadcast_test.cc @@ -0,0 +1,286 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <utility> + +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class BroadcastTest : public HloTestBase {}; + +XLA_TEST_F(BroadcastTest, BroadcastScalarToScalar) { + // Test degenerate case of broadcasting a scalar into a scalar. + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {}), input, {})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR0<float>(42.0), *result, + error_spec_); +} + +XLA_TEST_F(BroadcastTest, BroadcastScalarTo2D) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 2}), input, {})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{42.0, 42.0}, {42.0, 42.0}}), *result, + error_spec_); +} + +XLA_TEST_F(BroadcastTest, BroadcastVectorTo2D) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<float>({1.0, 2.0, 3.0}))); + + // Broadcast vector in both dimension 0 and dimension 1. Join them in a tuple + // to enable testing of the results. + auto element1 = builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {3, 2}), input, {0})); + auto element2 = builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 3}), input, {1})); + builder.AddInstruction(HloInstruction::CreateTuple({element1, element2})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}}), + result->tuple_literals(0), error_spec_); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{1.0, 2.0, 3.0}, {1.0, 2.0, 3.0}}), + result->tuple_literals(1), error_spec_); +} + +XLA_TEST_F(BroadcastTest, Broadcast2DTo2D) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 2}), input, {0, 1})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}), *result, + error_spec_); +} + +XLA_TEST_F(BroadcastTest, Broadcast2DTo2DTranspose) { + // Degenerately broadcasting a shape into a shape of the same rank reorders + // the dimensions, ie transpose. + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 2}), input, {1, 0})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{1.0, 3.0}, {2.0, 4.0}}), *result, + error_spec_); +} + +XLA_TEST_F(BroadcastTest, Broadcast2DTo3D) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 3, 2}), input, {0, 2})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR3<float>({{{1.0, 2.0}, {1.0, 2.0}, {1.0, 2.0}}, + {{3.0, 4.0}, {3.0, 4.0}, {3.0, 4.0}}}), + *result, error_spec_); +} + +TEST_F(BroadcastTest, Broadcast_R1_2_To_R4_2x2x3x3) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR1<float>({1.0, 2.0}))); + + // Broadcast vector in dimension 1. + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 2, 3, 3}), input, {1})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + Array4D<float> expected(2, 2, 3, 3); + Array2D<float> pz({{1, 2}, {1, 2}}); + expected.FillWithPZ(pz); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR4FromArray4D<float>(expected), *result, error_spec_); +} + +TEST_F(BroadcastTest, Broadcast_R1_1025_To_R4_3x3x3x1025) { + auto builder = HloComputation::Builder(TestName()); + std::vector<float> input_data(1025); + int64 r1_size = input_data.size(); + std::iota(input_data.begin(), input_data.end(), 0.0f); + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR1<float>(input_data))); + + // Broadcast vector in dimension 3. + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {3, 3, 3, r1_size}), input, {3})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + Array4D<float> expected(3, 3, 3, 1025); + Array2D<float> yx(/*height=*/3, /*width=*/r1_size); + for (int64 y = 0; y < 3; ++y) { + for (int64 x = 0; x < r1_size; ++x) { + yx(y, x) = input_data[x]; + } + } + expected.FillWithYX(yx); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR4FromArray4D<float>(expected), *result, error_spec_); +} + +XLA_TEST_F(BroadcastTest, Broadcast_R1_64_To_R4_32x64x7x7) { + auto builder = HloComputation::Builder(TestName()); + Array4D<float> r4_array(32, 64, 7, 7); + r4_array.Fill(42.0); + std::vector<float> r1_array(64, 42.0); + + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR1<float>(r1_array))); + + // Broadcast vector in dimension 1. + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {32, 64, 7, 7}), input, {1})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR4FromArray4D(r4_array), + *result, error_spec_); +} + +TEST_F(BroadcastTest, Broadcast_R0_to_R4_64x64x3x3) { + auto builder = HloComputation::Builder(TestName()); + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1.0f))); + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {64, 64, 3, 3}), input, {})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + LOG(INFO) << hlo_module->ToString(); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + Array4D<float> expected(64, 64, 3, 3); + expected.Fill(1.0f); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR4FromArray4D<float>(expected), *result, error_spec_); +} + +TEST_F(BroadcastTest, Broadcast_R2_2x2_To_R4_3x3x2x2) { + auto builder = HloComputation::Builder(TestName()); + Array2D<float> to_broadcast({{1.0f, 2.0f}, {3.0f, 4.0f}}); + auto input = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2FromArray2D<float>(to_broadcast))); + + // Broadcast vector in dimensions 2 and 3. + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {3, 3, 2, 2}), input, {2, 3})); + + // Create HLO module, compile, and execute. + auto hlo_module = MakeUnique<HloModule>(TestName()); + hlo_module->AddEntryComputation(builder.Build()); + auto result = ExecuteAndTransfer(std::move(hlo_module), {}); + + Array4D<float> expected(3, 3, 2, 2); + expected.FillWithYX(to_broadcast); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR4FromArray4D<float>(expected), *result, error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl new file mode 100644 index 0000000000..2c7eeb820d --- /dev/null +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -0,0 +1,149 @@ +"""Build rules for XLA testing.""" + +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_is_configured") + +def all_backends(): + if cuda_is_configured(): + return ["cpu", "cpu_parallel", "gpu"] + else: + return ["cpu", "cpu_parallel"] + +def xla_test(name, + srcs, + deps, + backends=[], + args=[], + tags=[], + copts=[], + backend_tags={}, + backend_args={}, + **kwargs): + """Generates cc_test targets for the given XLA backends. + + This rule generates a cc_test target for one or more XLA backends and also + a platform-agnostic cc_library rule. The arguments are identical to cc_test + with two additions: 'backends' and 'backend_args'. 'backends' specifies the + backends to generate tests for ("cpu", "cpu_parallel", "gpu"), and + 'backend_args'/'backend_tags' specifies backend-specific args parameters to + use when generating the cc_test. + + The name of the cc_tests are the provided name argument with the backend name + appended, and the cc_library target name is the provided name argument with + "_lib" appended. For example, if name parameter is "foo_test", then the cpu + test target will be "foo_test_cpu" and the cc_library target is "foo_lib". + + The cc_library target can be used to link with other plugins outside of + xla_test. + + The build rule also defines a test suite ${name} which includes the tests for + each of the supported backends. + + Each generated cc_test target has a tag indicating which backend the test is + for. This tag is of the form "xla_${BACKEND}" (eg, "xla_cpu"). These + tags can be used to gather tests for a particular backend into a test_suite. + + Examples: + + # Generates the targets: foo_test_cpu and foo_test_gpu. + xla_test( + name = "foo_test", + srcs = ["foo_test.cc"], + backends = ["cpu", "gpu"], + deps = [...], + ) + + # Generates the targets: bar_test_cpu and bar_test_gpu. bar_test_cpu + # includes the additional arg "--special_cpu_flag". + xla_test( + name = "bar_test", + srcs = ["bar_test.cc"], + backends = ["cpu", "gpu"], + backend_args = {"cpu": ["--special_cpu_flag"]} + deps = [...], + ) + + The build rule defines the preprocessor macro XLA_TEST_BACKEND_${BACKEND} + to the value 1 where ${BACKEND} is the uppercase name of the backend. + + Args: + name: Name of the target. + srcs: Sources for the target. + deps: Dependencies of the target. + backends: A list of backends to generate tests for. Supported + values: "cpu", "cpu_parallel", "gpu". If this list is empty, the test will + be generated for all supported backends. + args: Test arguments for the target. + tags: Tags for the target. + backend_args: A dict mapping backend name to list of additional args to + use for that target. + backend_tags: A dict mapping backend name to list of additional tags to + use for that target. + """ + test_names = [] + if not backends: + backends = all_backends() + + native.cc_library( + name="%s_lib" % name, + srcs=srcs, + copts=copts, + testonly=True, + deps=deps + ["//tensorflow/compiler/xla/tests:test_macros_header"], + ) + + for backend in backends: + test_name = "%s_%s" % (name, backend) + this_backend_tags = ["xla_%s" % backend] + this_backend_copts = [] + this_backend_args = backend_args.get(backend, []) + if backend == "cpu": + backend_deps = ["//tensorflow/compiler/xla/service:cpu_plugin"] + backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_cpu"] + elif backend == "cpu_parallel": + backend_deps = ["//tensorflow/compiler/xla/service:cpu_plugin"] + backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_cpu"] + this_backend_args += ["--xla_cpu_parallel=true"] + elif backend == "gpu": + backend_deps = ["//tensorflow/compiler/xla/service:gpu_plugin"] + backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"] + this_backend_tags += ["requires-gpu-sm35"] + else: + fail("Unknown backend %s" % backend) + + native.cc_test( + name=test_name, + srcs=srcs, + tags=tags + backend_tags.get(backend, []) + this_backend_tags, + copts=copts + ["-DXLA_TEST_BACKEND_%s=1" % backend.upper()] + + this_backend_copts, + args=args + this_backend_args, + deps=deps + backend_deps, + **kwargs) + + test_names.append(test_name) + + native.test_suite(name=name, tests=test_names) + + +def generate_backend_suites(backends=[]): + if not backends: + backends = all_backends() + for backend in backends: + native.test_suite(name="%s_tests" % backend, + tags = ["xla_%s" % backend]) + + +def generate_backend_test_macros(backends=[]): + if not backends: + backends = all_backends() + for backend in backends: + native.cc_library( + name="test_macros_%s" % backend, + testonly = True, + hdrs = ["test_macros.h"], + copts = ["-DXLA_PLATFORM=\\\"%s\\\"" % backend.upper()], + deps = [ + "//tensorflow/compiler/xla:types", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ]) diff --git a/tensorflow/compiler/xla/tests/call_test.cc b/tensorflow/compiler/xla/tests/call_test.cc new file mode 100644 index 0000000000..1c96b73034 --- /dev/null +++ b/tensorflow/compiler/xla/tests/call_test.cc @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <utility> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class CallOpTest : public ClientLibraryTestBase { + protected: + Computation CreateR0F32IdentityComputation() { + ComputationBuilder builder(client_, "Identity"); + builder.Parameter(0, r0f32_, "x"); + auto build_status = builder.Build(); + EXPECT_IS_OK(build_status.status()); + return build_status.ConsumeValueOrDie(); + } + + Computation CreateR1S0F32AdditionComputation() { + ComputationBuilder builder(client_, "Addition"); + auto x = builder.Parameter(0, r1s0f32_, "x"); + auto y = builder.Parameter(1, r1s0f32_, "y"); + builder.Add(x, y); + auto build_status = builder.Build(); + EXPECT_IS_OK(build_status.status()); + return build_status.ConsumeValueOrDie(); + } + + Computation CreateR1S2F32AdditionComputation() { + ComputationBuilder builder(client_, "Addition"); + auto x = builder.Parameter(0, r1s2f32_, "x"); + auto y = builder.Parameter(1, r1s2f32_, "y"); + builder.Add(x, y); + auto build_status = builder.Build(); + EXPECT_IS_OK(build_status.status()); + return build_status.ConsumeValueOrDie(); + } + + Shape r0f32_ = ShapeUtil::MakeShape(F32, {}); + Shape r1s0f32_ = ShapeUtil::MakeShape(F32, {0}); + Shape r1s2f32_ = ShapeUtil::MakeShape(F32, {2}); +}; + +XLA_TEST_F(CallOpTest, DISABLED_ON_GPU(CallR0F32IdentityScalar)) { + ComputationBuilder builder(client_, TestName()); + Computation callee = CreateR0F32IdentityComputation(); + auto constant = builder.ConstantLiteral(*LiteralUtil::CreateR0<float>(42.0)); + builder.Call(callee, {constant}); + + ComputeAndCompareR0<float>(&builder, 42.0, {}, ErrorSpec(0.01f)); +} + +XLA_TEST_F(CallOpTest, DISABLED_ON_GPU(CallR1S0F32AddArray)) { + ComputationBuilder builder(client_, TestName()); + Computation callee = CreateR1S0F32AdditionComputation(); + auto x = builder.ConstantLiteral(*LiteralUtil::CreateR1<float>({})); + auto y = builder.ConstantLiteral(*LiteralUtil::CreateR1<float>({})); + builder.Call(callee, {x, y}); + + ComputeAndCompareR1<float>(&builder, {}, {}, ErrorSpec(0.01f)); +} + +XLA_TEST_F(CallOpTest, DISABLED_ON_GPU(CallR1S2F32AddArray)) { + ComputationBuilder builder(client_, TestName()); + Computation callee = CreateR1S2F32AdditionComputation(); + auto x = builder.ConstantLiteral(*LiteralUtil::CreateR1<float>({1.0f, 2.0f})); + auto y = builder.ConstantLiteral(*LiteralUtil::CreateR1<float>({2.0f, 3.0f})); + builder.Call(callee, {x, y}); + + ComputeAndCompareR1<float>(&builder, {3.0f, 5.0f}, {}, ErrorSpec(0.01f)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/check_execution_arity_test.cc b/tensorflow/compiler/xla/tests/check_execution_arity_test.cc new file mode 100644 index 0000000000..675c9fccb0 --- /dev/null +++ b/tensorflow/compiler/xla/tests/check_execution_arity_test.cc @@ -0,0 +1,138 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class CheckExecutionArityTest : public ClientLibraryTestBase {}; + +TEST_F(CheckExecutionArityTest, TwoParamComputationNumArguments) { + ComputationBuilder builder(client_, "add_two_params"); + auto param_literal = LiteralUtil::CreateR1<float>({1.1f, 2.2f}); + + auto p0 = builder.Parameter(0, param_literal->shape(), "param0"); + auto p1 = builder.Parameter(1, param_literal->shape(), "param1"); + auto add = builder.Add(p0, p1); + + auto param0_data = + client_->TransferToServer(*param_literal).ConsumeValueOrDie(); + auto param1_data = + client_->TransferToServer(*param_literal).ConsumeValueOrDie(); + + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + auto computation = computation_status.ConsumeValueOrDie(); + + // The arity of the UserComputation is 2 arguments. Execution will succeed + // with 2 arguments, but fail with a different number. + auto result_two_args = + client_->Execute(computation, {param0_data.get(), param1_data.get()}); + ASSERT_IS_OK(result_two_args.status()); + + auto result_one_arg = client_->Execute(computation, {param0_data.get()}); + ASSERT_FALSE(result_one_arg.ok()); + ASSERT_EQ(result_one_arg.status().code(), + tensorflow::error::INVALID_ARGUMENT); + ASSERT_MATCH(result_one_arg.status().error_message(), + testing::ContainsRegex("takes 2")); + + auto result_zero_args = client_->Execute(computation, {}); + ASSERT_FALSE(result_zero_args.ok()); + ASSERT_EQ(result_zero_args.status().code(), + tensorflow::error::INVALID_ARGUMENT); + ASSERT_MATCH(result_zero_args.status().error_message(), + testing::ContainsRegex("takes 2")); +} + +XLA_TEST_F(CheckExecutionArityTest, CheckArgumentShapes) { + ComputationBuilder builder(client_, "add_two_params"); + + auto p0 = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param0"); + auto p1 = builder.Parameter(1, ShapeUtil::MakeShape(F32, {4}), "param1"); + auto add = builder.Mul(p0, p1); + + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + auto computation = computation_status.ConsumeValueOrDie(); + + auto f32_literal = LiteralUtil::CreateR0<float>(1.1f); + auto f32_data = client_->TransferToServer(*f32_literal).ConsumeValueOrDie(); + auto f32_4_literal = LiteralUtil::CreateR1<float>({1.0f, 2.0f, 3.0f, 4.0f}); + auto f32_4_data = + client_->TransferToServer(*f32_4_literal).ConsumeValueOrDie(); + auto u8_4_literal = LiteralUtil::CreateR1U8("hola"); + auto u8_4_data = client_->TransferToServer(*u8_4_literal).ConsumeValueOrDie(); + + // Match + auto status = + client_->Execute(computation, {f32_data.get(), f32_4_data.get()}); + ASSERT_IS_OK(status.status()); + + // Shape mismatch in parameter 0 + status = client_->Execute(computation, {f32_4_data.get(), f32_4_data.get()}); + ASSERT_FALSE(status.ok()); + ASSERT_EQ(status.status().code(), tensorflow::error::INVALID_ARGUMENT); + ASSERT_MATCH(status.status().error_message(), + testing::ContainsRegex("expects parameter 0")); + + // Shape mismatch in parameter 1 (rank) + status = client_->Execute(computation, {f32_data.get(), f32_data.get()}); + ASSERT_FALSE(status.ok()); + ASSERT_EQ(status.status().code(), tensorflow::error::INVALID_ARGUMENT); + ASSERT_MATCH(status.status().error_message(), + testing::ContainsRegex("expects parameter 1")); + + // Shape mismatch in parameter 1 (element type) + status = client_->Execute(computation, {f32_data.get(), u8_4_data.get()}); + ASSERT_FALSE(status.ok()); + ASSERT_EQ(status.status().code(), tensorflow::error::INVALID_ARGUMENT); + ASSERT_MATCH(status.status().error_message(), + testing::ContainsRegex("expects parameter 1")); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc new file mode 100644 index 0000000000..d2a7def5d0 --- /dev/null +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -0,0 +1,263 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" + +#include <string> + +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/hlo_pass_pipeline_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace se = ::perftools::gputools; + +namespace xla { +namespace { +// Wrapper function that creates a nicer error message (than a bare +// ValueOrDie()) if the platform we intend to test is not available. +Client* GetOrCreateLocalClientOrDie(se::Platform* platform) { + StatusOr<Client*> result = ClientLibrary::GetOrCreateLocalClient(platform); + TF_CHECK_OK(result.status()) << "could not create local client for testing"; + return result.ValueOrDie(); +} +} // namespace + +ClientLibraryTestBase::ClientLibraryTestBase( + se::Platform* platform, + tensorflow::gtl::ArraySlice<string> disabled_pass_names) + : client_(GetOrCreateLocalClientOrDie(platform)) { + legacy_flags::HloPassPipelineFlags* flags = + legacy_flags::GetHloPassPipelineFlags(); + flags->xla_disable_hlo_passes = + tensorflow::str_util::Join(disabled_pass_names, ","); +} + +string ClientLibraryTestBase::TestName() const { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); +} + +StatusOr<std::unique_ptr<GlobalData>> ClientLibraryTestBase::Execute( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + // Build the computation, as a convenience. + TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); + return client_->Execute(computation, arguments); +} + +StatusOr<std::unique_ptr<Literal>> ClientLibraryTestBase::ExecuteAndTransfer( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_output_layout) { + // Build the computation, as a convenience. + TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); + return client_->ExecuteAndTransfer(computation, arguments, + shape_with_output_layout); +} + +std::unique_ptr<GlobalData> ClientLibraryTestBase::ExecuteOrDie( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + return Execute(builder, arguments).ConsumeValueOrDie(); +} + +std::unique_ptr<Literal> ClientLibraryTestBase::ExecuteAndTransferOrDie( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + return ExecuteAndTransfer(builder, arguments).ConsumeValueOrDie(); +} + +string ClientLibraryTestBase::ExecuteToString( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + StatusOr<Computation> computation_status = builder->Build(); + if (!computation_status.ok()) { + return computation_status.status().ToString(); + } + Computation computation = computation_status.ConsumeValueOrDie(); + + auto result = client_->ExecuteAndTransfer(computation, arguments); + if (!result.ok()) { + return result.status().ToString(); + } else { + return LiteralUtil::ToString(*result.ValueOrDie()); + } +} + +void ClientLibraryTestBase::ComputeAndCompareR1( + ComputationBuilder* builder, const tensorflow::core::Bitmap& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = LiteralUtil::CreateR1(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +void ClientLibraryTestBase::ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_layout) { + EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, + shape_with_layout)); +} + +void ClientLibraryTestBase::ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error, + const Shape* shape_with_layout) { + EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, + error, shape_with_layout)); +} + +tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_layout) { + TF_ASSIGN_OR_RETURN( + auto actual, ExecuteAndTransfer(builder, arguments, shape_with_layout)); + if (ShapeUtil::ElementIsFloating(expected.shape())) { + LOG(WARNING) << "performing exact comparison of floating point numbers"; + } else { + TF_RET_CHECK(ShapeUtil::ElementIsIntegral(expected.shape()) || + expected.shape().element_type() == PRED); + } + LiteralTestUtil::ExpectEqual(expected, *actual); + return tensorflow::Status::OK(); +} + +tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error, + const Shape* shape_with_layout) { + TF_ASSIGN_OR_RETURN( + auto actual, ExecuteAndTransfer(builder, arguments, shape_with_layout)); + TF_RET_CHECK(ShapeUtil::ElementIsFloating(expected.shape())); + LiteralTestUtil::ExpectNear(expected, *actual, error); + return tensorflow::Status::OK(); +} + +void ClientLibraryTestBase::ComputeAndCompareR1U8( + ComputationBuilder* builder, tensorflow::StringPiece expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + auto actual_status = ExecuteAndTransfer(builder, arguments); + EXPECT_IS_OK(actual_status.status()); + if (!actual_status.ok()) { + return; + } + auto actual = actual_status.ConsumeValueOrDie(); + + // Turn the expected value into a literal. + std::unique_ptr<Literal> expected_literal = LiteralUtil::CreateR1U8(expected); + + VLOG(1) << "expected: " << LiteralUtil::ToString(*expected_literal); + VLOG(1) << "actual: " << LiteralUtil::ToString(*actual); + + EXPECT_EQ(expected, actual->u8s()); +} + +void ClientLibraryTestBase::ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + auto actual_status = ExecuteAndTransfer(builder, arguments); + EXPECT_IS_OK(actual_status.status()); + if (!actual_status.ok()) { + return; + } + auto actual = actual_status.ConsumeValueOrDie(); + LiteralTestUtil::ExpectEqualTuple(expected, *actual); +} + +void ClientLibraryTestBase::ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + auto actual_status = ExecuteAndTransfer(builder, arguments); + EXPECT_IS_OK(actual_status.status()); + if (!actual_status.ok()) { + return; + } + auto actual = actual_status.ConsumeValueOrDie(); + LiteralTestUtil::ExpectNearTuple(expected, *actual, error); +} + +Computation ClientLibraryTestBase::CreateScalarRelu() { + ComputationBuilder builder(client_, "relu"); + auto z_value = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); + auto zero = builder.ConstantR0<float>(0.0); + builder.Max(z_value, zero); + auto computation_status = builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); +} + +Computation ClientLibraryTestBase::CreateScalarMax() { + ComputationBuilder builder(client_, "max"); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + builder.Max(x, y); + auto computation_status = builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); +} + +Computation ClientLibraryTestBase::CreateScalarReluSensitivity() { + ComputationBuilder builder(client_, "relu_sensitivity"); + auto activation = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "activation"); + auto backprop = + builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "backprop"); + auto zero = builder.ConstantR0<float>(0.0); + auto activation_gtz = builder.Gt(activation, zero); + builder.Select(activation_gtz, /*on_true=*/backprop, /*on_false=*/zero); + + auto computation_status = builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); +} + +std::unique_ptr<Array2D<float>> ClientLibraryTestBase::CreatePatternedMatrix( + int rows, int cols, float offset) { + auto array = MakeUnique<Array2D<float>>(rows, cols); + for (int64 row = 0; row < rows; ++row) { + for (int64 col = 0; col < cols; ++col) { + (*array)(row, col) = col + (row * 1000.0f) + offset; + } + } + return array; +} + +std::unique_ptr<Array2D<float>> +ClientLibraryTestBase::CreatePatternedMatrixWithZeroPadding(int rows, int cols, + int rows_padded, + int cols_padded) { + CHECK_GE(rows_padded, rows); + CHECK_GE(cols_padded, cols); + auto array = MakeUnique<Array2D<float>>(rows_padded, cols_padded, 0.0); + for (int64 row = 0; row < rows; ++row) { + for (int64 col = 0; col < cols; ++col) { + (*array)(row, col) = col + (row * 1000.0f); + } + } + return array; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h new file mode 100644 index 0000000000..690fda3ffa --- /dev/null +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -0,0 +1,409 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_CLIENT_LIBRARY_TEST_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_CLIENT_LIBRARY_TEST_BASE_H_ + +#include <memory> +#include <string> +#include <type_traits> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/bitmap.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// A client library test establishes an in-process XLA client connection. +class ClientLibraryTestBase : public ::testing::Test { + protected: + explicit ClientLibraryTestBase( + perftools::gputools::Platform* platform = nullptr, + tensorflow::gtl::ArraySlice<string> disabled_pass_names = {}); + + // Returns the name of the test currently being run. + string TestName() const; + + // TODO(b/25566808): Add helper that populates a literal from a testdata file. + + // Convenience methods for building and running a computation from a builder. + StatusOr<std::unique_ptr<GlobalData>> Execute( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_output_layout = nullptr); + + // Convenience OrDie variants of above methods. + std::unique_ptr<GlobalData> ExecuteOrDie( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + std::unique_ptr<Literal> ExecuteAndTransferOrDie( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + + // Run a computation and return its value as a string. If an error + // occurs, then instead return the error as a string. + string ExecuteToString(ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + + // Convenience methods for building and running a computation, transferring + // the result, and comparing it to the expected value(s). Methods are + // templated on the native host type which maps to specific XLA types (See + // ComputationBuilder for details). For each rank, two forms are provided: one + // for floating point types with an ErrorSpec parameter, and one for integral + // types without the ErrorSpec parameter. + template <typename NativeT> + void ComputeAndCompareR0(ComputationBuilder* builder, NativeT expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + template <typename NativeT> + void ComputeAndCompareR0(ComputationBuilder* builder, NativeT expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + ErrorSpec error); + + template <typename NativeT> + void ComputeAndCompareR1(ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<NativeT> expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + template <typename NativeT> + void ComputeAndCompareR1(ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<NativeT> expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + ErrorSpec error); + + // As above, but uses a bitmap to hold the predicate vector to avoid + // deficiencies of vector<bool>. + void ComputeAndCompareR1(ComputationBuilder* builder, + const tensorflow::core::Bitmap& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + + template <typename NativeT> + void ComputeAndCompareR2(ComputationBuilder* builder, + const Array2D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + template <typename NativeT> + void ComputeAndCompareR2(ComputationBuilder* builder, + const Array2D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + ErrorSpec error); + + template <typename NativeT> + void ComputeAndCompareR3(ComputationBuilder* builder, + const Array3D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + template <typename NativeT> + void ComputeAndCompareR3(ComputationBuilder* builder, + const Array3D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + ErrorSpec error); + + template <typename NativeT> + void ComputeAndCompareR4(ComputationBuilder* builder, + const Array4D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + template <typename NativeT> + void ComputeAndCompareR4(ComputationBuilder* builder, + const Array4D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + ErrorSpec error); + + // Build and run the computation and compare the result with the given + // literal. shape_with_layout indicates the result layout to request when + // calling Execute. + void ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_layout = nullptr); + void ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error, + const Shape* shape_with_layout = nullptr); + + // ComputeAndCompare variant which returns an error status. + tensorflow::Status ComputeAndCompareLiteralWithStatus( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + const Shape* shape_with_layout = nullptr); + tensorflow::Status ComputeAndCompareLiteralWithStatus( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error, + const Shape* shape_with_layout = nullptr); + + // Compare the result of the computation to a strings. In XLA strings are + // represented using rank-1 U8 shapes. + void ComputeAndCompareR1U8( + ComputationBuilder* builder, tensorflow::StringPiece expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + + // Convenience method for running a built computation, transferring the + // result, and comparing it to the expected tuple literal. + void ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments); + void ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec abs_error); + + // Create scalar operations for use in reductions. + Computation CreateScalarRelu(); + Computation CreateScalarMax(); + Computation CreateScalarReluSensitivity(); + + // Special case convenience functions for creating filled arrays. + + // Creates an array of pseudorandom values lying between the given minimum and + // maximum values. + template <typename NativeT> + std::vector<NativeT> CreatePseudorandomR1(const int width, NativeT min_value, + NativeT max_value, uint32 seed); + template <typename NativeT> + std::unique_ptr<Array2D<NativeT>> CreatePseudorandomR2(const int rows, + const int cols, + NativeT min_value, + NativeT max_value, + uint32 seed); + + // Creates a (rows x cols) array filled in the following form: + // + // [ 0 1 ... cols-1] + // [ 1,000 1,001 ... 1000.0 + cols-1] + // [ ... ... ... ...] + // [(rows-1)*1000.0 ... ... (rows-1)*1000.0 + cols-1] + // + // If provided, offset is added uniformly to every element (e.g. an offset of + // 64 would cause 0 in the above to be 64, 1 to be 65, 1000 to be 1064, etc.) + std::unique_ptr<Array2D<float>> CreatePatternedMatrix(const int rows, + const int cols, + float offset = 0.0); + + // Creates a (rows x cols) array as above, padded out to + // (rows_padded x cols_padded) with zeroes. Requires rows_padded >= rows + // and cols_padded > cols. + std::unique_ptr<Array2D<float>> CreatePatternedMatrixWithZeroPadding( + const int rows, const int cols, const int rows_padded, + const int cols_padded); + + // Create a parameter instruction that wraps the given values and then stores + // into "data_handle" the global handle for that parameter. + // + // "parameter_number" is the parameter number. + // "name" is the name of the parameter instruction. + template <typename NativeT> + std::unique_ptr<GlobalData> CreateR1Parameter( + tensorflow::gtl::ArraySlice<NativeT> values, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle); + + // Create a parameter instruction that wraps the given constant array + // "array_2d" and then stores to "data_handle" the global handle for that + // parameter. + // + // "parameter_number" is the parameter number. + // "name" is the name of the parameter instruction. + template <typename NativeT> + std::unique_ptr<GlobalData> CreateR2Parameter( + const Array2D<NativeT>& array_2d, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle); + + Client* client_; +}; + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR0( + ComputationBuilder* builder, NativeT expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR0<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR0( + ComputationBuilder* builder, NativeT expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + static_assert(std::is_same<NativeT, float>::value || + std::is_same<NativeT, double>::value, + "Floating point type required when specifying an ErrorSpec"); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR0<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments, error); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR1( + ComputationBuilder* builder, tensorflow::gtl::ArraySlice<NativeT> expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR1<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR1( + ComputationBuilder* builder, tensorflow::gtl::ArraySlice<NativeT> expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + static_assert(std::is_same<NativeT, float>::value || + std::is_same<NativeT, double>::value, + "Floating point type required when specifying an ErrorSpec"); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR1<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments, error); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR2( + ComputationBuilder* builder, const Array2D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR2FromArray2D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR2( + ComputationBuilder* builder, const Array2D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + static_assert(std::is_same<NativeT, float>::value || + std::is_same<NativeT, double>::value, + "Floating point type required when specifying an ErrorSpec"); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR2FromArray2D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments, error); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR3( + ComputationBuilder* builder, const Array3D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR3FromArray3D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR3( + ComputationBuilder* builder, const Array3D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + static_assert(std::is_same<NativeT, float>::value || + std::is_same<NativeT, double>::value, + "Floating point type required when specifying an ErrorSpec"); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR3FromArray3D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments, error); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR4( + ComputationBuilder* builder, const Array4D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR4FromArray4D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + +template <typename NativeT> +void ClientLibraryTestBase::ComputeAndCompareR4( + ComputationBuilder* builder, const Array4D<NativeT>& expected, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error) { + static_assert(std::is_same<NativeT, float>::value || + std::is_same<NativeT, double>::value, + "Floating point type required when specifying an ErrorSpec"); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR4FromArray4D<NativeT>(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments, error); +} + +template <typename NativeT> +std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR1Parameter( + tensorflow::gtl::ArraySlice<NativeT> values, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle) { + std::unique_ptr<Literal> literal = LiteralUtil::CreateR1(values); + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + *data_handle = builder->Parameter(parameter_number, literal->shape(), name); + return data; +} + +template <typename NativeT> +std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR2Parameter( + const Array2D<NativeT>& array_2d, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle) { + std::unique_ptr<Literal> literal = LiteralUtil::CreateR2FromArray2D(array_2d); + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + *data_handle = builder->Parameter(parameter_number, literal->shape(), name); + return data; +} + +template <typename NativeT> +std::vector<NativeT> ClientLibraryTestBase::CreatePseudorandomR1( + const int width, NativeT min_value, NativeT max_value, uint32 seed) { + std::vector<NativeT> result(width); + test_utils::PseudorandomGenerator<NativeT> generator(min_value, max_value, + seed); + for (int i = 0; i < width; ++i) { + result[i] = generator.get(); + } + return result; +} + +template <typename NativeT> +std::unique_ptr<Array2D<NativeT>> ClientLibraryTestBase::CreatePseudorandomR2( + const int rows, const int cols, NativeT min_value, NativeT max_value, + uint32 seed) { + auto result = MakeUnique<Array2D<NativeT>>(rows, cols); + test_utils::PseudorandomGenerator<NativeT> generator(min_value, max_value, + seed); + for (int y = 0; y < rows; ++y) { + for (int x = 0; x < cols; ++x) { + (*result)(y, x) = generator.get(); + } + } + return result; +} + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_CLIENT_LIBRARY_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc new file mode 100644 index 0000000000..77b85af83c --- /dev/null +++ b/tensorflow/compiler/xla/tests/client_test.cc @@ -0,0 +1,127 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ClientTest : public ClientLibraryTestBase {}; + +TEST_F(ClientTest, ExecuteWithLayout) { + ComputationBuilder b(client_, TestName()); + + std::vector<std::vector<int64>> layouts = {{0, 1}, {1, 0}}; + for (const std::vector<int64>& execute_layout : layouts) { + for (const std::vector<int64>& transfer_layout : layouts) { + b.Add(b.ConstantR2<int32>({{1, 2}, {3, 4}}), + b.ConstantR2<int32>({{10, 20}, {30, 40}})); + auto computation = b.Build(); + ASSERT_TRUE(computation.ok()) << computation.status(); + + const Shape execute_shape_with_layout = ShapeUtil::MakeShapeWithLayout( + S32, /*dimensions=*/{2, 2}, execute_layout); + std::unique_ptr<GlobalData> data = + client_ + ->Execute(computation.ValueOrDie(), {}, + &execute_shape_with_layout) + .ConsumeValueOrDie(); + + std::unique_ptr<Literal> expected_literal = + test_utils::CreateR2LiteralWithLayout<int32>({{11, 22}, {33, 44}}, + transfer_layout); + + auto computed = client_->Transfer(*data, &expected_literal->shape()); + + LiteralTestUtil::AssertEqualShapesAndLayouts( + expected_literal->shape(), computed.ValueOrDie()->shape()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + } + } +} + +TEST_F(ClientTest, ExecuteWithTupleLayout) { + ComputationBuilder b(client_, TestName()); + + b.Tuple({b.ConstantR2<int32>({{1, 2}, {3, 4}}), + b.ConstantR2<int32>({{10, 20}, {30, 40}})}); + + auto computation = b.Build(); + ASSERT_TRUE(computation.ok()) << computation.status(); + + // Create a result shape with one element column major and the other row + // major. + Shape tuple_shape_with_layout = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, + /*minor_to_major=*/{0, 1}), + ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, + /*minor_to_major=*/{1, 0})}); + + auto result = client_ + ->ExecuteAndTransfer(computation.ValueOrDie(), {}, + &tuple_shape_with_layout) + .ConsumeValueOrDie(); + LiteralTestUtil::ExpectR2Equal<int32>({{1, 2}, {3, 4}}, + result->tuple_literals(0)); + LiteralTestUtil::ExpectR2Equal<int32>({{10, 20}, {30, 40}}, + result->tuple_literals(1)); + + EXPECT_TRUE(ShapeUtil::IsTuple(result->shape())); + EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape())); + + EXPECT_TRUE(ShapeUtil::Equal( + ShapeUtil::GetTupleElementShape(result->shape(), 0), + ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, + /*minor_to_major=*/{0, 1}))); + EXPECT_TRUE(ShapeUtil::Equal( + ShapeUtil::GetTupleElementShape(result->shape(), 1), + ShapeUtil::MakeShapeWithLayout(S32, /*dimensions=*/{2, 2}, + /*minor_to_major=*/{1, 0}))); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc new file mode 100644 index 0000000000..fe4dff2109 --- /dev/null +++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/codegen_test_base.h" + +#include <stdlib.h> +#include <utility> + +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/compiler.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/subprocess.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { + +void CodegenTestBase::CompileAndVerifyIr(std::unique_ptr<HloModule> hlo_module, + const string& pattern) { + std::unique_ptr<Executable> executable = + CompileToExecutable(std::move(hlo_module)); + string ir_module_string = GetIrFromExecutable(*executable); + RunFileCheck(ir_module_string, pattern); +} + +std::unique_ptr<Executable> CodegenTestBase::CompileToExecutable( + std::unique_ptr<HloModule> hlo_module) { + auto module_config = MakeUnique<HloModuleConfig>( + MakeProgramShape(hlo_module->entry_computation())); + return backend_->compiler() + ->Compile(std::move(hlo_module), std::move(module_config), + test_hlo_dumper_, backend_->default_stream_executor()) + .ConsumeValueOrDie(); +} + +void CodegenTestBase::RunFileCheck(const string& input, const string& pattern) { + // Write input to a temporary file. + char tempdir_template[] = "/tmp/ir_testXXXXXX"; + char* tempdir_name = mkdtemp(tempdir_template); + CHECK_NOTNULL(tempdir_name); + string pattern_path = + tensorflow::io::JoinPath(tempdir_name, "xla_hlo_test_ir_pattern"); + TF_CHECK_OK(tensorflow::WriteStringToFile(tensorflow::Env::Default(), + pattern_path, pattern)); + + // Invoke FileCheck to check whether input matches `pattern`. + tensorflow::SubProcess file_check_process; + const char* test_srcdir = getenv("TEST_SRCDIR"); + if (test_srcdir == nullptr) { + test_srcdir = "."; + } + string file_check_path = tensorflow::io::JoinPath( + test_srcdir, "external/llvm/FileCheck"); + file_check_process.SetProgram(file_check_path, + {file_check_path, pattern_path}); + file_check_process.SetChannelAction(tensorflow::CHAN_STDIN, + tensorflow::ACTION_PIPE); + file_check_process.SetChannelAction(tensorflow::CHAN_STDERR, + tensorflow::ACTION_PIPE); + CHECK(file_check_process.Start()); + string standard_error; + int exit_status = file_check_process.Communicate( + /*stdin_input=*/&input, /*stdout_output=*/nullptr, + /*stderr_output=*/&standard_error); + + // FileCheck returns 0 when the inputs match. If matching failed, we output + // the error message generated by FileCheck. + SCOPED_TRACE(tensorflow::strings::StrCat("Input to FileCheck:\n", input)); + EXPECT_EQ(0, exit_status) << standard_error; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.h b/tensorflow/compiler/xla/tests/codegen_test_base.h new file mode 100644 index 0000000000..50c0453107 --- /dev/null +++ b/tensorflow/compiler/xla/tests/codegen_test_base.h @@ -0,0 +1,56 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_CODEGEN_TEST_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_CODEGEN_TEST_BASE_H_ + +#include <memory> +#include <string> + +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" + +namespace xla { + +// Tests that verify IR emitted by the CPU/GPU backend is as expected. +class CodegenTestBase : public HloTestBase { + protected: + CodegenTestBase() {} + + // Returns the embedded LLVM IR from the given executable. Codegen tests must + // override this method, but execution tests do not have to because they do + // not examine the embedded IR. + virtual string GetIrFromExecutable(const Executable& executable) = 0; + + // Compiles the given HLO module to LLVM IR and verifies the IR matches the + // given pattern. `pattern` is in the FileCheck pattern matching syntax + // (http://llvm.org/docs/CommandGuide/FileCheck.html). + void CompileAndVerifyIr(std::unique_ptr<HloModule> hlo_module, + const string& pattern); + + protected: + // Compiles hlo_module to an executable, CHECK-failing if this fails. + std::unique_ptr<Executable> CompileToExecutable( + std::unique_ptr<HloModule> hlo_module); + + // Runs FileCheck with the given pattern over the given string and EXPECTs + // that FileCheck succeeded in matching the input. + void RunFileCheck(const string& input, const string& pattern); +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_CODEGEN_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/compilation_cache_test.cc b/tensorflow/compiler/xla/tests/compilation_cache_test.cc new file mode 100644 index 0000000000..38ce007cb0 --- /dev/null +++ b/tensorflow/compiler/xla/tests/compilation_cache_test.cc @@ -0,0 +1,218 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <initializer_list> +#include <memory> +#include <string> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class CompilationCacheTest : public ClientLibraryTestBase { + public: + void ExecuteComputationR0F32( + const Computation& computation, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, float expected_result, + bool expect_cache_hit) { + ExecutionProfile execution_profile; + std::unique_ptr<Literal> result = + client_ + ->ExecuteAndTransfer(computation, arguments, + /*output_layout=*/nullptr, &execution_profile) + .ConsumeValueOrDie(); + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR0<float>(expected_result), + *result, error_spec_); + EXPECT_EQ(expect_cache_hit, execution_profile.compilation_cache_hit()); + } + + void ExecuteComputationR2F32( + const Computation& computation, + tensorflow::gtl::ArraySlice<GlobalData*> arguments, + std::initializer_list<std::initializer_list<float>> expected_result, + bool expect_cache_hit) { + ExecutionProfile execution_profile; + auto data_handle = + client_ + ->Execute(computation, arguments, /*output_layout=*/nullptr, + &execution_profile) + .ConsumeValueOrDie(); + std::unique_ptr<Literal> result = + client_->Transfer(*data_handle).ConsumeValueOrDie(); + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR2<float>(expected_result), + *result, error_spec_); + EXPECT_EQ(expect_cache_hit, execution_profile.compilation_cache_hit()); + } + + ErrorSpec error_spec_{0.0001}; +}; + +XLA_TEST_F(CompilationCacheTest, ComputationCalledMultipleTimes) { + ComputationBuilder builder(client_, TestName()); + builder.Neg(builder.ConstantR0<float>(42.0)); + Computation computation = builder.Build().ConsumeValueOrDie(); + + ExecuteComputationR0F32(computation, {}, -42.0, /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation, {}, -42.0, /*expect_cache_hit=*/true); + ExecuteComputationR0F32(computation, {}, -42.0, /*expect_cache_hit=*/true); +} + +XLA_TEST_F(CompilationCacheTest, ComputationCalledWithDifferentParameters) { + std::unique_ptr<GlobalData> data_42 = + client_->TransferToServer(*LiteralUtil::CreateR0<float>(42.0f)) + .ConsumeValueOrDie(); + std::unique_ptr<GlobalData> data_123 = + client_->TransferToServer(*LiteralUtil::CreateR0<float>(123.0f)) + .ConsumeValueOrDie(); + std::unique_ptr<GlobalData> data_456 = + client_->TransferToServer(*LiteralUtil::CreateR0<float>(456.0f)) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + builder.Neg(builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param")); + Computation computation = builder.Build().ConsumeValueOrDie(); + + ExecuteComputationR0F32(computation, {data_42.get()}, -42.0, + /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation, {data_123.get()}, -123.0, + /*expect_cache_hit=*/true); + ExecuteComputationR0F32(computation, {data_456.get()}, -456.0, + /*expect_cache_hit=*/true); + ExecuteComputationR0F32(computation, {data_42.get()}, -42.0, + /*expect_cache_hit=*/true); +} + +XLA_TEST_F(CompilationCacheTest, MultipleComputations) { + ComputationBuilder builder_neg(client_, TestName() + "_neg"); + builder_neg.Neg(builder_neg.ConstantR0<float>(42.0)); + Computation computation_neg = builder_neg.Build().ConsumeValueOrDie(); + + ComputationBuilder builder_exp(client_, TestName() + "_exp"); + builder_exp.Exp(builder_exp.ConstantR0<float>(1.0)); + Computation computation_exp = builder_exp.Build().ConsumeValueOrDie(); + + ComputationBuilder builder_add(client_, TestName() + "_add"); + builder_add.Add(builder_add.ConstantR0<float>(2.0), + builder_add.ConstantR0<float>(3.0)); + Computation computation_add = builder_add.Build().ConsumeValueOrDie(); + + ExecuteComputationR0F32(computation_neg, {}, -42.0, + /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation_exp, {}, 2.7182817, + /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation_add, {}, 5.0, + /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation_neg, {}, -42.0, + /*expect_cache_hit=*/true); +} + +XLA_TEST_F(CompilationCacheTest, DifferentParameterLayouts) { + // Create two GlobalData arrays with the same shape but different + // layouts. Use these arrays as parameters to a simple computation. If the + // layout of the array changes then computation should be recompiled (cache + // miss). + auto rowmaj_array = test_utils::CreateR2LiteralWithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{1, 0}); + auto rowmaj_handle = + client_->TransferToServer(*rowmaj_array).ConsumeValueOrDie(); + + auto colmaj_array = test_utils::CreateR2LiteralWithLayout( + {{1.0f, 2.0f}, {3.0f, 4.0f}}, /*minor_to_major=*/{0, 1}); + auto colmaj_handle = + client_->TransferToServer(*colmaj_array).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2}), "param0"); + Computation computation = builder.Build().ConsumeValueOrDie(); + + ExecuteComputationR2F32(computation, {colmaj_handle.get()}, + {{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*expect_cache_hit=*/false); + ExecuteComputationR2F32(computation, {colmaj_handle.get()}, + {{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*expect_cache_hit=*/true); + ExecuteComputationR2F32(computation, {rowmaj_handle.get()}, + {{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*expect_cache_hit=*/false); + ExecuteComputationR2F32(computation, {rowmaj_handle.get()}, + {{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*expect_cache_hit=*/true); + ExecuteComputationR2F32(computation, {colmaj_handle.get()}, + {{1.0f, 2.0f}, {3.0f, 4.0f}}, + /*expect_cache_hit=*/true); +} + +XLA_TEST_F(CompilationCacheTest, MutatedComputation) { + // Build a computation, execute it, then mutate it. The mutated computation + // should not be in the cache until it is run once. This must be done through + // the stub interface because Computations built from ComputationBuilder are + // immutable. + ComputationBuilder builder(client_, TestName()); + auto neg = builder.Neg(builder.ConstantR0<float>(42.0)); + Computation computation = builder.Build().ConsumeValueOrDie(); + + ExecuteComputationR0F32(computation, {}, -42.0, /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation, {}, -42.0, /*expect_cache_hit=*/true); + + BinaryOpRequest request; + request.set_binop(BINOP_ADD); + *request.mutable_lhs() = neg; + *request.mutable_rhs() = neg; + OpRequest op_request; + *op_request.mutable_computation() = computation.handle(); + *op_request.mutable_binary_op_request() = request; + OpResponse response; + tensorflow::Status s = client_->stub()->Op(&op_request, &response); + ASSERT_TRUE(s.ok()); + + ExecuteComputationR0F32(computation, {}, -84.0, /*expect_cache_hit=*/false); + ExecuteComputationR0F32(computation, {}, -84.0, /*expect_cache_hit=*/true); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc new file mode 100644 index 0000000000..709ce5029c --- /dev/null +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -0,0 +1,249 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <utility> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ComputeConstantTest : public ClientLibraryTestBase { + public: + StatusOr<std::unique_ptr<Literal>> ComputeConstantLiteral( + ComputationDataHandle operand, ComputationBuilder* builder, + Layout* output_layout = nullptr) { + TF_ASSIGN_OR_RETURN(auto remote_computed, + builder->ComputeConstant(operand, output_layout)); + TF_ASSIGN_OR_RETURN(auto computed, client_->Transfer(*remote_computed)); + return std::move(computed); + } + + template <class Scalar> + StatusOr<Scalar> ComputeConstantScalar(ComputationDataHandle operand, + ComputationBuilder* builder) { + TF_ASSIGN_OR_RETURN(auto literal, ComputeConstantLiteral(operand, builder)); + return LiteralUtil::Get<Scalar>(*literal, {}); + } + + bool IsConstant(const ComputationDataHandle& operand, + ComputationBuilder* builder) { + StatusOr<bool> result = builder->IsConstant(operand); + EXPECT_TRUE(result.ok()) << result.status(); + return result.ok() ? result.ValueOrDie() : false; + } + + template <class Scalar> + void ExpectConstantComputedScalar(ComputationDataHandle operand, + Scalar expected, + ComputationBuilder* builder) { + Scalar computed = ComputeConstantScalar<Scalar>(operand, builder); + ASSERT_TRUE(computed.ok()) << computed.status(); + std::unique_ptr<Literal> expected_literal = LiteralUtil::CreateR0(expected); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed); + } +}; + +TEST_F(ComputeConstantTest, ScalarInt32Literal) { + ComputationBuilder b(client_, TestName()); + auto computation = b.ConstantR0<int32>(42); + EXPECT_TRUE(IsConstant(computation, &b)); + + auto value = ComputeConstantScalar<int32>(computation, &b); + ASSERT_TRUE(value.ok()) << value.status(); + EXPECT_EQ(value.ValueOrDie(), 42); +} + +TEST_F(ComputeConstantTest, ScalarFloatAdd) { + ComputationBuilder b(client_, TestName()); + auto computation = + b.Add(b.ConstantR0<float>(42.5f), b.ConstantR0<float>(1.5f)); + EXPECT_TRUE(IsConstant(computation, &b)); + + auto value = ComputeConstantScalar<float>(computation, &b); + ASSERT_TRUE(value.ok()) << value.status(); + EXPECT_EQ(value.ValueOrDie(), 44.0f); +} + +TEST_F(ComputeConstantTest, ScalarRng) { + ComputationBuilder b(client_, TestName()); + auto computation = + b.RngUniform(b.ConstantR0<float>(1.1f), b.ConstantR0<float>(2.1f), + ShapeUtil::MakeShape(F32, {})); + EXPECT_FALSE(IsConstant(computation, &b)); + + auto value = ComputeConstantScalar<float>(computation, &b); + ASSERT_FALSE(value.ok()) + << "computing a RNG value should not be considered a constant"; +} + +TEST_F(ComputeConstantTest, DirectParam) { + ComputationBuilder b(client_, TestName()); + auto computation = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param"); + EXPECT_FALSE(IsConstant(computation, &b)); + + auto value = ComputeConstantScalar<float>(computation, &b); + EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString()) + .contains("depends on parameter")) + << value.status(); +} + +TEST_F(ComputeConstantTest, IndirectParam) { + ComputationBuilder b(client_, TestName()); + auto computation = + b.Add(b.ConstantR0<float>(1.0f), + b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param")); + EXPECT_FALSE(IsConstant(computation, &b)); + + auto value = ComputeConstantScalar<float>(computation, &b); + EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString()) + .contains("depends on parameter")) + << value.status(); +} + +// Test computation of an expression interspersed with param nodes but +// the expression does not depend on the param nodes. +TEST_F(ComputeConstantTest, UnrelatedParam) { + ComputationBuilder b(client_, TestName()); + + auto param_a = b.Parameter(10, ShapeUtil::MakeShape(F32, {}), "param0"); + auto constant_4 = b.Add(b.ConstantR0<float>(2.5f), b.ConstantR0<float>(1.5f)); + auto not_constant_a = b.Add(constant_4, param_a); + + auto param_b = b.Parameter(1, ShapeUtil::MakeShape(F32, {}), "param1"); + auto constant_9 = b.Mul(b.ConstantR0<float>(2.0f), b.ConstantR0<float>(4.5f)); + auto not_constant_b = b.Add(param_b, constant_9); + + auto constant_13 = b.Add(constant_4, constant_9); + b.Add(not_constant_b, b.Add(constant_13, not_constant_a)); + + EXPECT_TRUE(IsConstant(constant_13, &b)); + + auto value = ComputeConstantScalar<float>(constant_13, &b); + ASSERT_TRUE(value.ok()) << value.status(); + EXPECT_EQ(value.ValueOrDie(), 13.0f); +} + +TEST_F(ComputeConstantTest, NonScalarAdd) { + ComputationBuilder b(client_, TestName()); + + auto computation = + b.Add(b.ConstantR1<int32>({1, 2}), b.ConstantR1<int32>({3, 4})); + EXPECT_TRUE(IsConstant(computation, &b)); + + auto computed = ComputeConstantLiteral(computation, &b); + ASSERT_TRUE(computed.ok()) << computed.status(); + std::unique_ptr<Literal> expected_literal = + LiteralUtil::CreateR1<int32>({4, 6}); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); +} + +TEST_F(ComputeConstantTest, IntegerDivide) { + ComputationBuilder b(client_, TestName()); + auto computation = b.Div(b.ConstantR0<int32>(15), b.ConstantR0<int32>(3)); + EXPECT_TRUE(IsConstant(computation, &b)); + + auto computed = ComputeConstantLiteral(computation, &b); + ASSERT_TRUE(computed.ok()) << computed.status(); + std::unique_ptr<Literal> expected_literal = LiteralUtil::CreateR0<int32>(5); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); +} + +XLA_TEST_F(ComputeConstantTest, Layout) { + ComputationBuilder b(client_, TestName()); + + std::vector<std::vector<int64>> layouts = {{0, 1}, {1, 0}}; + for (const std::vector<int64>& layout : layouts) { + auto layout_proto = LayoutUtil::MakeLayout(layout); + auto computed = + ComputeConstantLiteral(b.Add(b.ConstantR2<int32>({{1, 2}, {3, 4}}), + b.ConstantR2<int32>({{10, 20}, {30, 40}})), + &b, &layout_proto); + ASSERT_TRUE(computed.ok()) << computed.status(); + + std::unique_ptr<Literal> expected_literal = + test_utils::CreateR2LiteralWithLayout<int32>({{11, 22}, {33, 44}}, + layout); + LiteralTestUtil::AssertEqualShapesAndLayouts( + expected_literal->shape(), computed.ValueOrDie()->shape()); + LiteralTestUtil::ExpectEqual(*expected_literal, *computed.ValueOrDie()); + } +} + +// This test is permanently disabled on CPU because it requires that the +// backend used for execution is different than the backend used for +// ComputeConstant which is always cpu. +TEST_F(ComputeConstantTest, DISABLED_ON_CPU(ReuseComputedConstant)) { + // Compute a trivial constant, then try to use the value in an Execute + // call. This should fail because the constant resides on the CPU and the + // Execute call is executed on a different backend. + ComputationBuilder constant_b(client_, TestName()); + auto constant = constant_b.ConstantR0<int32>(42); + auto handle = constant_b.ComputeConstant(constant).ConsumeValueOrDie(); + auto literal = client_->Transfer(*handle).ConsumeValueOrDie(); + LiteralTestUtil::ExpectR0Equal(42, *literal); + + // Build trivial computation which takes one parameter. + ComputationBuilder b(client_, TestName()); + b.Neg(b.Parameter(0, ShapeUtil::MakeShape(S32, {}), "param0")); + auto computation = b.Build().ConsumeValueOrDie(); + + // Try to use value from ComputeConstant in Execute. + auto execute_status = client_->Execute(computation, {handle.get()}); + EXPECT_FALSE(execute_status.ok()); + EXPECT_MATCH( + execute_status.status().error_message(), + testing::ContainsRegex("argument 0 is on device Host:0 but computation " + "will be executed on device")); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/concat_test.cc b/tensorflow/compiler/xla/tests/concat_test.cc new file mode 100644 index 0000000000..9a48b19b96 --- /dev/null +++ b/tensorflow/compiler/xla/tests/concat_test.cc @@ -0,0 +1,523 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +using ConcatTest = ClientLibraryTestBase; + +// Concatenate expects at least one argument. +XLA_TEST_F(ConcatTest, Concat_Nothing) { + ComputationBuilder builder(client_, TestName()); + auto concatenated = builder.ConcatInDim({}, 0); + StatusOr<Computation> computation_status = builder.Build(); + ASSERT_FALSE(computation_status.ok()); + EXPECT_MATCH( + computation_status.status().ToString(), + testing::ContainsRegex("Concatenate expects at least one argument")); +} + +// Concatenate with one argument works. +XLA_TEST_F(ConcatTest, Concat_R1_With_Nothing) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0, 64.0}); + auto concatenated = builder.ConcatInDim({a}, 0); + + std::vector<float> expected = {42, 64}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Show that we can't concatenate R0 with R0 because we can't name the dimension +// to concatenate on. +XLA_TEST_F(ConcatTest, CannotConcatR0WithR0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR0<float>(42.0); + auto b = builder.ConstantR0<float>(64.0); + auto concatenated = builder.ConcatInDim({a, b}, 0); + StatusOr<Computation> computation_status = builder.Build(); + ASSERT_FALSE(computation_status.ok()); + EXPECT_MATCH(computation_status.status().ToString(), + testing::ContainsRegex( + "dimension to concatenate along out of bounds: 0")); +} + +XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({}); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + std::vector<float> expected = {}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto b = builder.ConstantR1<float>({256.0}); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + std::vector<float> expected = {256}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R1_L2_With_R1_L0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0, 64.0}); + auto b = builder.ConstantR1<float>({}); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + std::vector<float> expected = {42, 64}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R1_L2_With_R1_L1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0, 64.0}); + auto b = builder.ConstantR1<float>({256.0}); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + std::vector<float> expected = {42, 64, 256}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R1_L253_With_R1_L7) { + std::vector<float> lhs(253); + std::vector<float> rhs(7); + std::vector<float> expected(253 + 7); + for (int i = 0; i < 253; ++i) { + expected[i] = lhs[i] = i + 1; + } + for (int i = 0; i < 7; ++i) { + expected[253 + i] = rhs[i] = 253 + i + 1; + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>(lhs); + auto b = builder.ConstantR1<float>(rhs); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_0x0_With_0x0) { + for (int dim : {0, 1}) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D(Array2D<float>(0, 0)); + auto b = builder.ConstantR2FromArray2D(Array2D<float>(0, 0)); + auto concatenated = builder.ConcatInDim({a, b}, dim); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 0), {}, + ErrorSpec(0.0001)); + } +} + +XLA_TEST_F(ConcatTest, Concat_1x1_With_1x1_InDim0) { + ComputationBuilder builder(client_, TestName()); + auto a_array = CreatePatternedMatrix(1, 1); + auto b_array = CreatePatternedMatrix(1, 1, /*offset=*/64.0); + auto a = builder.ConstantR2FromArray2D(*a_array); + auto b = builder.ConstantR2FromArray2D(*b_array); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + Array2D<float> expected({ + {0}, {64}, + }); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_1x1_With_1x1_InDim1) { + ComputationBuilder builder(client_, TestName()); + auto a_array = CreatePatternedMatrix(1, 1); + auto b_array = CreatePatternedMatrix(1, 1, /*offset=*/64.0); + auto a = builder.ConstantR2FromArray2D(*a_array); + auto b = builder.ConstantR2FromArray2D(*b_array); + auto concatenated = builder.ConcatInDim({a, b}, 1); + + Array2D<float> expected({ + {0, 64}, + }); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat2x0With2x5) { + ComputationBuilder builder(client_, TestName()); + auto b_array = CreatePatternedMatrix(2, 5, /*offset=*/64.0); + auto a = builder.ConstantR2FromArray2D(Array2D<float>(2, 0)); + auto b = builder.ConstantR2FromArray2D(*b_array); + auto concatenated = builder.ConcatInDim({a, b}, 1); + + ComputeAndCompareR2<float>(&builder, *b_array, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat2x3With2x5) { + ComputationBuilder builder(client_, TestName()); + auto a_array = CreatePatternedMatrix(2, 3); + auto b_array = CreatePatternedMatrix(2, 5, /*offset=*/64.0); + auto a = builder.ConstantR2FromArray2D(*a_array); + auto b = builder.ConstantR2FromArray2D(*b_array); + auto concatenated = builder.ConcatInDim({a, b}, 1); + + Array2D<float> expected({ + {0, 1, 2, 64, 65, 66, 67, 68}, + {1000, 1001, 1002, 1064, 1065, 1066, 1067, 1068}, + }); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat3x2With0x2) { + ComputationBuilder builder(client_, TestName()); + auto a_array = CreatePatternedMatrix(3, 2); + auto a = builder.ConstantR2FromArray2D(*a_array); + auto b = builder.ConstantR2FromArray2D(Array2D<float>(0, 2)); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + ComputeAndCompareR2<float>(&builder, *a_array, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat3x2With5x2) { + ComputationBuilder builder(client_, TestName()); + auto a_array = CreatePatternedMatrix(3, 2); + auto b_array = CreatePatternedMatrix(5, 2, /*offset=*/64.0); + auto a = builder.ConstantR2FromArray2D(*a_array); + auto b = builder.ConstantR2FromArray2D(*b_array); + auto concatenated = builder.ConcatInDim({a, b}, 0); + + Array2D<float> expected({ + {0, 1}, + {1000, 1001}, + {2000, 2001}, + {64, 65}, + {1064, 1065}, + {2064, 2065}, + {3064, 3065}, + {4064, 4065}, + }); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R3_3x0x2_3x0x1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR3FromArray3D(Array3D<float>(3, 0, 2)); + auto b = builder.ConstantR3FromArray3D(Array3D<float>(3, 0, 1)); + auto concatenated = builder.ConcatInDim({a, b}, 2); + ComputeAndCompareR3<float>(&builder, Array3D<float>(3, 0, 3), {}, + ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1) { + ComputationBuilder builder(client_, TestName()); + Array3D<float> a_array({ + // 3x1x2 + {{0, 1}}, + {{2, 3}}, + {{4, 5}}, + }); + Array3D<float> b_array({ + // 3x1x1 + {{6}}, + {{7}}, + {{8}}, + }); + auto a = builder.ConstantR3FromArray3D(a_array); + auto b = builder.ConstantR3FromArray3D(b_array); + auto concatenated = builder.ConcatInDim({a, b}, 2); + + Array3D<float> expected({ + {{0, 1, 6}}, {{2, 3, 7}}, {{4, 5, 8}}, + }); + ComputeAndCompareR3<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R1_1x1_1x1_1x1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0}); + auto b = builder.ConstantR1<float>({64.0}); + auto c = builder.ConstantR1<float>({256.0}); + auto concatenated = builder.ConcatInDim({a, b, c}, 0); + + std::vector<float> expected = {42, 64, 256}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_R3_3x1x2_3x1x1_3x1x1) { + ComputationBuilder builder(client_, TestName()); + Array3D<float> a_array({ + // 3x1x2 + {{0, 1}}, + {{4, 5}}, + {{8, 9}}, + }); + Array3D<float> b_array({ + // 3x1x1 + {{2}}, + {{6}}, + {{10}}, + }); + Array3D<float> c_array({ + // 3x1x1 + {{3}}, + {{7}}, + {{11}}, + }); + auto a = builder.ConstantR3FromArray3D(a_array); + auto b = builder.ConstantR3FromArray3D(b_array); + auto c = builder.ConstantR3FromArray3D(c_array); + auto concatenated = builder.ConcatInDim({a, b, c}, 2); + + Array3D<float> expected({ + {{0, 1, 2, 3}}, {{4, 5, 6, 7}}, {{8, 9, 10, 11}}, + }); + ComputeAndCompareR3<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, DoubleConcatLeftAssociative) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0}); + auto b = builder.ConstantR1<float>({64.0}); + auto c = builder.ConstantR1<float>({256.0}); + // concatenated = (a concat b) concat c + auto concatenated = + builder.ConcatInDim({builder.ConcatInDim({a, b}, 0), c}, 0); + + std::vector<float> expected = {42, 64, 256}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, DoubleConcatRightAssociative) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0}); + auto b = builder.ConstantR1<float>({64.0}); + auto c = builder.ConstantR1<float>({256.0}); + // concatenated = a concat (b concat c) + auto concatenated = + builder.ConcatInDim({a, builder.ConcatInDim({b, c}, 0)}, 0); + + std::vector<float> expected = {42, 64, 256}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_1x1024_With_1x1024_InDim0) { + Array2D<float> lhs(1, 1024); + Array2D<float> rhs(1, 1024); + for (int i = 0; i < 1024; ++i) { + lhs(0, i) = i; + rhs(0, i) = i + 1024; + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(lhs); + auto b = builder.ConstantR2FromArray2D<float>(rhs); + builder.ConcatInDim({a, b}, 0); + + Array2D<float> expected(2, 1024); + for (int i = 0; i < 1024; ++i) { + expected(0, i) = i; + expected(1, i) = i + 1024; + } + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_1x1024_With_1x1024_InDim1) { + Array2D<float> lhs(1, 1024); + Array2D<float> rhs(1, 1024); + for (int i = 0; i < 1024; ++i) { + lhs(0, i) = i; + rhs(0, i) = i + 1024; + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(lhs); + auto b = builder.ConstantR2FromArray2D<float>(rhs); + builder.ConcatInDim({a, b}, 1); + + Array2D<float> expected(1, 2048); + for (int i = 0; i < 1024; ++i) { + expected(0, i) = i; + expected(0, i + 1024) = i + 1024; + } + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConcatTest, Concat_64x64_With_64x2) { + Array2D<float> lhs(64, 64); + Array2D<float> rhs(64, 2); + for (int i0 = 0; i0 < 64; ++i0) { + for (int i1 = 0; i1 < 64; ++i1) { + lhs(i0, i1) = (i0 << 10) | i1; + } + for (int i1 = 0; i1 < 2; ++i1) { + rhs(i0, i1) = (i0 << 10) | (i1 + 64); + } + } + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(lhs); + auto b = builder.ConstantR2FromArray2D<float>(rhs); + builder.ConcatInDim({a, b}, 1); + + Array2D<float> expected(64, 66); + for (int i0 = 0; i0 < 64; ++i0) { + for (int i1 = 0; i1 < 66; ++i1) { + expected(i0, i1) = (i0 << 10) | i1; + } + } + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Show that we can't concatenate with an opaques. +XLA_TEST_F(ConcatTest, CannotConcatOpaques) { + ComputationBuilder builder(client_, TestName()); + auto opaque_shape = ShapeUtil::MakeOpaqueShape(); + auto r1f32 = xla::ShapeUtil::MakeShape(xla::F32, {1}); + auto x = builder.Parameter(0, r1f32, "x"); + auto y = builder.Parameter(1, opaque_shape, "y"); + auto concatenated = builder.ConcatInDim({x, y}, 0); + StatusOr<Computation> computation_status = builder.Build(); + ASSERT_FALSE(computation_status.ok()); + EXPECT_MATCH( + computation_status.status().ToString(), + testing::ContainsRegex( + "Expected non-opaque argument for operand of concatenation")); +} + +XLA_TEST_F(ConcatTest, ConcatSeveralBoxedPredicates) { + ComputationBuilder builder(client_, TestName()); + auto p0 = builder.ConstantR1<bool>({true}); + auto p1 = builder.ConstantR1<bool>({false}); + auto p2 = builder.ConstantR1<bool>({true}); + auto concatenated = builder.ConcatInDim({p0, p1, p2}, 0); + + bool expected[] = {true, false, true}; + ComputeAndCompareR1<bool>(&builder, expected, {}); +} + +XLA_TEST_F(ConcatTest, ConcatSeveralR1S32s) { + ComputationBuilder builder(client_, TestName()); + auto a0 = builder.ConstantR1<int32>({1}); + auto a1 = builder.ConstantR1<int32>({2, 3}); + auto a2 = builder.ConstantR1<int32>({4, 5, 6}); + auto a3 = builder.ConstantR1<int32>({7, 8, 9, 10}); + auto concatenated = builder.ConcatInDim({a0, a1, a2, a3}, 0); + + std::vector<int32> expected(10); + std::iota(expected.begin(), expected.end(), 1); + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +// Describes a binary rank-2 concatenation test. +struct R2BinarySpec { + int64 lhs_dim0; + int64 lhs_dim1; + int64 rhs_dim0; + int64 rhs_dim1; + int64 concat_dimension; +}; + +// TEST_P harness for binary rank-2 concatenation. +class ConcatR2BinaryTest : public ClientLibraryTestBase, + public ::testing::WithParamInterface<R2BinarySpec> { +}; + +TEST_P(ConcatR2BinaryTest, DoIt) { + const R2BinarySpec& spec = GetParam(); + Array2D<int32> lhs(spec.lhs_dim0, spec.lhs_dim1); + lhs.FillUnique(); + Array2D<int32> rhs(spec.rhs_dim0, spec.rhs_dim1); + rhs.FillUnique(1000); + + ComputationBuilder builder(client_, TestName()); + auto a0 = builder.ConstantR2FromArray2D<int32>(lhs); + auto a1 = builder.ConstantR2FromArray2D<int32>(rhs); + builder.ConcatInDim({a0, a1}, spec.concat_dimension); + + std::unique_ptr<Array2D<int32>> expected = + ReferenceUtil::Concat2D(lhs, rhs, spec.concat_dimension); + ComputeAndCompareR2<int32>(&builder, *expected, {}); +} + +// Regression test for b/31944287. x*y is used (at the same index) by all +// operands of the concat. We should emit x*y in three incoming basic blocks of +// the concat because these basic blocks are not control-equivalent. +// +// x*y +// / | \ +// add1 add2 add3 +// \ | / +// concat +XLA_TEST_F(ConcatTest, ConcatOperandsOfSameOperand) { + auto f32_scalar = ShapeUtil::MakeShape(xla::F32, {}); + auto x_literal = LiteralUtil::CreateR0<float>(2.f); + auto y_literal = LiteralUtil::CreateR0<float>(3.f); + auto x_data = client_->TransferToServer(*x_literal).ConsumeValueOrDie(); + auto y_data = client_->TransferToServer(*y_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto x = builder.Parameter(0, f32_scalar, "x"); + auto y = builder.Parameter(1, f32_scalar, "y"); + auto mul = builder.Mul(x, y); + auto add1 = builder.Add(mul, builder.ConstantR1<float>({1.f, 2.f})); + auto add2 = builder.Add(mul, builder.ConstantR1<float>({3.f, 4.f})); + auto add3 = builder.Add(mul, builder.ConstantR1<float>({5.f, 6.f})); + builder.ConcatInDim({add1, add2, add3}, /*dimension=*/0); + + ComputeAndCompareR1<float>(&builder, {7., 8., 9., 10., 11., 12.}, + {x_data.get(), y_data.get()}, ErrorSpec(1e-4)); +} + +INSTANTIATE_TEST_CASE_P(ConcatR2BinaryTestInstantiation, ConcatR2BinaryTest, + ::testing::Values(R2BinarySpec{1, 1, 1, 1, 0}, + R2BinarySpec{1, 1, 1, 1, 1}, + R2BinarySpec{4, 3, 4, 3, 0}, + R2BinarySpec{4, 3, 4, 3, 1}, + R2BinarySpec{7, 128, 1, 128, 0}, + R2BinarySpec{8, 127, 8, 1, 1})); + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/constants_test.cc b/tensorflow/compiler/xla/tests/constants_test.cc new file mode 100644 index 0000000000..58d52ac116 --- /dev/null +++ b/tensorflow/compiler/xla/tests/constants_test.cc @@ -0,0 +1,193 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that constants in program memory round trip as expected. + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConstantsTest : public ClientLibraryTestBase { + protected: + const ErrorSpec error_spec_{1e-3, 1e-5}; +}; + +TEST_F(ConstantsTest, ZeroCellF32) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>({}); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(ConstantsTest, OneCellF32) { + std::vector<float> constant = {2.0}; + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>(constant); + + ComputeAndCompareR1<float>(&builder, constant, {}, error_spec_); +} + +TEST_F(ConstantsTest, OneCellS32) { + std::vector<int32> constant = {2}; + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<int32>(constant); + + ComputeAndCompareR1<int32>(&builder, constant, {}); +} + +TEST_F(ConstantsTest, OneCellU32) { + std::vector<uint32> constant = {2}; + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<uint32>(constant); + + ComputeAndCompareR1<uint32>(&builder, constant, {}); +} + +TEST_F(ConstantsTest, EightCells) { + std::vector<float> constant = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>(constant); + + ComputeAndCompareR1<float>(&builder, constant, {}, error_spec_); +} + +TEST_F(ConstantsTest, SixteenCells) { + std::vector<float> constant = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, + 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>(constant); + + ComputeAndCompareR1<float>(&builder, constant, {}, error_spec_); +} + +TEST_F(ConstantsTest, Empty_0x2) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 2), {}, error_spec_); +} + +TEST_F(ConstantsTest, Small_2x2) { + std::unique_ptr<Array2D<float>> constant = + MakeLinspaceArray2D(100.0, 200.0, 2, 2); + + ComputationBuilder builder(client_, TestName()); + builder.ConstantR2FromArray2D<float>(*constant); + + ComputeAndCompareR2<float>(&builder, *constant, {}, error_spec_); +} + +TEST_F(ConstantsTest, Empty_3x0x2) { + ComputationBuilder builder(client_, TestName()); + auto constant = builder.ConstantLiteral( + *LiteralUtil::CreateR3FromArray3D<float>(Array3D<float>(3, 0, 2))); + + ComputeAndCompareR3<float>(&builder, Array3D<float>(3, 0, 2), {}); +} + +TEST_F(ConstantsTest, Small_2x2x2) { + ComputationBuilder builder(client_, TestName()); + Array3D<float> array3d({ + // x0 x1 + {{1.f, 2.f}, // y0 + {3.f, 4.f}}, // y1 + + {{5.f, 6.f}, // y0 + {7.f, 8.f}}, // y1 + }); + auto constant = builder.ConstantLiteral( + *LiteralUtil::CreateR3FromArray3D<float>(array3d)); + + ComputeAndCompareR3<float>(&builder, array3d, {}); +} + +TEST_F(ConstantsTest, Small_3x2x1x1) { + Array4D<float> input_array(3, 2, 1, 1); + Array2D<float> pz({ + // z0 z1 + {-1.0f, 4.1f}, // p0 + {2.0f, 4.1f}, // p1 + {5.0f, 4.4f}, // p2 + }); + input_array.FillWithPZ(pz); + Literal input_literal = *LiteralUtil::CreateR4FromArray4D(input_array); + + { + ComputationBuilder builder(client_, TestName()); + builder.ConstantLiteral(input_literal); + ComputeAndCompareR4<float>(&builder, input_array, {}, error_spec_); + } + + { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR4FromArray4D<float>(input_array); + ComputeAndCompareR4<float>(&builder, input_array, {}, error_spec_); + } +} + +// TODO(b/29263943): Support tuple constants. +TEST_F(ConstantsTest, DISABLED_TupleConstant) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantLiteral(*LiteralUtil::MakeTuple( + {LiteralUtil::CreateR2<float>({{1.0}, {2.0}}).get(), + LiteralUtil::CreateR1<float>({2.0, 42}).get()})); + + std::unique_ptr<Literal> result = ExecuteAndTransferOrDie(&builder, {}); + + LiteralTestUtil::ExpectR2Near<float>({{1.0}, {2.0}}, + result->tuple_literals(0), error_spec_); + LiteralTestUtil::ExpectR1Near<float>({2.0, 42.0}, result->tuple_literals(1), + error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/convert_test.cc b/tensorflow/compiler/xla/tests/convert_test.cc new file mode 100644 index 0000000000..9f8c3a9aeb --- /dev/null +++ b/tensorflow/compiler/xla/tests/convert_test.cc @@ -0,0 +1,210 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cstdint> +#include <limits> +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConvertTest : public ClientLibraryTestBase { + public: + explicit ConvertTest(perftools::gputools::Platform* platform = nullptr) + : ClientLibraryTestBase(platform, + /*disabled_pass_names=*/{"algsimp", "inline"}) {} +}; + +TEST_F(ConvertTest, ConvertR1S32ToR1S32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({42, 64}); + builder.ConvertElementType(a, S32); + + std::vector<int32> expected = {42, 64}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +TEST_F(ConvertTest, ConvertR1F32ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.0f, 64.0f}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {42.0f, 64.0f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(ConvertTest, ConvertR1S32ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({42, 64}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {42.0f, 64.0f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConvertTest, ConvertR1S0S32ToR1S0F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>({}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(ConvertTest, ConvertR1F32ToR1S32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({42.6, 64.4}); + builder.ConvertElementType(a, S32); + + std::vector<int32> expected = {42, 64}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1S64ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int64>({32, 64}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {32.0, 64.0}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1U8ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<uint8_t>({32, 64}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {32.0, 64.0}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1U8ToR1S32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<uint8_t>({32, 64}); + builder.ConvertElementType(a, S32); + + std::vector<int32_t> expected = {32, 64}; + ComputeAndCompareR1<int32_t>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1U8ToR1U32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<uint8_t>({32, 64}); + builder.ConvertElementType(a, U32); + + std::vector<uint32_t> expected = {32, 64}; + ComputeAndCompareR1<uint32_t>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1F32ToR1F64) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({32.0f, 64.0f}); + builder.ConvertElementType(a, F64); + + std::vector<double> expected = {32.0, 64.0}; + ComputeAndCompareR1<double>(&builder, expected, {}); +} + +XLA_TEST_F(ConvertTest, ConvertR1F64ToR1F32) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<double>({32.0, 64.0}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = {32.0f, 64.0f}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(ConvertTest, ConvertS32Extremes) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<int32>( + {std::numeric_limits<int32>::min(), std::numeric_limits<int32>::max()}); + builder.ConvertElementType(a, F32); + + std::vector<float> expected = { + static_cast<float>(std::numeric_limits<int32>::min()), + static_cast<float>(std::numeric_limits<int32>::max())}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(ConvertTest, ConvertMapToS32) { + ComputationBuilder builder(client_, TestName()); + auto b = builder.CreateSubBuilder("convert"); + auto param = b->Parameter(0, ShapeUtil::MakeShape(F32, {}), "in"); + b->ConvertElementType(param, S32); + auto a = builder.ConstantR1<float>({42.0f, 64.0f}); + builder.Map({a}, b->BuildAndNoteError()); + + std::vector<int32> expected = {42, 64}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +TEST_F(ConvertTest, ConvertMapToF32) { + ComputationBuilder builder(client_, TestName()); + auto b = builder.CreateSubBuilder("convert"); + auto param = b->Parameter(0, ShapeUtil::MakeShape(S32, {}), "in"); + b->ConvertElementType(param, F32); + auto a = builder.ConstantR1<int32>({42, 64}); + builder.Map({a}, b->BuildAndNoteError()); + + std::vector<float> expected = {42.0f, 64.0f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Regression test for b/31758660. When ReshapeMover transforms +// input -> reshape -> convert +// to +// input -> convert -> reshape +// the new convert should have the same element type as the old convert. +TEST_F(ConvertTest, ConvertReshape) { + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR1<int32>({42}); + auto reshape = builder.Reshape(input, /*dimensions=*/{0}, /*new_sizes=*/{}); + builder.ConvertElementType(reshape, F32); + + ComputeAndCompareR0<float>(&builder, 42.0f, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc new file mode 100644 index 0000000000..9f38dc4b36 --- /dev/null +++ b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc @@ -0,0 +1,117 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <algorithm> +#include <array> +#include <memory> + +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConvolutionDimensionNumbersTest : public ClientLibraryTestBase {}; + +// Tests the convolution operation with invalid input dimension numbers. +TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) { + auto dimension_numbers_status = + ComputationBuilder::CreateConvDimensionNumbers(0, 2, 2, 3, 0, 1, 2, 3); + ASSERT_FALSE(dimension_numbers_status.ok()); + ASSERT_MATCH(dimension_numbers_status.status().error_message(), + testing::ContainsRegex("input are not unique")); +} + +// Tests the convolution operation with invalid weight dimension numbers. +TEST_F(ConvolutionDimensionNumbersTest, InvalidWeightDimensionNumbers) { + auto dimension_numbers_status = + ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 2, 3, 2, 3); + ASSERT_FALSE(dimension_numbers_status.ok()); + ASSERT_MATCH(dimension_numbers_status.status().error_message(), + testing::ContainsRegex("weight are not unique")); +} + +XLA_TEST_F(ConvolutionDimensionNumbersTest, + TwoConvsWithDifferentDimensionNumbers) { + auto input_array = MakeUnique<Array4D<float>>(2, 3, 5, 5); + input_array->FillWithMultiples(0.1); + auto weight_array = MakeUnique<Array4D<float>>(4, 3, 1, 1); + weight_array->FillWithMultiples(0.2); + auto weight_data = + client_ + ->TransferToServer(*LiteralUtil::CreateR4FromArray4D(*weight_array)) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(*input_array); + auto weight = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {4, 3, 1, 1}), "weight"); + auto conv1 = builder.Conv(input, weight, {1, 1}, Padding::kValid); + + ConvolutionDimensionNumbers dim_nums = + ComputationBuilder::CreateDefaultConvDimensionNumbers(); + // Swap batch_dimension and feature_dimension. + int64 tmp = dim_nums.batch_dimension(); + dim_nums.set_batch_dimension(dim_nums.feature_dimension()); + dim_nums.set_feature_dimension(tmp); + // Swap kernel_input_feature_dimension and kernel_output_feature_dimension. + tmp = dim_nums.kernel_input_feature_dimension(); + dim_nums.set_kernel_input_feature_dimension( + dim_nums.kernel_output_feature_dimension()); + dim_nums.set_kernel_output_feature_dimension(tmp); + builder.ConvWithGeneralDimensions(input, conv1, {1, 1}, Padding::kValid, + dim_nums); + + auto expected_conv1 = ReferenceUtil::ConvArray4D(*input_array, *weight_array, + {1, 1}, Padding::kValid); + auto expected_conv2 = ReferenceUtil::ConvArray4DGeneralDimensions( + *input_array, *expected_conv1, {1, 1}, Padding::kValid, dim_nums); + + ComputeAndCompareR4<float>(&builder, *expected_conv2, {weight_data.get()}, + ErrorSpec(0.001, 0.01)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc new file mode 100644 index 0000000000..ffbda89b94 --- /dev/null +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -0,0 +1,361 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests of convolution with trivial kernels and no special variations (like +// strides and padding). + +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConvolutionTest : public ClientLibraryTestBase { + protected: +#if XLA_TEST_BACKEND_GPU + // XLA:GPU sometimes uses FFT convolution which isn't as precise as spatial + // convolution. So relax the absolute error threshold. + ErrorSpec error_spec_ = ErrorSpec(1e-3); +#else + ErrorSpec error_spec_ = ErrorSpec(1e-4); +#endif +}; + +XLA_TEST_F(ConvolutionTest, ForwardPassConvolution_3x3x256_256_OutputZ_Iota) { + const int kInputActivationSizeY = 3; + const int kInputActivationSizeX = 3; + const int kInputActivationSizeZ = 256; + const int kKernelSizeX = 2; + const int kKernelSizeY = 2; + const int kOutputActivationSizeZ = 256; + const int kMiniBatchSize = 4; + auto alhs = + MakeUnique<Array4D<float>>(kMiniBatchSize, kInputActivationSizeZ, + kInputActivationSizeY, kInputActivationSizeX); + alhs->FillWithMultiples(1.0f); + ASSERT_EQ(3, alhs->width()); + ASSERT_EQ(3, alhs->height()); + + auto arhs = + MakeUnique<Array4D<float>>(kOutputActivationSizeZ, kInputActivationSizeZ, + kKernelSizeY, kKernelSizeX); + Array2D<float> rhs_raster({ + {1.0f, 0.0f}, // row 0 + {0.0f, 0.0f}, // row 1 + }); + arhs->FillWithYX(rhs_raster); + ASSERT_EQ(2, arhs->width()); + ASSERT_EQ(2, arhs->height()); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR4FromArray4D<float>(*alhs); + auto rhs = builder.ConstantR4FromArray4D<float>(*arhs); + builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> aexpected = + ReferenceUtil::ConvArray4D(*alhs, *arhs, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *aexpected, {}, error_spec_); +} + +TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + } + + Array4D<float> input(1, 1, 1, 2); + input.FillWithYX(Array2D<float>({ + {1, 2}, + })); + Array4D<float> filter(1, 1, 1, 2); + filter.FillWithYX(Array2D<float>({ + {5, 6}, + })); + + std::unique_ptr<Array4D<float>> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); + + auto input_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4<float>(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +// Tests valid padding for 2D convolution in raster space. +TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + } + + Array4D<float> input(1, 1, 4, 4); + // clang-format off + input.FillWithYX(Array2D<float>({ + {1, 2, 3, 4 }, + {5, 6, 7, 8 }, + {9, 10, 11, 12}, + {13, 14, 15, 16}, + })); + // clang-format on + Array4D<float> filter(1, 1, 2, 2); + // clang-format off + filter.FillWithYX(Array2D<float>({ + {5, 6}, + {7, 8}, + })); + // clang-format on + + std::unique_ptr<Array4D<float>> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kValid); + + auto input_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4<float>(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +// Tests same padding for 2D convolution in raster space. +TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kSame); + } + + Array4D<float> input(1, 1, 4, 4); + // clang-format off + input.FillWithYX(Array2D<float>({ + {1, 2, 3, 4 }, + {5, 6, 7, 8 }, + {9, 10, 11, 12}, + {13, 14, 15, 16}, + })); + // clang-format on + Array4D<float> filter(1, 1, 2, 2); + // clang-format off + filter.FillWithYX(Array2D<float>({ + {5, 6}, + {7, 8}, + })); + // clang-format on + + std::unique_ptr<Array4D<float>> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); + + auto input_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4<float>(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +// Tests same padding for 2D convolution in raster space with an odd sized +// kernel. +TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1, 1}, Padding::kSame); + } + + Array4D<float> input(1, 1, 4, 4); + // clang-format off + input.FillWithYX(Array2D<float>({ + {1, 2, 3, 4 }, + {5, 6, 7, 8 }, + {9, 10, 11, 12}, + {13, 14, 15, 16}, + })); + // clang-format on + Array4D<float> filter(1, 1, 3, 3); + // clang-format off + filter.FillWithYX(Array2D<float>({ + { 5, 6, 7}, + { 8, 9, 10}, + {11, 12, 13}, + })); + // clang-format on + + std::unique_ptr<Array4D<float>> aexpected = + ReferenceUtil::ConvArray4D(input, filter, {1, 1}, Padding::kSame); + + auto input_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*LiteralUtil::CreateR4FromArray4D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR4<float>(&builder, *aexpected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +// TODO(b/32873825): implement 1D convolution on GPU. +XLA_TEST_F(ConvolutionTest, DISABLED_ON_GPU(Convolve1D_1x2x5_1x2x2_Valid)) { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + builder.Conv(input, filter, {1}, Padding::kValid); + } + + Array3D<float> input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); + Array3D<float> filter({{{10, 20}, {30, 40}}}); + + Array3D<float> expected({{{510, 610, 710, 810}}}); + + auto input_literal = + client_->TransferToServer(*LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3<float>(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +// TODO(b/32873825): implement 3D convolution on GPU. +XLA_TEST_F(ConvolutionTest, + DISABLED_ON_GPU(Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid)) { + ComputationBuilder builder(client_, TestName()); + std::vector<int64> input_dims = {1, 4, 2, 3, 3}; + std::vector<int64> filter_dims = {2, 2, 2, 3, 3}; + Shape input_shape = ShapeUtil::MakeShape(F32, input_dims); + Shape filter_shape = ShapeUtil::MakeShape(F32, filter_dims); + { + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + + // Tensorflow dimension numbers for 3D convolution. + ConvolutionDimensionNumbers dnums; + dnums.set_batch_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(2); + dnums.add_spatial_dimensions(3); + dnums.set_feature_dimension(4); + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.add_kernel_spatial_dimensions(2); + dnums.set_kernel_input_feature_dimension(3); + dnums.set_kernel_output_feature_dimension(4); + + builder.ConvWithGeneralDimensions(input, filter, {1, 1, 1}, Padding::kValid, + dnums); + } + + std::vector<float> input_elems(ShapeUtil::ElementsIn(input_shape)); + std::iota(input_elems.begin(), input_elems.end(), 1.0f); + auto input_r1 = LiteralUtil::CreateR1<float>(input_elems); + auto input_r5 = + LiteralUtil::Reshape(*input_r1, input_dims).ConsumeValueOrDie(); + + std::vector<float> filter_elems(ShapeUtil::ElementsIn(filter_shape)); + std::iota(filter_elems.begin(), filter_elems.end(), 1.0f); + auto filter_r1 = LiteralUtil::CreateR1<float>(filter_elems); + auto filter_r5 = + LiteralUtil::Reshape(*filter_r1, filter_dims).ConsumeValueOrDie(); + + auto expected_r1 = LiteralUtil::CreateR1<float>( + {19554, 19962, 20370, 22110, 22590, 23070, 34890, 35730, 36570, 37446, + 38358, 39270, 50226, 51498, 52770, 52782, 54126, 55470}); + auto expected_r5 = + LiteralUtil::Reshape(*expected_r1, {1, 3, 1, 2, 3}).ConsumeValueOrDie(); + + auto input_literal = client_->TransferToServer(*input_r5).ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*filter_r5).ConsumeValueOrDie(); + + ComputeAndCompareLiteral(&builder, *expected_r5, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/convolution_variants_test.cc b/tensorflow/compiler/xla/tests/convolution_variants_test.cc new file mode 100644 index 0000000000..b599f9b95b --- /dev/null +++ b/tensorflow/compiler/xla/tests/convolution_variants_test.cc @@ -0,0 +1,1294 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests of convolution variants -- kernel sizes, padding, and strides -- +// in small sized data. + +#include <algorithm> +#include <initializer_list> +#include <memory> +#include <numeric> +#include <random> +#include <vector> + +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConvolutionVariantsTest : public ClientLibraryTestBase { + protected: +#if XLA_TEST_BACKEND_GPU + // XLA:GPU sometimes uses FFT convolution which isn't as precise as spatial + // convolution. So relax the absolute error threshold. + ErrorSpec error_spec_ = ErrorSpec(1e-1, 1e-5); +#else + ErrorSpec error_spec_ = ErrorSpec(1e-4, 1e-2); +#endif +}; + +TEST_F(ConvolutionVariantsTest, Minimal) { + ComputationBuilder builder(client_, TestName()); + + const Array4D<float> input_array(1, 1, 1, 1, {2}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {3}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + const Array4D<float> expected(1, 1, 1, 1, {6}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, MinimalWithBatch) { + ComputationBuilder builder(client_, TestName()); + + const Array4D<float> input_array(5, 1, 1, 1, {1, 2, 3, 4, 5}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {2}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + const Array4D<float> expected(5, 1, 1, 1, {2, 4, 6, 8, 10}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Flat1x1) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(2, 1, 3, 4); + input_array.FillWithMultiples(1); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {2.3}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(2, 1, 3, 4); + expected.FillWithMultiples(2.3); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Deep1x1) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 2, 1, 1, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(3, 2, 1, 1, {1, 2, 3, 4, 5, 6}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 3, 1, 1, {12, 34, 56}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2in1x2) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 2, {1, 2}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 1, {12}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2in1x3) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 3, {1, 2, 3}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 2, {12, 23}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2in2x2) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 2, 2, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 2, 1, {12, 34}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x1in2x2) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 2, 2, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 2, 1, {10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 2, {13, 24}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x2in2x2) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 2, 2, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 2, 2, {1000, 100, 10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 1, {1234}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2in2x3WithDepthAndBatch) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array( + 2, 2, 2, 3, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, // plane 0 + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 0}); // plane 1 + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array( + 2, 2, 1, 2, {1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected( + 2, 2, 2, 2, + {167, 1278, 3490, 4500, 0.0167, 0.1278, 0.3490, 0.4500, // plane 0 + 334, 2556, 6980, 9000, 0.0334, 0.2556, 0.6980, 0.9000}); // plane 1 + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x4) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 4, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {10}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 2}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 2, {10, 30}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x5) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 5, {1, 2, 3, 4, 5}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {10}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 2}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 3, {10, 30, 50}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x4) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 4, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 3, {100, 10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 2}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 1, {123}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x5) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 5, {1, 2, 3, 4, 5}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 3, {100, 10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 2}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 2, {123, 345}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1stride2x2in3x3) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 3, 3, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 1, {10}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {2, 2}, Padding::kValid); + + Array4D<float> expected(1, 1, 2, 2, {10, 30, 70, 90}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter3x1in1x1Padded) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 1, {1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 3, {10, 20, 30}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D<float> expected(1, 1, 1, 1, {20}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter5x1in3x1Padded) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 3, {1, 2, 3}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 5, {10000, 1000, 100, 10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D<float> expected(1, 1, 1, 3, {123, 1230, 12300}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter3x3in2x2Padded) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 2, 2, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 3, 3, {10000, 0, 1000, // row 0 + 0, 100, 0, // row 1 + 10, 0, 1}); // row 2 + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D<float> expected(1, 1, 2, 2, {104, 230, 2300, 10400}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1in2x1WithPaddingAndDepth) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 2, 1, 2, {1, 2, 3, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 2, 1, 1, {10, 1}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D<float> expected(1, 1, 1, 2, {13, 24}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x2Stride1x1Input3x3) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 3, 3, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 2, 2, {7, 13, 17, 23}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 2, 2, {216, 276, 396, 456}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2Stride1x1Input1x3) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(1, 1, 1, 3, {1, 2, 3}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + const Array4D<float> filter_array(1, 1, 1, 2, {7, 13}); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 1, 1, 2, {33, 53}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x1x8x8Input1x1x8x8) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(64); + std::iota(input_data.begin(), input_data.end(), 0.0); + Array4D<float> input_array(1, 1, 8, 8, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(128); + std::fill(filter_data.begin(), filter_data.begin() + 64, 1.0); + std::fill(filter_data.begin() + 64, filter_data.begin() + 128, 2.0); + const Array4D<float> filter_array(2, 1, 8, 8, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 2, 1, 1, {2016, 4032}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input16x1x1x1) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(16 * 1 * 1 * 1); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(16, 1, 1, 1, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * 1 * 1); + std::iota(filter_data.begin(), filter_data.end(), 1.0); + const Array4D<float> filter_array(1, 1, 1, 1, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::vector<float> expected_data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + Array4D<float> expected(16, 1, 1, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input16x1x2x2) { + ComputationBuilder builder(client_, TestName()); + + constexpr int bs = 16; + constexpr int kx = 2; + constexpr int ky = 2; + Array4D<float> input_array(bs, 1, ky, kx); + for (int i0 = 0; i0 < bs; ++i0) { + for (int i2 = 0; i2 < ky; ++i2) { + for (int i3 = 0; i3 < kx; ++i3) { + input_array(i0, 0, i2, i3) = i0 + 1; + } + } + } + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * ky * kx); + std::iota(filter_data.begin(), filter_data.end(), 1.0); + const Array4D<float> filter_array(1, 1, ky, kx, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::vector<float> expected_data(bs); + for (int i = 0; i < bs; ++i) { + expected_data[i] = 10 * (i + 1); + } + Array4D<float> expected(bs, 1, 1, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input3x1x2x2) { + ComputationBuilder builder(client_, TestName()); + + constexpr int kx = 2; + constexpr int ky = 2; + constexpr int bs = 3; + Array4D<float> input_array(bs, 1, ky, kx); + for (int i0 = 0; i0 < bs; ++i0) { + for (int i2 = 0; i2 < ky; ++i2) { + for (int i3 = 0; i3 < kx; ++i3) { + input_array(i0, 0, i2, i3) = i0 + i2 + i3 + 1; + } + } + } + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * ky * kx); + std::iota(filter_data.begin(), filter_data.end(), 1.0); + const Array4D<float> filter_array(1, 1, ky, kx, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::vector<float> expected_data = { + 23, 33, 43, + }; + Array4D<float> expected(bs, 1, 1, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x8x8Input16x1x8x8) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(16, 1, 8, 8); + for (int i0 = 0; i0 < 16; ++i0) { + for (int i2 = 0; i2 < 8; ++i2) { + for (int i3 = 0; i3 < 8; ++i3) { + input_array(i0, 0, i2, i3) = i0 + i2 + i3 + 1; + } + } + } + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * 8 * 8); + std::iota(filter_data.begin(), filter_data.end(), 1.0); + const Array4D<float> filter_array(1, 1, 8, 8, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::vector<float> expected_data = { + 19664, 21744, 23824, 25904, 27984, 30064, 32144, 34224, + 36304, 38384, 40464, 42544, 44624, 46704, 48784, 50864, + }; + Array4D<float> expected(16, 1, 1, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input1x2x8x8) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(2 * 8 * 8); + std::iota(input_data.begin(), input_data.end(), 0.0); + Array4D<float> input_array(1, 2, 8, 8, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(2 * 2 * 8 * 8); + std::fill(filter_data.begin(), filter_data.begin() + filter_data.size() / 4, + 1.0); + std::fill(filter_data.begin() + filter_data.size() / 4, + filter_data.begin() + filter_data.size() / 2, 2.0); + std::fill(filter_data.begin() + filter_data.size() / 2, + filter_data.begin() + 3 * filter_data.size() / 4, 3.0); + std::fill(filter_data.begin() + 3 * filter_data.size() / 4, filter_data.end(), + 4.0); + const Array4D<float> filter_array(2, 2, 8, 8, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(1, 2, 1, 1, {14240, 30496}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input2x2x8x8) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(2 * 2 * 8 * 8); + std::iota(input_data.begin(), input_data.end(), 0.0); + Array4D<float> input_array(2, 2, 8, 8, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(2 * 2 * 8 * 8); + std::fill(filter_data.begin(), filter_data.begin() + filter_data.size() / 4, + 1.0); + std::fill(filter_data.begin() + filter_data.size() / 4, + filter_data.begin() + filter_data.size() / 2, 2.0); + std::fill(filter_data.begin() + filter_data.size() / 2, + filter_data.begin() + 3 * filter_data.size() / 4, 3.0); + std::fill(filter_data.begin() + 3 * filter_data.size() / 4, filter_data.end(), + 4.0); + const Array4D<float> filter_array(2, 2, 8, 8, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(2, 2, 1, 1, {14240, 30496, 38816, 87840}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input32x2x8x8) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(32 * 2 * 8 * 8); + std::iota(input_data.begin(), input_data.end(), 0.0); + Array4D<float> input_array(32, 2, 8, 8, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(2 * 2 * 8 * 8); + std::fill(filter_data.begin(), filter_data.begin() + filter_data.size() / 4, + 1.0); + std::fill(filter_data.begin() + filter_data.size() / 4, + filter_data.begin() + filter_data.size() / 2, 2.0); + std::fill(filter_data.begin() + filter_data.size() / 2, + filter_data.begin() + 3 * filter_data.size() / 4, 3.0); + std::fill(filter_data.begin() + 3 * filter_data.size() / 4, filter_data.end(), + 4.0); + const Array4D<float> filter_array(2, 2, 8, 8, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::vector<float> expected_data = { + 14240, 30496, 38816, 87840, 63392, 145184, 87968, + 202528, 112544, 259872, 137120, 317216, 161696, 374560, + 186272, 431904, 210848, 489248, 235424, 546592, 260000, + 603936, 284576, 661280, 309152, 718624, 333728, 775968, + 358304, 833312, 382880, 890656, 407456, 948000, 432032, + 1005344, 456608, 1062688, 481184, 1120032, 505760, 1177376, + 530336, 1.23472e+06, 554912, 1292064, 579488, 1349408, 604064, + 1406752, 628640, 1464096, 653216, 1.52144e+06, 677792, 1578784, + 702368, 1636128, 726944, 1693472, 751520, 1750816, 776096, + 1.80816e+06, + }; + Array4D<float> expected(32, 2, 1, 1, expected_data); + // The output elements can be larger than 1e+5, making the absolute error + // large sometimes. So, we focus on relative errors for this test case. + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter16x16x1x1Input16x16x1x1) { + ComputationBuilder builder(client_, TestName()); + + Array4D<float> input_array(16, 16, 1, 1); + Array4D<float> filter_array(16, 16, 1, 1); + for (int i0 = 0; i0 < 16; ++i0) { + for (int i1 = 0; i1 < 16; ++i1) { + input_array(i0, i1, 0, 0) = 1000 * i0 + i1; + filter_array(i0, i1, 0, 0) = 1; + } + } + + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D<float> expected(16, 16, 1, 1); + for (int i0 = 0; i0 < 16; ++i0) { + for (int i1 = 0; i1 < 16; ++i1) { + expected(i0, i1, 0, 0) = 16000 * i0 + 120; + } + } + + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, FlatRhsDilation) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 4 * 6); + std::iota(input_data.begin(), input_data.end(), 0.0); + Array4D<float> input_array(1, 1, 4, 6, input_data); + + Array4D<float> filter_array(1, 1, 2, 3, {1, 10, 100, 2, 20, 200}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneralDilated( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{}, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{2, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 2, 2, {3924, 4257, 5922, 6255}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation1D) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneralDilated( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{}, + /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 1, 8, {10, 2, 20, 3, 30, 4, 40, 5}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 3 * 4); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 3, 4, input_data); + + Array4D<float> filter_array(1, 1, 4, 3, {100, 10, 1, // + 200, 20, 2, // + 300, 30, 3, // + 400, 40, 4}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneralDilated( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{2, 1}, + /*padding=*/{{1, 0}, {0, 0}}, /*lhs_dilation=*/{3, 2}, + /*rhs_dilation=*/{}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 3, 5, {204, 40, 406, 60, 608, // + 1518, 180, 1821, 210, 2124, // + 4146, 460, 4651, 510, 5156}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingOnBothEnds) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneral( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, + /*padding=*/{{0, 0}, {-1, -1}}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 1, 2, {23, 34}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingLowAndPositivePaddingHigh) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneral( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, + /*padding=*/{{0, 0}, {-1, 2}}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 1, 5, {23, 34, 45, 50, 0}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingLowAndNegativePaddingHigh) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneral( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, + /*padding=*/{{0, 0}, {2, -1}}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + Array4D<float> expected(1, 1, 1, 5, {0, 1, 12, 23, 34}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingAndDilation) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneralDilated( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, + /*padding=*/{{0, 0}, {3, 2}}, + /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{1, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + // input: + // [1, 2, 3, 4, 5] --dilate-> [1, 0, 2, 0, 3, 0, 4, 0, 5] + // ---pad---> [0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 0] + // filter: + // [10, 1] --dilate-> [10, 0, 1] + Array4D<float> expected(1, 1, 1, 12, + {0, 1, 0, 12, 0, 23, 0, 34, 0, 45, 0, 50}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} +XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingAndDilation) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 1 * 1 * 5); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 1, 1, 5, input_data); + + Array4D<float> filter_array(1, 1, 1, 2, {10, 1}); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.ConvGeneralDilated( + /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, + /*padding=*/{{0, 0}, {-3, -2}}, + /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{1, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + + // input: + // [1, 2, 3, 4, 5] --dilate-> [1, 0, 2, 0, 3, 0, 4, 0, 5] + // ---pad---> [0, 3, 0, 4] + // filter: + // [10, 1] --dilate-> [10, 0, 1] + Array4D<float> expected(1, 1, 1, 2, {0, 34}); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, RandomData_Input1x1x2x3_Filter2x1x1x2) { + constexpr int bs = 1; + constexpr int iz = 1; + constexpr int oz = 2; + constexpr int iy = 2; + constexpr int ix = 3; + constexpr int ky = 1; + constexpr int kx = 2; + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<float> input_data(bs * iz * iy * ix); + for (float& f : input_data) { + f = distribution(rng); + } + std::vector<float> kernel_data(oz * iz * ky * kx); + for (float& f : kernel_data) { + f = distribution(rng); + } + + Array4D<float> input_array(bs, iz, iy, ix, input_data); + Array4D<float> filter_array(oz, iz, ky, kx, kernel_data); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> expected = ReferenceUtil::ConvArray4D( + input_array, filter_array, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, RandomData_Input1x16x1x1_Filter1x16x1x1) { + constexpr int bs = 1; + constexpr int iz = 16; + constexpr int oz = 1; + constexpr int iy = 1; + constexpr int ix = 1; + constexpr int ky = 1; + constexpr int kx = 1; + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<float> input_data(bs * iz * iy * ix); + for (float& f : input_data) { + f = distribution(rng); + } + std::vector<float> kernel_data(oz * iz * ky * kx); + for (float& f : kernel_data) { + f = distribution(rng); + } + + Array4D<float> input_array(bs, iz, iy, ix, input_data); + Array4D<float> filter_array(oz, iz, ky, kx, kernel_data); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> expected = ReferenceUtil::ConvArray4D( + input_array, filter_array, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter1x16x1x1) { + constexpr int bs = 16; + constexpr int iz = 16; + constexpr int oz = 1; + constexpr int iy = 1; + constexpr int ix = 1; + constexpr int ky = 1; + constexpr int kx = 1; + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<float> input_data(bs * iz * iy * ix); + for (float& f : input_data) { + f = distribution(rng); + } + std::vector<float> kernel_data(oz * iz * ky * kx); + for (float& f : kernel_data) { + f = distribution(rng); + } + + Array4D<float> input_array(bs, iz, iy, ix, input_data); + Array4D<float> filter_array(oz, iz, ky, kx, kernel_data); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> expected = ReferenceUtil::ConvArray4D( + input_array, filter_array, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter16x16x1x1) { + constexpr int bs = 16; + constexpr int iz = 16; + constexpr int oz = 16; + constexpr int iy = 1; + constexpr int ix = 1; + constexpr int ky = 1; + constexpr int kx = 1; + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<float> input_data(bs * iz * iy * ix); + for (float& f : input_data) { + f = distribution(rng); + } + std::vector<float> kernel_data(oz * iz * ky * kx); + for (float& f : kernel_data) { + f = distribution(rng); + } + + Array4D<float> input_array(bs, iz, iy, ix, input_data); + Array4D<float> filter_array(oz, iz, ky, kx, kernel_data); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> expected = ReferenceUtil::ConvArray4D( + input_array, filter_array, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x16x16_Filter16x16x16x16) { + constexpr int bs = 16; + constexpr int iz = 16; + constexpr int oz = 16; + constexpr int iy = 16; + constexpr int ix = 16; + constexpr int ky = 16; + constexpr int kx = 16; + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<float> input_data(bs * iz * iy * ix); + for (float& f : input_data) { + f = distribution(rng); + } + std::vector<float> kernel_data(oz * iz * ky * kx); + for (float& f : kernel_data) { + f = distribution(rng); + } + + Array4D<float> input_array(bs, iz, iy, ix, input_data); + Array4D<float> filter_array(oz, iz, ky, kx, kernel_data); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + builder.Conv(input, filter, {1, 1}, Padding::kValid); + + std::unique_ptr<Array4D<float>> expected = ReferenceUtil::ConvArray4D( + input_array, filter_array, {1, 1}, Padding::kValid); + + ComputeAndCompareR4<float>(&builder, *expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x2x1x1Input1x2x3x1GeneralPadding) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 2 * 3 * 1); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 2, 3, 1, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 2 * 1 * 1); + std::iota(filter_data.begin(), filter_data.end(), 1.0); + Array4D<float> filter_array(1, 2, 1, 1, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + ConvolutionDimensionNumbers dnums; + // NHWC input format. + dnums.set_batch_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(2); + dnums.set_feature_dimension(3); + + // Tensorflow filter shape: [ H, W, inC, outC ] + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.set_kernel_input_feature_dimension(2); + dnums.set_kernel_output_feature_dimension(3); + + // Tests padding sizes that don't correspond either to SAME or VALID padding. + builder.ConvGeneral(input, filter, {1, 1}, {{2, 1}, {2, 3}}, dnums); + + std::vector<float> expected_data = { + 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, // + 0, 2, 5, 8, 3, 0, 0, // + 0, 8, 14, 17, 6, 0, 0, // + 0, 0, 0, 0, 0, 0, 0 // + }; + Array4D<float> expected(1, 5, 7, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1GeneralPadding) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 2 * 3 * 1); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 2, 3, 1, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * 1 * 1); + std::iota(filter_data.begin(), filter_data.end(), 2.0); + Array4D<float> filter_array(1, 1, 1, 1, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + ConvolutionDimensionNumbers dnums; + // NHWC input format. + dnums.set_batch_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(2); + dnums.set_feature_dimension(3); + + // Tensorflow filter shape: [ H, W, inC, outC ] + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.set_kernel_input_feature_dimension(2); + dnums.set_kernel_output_feature_dimension(3); + + // Tests padding sizes that don't correspond either to SAME or VALID padding. + builder.ConvGeneral(input, filter, {1, 1}, {{2, 1}, {2, 3}}, dnums); + + std::vector<float> expected_data = { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 2, 4, 6, 0, 0, 0, // + 0, 0, 8, 10, 12, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0 // + }; + Array4D<float> expected(1, 5, 8, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1NoPadding) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 2 * 3 * 1); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 2, 3, 1, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * 1 * 1); + std::iota(filter_data.begin(), filter_data.end(), 2.0); + Array4D<float> filter_array(1, 1, 1, 1, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + ConvolutionDimensionNumbers dnums; + // NHWC input format. + dnums.set_batch_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(2); + dnums.set_feature_dimension(3); + + // Tensorflow filter shape: [ H, W, inC, outC ] + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.set_kernel_input_feature_dimension(2); + dnums.set_kernel_output_feature_dimension(3); + + // Tests zero padding sizes. This can use matmul for computation. + builder.ConvGeneral(input, filter, {1, 1}, {{0, 0}, {0, 0}}, dnums); + + std::vector<float> expected_data = { + 2, 4, 6, // + 8, 10, 12, + }; + Array4D<float> expected(1, 2, 3, 1, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) { + ComputationBuilder builder(client_, TestName()); + + std::vector<float> input_data(1 * 2 * 3 * 2); + std::iota(input_data.begin(), input_data.end(), 1.0); + Array4D<float> input_array(1, 2, 3, 2, input_data); + auto input = builder.ConstantR4FromArray4D<float>(input_array); + + std::vector<float> filter_data(1 * 1 * 2 * 3); + std::iota(filter_data.begin(), filter_data.end(), 2.0); + Array4D<float> filter_array(1, 1, 2, 3, filter_data); + auto filter = builder.ConstantR4FromArray4D<float>(filter_array); + + ConvolutionDimensionNumbers dnums; + // NHWC input format. + dnums.set_batch_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(2); + dnums.set_feature_dimension(3); + + // Tensorflow filter shape: [ H, W, inC, outC ] + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.set_kernel_input_feature_dimension(2); + dnums.set_kernel_output_feature_dimension(3); + + // Tests zero padding sizes. This can use matmul for computation. + builder.ConvGeneral(input, filter, {1, 1}, {{0, 0}, {0, 0}}, dnums); + + std::vector<float> expected_data = { + 12, 15, 18, // + 26, 33, 40, // + 40, 51, 62, // + 54, 69, 84, // + 68, 87, 106, // + 82, 105, 128, // + }; + Array4D<float> expected(1, 2, 3, 3, expected_data); + ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_); +} + +// Regression test for b/32034796. +// +// XLA:GPU fuses +// Conv([1,2,3], Reverse([5,6]), padding_low=1) +// into +// BackwardInputConv([1,2,3], [5,6], padding_low=0, padding_high=1) +TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding) { + ComputationBuilder builder(client_, TestName()); + + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{1, 2, 3})); + auto weights = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 2, /*values=*/{5, 6})); + auto mirrored_weights = builder.Rev(weights, {2, 3}); + builder.ConvWithGeneralPadding(gradients, mirrored_weights, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {1, 0}}); + ComputeAndCompareR4<float>(&builder, {{{{5, 16, 27}}}}, {}, error_spec_); +} + +// XLA:GPU fuses +// Conv([1], Reverse([1,10,100]), padding_high=3, base_dilation=3) +// into +// BackwardInputConv([1], [1,10,100], stride=3, padding=(2,1)) +TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingGreaterThanHighPadding) { + ComputationBuilder builder(client_, TestName()); + + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 1, /*values=*/{1})); + auto weights = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{1, 10, 100})); + auto mirrored_weights = builder.Rev(weights, {2, 3}); + builder.ConvGeneralDilated( + gradients, mirrored_weights, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {0, 3}}, + /*lhs_dilation=*/{1, 3}, /*rhs_dilation=*/{}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + ComputeAndCompareR4<float>(&builder, {{{{100, 0}}}}, {}, error_spec_); +} + +// XLA:GPU fuses +// Conv([1], Reverse([1,10,100]), padding=(1,1)) +// into +// BackwardInputConv([1], [1,10,100], padding=(1,1)) +TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding) { + ComputationBuilder builder(client_, TestName()); + + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 1, /*values=*/{1})); + auto weights = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{1, 10, 100})); + auto mirrored_weights = builder.Rev(weights, {2, 3}); + builder.ConvWithGeneralPadding(gradients, mirrored_weights, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {1, 1}}); + ComputeAndCompareR4<float>(&builder, {{{{10}}}}, {}, error_spec_); +} + +// HLO pattern +// Conv([1,2,3], Reverse([1,10], padding_high=2) +// could be fused to +// BackwardInputConv([1,2,3], [1,10], padding_low=1, padding_high=-1) +// +// However, XLA:GPU doesn't actually fuse it because PadInsertion doesn't +// support negative padding on backward convolution yet (b/32744257). +TEST_F(ConvolutionVariantsTest, BackwardInputWithNegativePaddingHigh) { + ComputationBuilder builder(client_, TestName()); + + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{1, 2, 3})); + auto weights = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 2, /*values=*/{1, 10})); + auto mirrored_weights = builder.Rev(weights, {2, 3}); + builder.ConvWithGeneralPadding(gradients, mirrored_weights, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {0, 2}}); + + ComputeAndCompareR4<float>(&builder, {{{{12, 23, 30, 0}}}}, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding) { + ComputationBuilder builder(client_, TestName()); + + // activations: 1,2,3,4 ---pad--> 0,1,2,3,4,0,0 + // gradients: 100,10,1 -dilate-> 100,0,10,0,1 + // weight gradients: 24,130,240 + // + // This pattern will be fused to backward convolution with padding=(1,2). + auto activations = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 4, /*values=*/{1, 2, 3, 4})); + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{100, 10, 1})); + auto forward_conv = builder.ConvGeneralDilated( + activations, gradients, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {1, 2}}, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + builder.Transpose(forward_conv, {0, 1, 2, 3}); + + ComputeAndCompareR4<float>(&builder, {{{{24, 130, 240}}}}, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, + BackwardFilterLowPaddingGreaterThanHighPadding) { + ComputationBuilder builder(client_, TestName()); + + // activations: 1,2,3,4 ---pad--> 0,0,1,2,3,4 + // gradients: 100,10,1 -dilate-> 100,0,10,0,1 + // weight gradients: 13,24 + // + // This pattern will be fused to backward convolution with padding=(2,1). + // Note: both (2,1) and (2,0) are valid padding for the backward convolution + // because the stride is 2. + auto activations = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 4, /*values=*/{1, 2, 3, 4})); + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{100, 10, 1})); + auto forward_conv = builder.ConvGeneralDilated( + activations, gradients, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {2, 0}}, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + builder.Transpose(forward_conv, {0, 1, 2, 3}); + + ComputeAndCompareR4<float>(&builder, {{{{13, 24}}}}, {}, error_spec_); +} + +TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding) { + ComputationBuilder builder(client_, TestName()); + + // activations: 1,2,3,4 ---pad--> 0,0,1,2,3,4,0 + // gradients: 100,10,1 -dilate-> 100,0,10,0,1 + // weight gradients: 13,24,130 + // + // This pattern will be fused to backward convolution with padding=(2,2). + // Note: both (2,1) and (2,2) are valid padding for the backward convolution + // because the stride is 2. ConvolutionFolding prefers (2,2) because cuDNN + // supports even padding only -- using (2,1) would need extra effort of + // canonicalization. + auto activations = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 4, /*values=*/{1, 2, 3, 4})); + auto gradients = builder.ConstantR4FromArray4D<float>( + Array4D<float>(1, 1, 1, 3, /*values=*/{100, 10, 1})); + auto forward_conv = builder.ConvGeneralDilated( + activations, gradients, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {2, 1}}, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, + ComputationBuilder::CreateDefaultConvDimensionNumbers()); + builder.Transpose(forward_conv, {0, 1, 2, 3}); + + ComputeAndCompareR4<float>(&builder, {{{{13, 24, 130}}}}, {}, error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/copy_test.cc b/tensorflow/compiler/xla/tests/copy_test.cc new file mode 100644 index 0000000000..29e2950533 --- /dev/null +++ b/tensorflow/compiler/xla/tests/copy_test.cc @@ -0,0 +1,277 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <utility> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class CopyOpTest : public HloTestBase { + protected: + void TestCopyOp(const Literal& literal) { + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(MakeUnique<Literal>(literal))); + builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + auto computation = builder.Build(); + auto hlo_module = MakeUnique<HloModule>("test_module"); + hlo_module->AddEntryComputation(std::move(computation)); + + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + LiteralTestUtil::ExpectEqual(literal, *result); + } + + void TestCopyConstantLayout021(size_t n1, size_t n2, size_t n3); + void TestCopyConstantLayoutR4(size_t n1, size_t n2, size_t n3, size_t n4, + tensorflow::gtl::ArraySlice<int64> permutation); +}; + +TEST_F(CopyOpTest, CopyR0Bool) { + TestCopyOp(*LiteralUtil::CreateR0<bool>(true)); +} + +TEST_F(CopyOpTest, CopyR1S0U32) { + TestCopyOp(*LiteralUtil::CreateR1<uint32>({})); +} + +TEST_F(CopyOpTest, CopyR1S3U32) { + TestCopyOp(*LiteralUtil::CreateR1<uint32>({1, 2, 3})); +} + +TEST_F(CopyOpTest, CopyR3F32_2x2x3) { + TestCopyOp( + *LiteralUtil::CreateR3({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, + {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}})); +} + +TEST_F(CopyOpTest, CopyR4S32_2x2x3x2) { + TestCopyOp(*LiteralUtil::CreateR4( + {{{{1, -2}, {-4, 5}, {6, 7}}, {{8, 9}, {10, 11}, {12, 13}}}, + {{{10, 3}, {7, -2}, {3, 6}}, {{2, 5}, {-11, 5}, {-2, -5}}}})); +} + +TEST_F(CopyOpTest, CopyR4S32_0x2x3x2) { + TestCopyOp(*LiteralUtil::CreateR4FromArray4D(Array4D<int32>(0, 2, 3, 2))); +} + +TEST_F(CopyOpTest, CopyParameterScalar) { + auto builder = HloComputation::Builder(TestName()); + + // Copy literal to device to use as parameter. + auto literal = LiteralUtil::CreateR0<float>(42.0); + Shape shape = literal->shape(); + auto constant_device_base = TransferToDevice(*literal); + + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kCopy, param0)); + + auto computation = builder.Build(); + + auto hlo_module = MakeUnique<HloModule>("test_module"); + hlo_module->AddEntryComputation(std::move(computation)); + + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {constant_device_base}); + LiteralTestUtil::ExpectR0Near<float>(42.0f, *result, error_spec_); +} + +TEST_F(CopyOpTest, CopyConstantR2Twice) { + auto builder = HloComputation::Builder(TestName()); + + auto literal = LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(literal))); + + auto copy = builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + builder.AddInstruction( + HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, copy)); + + auto computation = builder.Build(); + + auto hlo_module = MakeUnique<HloModule>("test_module"); + hlo_module->AddEntryComputation(std::move(computation)); + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + LiteralTestUtil::ExpectR2Near<float>({{1.0, 2.0}, {3.0, 4.0}}, *result, + error_spec_); +} + +TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) { + HloComputation::Builder builder(TestName()); + + std::unique_ptr<Literal> literal = + LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}); + // Reverse the minor-to-major order of the literal. + Layout* literal_layout = literal->mutable_shape()->mutable_layout(); + ASSERT_EQ(2, literal_layout->minor_to_major_size()); + literal_layout->mutable_minor_to_major()->SwapElements(0, 1); + + HloInstruction* constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(literal))); + + builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + + std::unique_ptr<HloComputation> computation = builder.Build(); + + auto hlo_module = MakeUnique<HloModule>("test_module"); + hlo_module->AddEntryComputation(std::move(computation)); + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + + // The result of the computation has the default layout, which is the inverse + // of the layout of the source literal. + LiteralTestUtil::ExpectR2Near<float>({{1.0, 3.0}, {2.0, 4.0}}, *result, + error_spec_); +} + +void CopyOpTest::TestCopyConstantLayout021(size_t n1, size_t n2, size_t n3) { + Array3D<int32> a(n1, n2, n3); + for (size_t i = 0; i < n1; ++i) { + for (size_t j = 0; j < n2; ++j) { + for (size_t k = 0; k < n3; ++k) { + a(i, j, k) = i * n3 * n2 + j * n3 + k; + } + } + } + + HloComputation::Builder builder(TestName()); + + std::unique_ptr<Literal> literal = LiteralUtil::CreateR3FromArray3D(a); + + HloInstruction* constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(literal))); + + builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + + std::unique_ptr<HloComputation> computation = builder.Build(); + + auto hlo_module = MakeUnique<HloModule>("test_module"); + auto config = MakeUnique<HloModuleConfig>(computation->ComputeProgramShape()); + *config->mutable_entry_computation_layout()->mutable_result_layout() = + ShapeLayout(ShapeUtil::MakeShapeWithLayout( + constant->shape().element_type(), + AsInt64Slice(constant->shape().dimensions()), {1, 2, 0})); + hlo_module->AddEntryComputation(std::move(computation)); + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), std::move(config), {}); + + LiteralTestUtil::ExpectR3EqualArray3D(a, *result); +} + +void CopyOpTest::TestCopyConstantLayoutR4( + size_t n1, size_t n2, size_t n3, size_t n4, + tensorflow::gtl::ArraySlice<int64> permutation) { + Array4D<int32> a(n1, n2, n3, n4); + for (size_t i = 0; i < n1; ++i) { + for (size_t j = 0; j < n2; ++j) { + for (size_t k = 0; k < n3; ++k) { + for (size_t l = 0; l < n4; ++l) { + a(i, j, k, l) = i * n4 * n3 * n2 + j * n4 * n3 + k * n4 + l; + } + } + } + } + + HloComputation::Builder builder(TestName()); + + std::unique_ptr<Literal> literal = LiteralUtil::CreateR4FromArray4D(a); + + HloInstruction* constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(literal))); + + builder.AddInstruction(HloInstruction::CreateUnary( + constant->shape(), HloOpcode::kCopy, constant)); + + std::unique_ptr<HloComputation> computation = builder.Build(); + + auto hlo_module = MakeUnique<HloModule>("test_module"); + auto config = MakeUnique<HloModuleConfig>(computation->ComputeProgramShape()); + *config->mutable_entry_computation_layout()->mutable_result_layout() = + ShapeLayout(ShapeUtil::MakeShapeWithLayout( + constant->shape().element_type(), + AsInt64Slice(constant->shape().dimensions()), ({ + std::vector<int64> p(permutation.rbegin(), permutation.rend()); + p; + }))); + hlo_module->AddEntryComputation(std::move(computation)); + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), std::move(config), {}); + + LiteralTestUtil::ExpectR4EqualArray4D(a, *result); +} + +XLA_TEST_F(CopyOpTest, CopyConstantR3Layout021_SingleIncompleteTilePerLayer) { + TestCopyConstantLayout021(2, 2, 3); +} + +XLA_TEST_F(CopyOpTest, CopyConstantR3Layout021_SingleCompleteTilePerLayer) { + TestCopyConstantLayout021(2, 32, 32); +} + +XLA_TEST_F(CopyOpTest, CopyConstantR3Layout021_MultipleTilesPerLayer) { + TestCopyConstantLayout021(2, 70, 35); +} + +XLA_TEST_F(CopyOpTest, CopyConstantR4Layout0231_MultipleTilesPerLayer) { + TestCopyConstantLayoutR4(2, 70, 7, 5, {0, 2, 3, 1}); +} + +XLA_TEST_F(CopyOpTest, CopyConstantR4Layout0312_MultipleTilesPerLayer) { + TestCopyConstantLayoutR4(2, 14, 5, 35, {0, 3, 1, 2}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc new file mode 100644 index 0000000000..dc54c9defe --- /dev/null +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -0,0 +1,148 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <utility> + +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/test.h" + +extern "C" void __attribute__((visibility("default"))) +R0F32Add2(float* out, float** in) { + TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float*)); + *out = **in + 2.0f; +} + +extern "C" void __attribute__((visibility("default"))) +R2F32ReduceSum(float* out, float** in) { + TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); + float* array = in[0]; + *out = array[0] + array[1] + array[2] + array[3]; +} + +extern "C" void __attribute__((visibility("default"))) +Add1ToValues(float* out, float** in) { + TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); + float* array = in[0]; + out[0] = array[0] + 1; + out[1] = array[1] + 1; + out[2] = array[2] + 1; + out[3] = array[3] + 1; +} + +namespace xla { +namespace { + +class CustomCallTest : public HloTestBase { + protected: + Shape r0f32_ = ShapeUtil::MakeShape(F32, {}); + Shape r2f32_ = ShapeUtil::MakeShape(F32, {2, 2}); +}; + +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR0F32Add2)) { + auto hlo_module = MakeUnique<HloModule>("test_module"); + auto builder = HloComputation::Builder(TestName()); + + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(42.0f))); + builder.AddInstruction( + HloInstruction::CreateCustomCall(r0f32_, {constant}, "R0F32Add2")); + + hlo_module->AddEntryComputation(builder.Build()); + + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + LiteralTestUtil::ExpectR0Near<float>(44.0f, *result, error_spec_); +} + +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) { + auto hlo_module = MakeUnique<HloModule>("test_module"); + auto builder = HloComputation::Builder(TestName()); + + Array2D<float> array(2, 2); + array(0, 0) = 1.0f; + array(0, 1) = 2.0f; + array(1, 0) = 3.0f; + array(1, 1) = 4.0f; + + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR2FromArray2D(array))); + builder.AddInstruction( + HloInstruction::CreateCustomCall(r0f32_, {constant}, "R2F32ReduceSum")); + + hlo_module->AddEntryComputation(builder.Build()); + + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + LiteralTestUtil::ExpectR0Near<float>(10.0f, *result, error_spec_); +} + +XLA_TEST_F(CustomCallTest, + DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) { + auto hlo_module = MakeUnique<HloModule>("test_module"); + auto b = HloComputation::Builder(TestName()); + + auto input = b.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR2FromArray2D( + Array2D<float>{{1.0f, 2.0f}, {3.0f, 4.0f}}))); + auto incremented = b.AddInstruction(HloInstruction::CreateCustomCall( + ShapeUtil::MakeShape(F32, {1, 2, 2}), {input}, "Add1ToValues")); + auto incremented_again = b.AddInstruction(HloInstruction::CreateCustomCall( + ShapeUtil::MakeShape(F32, {1, 2, 2}), {incremented}, "Add1ToValues")); + + // Concatenate the values along first dim. + b.AddInstruction( + HloInstruction::CreateConcatenate(ShapeUtil::MakeShape(F32, {2, 2, 2}), + {incremented, incremented_again}, 0)); + + hlo_module->AddEntryComputation(b.Build()); + + std::unique_ptr<Literal> result = + ExecuteAndTransfer(std::move(hlo_module), {}); + LiteralTestUtil::ExpectR3EqualArray3D<float>( + Array3D<float>{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, *result); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/deallocation_test.cc b/tensorflow/compiler/xla/tests/deallocation_test.cc new file mode 100644 index 0000000000..528efd2942 --- /dev/null +++ b/tensorflow/compiler/xla/tests/deallocation_test.cc @@ -0,0 +1,155 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class DeallocationTest : public ClientLibraryTestBase { + protected: + // Build and execute the given computation then verify the results can be + // transferred from the device successfully. + std::unique_ptr<GlobalData> ExecuteAndCheckTransfer( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + Computation computation = builder->Build().ConsumeValueOrDie(); + auto global_data = + client_->Execute(computation, arguments).ConsumeValueOrDie(); + TF_CHECK_OK(client_->Transfer(*global_data).status()); + return global_data; + } +}; + +TEST_F(DeallocationTest, DeallocateScalar) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR0<float>(42.0); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + // A result can be transfered an arbitrary number of times. Add an extra + // transfer here so we're not just testing that a second call to Transfer + // fails. + ASSERT_IS_OK(client_->Transfer(*global_data).status()); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +TEST_F(DeallocationTest, DeallocateVector) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +TEST_F(DeallocationTest, DeallocateEmptyVector) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>({}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +XLA_TEST_F(DeallocationTest, DeallocateTuple) { + ComputationBuilder builder(client_, TestName()); + builder.Tuple({builder.ConstantR0<float>(42.0), + builder.ConstantR1<float>({1.0, 2.0, 3.0})}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +XLA_TEST_F(DeallocationTest, DeallocateTupleWithRepeatedElements) { + ComputationBuilder builder(client_, TestName()); + auto element = builder.ConstantR0<float>(42.0); + auto inner_tuple = builder.Tuple({builder.ConstantR0<float>(42.0), element}); + builder.Tuple({element, inner_tuple, element}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +XLA_TEST_F(DeallocationTest, DeallocateNestedTuple) { + ComputationBuilder builder(client_, TestName()); + auto inner_tuple = + builder.Tuple({builder.ConstantR0<float>(42.0), + builder.ConstantR1<float>({1.0, 2.0, 3.0})}); + builder.Tuple({inner_tuple, builder.ConstantR1<float>({0.123, 0.456})}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + ASSERT_IS_OK(client_->Unregister(*global_data)); + + auto transfer_status = client_->Transfer(*global_data); + ASSERT_FALSE(transfer_status.ok()); + ASSERT_MATCH(transfer_status.status().error_message(), + testing::HasSubstr("was previously deallocated")); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc new file mode 100644 index 0000000000..57a7c61b14 --- /dev/null +++ b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc @@ -0,0 +1,215 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class DeconstructTupleTest : public ClientLibraryTestBase { + protected: + // Build and execute the given computation then verify the results can be + // transferred from the device successfully. + std::unique_ptr<GlobalData> ExecuteAndCheckTransfer( + ComputationBuilder* builder, + tensorflow::gtl::ArraySlice<GlobalData*> arguments) { + Computation computation = builder->Build().ConsumeValueOrDie(); + auto global_data = + client_->Execute(computation, arguments).ConsumeValueOrDie(); + TF_CHECK_OK(client_->Transfer(*global_data).status()); + return global_data; + } +}; + +TEST_F(DeconstructTupleTest, DeconstructTuple) { + ComputationBuilder builder(client_, TestName()); + auto const1 = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto const2 = builder.ConstantR1<float>({2.0, 4.0, 6.0, 8.0}); + builder.Tuple({const1, const2}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status.ok()); + + // Try copying the elements back and comparing it + auto handles = result_status.ConsumeValueOrDie(); + std::vector<float> copy(4); + ASSERT_IS_OK(client_->TransferInProcess(*handles[0], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[1], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); +} + +TEST_F(DeconstructTupleTest, DeconstructTupleTwice) { + ComputationBuilder builder(client_, TestName()); + auto const1 = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto const2 = builder.ConstantR1<float>({2.0, 4.0, 6.0, 8.0}); + builder.Tuple({const1, const2}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status1 = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status1.ok()); + auto result_status2 = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status2.ok()); + + auto handles1 = result_status1.ConsumeValueOrDie(); + auto handles2 = result_status2.ConsumeValueOrDie(); + std::vector<float> copy(4); + + ASSERT_IS_OK(client_->TransferInProcess(*handles1[0], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles1[1], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); + handles1[0].reset(); + handles1[1].reset(); + + ASSERT_IS_OK(client_->TransferInProcess(*handles2[0], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles2[1], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); +} + +XLA_TEST_F(DeconstructTupleTest, DeconstructTupleRepeatedElement) { + ComputationBuilder builder(client_, TestName()); + auto const1 = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto const2 = builder.ConstantR1<float>({2.0, 4.0, 6.0, 8.0}); + builder.Tuple({const1, const2, const2, const1}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status.ok()); + + // Verify the returned GlobalDataHandle arrays have repeated elements like the + // tuple does. That is, in the returned vector of handles, handle[0] should be + // the same as handle[3] and handle[1] should be the same as handle[2]. + auto handles = result_status.ConsumeValueOrDie(); + + std::vector<float> copy(4); + ASSERT_IS_OK(client_->TransferInProcess(*handles[0], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[1], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[2], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[3], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); +} + +TEST_F(DeconstructTupleTest, DeconstructTupleThenDeallocate) { + ComputationBuilder builder(client_, TestName()); + auto const1 = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto const2 = builder.ConstantR1<float>({2.0, 4.0, 6.0, 8.0}); + builder.Tuple({const1, const2, const1}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status.ok()); + auto handles = result_status.ConsumeValueOrDie(); + + // Deallocate the tuple, then try copying the elements back. The elements + // should not have been deallocated because of reference counting. + global_data.reset(); + + std::vector<float> copy(4); + ASSERT_IS_OK(client_->TransferInProcess(*handles[0], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[1], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({2.0, 4.0, 6.0, 8.0})); + ASSERT_IS_OK(client_->TransferInProcess(*handles[2], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); + + /// Try deallocating one of the repeated elements, then copy + handles[0].reset(); + + ASSERT_IS_OK(client_->TransferInProcess(*handles[2], ©[0])); + EXPECT_MATCH(copy, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); +} + +TEST_F(DeconstructTupleTest, DeconstructNonTuple) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_FALSE(result_status.ok()); + EXPECT_MATCH(result_status.status().error_message(), + testing::ContainsRegex("global data handle .* is not a tuple")); +} + +XLA_TEST_F(DeconstructTupleTest, DeconstructTupleFromParam) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({3.14f, -100.25f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2}), "param0"); + builder.Tuple({p}); + auto global_data = ExecuteAndCheckTransfer(&builder, {param0_data.get()}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_TRUE(result_status.ok()); + auto handles = result_status.ConsumeValueOrDie(); + EXPECT_NE(handles[0]->handle().handle(), param0_data->handle().handle()); +} + +XLA_TEST_F(DeconstructTupleTest, DeconstructNestedTuple) { + ComputationBuilder builder(client_, TestName()); + auto const1 = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto const2 = builder.ConstantR1<float>({2.0, 4.0, 6.0, 8.0}); + builder.Tuple({builder.Tuple({const1, const2}), const1}); + auto global_data = ExecuteAndCheckTransfer(&builder, {}); + + auto result_status = client_->DeconstructTuple(*global_data); + EXPECT_FALSE(result_status.ok()); + EXPECT_MATCH( + result_status.status().error_message(), + testing::ContainsRegex("deconstructing nested tuples not yet supported")); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc new file mode 100644 index 0000000000..da2d43ca4f --- /dev/null +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -0,0 +1,387 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_runtime_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/layout_util_flags.h" +#include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace xla { +namespace { + +// TODO(mfdyck): use GUnit typed tests when we can do all tests on all backends. +class DotOperationTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001, 1e-5}; + + protected: + template <typename Element> + void TestOneElementVectorDot(); + template <typename Element> + void TestVectorDot(); + template <typename Element> + void TestSquareMatrixDot(bool lhs_row_major = false, + bool rhs_row_major = false); + template <typename Element> + void TestNonsquareMatrixDot(bool lhs_row_major = false, + bool rhs_row_major = false); +}; + +XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<float>({}); + auto rhs = builder.ConstantR1<float>({}); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR0<float>(&builder, 0.0, {}, error_spec_); +} + +template <typename Element> +void DotOperationTest::TestOneElementVectorDot() { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<Element>({2.0}); + auto rhs = builder.ConstantR1<Element>({3.0}); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR0<Element>(&builder, 6.0, {}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, OneElementVectorDotF32) { + TestOneElementVectorDot<float>(); +} + +XLA_TEST_F(DotOperationTest, OneElementVectorDotF64) { + TestOneElementVectorDot<double>(); +} + +template <typename Element> +void DotOperationTest::TestVectorDot() { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR1<Element>({1.0, 2.5, 42.0}); + auto rhs = builder.ConstantR1<Element>({11.0, -1.0, 0.5}); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR0<Element>(&builder, 29.5, {}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, VectorDotF32) { TestVectorDot<float>(); } + +XLA_TEST_F(DotOperationTest, VectorDotF64) { TestVectorDot<double>(); } + +namespace { + +std::vector<int64> MinorToMajorForIsRowMajor(bool row_major) { + return {row_major ? 1 : 0, row_major ? 0 : 1}; +} + +} // namespace + +XLA_TEST_F(DotOperationTest, Dot_0x2_2x0) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + auto rhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(2, 0)); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 0), {}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, Dot_0x2_2x3) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + auto rhs = builder.ConstantR2<float>({{7.0, 8.0, 9.0}, {42.0, 77.0, 101.0}}); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 3), {}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, Dot_3x2_2x0) { + ComputationBuilder builder(client_, TestName()); + auto lhs = + builder.ConstantR2<float>({{7.0, 8.0}, {9.0, 42.0}, {77.0, 101.0}}); + auto rhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(2, 0)); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(3, 0), {}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, Dot_2x0_0x2) { + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(2, 0)); + auto rhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + auto result = builder.Dot(lhs, rhs); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 2, 0.0f), {}, + error_spec_); +} + +template <typename Element> +void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, + bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout<Element>( + {{1.0, 2.0}, {3.0, -4.0}}, + MinorToMajorForIsRowMajor(lhs_row_major))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout<Element>( + {{1.0, 6.0}, {7.0, -4.0}}, + MinorToMajorForIsRowMajor(rhs_row_major))) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType<Element>(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs")); + + Array2D<Element> expected({{15.0, -2.0}, {-25.0, 34.0}}); + ComputeAndCompareR2<Element>( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { + constexpr bool kLhsRowMajor = false; + constexpr bool kRhsRowMajor = false; + TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) { + TestSquareMatrixDot<float>(false, true); +} + +XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) { + TestSquareMatrixDot<float>(true, false); +} + +TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) { + constexpr bool kLhsRowMajor = true; + constexpr bool kRhsRowMajor = true; + TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +XLA_TEST_F(DotOperationTest, SquareMatrixDotF64) { + TestSquareMatrixDot<double>(); +} + +template <typename Element> +void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, + bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout<Element>( + {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, + MinorToMajorForIsRowMajor(lhs_row_major))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*test_utils::CreateR2LiteralWithLayout<Element>( + {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, + MinorToMajorForIsRowMajor(rhs_row_major))) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType<Element>(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 3}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}), "rhs")); + + Array2D<Element> expected({{26.0, 0.0}, {-12.0, 10.0}}); + + ComputeAndCompareR2<Element>( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFF) { + constexpr bool kLhsRowMajor = false; + constexpr bool kRhsRowMajor = false; + TestNonsquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFT) { + constexpr bool kLhsRowMajor = false; + constexpr bool kRhsRowMajor = true; + TestNonsquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTF) { + constexpr bool kLhsRowMajor = true; + constexpr bool kRhsRowMajor = false; + TestNonsquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTT) { + constexpr bool kLhsRowMajor = true; + constexpr bool kRhsRowMajor = true; + TestNonsquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor); +} + +XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF64) { + TestNonsquareMatrixDot<double>(); +} + +TEST_F(DotOperationTest, ConcurrentMatMul) { + ComputationBuilder builder(client_, TestName()); + auto matrix1 = builder.ConstantR2<float>({{1.0, 2.0}, {3.0, 4.0}}); + auto matrix2 = builder.ConstantR2<float>({{5.0, 6.0}, {7.0, 8.0}}); + auto matrix12 = builder.Dot(matrix1, matrix2); + auto matrix21 = builder.Dot(matrix2, matrix1); + builder.Add(matrix12, matrix21); + + Array2D<float> expected({{42.0, 56.0}, {74.0, 96.0}}); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +// Regression test for b/32055648. The root of the graph is a kFusion of 4 +// bitcasts. Although bitcasts don't map to thunks, the root should still be +// sync-dependent on bitcasts' operands. +XLA_TEST_F(DotOperationTest, BatchMatMul) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "x"); + auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "y"); + + auto x_flat = builder.Reshape(x, {0, 1, 2, 3}, {4, 2, 2}); + auto y_flat = builder.Reshape(y, {0, 1, 2, 3}, {4, 2, 2}); + + // Slice batches into individual matrices and multiply them. + std::vector<xla::ComputationDataHandle> out_slices; + for (int i = 0; i < 4; ++i) { + // Slice off individual matrices and reshape to 2D tensors. + auto x_slice = builder.Slice(x_flat, {i, 0, 0}, {i + 1, 2, 2}); + x_slice = builder.Reshape(x_slice, {0, 1, 2}, {2, 2}); + auto y_slice = builder.Slice(y_flat, {i, 0, 0}, {i + 1, 2, 2}); + y_slice = builder.Reshape(y_slice, {0, 1, 2}, {2, 2}); + + auto out = builder.Dot(x_slice, y_slice); + out = builder.Reshape(out, {0, 1}, {1, 2, 2}); + out_slices.push_back(out); + } + auto out_flat = builder.ConcatInDim(out_slices, 0); + builder.Reshape(out_flat, {0, 1, 2}, {2, 2, 2, 2}); + + auto x_data = client_ + ->TransferToServer(*LiteralUtil::CreateR4<float>( + {{{{1000, 100}, {10, 1}}, {{2000, 200}, {20, 2}}}, + {{{3000, 300}, {30, 3}}, {{4000, 400}, {40, 4}}}})) + .ConsumeValueOrDie(); + auto y_data = client_ + ->TransferToServer(*LiteralUtil::CreateR4<float>( + {{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}, + {{{11, 22}, {33, 44}}, {{55, 66}, {77, 88}}}})) + .ConsumeValueOrDie(); + + ComputeAndCompareR4<float>( + &builder, + /*expected=*/{{{{1300, 2400}, {13, 24}}, {{11400, 13600}, {114, 136}}}, + {{{42900, 79200}, {429, 792}}, + {{250800, 299200}, {2508, 2992}}}}, + {x_data.get(), y_data.get()}, error_spec_); +} + +TEST_F(DotOperationTest, TransposeFolding) { + for (bool transpose_lhs : {false, true}) { + for (bool transpose_rhs : {false, true}) { + for (bool row_major : {false, true}) { + std::unique_ptr<Array2D<float>> lhs( + new Array2D<float>({{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}})); + std::unique_ptr<Array2D<float>> rhs( + new Array2D<float>({{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}})); + + if (transpose_lhs) { + lhs = ReferenceUtil::TransposeArray2D(*lhs); + } + if (transpose_rhs) { + rhs = ReferenceUtil::TransposeArray2D(*rhs); + } + auto lhs_handle = + client_ + ->TransferToServer( + *LiteralUtil::CreateR2FromArray2DWithLayout<float>( + *lhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer( + *LiteralUtil::CreateR2FromArray2DWithLayout<float>( + *rhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType<float>(); + auto lhs_arg = builder.Parameter( + 0, ShapeUtil::MakeShape(prim_type, {lhs->height(), lhs->width()}), + "lhs"); + auto rhs_arg = builder.Parameter( + 1, ShapeUtil::MakeShape(prim_type, {rhs->height(), rhs->width()}), + "rhs"); + if (transpose_lhs) { + lhs_arg = builder.Transpose(lhs_arg, {1, 0}); + } + if (transpose_rhs) { + rhs_arg = builder.Transpose(rhs_arg, {1, 0}); + } + auto result = builder.Dot(lhs_arg, rhs_arg); + + Array2D<float> expected({{26.0, 0.0}, {-12.0, 10.0}}); + VLOG(1) << "TestTransposeFolding " << transpose_lhs << " " + << transpose_rhs << " " << row_major; + ComputeAndCompareR2<float>(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, + error_spec_); + } + } + } +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendLayoutUtilFlags(&flag_list); + xla::legacy_flags::AppendCpuRuntimeFlags(&flag_list); + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc new file mode 100644 index 0000000000..cecc4872df --- /dev/null +++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc @@ -0,0 +1,506 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/local_service.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/platform/types.h" + +namespace se = ::perftools::gputools; + +namespace xla { +namespace { + +class DynamicSliceTest : public ClientLibraryTestBase { + protected: + template <typename IndexT> + void TestR1() { + // Slice at dimension start. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, {0}, {5}, + {0.0, 1.0, 2.0, 3.0, 4.0}); + // Slice in the middle. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, {2}, {3}, + {2.0, 3.0, 4.0}); + // Slice at dimension boundaries. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, {5}, {3}, + {5.0, 6.0, 7.0}); + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, {6}, {4}, + {6.0, 7.0, 0.0, 1.0}); + } + + template <typename IndexT> + void TestR2() { + // Slice at dimension start. + RunR2<IndexT>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {0, 0}, {2, 2}, {{1.0f, 2.0f}, {4.0f, 5.0f}}); + // Slice in the middle. + RunR2<IndexT>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {1, 1}, {2, 1}, {{5.0f}, {8.0f}}); + // Slice at dimension boundaries. + RunR2<IndexT>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {1, 1}, {2, 1}, {{5.0f}, {8.0f}}); + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR2<IndexT>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {1, 1}, {3, 3}, + {{5.0f, 6.0f, 4.0f}, {8.0f, 9.0f, 7.0f}, {2.0f, 3.0f, 1.0f}}); + } + + template <typename IndexT> + void TestR3() { + // R3 Shape: [2, 3, 2] + // clang-format off + + // Slice at dimension start. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {0, 0, 0}, {2, 1, 2}, + {{{1.0f, 2.0f}}, {{7.0f, 8.0f}}}); + + // Slice in the middle. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {0, 1, 1}, {2, 2, 1}, + {{{4.0f}, {6.0f}}, {{10.0f}, {12.0f}}}); + + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {0, 2, 1}, {2, 2, 1}, + {{{6.0f}, {2.0f}}, {{12.0f}, {8.0f}}}); + + // clang-format on + } + + template <typename IndexT> + void RunR1(const std::vector<float>& input_values, + const std::vector<IndexT> slice_starts, + const std::vector<int64> slice_sizes, + const std::vector<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR1<float>(input_values); + builder.DynamicSlice(input, starts, slice_sizes); + // Run computation and compare against expected values. + ComputeAndCompareR1<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } + + template <typename IndexT> + void RunR2(const Array2D<float>& input_values, + const std::vector<IndexT> slice_starts, + const std::vector<int64> slice_sizes, + const Array2D<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR2FromArray2D<float>(input_values); + builder.DynamicSlice(input, starts, slice_sizes); + // Run computation and compare against expected values. + ComputeAndCompareR2<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } + + template <typename IndexT> + void RunR3(const Array3D<float>& input_values, + const std::vector<IndexT> slice_starts, + const std::vector<int64> slice_sizes, + const Array3D<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR3FromArray3D<float>(input_values); + builder.DynamicSlice(input, starts, slice_sizes); + // Run computation and compare against expected values. + ComputeAndCompareR3<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } +}; + +XLA_TEST_F(DynamicSliceTest, Int32R1) { TestR1<int32>(); } + +XLA_TEST_F(DynamicSliceTest, Int64R1) { TestR1<int64>(); } + +XLA_TEST_F(DynamicSliceTest, UInt64R1) { TestR1<uint64>(); } + +XLA_TEST_F(DynamicSliceTest, Int32R2) { TestR2<int32>(); } + +XLA_TEST_F(DynamicSliceTest, Int64R2) { TestR2<int64>(); } + +XLA_TEST_F(DynamicSliceTest, UInt64R2) { TestR2<uint64>(); } + +XLA_TEST_F(DynamicSliceTest, Int32R3) { TestR3<int32>(); } + +XLA_TEST_F(DynamicSliceTest, Int64R3) { TestR3<int64>(); } + +XLA_TEST_F(DynamicSliceTest, UInt64R3) { TestR3<uint64>(); } + +class DynamicUpdateSliceTest : public ClientLibraryTestBase { + protected: + template <typename IndexT> + void TestR1() { + // clang-format off + // Slice at dimension start. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, + {8.0, 9.0, 10.0}, {0}, + {8.0, 9.0, 10.0, 3.0, 4.0, 5.0, 6.0, 7.0}); + // Slice in the middle. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, + {8.0, 9.0, 10.0}, {2}, + {0.0, 1.0, 8.0, 9.0, 10.0, 5.0, 6.0, 7.0}); + // Slice at dimension boundaries. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, + {8.0, 9.0, 10.0}, {5}, + {0.0, 1.0, 2.0, 3.0, 4.0, 8.0, 9.0, 10.0}); + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR1<IndexT>({0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, + {8.0, 9.0, 10.0}, {6}, + {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 8.0, 9.0}); + // clang-format on + } + + template <typename IndexT> + void TestR2() { + // clang-format off + // Slice at dimension start. + RunR2<IndexT>( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {{10.0f, 11.0f}}, {0, 0}, + {{10.0f, 11.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}); + // Slice in the middle. + RunR2<IndexT>( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {{10.0f, 11.0f}}, {1, 1}, + {{1.0f, 2.0f, 3.0f}, {4.0f, 10.0f, 11.0f}, {7.0f, 8.0f, 9.0f}}); + // Slice at dimension boundaries. + RunR2<IndexT>( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {{10.0f, 11.0f}}, {2, 1}, + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 10.0f, 11.0f}}); + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR2<IndexT>( + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 9.0f}}, + {{10.0f, 11.0f}}, {2, 2}, + {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}, {7.0f, 8.0f, 10.0f}}); + // clang-format on + } + + template <typename IndexT> + void TestR3() { + // R3 Shape: [2, 3, 2] + // clang-format off + // Slice at dimension start. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {{{13.0f, 14.0f}, {15.0f, 16.0f}}, + {{17.0f, 18.0f}, {19.0f, 20.0f}}}, + {0, 0, 0}, + {{{13.0f, 14.0f}, {15.0f, 16.0f}, {5.0f, 6.0f}}, + {{17.0f, 18.0f}, {19.0f, 20.0f}, {11.0f, 12.0f}}}); + // Slice in the middle. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {{{13.0f}, {15.0f}}}, + {1, 1, 1}, + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 13.0f}, {11.0f, 15.0f}}}); + // Slice at dimension boundaries, but with sizes that cause indices to wrap. + RunR3<IndexT>( + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}, + {{{13.0f}, {15.0f}}}, + {1, 2, 1}, + {{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, + {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 13.0f}}}); + // clang-format on + } + + template <typename IndexT> + void RunR1(const std::vector<float>& input_values, + const std::vector<float>& update_values, + const std::vector<IndexT> slice_starts, + const std::vector<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR1<float>(input_values); + auto update = builder.ConstantR1<float>(update_values); + builder.DynamicUpdateSlice(input, update, starts); + // Run computation and compare against expected values. + ComputeAndCompareR1<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } + + template <typename IndexT> + void RunR2(const Array2D<float>& input_values, + const Array2D<float>& update_values, + const std::vector<IndexT> slice_starts, + const Array2D<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR2FromArray2D<float>(input_values); + auto update = builder.ConstantR2FromArray2D<float>(update_values); + builder.DynamicUpdateSlice(input, update, starts); + // Run computation and compare against expected values. + ComputeAndCompareR2<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } + + template <typename IndexT> + void RunR3(const Array3D<float>& input_values, + const Array3D<float>& update_values, + const std::vector<IndexT> slice_starts, + const Array3D<float>& expected_values) { + ComputationBuilder builder(client_, TestName()); + // Initialize and transfer dynamic slice start indices parameter. + ComputationDataHandle starts; + std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>( + slice_starts, 0, "slice_starts", &builder, &starts); + // Build dynamic slice computation. + auto input = builder.ConstantR3FromArray3D<float>(input_values); + auto update = builder.ConstantR3FromArray3D<float>(update_values); + builder.DynamicUpdateSlice(input, update, starts); + // Run computation and compare against expected values. + ComputeAndCompareR3<float>(&builder, expected_values, {start_data.get()}, + ErrorSpec(0.000001)); + } + + void RunR3Contiguous(std::vector<int32> operand_shape, int32 index, + int32 size) { + const int32 kSeq = operand_shape[0]; + const int32 kBatch = operand_shape[1]; + const int32 kDim = operand_shape[2]; + Array3D<float> input_values(kSeq, kBatch, kDim); + Array3D<float> update_values(size, kBatch, kDim); + Array3D<float> expected_values(kSeq, kBatch, kDim); + + input_values.FillIota(0); + float val = 1000; + update_values.FillIota(val); + + // TODO(b/34128753) Expected values may vary depending on backend when + // the update wraps. According to documentation, the results are technically + // implementation specific where the update is out of bounds, and hence + // we don't really know what to pass into ComputeAndCompareR3. + expected_values.FillIota(0); + for (int i = 0; i < size; i++) { + for (int j = 0; j < kBatch; j++) { + for (int k = 0; k < kDim; k++) { + expected_values((index + i) % kSeq, j, k) = val++; + } + } + } + if (VLOG_IS_ON(1)) { + DumpArray<float>("input", input_values); + DumpArray<float>("update", update_values); + DumpArray<float>("expected", expected_values); + } + + // Build dynamic slice computation. + ComputationBuilder builder(client_, TestName()); + auto starts = builder.ConstantR1<int32>({index, 0, 0}); + auto input = builder.ConstantR3FromArray3D<float>(input_values); + auto update = builder.ConstantR3FromArray3D<float>(update_values); + builder.DynamicUpdateSlice(input, update, starts); + + // Run computation and compare against expected values. + ComputeAndCompareR3<float>(&builder, expected_values, {}, + ErrorSpec(0.000001)); + } + + template <typename NativeT> + void DumpArray(const string& name, const Array3D<NativeT> values) { + std::unique_ptr<Literal> literal = + LiteralUtil::CreateR3FromArray3D<NativeT>(values); + LOG(INFO) << name << ":" << LiteralUtil::ToString(*literal); + } +}; + +XLA_TEST_F(DynamicUpdateSliceTest, Int32R1) { TestR1<int32>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, Int64R1) { TestR1<int64>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, UInt64R1) { TestR1<uint64>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, Int32R2) { TestR2<int32>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, Int64R2) { TestR2<int64>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, UInt64R2) { TestR2<uint64>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, Int32R3) { TestR3<int32>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, Int64R3) { TestR3<int64>(); } + +XLA_TEST_F(DynamicUpdateSliceTest, UInt64R3) { TestR3<uint64>(); } + +// Tests for simple R3 case where the update is contiguous (i.e. the minor +// two dimensions are not sliced). +XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousSingleElement) { + // Single element, no wrap. + std::vector<int32> operand_shape({4, 5, 2}); + RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/1); +} + +XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousMultipleElements) { + // Multiple element, no wrap. + std::vector<int32> operand_shape({4, 5, 2}); + RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/2); +} + +// TODO(b/34128753) CPU and GPU failed on 2016-01-06. Appears not to handle +// wrapping as expected. +XLA_TEST_F(DynamicUpdateSliceTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(R3ContiguousMultipleWrapping))) { + // Multiple element, wrapping. + std::vector<int32> operand_shape({4, 5, 2}); + RunR3Contiguous(operand_shape, /*index=*/3, /*size=*/2); +} + +// TODO(b/34128753) CPU and GPU failed on 2016-01-06. Appears not to handle +// wrapping as expected. +XLA_TEST_F(DynamicUpdateSliceTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(R3ContiguousTooLarge))) { + // Multiple element, update size larger than operand. + std::vector<int32> operand_shape({4, 5, 2}); + RunR3Contiguous(operand_shape, /*index=*/5, /*size=*/2); +} + +XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousUnaligned) { + std::vector<int32> operand_shape({3, 123, 247}); + RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/1); +} + +// TODO(b/34134076) Disabled on GPU 2016-01-06 due to out-of-memory error. +XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_GPU(R3ContiguousLarger)) { + std::vector<int32> operand_shape({32, 128, 1024}); + RunR3Contiguous(operand_shape, /*index=*/7, /*size=*/1); +} + +void BM_DynamicSlice(int num_iters) { + tensorflow::testing::StopTiming(); + + se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie(); + auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie(); + StreamExecutorMemoryAllocator allocator(platform, executors); + LocalClient* client = + ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); + auto* transfer_manager = + TransferManager::GetForPlatform(platform).ValueOrDie(); + int device_ordinal = client->default_device_ordinal(); + + ComputationBuilder builder(client, "DynamicSlice"); + + // Create input as a constant: shape [1, 2, 3, 4] + auto input_literal = LiteralUtil::CreateR4( + {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}, + {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}}); + auto input = builder.ConstantLiteral(*input_literal); + + // Create dynamic slice start indices as a parameter: shape [4] + auto start_indices_shape = ShapeUtil::MakeShape(S32, {4}); + auto start_indices = + builder.Parameter(0, start_indices_shape, "start_indices"); + // Add DynamicSlice op to the computatation. + builder.DynamicSlice(input, start_indices, {1, 1, 1, 1}); + auto computation = builder.Build().ConsumeValueOrDie(); + + // Initialize and transfer parameter buffer. + auto buffer = ScopedShapedBuffer::MakeScopedShapedBuffer(start_indices_shape, + &allocator, 0) + .ConsumeValueOrDie(); + + auto start_indices_literal = LiteralUtil::CreateR1<int32>({0, 1, 2, 3}); + ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice( + executors[device_ordinal], *start_indices_literal, + buffer->mutable_buffer({}))); + + // Run some warm-up executions. + LocalExecuteOptions options; + options.set_allocator(&allocator); + const int kWarmups = 2; + for (int i = 0; i < kWarmups; ++i) { + auto result = client->ExecuteLocally(computation, {buffer.get()}, options); + ASSERT_TRUE(result.ok()); + } + + // Run benchmark. + tensorflow::testing::StartTiming(); + for (int i = 0; i < num_iters; ++i) { + auto result = client->ExecuteLocally(computation, {buffer.get()}, options); + ASSERT_TRUE(result.ok()); + } +} +BENCHMARK(BM_DynamicSlice); + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/floor_ceil_test.cc b/tensorflow/compiler/xla/tests/floor_ceil_test.cc new file mode 100644 index 0000000000..8e30063085 --- /dev/null +++ b/tensorflow/compiler/xla/tests/floor_ceil_test.cc @@ -0,0 +1,128 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <limits> +#include <string> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class FloorCeilTest : public ClientLibraryTestBase { + public: + enum Function { + kFloor, + kCeil, + }; + + // Runs a computation and comparison on expected vs f(input) + void TestR1F32(tensorflow::gtl::ArraySlice<float> input, + tensorflow::gtl::ArraySlice<float> expected, Function f) { + LOG(INFO) << "input: {" << tensorflow::str_util::Join(expected, ", ") + << "}"; + ComputationBuilder builder(client_, TestName()); + auto c = builder.ConstantR1<float>(input); + if (f == kCeil) { + builder.Ceil(c); + } else { + ASSERT_EQ(kFloor, f); + builder.Floor(c); + } + ComputeAndCompareR1<float>(&builder, expected, /*arguments=*/{}); + } + + void TestR0F32(float input, float expected, Function f) { + LOG(INFO) << "input: " << expected; + ComputationBuilder builder(client_, TestName()); + auto c = builder.ConstantR0<float>(input); + if (f == kCeil) { + builder.Ceil(c); + } else { + ASSERT_EQ(kFloor, f); + builder.Floor(c); + } + ComputeAndCompareR0<float>(&builder, expected, /*arguments=*/{}); + } + + const ErrorSpec error_spec_{0.0001}; + + float infinity_ = std::numeric_limits<float>::infinity(); + float minus_infinity_ = -std::numeric_limits<float>::infinity(); +}; + +// Interesting notes: +// * if you pass snan the CPU doesn't canonicalize it to qnan. +// * passing x86-based CPU's qnan to the GPU makes a different nan +// "7fc00000=nan=nan vs 7fffffff=nan=nan" + +XLA_TEST_F(FloorCeilTest, R1S0Floor) { TestR1F32({}, {}, kFloor); } + +TEST_F(FloorCeilTest, R1Floor) { + TestR1F32({0.0, -0.0, infinity_, minus_infinity_, 1.1, -0.1}, + {0.0, -0.0, infinity_, minus_infinity_, 1.0, -1.0}, kFloor); +} + +TEST_F(FloorCeilTest, R1Ceil) { + TestR1F32({0.0, -0.0, infinity_, minus_infinity_, 1.1, -0.1}, + {0.0, -0.0, infinity_, minus_infinity_, 2.0, -0.0}, kCeil); +} + +TEST_F(FloorCeilTest, R0Floor) { + TestR0F32(0.0, 0.0, kFloor); + TestR0F32(-0.0, -0.0, kFloor); + TestR0F32(infinity_, infinity_, kFloor); + TestR0F32(minus_infinity_, minus_infinity_, kFloor); + TestR0F32(1.1, 1.0, kFloor); + TestR0F32(-0.1, -1.0, kFloor); +} + +TEST_F(FloorCeilTest, R0Ceil) { + TestR0F32(0.0, 0.0, kCeil); + TestR0F32(-0.0, -0.0, kCeil); + TestR0F32(infinity_, infinity_, kCeil); + TestR0F32(minus_infinity_, minus_infinity_, kCeil); + TestR0F32(1.1, 2.0, kCeil); + TestR0F32(-0.1, -0.0, kCeil); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/fmax_test.cc b/tensorflow/compiler/xla/tests/fmax_test.cc new file mode 100644 index 0000000000..2835038c90 --- /dev/null +++ b/tensorflow/compiler/xla/tests/fmax_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class FmaxSimpleTest : public ClientLibraryTestBase {}; + +TEST_F(FmaxSimpleTest, FmaxTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {-0.0, 1.0, 2.0, -3.0, -4.0, 5.0, 6.0, -7.0, -8.0, 9.0}); + auto y = builder.ConstantR1<float>( + {-0.0, -1.0, -2.0, 3.0, 4.0, -5.0, -6.0, 7.0, 8.0, -9.0}); + builder.Max(x, y); + + std::vector<float> expected = {-0.0, 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, 9.0}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc new file mode 100644 index 0000000000..7bddbfa894 --- /dev/null +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -0,0 +1,589 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <math.h> +#include <algorithm> +#include <memory> +#include <new> +#include <utility> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +using tensorflow::gtl::ArraySlice; + +namespace xla { +namespace { + +const int test_width = 2, test_height = 3; + +const float test_float_vals[3][test_width][test_height] = { + {{-1.0, -1.0, 1.0}, {-3.0, 0.0, -1.0}}, + {{-3.0, 2.0, 1.0}, {0.0, -3.0, 1.0}}, + {{-3.0, 0.0, -3.0}, {-1.0, -2.0, 1.0}}}; + +// Test whether fusion operations are emitted with no errors and compute +// accurate outputs. +class FusionTest : public HloTestBase { + protected: + template <typename T, int Arity> + void TestElementwise2D(HloOpcode opcode) { + Array2D<float> operand_data[Arity]; + for (int i = 0; i < Arity; ++i) { + new (&operand_data[i]) Array2D<float>(test_width, test_height); + } + Array2D<T> answer_data(test_width, test_height); + for (int i = 0; i < test_width; ++i) { + for (int j = 0; j < test_height; ++j) { + float xs[Arity]; + for (int k = 0; k < Arity; ++k) { + xs[k] = test_float_vals[k][i][j]; + operand_data[k](i, j) = xs[k]; + } + answer_data(i, j) = ComputeElementwiseAnswer<T>(opcode, xs); + } + } + + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + + auto prim_type = primitive_util::NativeToPrimitiveType<T>(); + + HloInstruction* hlos[4]; + for (int i = 0; i < Arity; ++i) { + hlos[i + 1] = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2FromArray2D(operand_data[i]))); + } + auto answer_shape = + ShapeUtil::MakeShape(prim_type, {test_width, test_height}); + std::unique_ptr<HloInstruction> root_hlo; + switch (Arity) { + case 1: + root_hlo = HloInstruction::CreateUnary(answer_shape, opcode, hlos[1]); + break; + case 2: + root_hlo = HloInstruction::CreateBinary(answer_shape, opcode, hlos[1], + hlos[2]); + break; + case 3: + root_hlo = HloInstruction::CreateTernary(answer_shape, opcode, hlos[1], + hlos[2], hlos[3]); + break; + default: + LOG(FATAL) << "Bad arity: " << Arity; + } + hlos[0] = builder.AddInstruction(std::move(root_hlo)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction( + ArraySlice<HloInstruction*>(hlos, 0, Arity + 1), + HloInstruction::FusionKind::kLoop); + + auto expected = LiteralUtil::CreateR2FromArray2D(answer_data); + auto actual = ExecuteAndTransfer(std::move(hlo_module), {}); + if (primitive_util::IsFloatingPointType(prim_type)) { + LiteralTestUtil::ExpectNear(*expected, *actual, ErrorSpec(1e-4)); + } else { + LiteralTestUtil::ExpectEqual(*expected, *actual); + } + } + + private: + template <typename T> + T ComputeElementwiseAnswer(HloOpcode opcode, ArraySlice<float> xs); +}; + +template <> +float FusionTest::ComputeElementwiseAnswer<float>(HloOpcode opcode, + ArraySlice<float> xs) { + switch (opcode) { + case HloOpcode::kAdd: + return xs[0] + xs[1]; + case HloOpcode::kSubtract: + return xs[0] - xs[1]; + case HloOpcode::kMultiply: + return xs[0] * xs[1]; + case HloOpcode::kDivide: + return xs[0] / xs[1]; + case HloOpcode::kPower: + return powf(xs[0], xs[1]); + case HloOpcode::kMinimum: + return std::min(xs[0], xs[1]); + case HloOpcode::kMaximum: + return std::max(xs[0], xs[1]); + case HloOpcode::kClamp: + return std::min(xs[2], std::max(xs[1], xs[0])); + default: + LOG(FATAL) << "No elementwise opcode: " << opcode; + } +} + +template <> +uint8 FusionTest::ComputeElementwiseAnswer<uint8>(HloOpcode opcode, + ArraySlice<float> xs) { + switch (opcode) { + case HloOpcode::kEq: + return xs[0] == xs[1]; + case HloOpcode::kNe: + return xs[0] != xs[1]; + case HloOpcode::kGt: + return xs[0] > xs[1]; + case HloOpcode::kLt: + return xs[0] < xs[1]; + case HloOpcode::kGe: + return xs[0] >= xs[1]; + case HloOpcode::kLe: + return xs[0] <= xs[1]; + default: + LOG(FATAL) << "No comparatory opcode: " << opcode; + } +} + +XLA_TEST_F(FusionTest, Test) { + // test expression: + // slice(select({{T, F, T}, {F, T, F}}, + // concat(transpose({{1.0}, {2.0}, {3.0}} + + // {{-1.0}, {-1.0}, {-1.0}}), + // {{1.62, 2.72, 3.14}}) + + // (-{{1.0, 1.0, 1.0}, {0.0, 0.0, 0.0}}), + // {{0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}})) = {{0.5}, {2.72}} + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0}, {2.0}, {3.0}}))); + auto const1 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{-1.0}, {-1.0}, {-1.0}}))); + auto add2 = builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(F32, {3, 1}), HloOpcode::kAdd, const0, const1)); + auto reshape3 = builder.AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(F32, {1, 3}), add2, {1, 0})); + auto const4 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.62, 2.72, 3.14}}))); + auto concat5 = builder.AddInstruction(HloInstruction::CreateConcatenate( + ShapeUtil::MakeShape(F32, {2, 3}), {reshape3, const4}, 0)); + auto const6 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0, 1.0, 1.0}, {0.0, 0.0, 0.0}}))); + auto negate7 = builder.AddInstruction(HloInstruction::CreateUnary( + ShapeUtil::MakeShape(F32, {2, 3}), HloOpcode::kNegate, const6)); + auto add8 = builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(F32, {2, 3}), HloOpcode::kAdd, concat5, negate7)); + auto const9 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}}))); + auto const10 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR2<bool>( + {{true, false, true}, {false, true, false}}))); + auto select11 = builder.AddInstruction( + HloInstruction::CreateTernary(ShapeUtil::MakeShape(F32, {2, 3}), + HloOpcode::kSelect, const10, add8, const9)); + auto slice12 = builder.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {2, 1}), select11, {0, 1}, {2, 2})); + // CreateFusionInstruction needs the `instructions_to_fuse` argument in + // reverse topological order, so the first element in `instructions_to_fuse` + // must be the root. + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction( + {slice12, select11, const10, const9, add8, negate7, const6, concat5, + const4, reshape3, add2, const1, const0}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR2<float>({{0.5}, {2.72}}), + *ExecuteAndTransfer(std::move(hlo_module), {}), + ErrorSpec(1e-4)); +} + +// Test whether we emit appropriate code for parameters of fusion instructions. +XLA_TEST_F(FusionTest, Parameter) { + // Build a computation and fuse part of it so the fusion instruction has an + // operand parameter. + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{1.0, 2.0, 3.0}}))); + auto copy1 = builder.AddInstruction(HloInstruction::CreateUnary( + ShapeUtil::MakeShape(F32, {1, 3}), HloOpcode::kCopy, const0)); + auto const2 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{-2.0, -2.0, -2.0}}))); + // add3 = copy1 + const2 = const0 + const2 = {1,2,3} + {-2,-2,-2} = {-1,0,+1} + auto add3 = builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(F32, {1, 3}), HloOpcode::kAdd, copy1, const2)); + // CreateFusionInstruction needs `instructions_to_fuse` in reverse topological + // order. + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{add3, const2}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectNear(*LiteralUtil::CreateR2<float>({{-1.0, 0.0, 1.0}}), + *ExecuteAndTransfer(std::move(hlo_module), {}), + ErrorSpec(1e-4)); +} + +XLA_TEST_F(FusionTest, BroadcastIntoBinaryOp) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const_vector = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<float>({1.0, 2.0, 3.0}))); + auto const_array = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<float>({{-1.0, -2.0, -4.0}, {10.0, 20.0, 30.0}}))); + auto broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(const_array->shape(), const_vector, {1})); + // add2 = broadcast(const_vector) + const_array + // = broadcast({1,2,3}) + {{-1.0, -2.0, -4.0}, {10.0, 20.0, 30.0}} + // = {{1, 2, 3}, {1, 2, 3}} + {{-1.0, -2.0, -4.0}, {10.0, 20.0, 30.0}} + auto add2 = builder.AddInstruction( + HloInstruction::CreateBinary(ShapeUtil::MakeShape(F32, {2, 3}), + HloOpcode::kAdd, broadcast, const_array)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{add2, broadcast}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectNear( + *LiteralUtil::CreateR2<float>({{0.0, 0.0, -1.0}, {11.0, 22.0, 33.0}}), + *ExecuteAndTransfer(std::move(hlo_module), {}), ErrorSpec(1e-4)); +} + +XLA_TEST_F(FusionTest, ReshapeToScalar) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto single_element_array = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR2<int32>({{5}}))); + auto reshape = builder.AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(S32, {}), single_element_array)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR0<int32>(5), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape_3by2_1by2by3) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}}))); + auto reshape1 = builder.AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(S32, {1, 2, 3}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR3<int32>({{{1, 2, 3}, {4, 5, 6}}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape_1by2by3_3by2) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR3<int32>({{{1, 2, 3}, {4, 5, 6}}}))); + auto reshape1 = builder.AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {3, 2}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape_1by1by1_) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR3<int32>({{{7}}}))); + auto reshape1 = builder.AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR0<int32>(7), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape__1by1by1) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(7))); + auto reshape1 = builder.AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(S32, {1, 1, 1}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR3<int32>({{{7}}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape__) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(7))); + auto reshape1 = builder.AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR0<int32>(7), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reshape_3by3_3by3) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}))); + auto reshape1 = builder.AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {3, 3}), const0)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Transpose_2by3) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}}))); + auto reshape1 = builder.AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(S32, {3, 2}), const0, {1, 0})); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR2<int32>({{1, 4}, {2, 5}, {3, 6}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Transpose_3by3) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}))); + auto reshape1 = builder.AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(S32, {3, 3}), const0, {1, 0})); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reshape1}, + HloInstruction::FusionKind::kLoop); + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR2<int32>({{1, 4, 7}, {2, 5, 8}, {3, 6, 9}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Reverse) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR1<int32>({1, 2, 3}))); + auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse( + ShapeUtil::MakeShape(S32, {3}), const0, {0})); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reverse1}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR1<int32>({3, 2, 1}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +std::unique_ptr<HloComputation> MakeReduceTestComputation() { + auto builder = HloComputation::Builder("add"); + auto lhs = builder.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/0, ShapeUtil::MakeShape(S32, {}), "lhs")); + auto rhs = builder.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/1, ShapeUtil::MakeShape(S32, {}), "rhs")); + builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, lhs, rhs)); + return builder.Build(); +} + +XLA_TEST_F(FusionTest, DISABLED_ON_CPU(Reduce)) { + auto hlo_module = MakeUnique<HloModule>(TestName()); + + auto builder = HloComputation::Builder(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<int32>({1, 2, 4, 8}))); + auto const1 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(0))); + auto reduce2 = builder.AddInstruction(HloInstruction::CreateReduce( + ShapeUtil::MakeShape(S32, {}), const0, const1, {0}, + hlo_module->AddEmbeddedComputation(MakeReduceTestComputation()))); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reduce2}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR0<int32>(15), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, DISABLED_ON_CPU(ReduceImplicitBroadcast)) { + auto hlo_module = MakeUnique<HloModule>(TestName()); + + auto builder = HloComputation::Builder(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<int32>({1, 2, 4, 8}))); + auto const1 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(0))); + auto reduce2 = builder.AddInstruction(HloInstruction::CreateReduce( + ShapeUtil::MakeShape(S32, {}), const0, const1, {0}, + hlo_module->AddEmbeddedComputation(MakeReduceTestComputation()))); + auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary( + ShapeUtil::MakeShape(S32, {1}), HloOpcode::kNegate, reduce2)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, reduce2}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectEqual(*LiteralUtil::CreateR1<int32>({-15}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, DISABLED_ON_CPU(ReduceWindow)) { + auto builder = HloComputation::Builder(TestName()); + auto hlo_module = MakeUnique<HloModule>(TestName()); + auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR2<int32>({{2, 3, 5}, {7, 11, 13}, {17, 19, 23}}))); + auto const1 = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(1))); + Window window; + ASSERT_TRUE( + tensorflow::protobuf::TextFormat::ParseFromString("dimensions:{\n" + "size:2\n" + "stride:1\n" + "padding_low:0\n" + "padding_high:0\n" + "window_dilation:1\n" + "base_dilation:1\n" + "}\n" + "dimensions:{\n" + "size:2\n" + "stride:1\n" + "padding_low:0\n" + "padding_high:0\n" + "window_dilation:1\n" + "base_dilation:1\n" + "}\n", + &window)); + auto nested_builder = HloComputation::Builder("mul"); + { + auto x = nested_builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(S32, {}), "x")); + auto y = nested_builder.AddInstruction( + HloInstruction::CreateParameter(1, ShapeUtil::MakeShape(S32, {}), "y")); + nested_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kMultiply, x, y)); + } + auto nested_computation = + hlo_module->AddEmbeddedComputation(nested_builder.Build()); + auto reduce_window2 = + builder.AddInstruction(HloInstruction::CreateReduceWindow( + ShapeUtil::MakeShape(S32, {2, 2}), const0, const1, window, + nested_computation)); + hlo_module->AddEntryComputation(builder.Build()) + ->CreateFusionInstruction(/*instructions_to_fuse=*/{reduce_window2}, + HloInstruction::FusionKind::kLoop); + + LiteralTestUtil::ExpectEqual( + *LiteralUtil::CreateR2<int32>({{462, 2145}, {24871, 62491}}), + *ExecuteAndTransfer(std::move(hlo_module), {})); +} + +XLA_TEST_F(FusionTest, Add2D) { TestElementwise2D<float, 2>(HloOpcode::kAdd); } + +XLA_TEST_F(FusionTest, Subtract2D) { + TestElementwise2D<float, 2>(HloOpcode::kSubtract); +} + +XLA_TEST_F(FusionTest, Multiply2D) { + TestElementwise2D<float, 2>(HloOpcode::kMultiply); +} + +XLA_TEST_F(FusionTest, Divide2D) { + TestElementwise2D<float, 2>(HloOpcode::kDivide); +} + +XLA_TEST_F(FusionTest, Power2D) { + TestElementwise2D<float, 2>(HloOpcode::kPower); +} + +XLA_TEST_F(FusionTest, Minimum2D) { + TestElementwise2D<float, 2>(HloOpcode::kMinimum); +} + +XLA_TEST_F(FusionTest, Maximum2D) { + TestElementwise2D<float, 2>(HloOpcode::kMaximum); +} + +XLA_TEST_F(FusionTest, Equal2D) { TestElementwise2D<uint8, 2>(HloOpcode::kEq); } + +XLA_TEST_F(FusionTest, Inequal2D) { + TestElementwise2D<uint8, 2>(HloOpcode::kNe); +} + +XLA_TEST_F(FusionTest, Greater2D) { + TestElementwise2D<uint8, 2>(HloOpcode::kGt); +} + +XLA_TEST_F(FusionTest, Lesser2D) { + TestElementwise2D<uint8, 2>(HloOpcode::kLt); +} + +XLA_TEST_F(FusionTest, GreaterOrEqual2D) { + TestElementwise2D<uint8, 2>(HloOpcode::kGe); +} + +XLA_TEST_F(FusionTest, LesserOrEqual2D) { + TestElementwise2D<uint8, 2>(HloOpcode::kLe); +} + +XLA_TEST_F(FusionTest, Clamp2D) { + TestElementwise2D<float, 3>(HloOpcode::kClamp); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc new file mode 100644 index 0000000000..872188de81 --- /dev/null +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -0,0 +1,204 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" + +#include <set> +#include <string> +#include <utility> + +#define EIGEN_USE_THREADS + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/hlo_test_base_flags.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/computation_layout.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" +#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape_layout.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/common_runtime/eigen_thread_pool.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace se = ::perftools::gputools; + +namespace xla { + +// Define this in .cc file to avoid having to include eigen or forward declare +// these types in the header. +struct HloTestBase::EigenThreadPoolWrapper { + std::unique_ptr<EigenThreadPoolWrapper> pool; + std::unique_ptr<Eigen::ThreadPoolDevice> device; +}; + +HloTestBase::HloTestBase() + : backend_(Backend::CreateDefaultBackend().ConsumeValueOrDie()) { + test_hlo_dumper_ = [](const HloModule& module, const string& label) { + legacy_flags::HloTestBaseFlags* flags = legacy_flags::GetHloTestBaseFlags(); + if (flags->xla_hlo_test_generate_hlo_graph) { + const bool show_addresses = true; + const bool show_layouts = true; + hlo_graph_dumper::DumpGraph(*module.entry_computation(), label, + show_addresses, show_layouts); + } + }; + VLOG(1) << "executing on platform " << backend_->platform()->Name(); +} + +HloTestBase::~HloTestBase() { + // Deallocate all the memory allocated during the tests. + for (auto& allocation : allocations_) { + backend_->default_stream_executor()->Deallocate(&allocation); + } +} + +StatusOr<perftools::gputools::DeviceMemoryBase> HloTestBase::Execute( + std::unique_ptr<HloModule> module, + tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> + arguments, + Shape* result_shape) { + auto module_config = MakeUnique<HloModuleConfig>( + MakeProgramShape(module->entry_computation())); + return Execute(std::move(module), std::move(module_config), arguments, + result_shape); +} + +StatusOr<se::DeviceMemoryBase> HloTestBase::Execute( + std::unique_ptr<HloModule> hlo_module, + std::unique_ptr<HloModuleConfig> module_config, + tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments, + Shape* result_shape) { + VLOG(3) << "module_config layout " + << LayoutUtil::HumanString(module_config->entry_computation_layout() + .result_layout() + .layout()); + TF_ASSIGN_OR_RETURN( + std::unique_ptr<Executable> executable, + backend_->compiler()->Compile(std::move(hlo_module), + std::move(module_config), test_hlo_dumper_, + backend_->default_stream_executor())); + + se::Stream stream(backend_->default_stream_executor()); + stream.Init(); + + ExecutableRunOptions run_options; + run_options.set_stream(&stream); + run_options.set_allocator(backend_->memory_allocator()); + run_options.set_inter_op_thread_pool(backend_->inter_op_thread_pool()); + run_options.set_intra_op_thread_pool( + backend_->eigen_intra_op_thread_pool_device()); + + HloExecutionProfile hlo_execution_profile; + TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase result, + executable->ExecuteOnStream(&run_options, arguments, + &hlo_execution_profile)); + TF_RET_CHECK(stream.BlockHostUntilDone()); + + allocations_.push_back(result); + + *result_shape = executable->result_shape(); + + if (ShapeUtil::IsTuple(*result_shape)) { + // We must record element buffers of tuples as well to avoid leaks. + DCHECK(!ShapeUtil::IsNestedTuple(*result_shape)); + TF_ASSIGN_OR_RETURN( + std::vector<se::DeviceMemoryBase> element_buffers, + backend_->transfer_manager()->ShallowCopyTupleFromDevice( + backend_->default_stream_executor(), result, *result_shape)); + + // A tuple may contain the same buffer in more than one element. Keep track + // of the buffers already added to avoid duplicates in allocations_. + std::set<void*> added_opaques; + for (auto element_buffer : element_buffers) { + if (added_opaques.count(element_buffer.opaque()) == 0) { + added_opaques.insert(element_buffer.opaque()); + allocations_.push_back(element_buffer); + } + } + } + + return result; +} + +se::DeviceMemoryBase HloTestBase::TransferToDevice(const Literal& literal) { + // Allocate memory on the device using the stream executor. + int64 allocation_size = + backend_->transfer_manager()->GetByteSizeRequirement(literal.shape()); + se::DeviceMemoryBase allocation = + backend_->default_stream_executor()->AllocateArray<uint8>( + allocation_size); + allocations_.push_back(allocation); + + TF_CHECK_OK(backend_->transfer_manager()->TransferLiteralToDevice( + backend_->default_stream_executor(), literal, &allocation)); + + return allocation; +} + +std::unique_ptr<Literal> HloTestBase::TransferFromDevice( + const Shape& shape, se::DeviceMemoryBase device_base) { + auto literal = MakeUnique<Literal>(); + TF_CHECK_OK(backend_->transfer_manager()->TransferLiteralFromDevice( + backend_->default_stream_executor(), device_base, shape, shape, + literal.get())); + return literal; +} + +std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer( + std::unique_ptr<HloModule> module, + tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) { + Shape result_shape; + se::DeviceMemoryBase device_base = + Execute(std::move(module), arguments, &result_shape).ValueOrDie(); + return TransferFromDevice(result_shape, device_base); +} + +std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer( + std::unique_ptr<HloModule> module, + std::unique_ptr<HloModuleConfig> module_config, + tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) { + Shape result_shape; + se::DeviceMemoryBase device_base = + Execute(std::move(module), std::move(module_config), arguments, + &result_shape) + .ValueOrDie(); + return TransferFromDevice(result_shape, device_base); +} + +ProgramShape HloTestBase::MakeProgramShape(HloComputation* computation) { + ProgramShape program_shape; + for (int64 i = 0; i < computation->num_parameters(); ++i) { + *program_shape.add_parameters() = + computation->parameter_instruction(i)->shape(); + } + *program_shape.mutable_result() = computation->root_instruction()->shape(); + return program_shape; +} + +string HloTestBase::TestName() const { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h new file mode 100644 index 0000000000..fa88c76899 --- /dev/null +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -0,0 +1,107 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_HLO_TEST_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_HLO_TEST_BASE_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/compiler.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { + +// A base class for tests which build and run HLO code. This is a lower level of +// abstraction than using the client interface and enables, for one, explicitly +// building a graph of HLO instructions to run. +class HloTestBase : public ::testing::Test { + protected: + struct EigenThreadPoolWrapper; + HloTestBase(); + + ~HloTestBase() override; + + // Executes the given module and returns a global data handle. + StatusOr<perftools::gputools::DeviceMemoryBase> Execute( + std::unique_ptr<HloModule> module, + tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> + arguments, + Shape* result_shape); + + // Variation of Execute which takes a custom module_config instead of creating + // a default one. + StatusOr<perftools::gputools::DeviceMemoryBase> Execute( + std::unique_ptr<HloModule> module, + std::unique_ptr<HloModuleConfig> module_config, + tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> + arguments, + Shape* result_shape); + + // Transfers the given literal to the device and returns the data handle. + perftools::gputools::DeviceMemoryBase TransferToDevice( + const Literal& literal); + + // Transfers the array refered to by the given handle from the device and + // returns as a Literal. + std::unique_ptr<Literal> TransferFromDevice( + const Shape& shape, perftools::gputools::DeviceMemoryBase device_base); + + // Executes the given module and return the result as a Literal. + std::unique_ptr<Literal> ExecuteAndTransfer( + std::unique_ptr<HloModule> module, + tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> + arguments); + + // Variation of ExecuteAndTransfer which takes a custom module_config instead + // of creating a default one. + std::unique_ptr<Literal> ExecuteAndTransfer( + std::unique_ptr<HloModule> module, + std::unique_ptr<HloModuleConfig> module_config, + tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase> + arguments); + + // Utility function which creates a ProgramShape for a given computation. + ProgramShape MakeProgramShape(HloComputation* computation); + + string TestName() const; + + std::unique_ptr<Backend> backend_; + + Compiler::HloDumper test_hlo_dumper_; + + // This vector contains handles of all the device memory allocations performed + // by the test. These are deallocated on destruction of the test object. + std::vector<perftools::gputools::DeviceMemoryBase> allocations_; + + ErrorSpec error_spec_{0.0001}; + + std::unique_ptr<EigenThreadPoolWrapper> thread_pool_wrapper_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_HLO_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/inprocess_service_test.cc b/tensorflow/compiler/xla/tests/inprocess_service_test.cc new file mode 100644 index 0000000000..9909f041de --- /dev/null +++ b/tensorflow/compiler/xla/tests/inprocess_service_test.cc @@ -0,0 +1,204 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <initializer_list> +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +// Tests which exercise the "InProcess" methods of xla::Client. The +// "InProcess" methods require that the client and server share the same +// process. +class InProcessServiceTest : public ClientLibraryTestBase { + protected: + std::unique_ptr<GlobalData> ExecuteR2F32Constant( + std::initializer_list<std::initializer_list<float>> values, + tensorflow::gtl::ArraySlice<int64> minor_to_major) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR2<float>(values); + auto computation = builder.Build().ConsumeValueOrDie(); + CHECK_EQ(2, minor_to_major.size()); + Shape shape_with_layout = ShapeUtil::MakeShapeWithLayout( + F32, + /*dimensions=*/{static_cast<int64>(values.size()), + static_cast<int64>(values.begin()->size())}, + minor_to_major); + return client_ + ->Execute(computation, {}, &shape_with_layout, + /*execution_profile=*/nullptr) + .ConsumeValueOrDie(); + } + + ErrorSpec error_spec_{0.0001}; +}; + +XLA_TEST_F(InProcessServiceTest, TransferFromServer) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<int32>({1, 42, 5}); + auto computation = builder.Build().ConsumeValueOrDie(); + + auto handle = client_->Execute(computation, {}).ConsumeValueOrDie(); + + std::vector<int32> result(3, 0); + ASSERT_IS_OK(client_->TransferInProcess(*handle, result.data())); + EXPECT_MATCH(result, testing::VectorMatcher<int32>({1, 42, 5})); +} + +XLA_TEST_F(InProcessServiceTest, TransferToServer) { + std::vector<float> input{1.0f, 2.0f, -42.0f}; + Shape shape = ShapeUtil::MakeShape(F32, {3}); + auto data_handle = client_->TransferToServerInProcess(shape, input.data()) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto param = builder.Parameter(0, ShapeUtil::MakeShape(F32, {3}), "param"); + builder.Add(param, param); + + ComputeAndCompareR1<float>(&builder, {2.0f, 4.0f, -84.0f}, + {data_handle.get()}, error_spec_); +} + +// TODO(b/28506710): This test case seems not to test inprocess +// methods. +TEST_F(InProcessServiceTest, GetShape) { + ComputationBuilder builder(client_, TestName()); + builder.ConstantR1<int32>({1, 42, 5}); + auto computation = builder.Build().ConsumeValueOrDie(); + + auto handle = client_->Execute(computation, {}).ConsumeValueOrDie(); + + Shape shape = client_->GetShape(*handle).ConsumeValueOrDie(); + ASSERT_EQ(S32, shape.element_type()); + ASSERT_EQ(1, ShapeUtil::Rank(shape)); + ASSERT_EQ(3, shape.dimensions(0)); +} + +XLA_TEST_F(InProcessServiceTest, GetShapeOfClientSuppliedArrayRowMajor) { + std::vector<float> input{1.0f, 2.0f, 3.0f, 4.0f}; + Shape shape = ShapeUtil::MakeShape(F32, {2, 2}); + shape.clear_layout(); + *shape.mutable_layout() = LayoutUtil::MakeLayout({1, 0}); + auto handle = client_->TransferToServerInProcess(shape, input.data()) + .ConsumeValueOrDie(); + + Shape shape_returned = client_->GetShape(*handle).ConsumeValueOrDie(); + ASSERT_TRUE(ShapeUtil::Equal(shape, shape_returned)); +} + +XLA_TEST_F(InProcessServiceTest, GetShapeOfClientSuppliedArrayColMajor) { + std::vector<float> input{1.0f, 2.0f, 3.0f, 4.0f}; + Shape shape = ShapeUtil::MakeShape(F32, {2, 2}); + shape.clear_layout(); + *shape.mutable_layout() = LayoutUtil::MakeLayout({0, 1}); + auto handle = client_->TransferToServerInProcess(shape, input.data()) + .ConsumeValueOrDie(); + + Shape shape_returned = client_->GetShape(*handle).ConsumeValueOrDie(); + ASSERT_TRUE(ShapeUtil::Equal(shape, shape_returned)); +} + +TEST_F(InProcessServiceTest, TransferToServerNoLayout) { + std::vector<float> input{1.0f, 2.0f, -42.0f}; + Shape shape = ShapeUtil::MakeShape(F32, {3}); + shape.clear_layout(); + auto transfer_status = + client_->TransferToServerInProcess(shape, input.data()); + ASSERT_EQ(transfer_status.status().code(), + tensorflow::error::INVALID_ARGUMENT); +} + +XLA_TEST_F(InProcessServiceTest, ExecuteRowMajor) { + auto handle = + ExecuteR2F32Constant({{1.0, 2.0}, {3.0, 4.0}}, /*minor_to_major=*/{1, 0}); + + std::vector<float> result(4, 0.0); + Shape shape; + ASSERT_IS_OK(client_->TransferInProcess(*handle, result.data())); + + EXPECT_MATCH(result, testing::VectorMatcher<float>({1.0, 2.0, 3.0, 4.0})); +} + +XLA_TEST_F(InProcessServiceTest, ExecuteColumnMajor) { + auto handle = + ExecuteR2F32Constant({{1.0, 2.0}, {3.0, 4.0}}, /*minor_to_major=*/{0, 1}); + + std::vector<float> result(4, 0); + Shape shape; + ASSERT_IS_OK(client_->TransferInProcess(*handle, result.data())); + + EXPECT_MATCH(result, testing::VectorMatcher<float>({1.0, 3.0, 2.0, 4.0})); +} + +XLA_TEST_F(InProcessServiceTest, ExecuteAndReuseDifferentLayouts) { + // Create arrays on the server which have different layouts. Verify the + // computation still produces the correct results. + auto handle_rowmaj = + ExecuteR2F32Constant({{1.0, 2.0}, {3.0, 4.0}}, /*minor_to_major=*/{1, 0}); + + auto handle_colmaj = ExecuteR2F32Constant({{10.0, 20.0}, {30.0, 40.0}}, + /*minor_to_major=*/{0, 1}); + + ComputationBuilder builder(client_, TestName()); + auto param0 = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2}), "param0"); + auto param1 = + builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2}), "param1"); + builder.Add(param0, param1); + + Array2D<float> expected({{11.0, 22.0}, {33.0, 44.0}}); + ComputeAndCompareR2<float>(&builder, expected, + {handle_rowmaj.get(), handle_colmaj.get()}, + error_spec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc new file mode 100644 index 0000000000..f7bbc0f38b --- /dev/null +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -0,0 +1,566 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/literal_test_util.h" + +#include <unistd.h> +#include <cmath> +#include <vector> + +#include "tensorflow/compiler/xla/index_util.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)); + ASSERT_EQ(expected.element_type(), actual.element_type()) + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); + for (int i = 0; i < expected.dimensions_size(); ++i) { + ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); + } + ASSERT_EQ(expected.tuple_shapes_size(), actual.tuple_shapes_size()); + for (int i = 0; i < expected.tuple_shapes_size(); ++i) { + AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + } +} + +/* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( + const Shape& expected, const Shape& actual) { + ASSERT_EQ(expected.ShortDebugString(), actual.ShortDebugString()); +} + +namespace { + +string Hostname() { + char hostname[1024]; + gethostname(hostname, sizeof hostname); + hostname[sizeof hostname - 1] = 0; + return string(hostname); +} + +// Helper function for comparing a floating point type, FloatT, bitwise equal +// between the left-hand-side and right-hand-side, by bit-casting to UnsignedT +// -- on miscompare, a nice error message is given in the AssertionFailure. +template <typename FloatT, typename UnsignedT> +testing::AssertionResult CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs) { + auto ulhs = tensorflow::bit_cast<UnsignedT>(lhs); + auto urhs = tensorflow::bit_cast<UnsignedT>(rhs); + if (ulhs != urhs) { + return testing::AssertionFailure() << tensorflow::strings::Printf( + "floating values are not bitwise-equal; and equality testing " + "was requested: %s=%g=%a vs %s=%g=%a", + tensorflow::strings::StrCat(tensorflow::strings::Hex(ulhs)) + .c_str(), + lhs, lhs, + tensorflow::strings::StrCat(tensorflow::strings::Hex(urhs)) + .c_str(), + rhs, rhs); + } + return testing::AssertionSuccess(); +} + +// Templated comparator that specializes for float equality comparison with the +// bitwise helper above (this is the un-specialized fallback, to just use the +// default gunit implementation). +template <typename NativeT> +testing::AssertionResult CompareEqual(NativeT lhs, NativeT rhs) { + if (lhs == rhs) { + return testing::AssertionSuccess(); + } + ::testing::Message msg; + msg << "Expected equality of these values:"; + msg << "\n " << lhs; + msg << "\n " << rhs; + + return testing::AssertionFailure() << msg; +} + +// Specializations for floating types that do bitwise comparisons when equality +// comparison is requested. +template <> +testing::AssertionResult CompareEqual<float>(float lhs, float rhs) { + return CompareFloatsBitwiseEqual<float, uint32>(lhs, rhs); +} +template <> +testing::AssertionResult CompareEqual<double>(double lhs, double rhs) { + return CompareFloatsBitwiseEqual<double, uint64>(lhs, rhs); +} + +// A recursive function which iterates through every index of expected and +// actual literal and compares their values elementwise. Returns true if all +// elements are equal. +template <typename NativeT> +bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual, + tensorflow::gtl::MutableArraySlice<int64> multi_index, + int64 dimension) { + if (dimension == expected.shape().dimensions_size()) { + NativeT expected_value = LiteralUtil::Get<NativeT>(expected, multi_index); + NativeT actual_value = LiteralUtil::Get<NativeT>(actual, multi_index); + testing::AssertionResult result = + CompareEqual<NativeT>(expected_value, actual_value); + return result; // Defines implicit coersion to bool. + } + + bool all_match = true; + for (int64 i = 0; i < expected.shape().dimensions(dimension); ++i) { + multi_index[dimension] = i; + all_match = all_match && ExpectLiteralsEqual<NativeT>( + expected, actual, multi_index, dimension + 1); + } + return all_match; +} + +} // namespace + +/* static */ void LiteralTestUtil::ExpectEqual(const Literal& expected, + const Literal& actual) { + EXPECT_TRUE(Equal(expected, actual)) << "expected:\n" + << LiteralUtil::ToString(expected) + << "\n\tvs actual:\n" + << LiteralUtil::ToString(actual); +} + +/* static */ void LiteralTestUtil::ExpectNotEqual(const Literal& expected, + const Literal& actual) { + EXPECT_FALSE(Equal(expected, actual)); +} + +/* static */ testing::AssertionResult LiteralTestUtil::Equal( + const Literal& expected, const Literal& actual) { + VLOG(1) << "expected: " << LiteralUtil::ToString(expected); + VLOG(1) << "actual: " << LiteralUtil::ToString(actual); + + AssertEqualShapes(expected.shape(), actual.shape()); + std::vector<int64> multi_index(expected.shape().dimensions_size(), 0); + bool match = false; + switch (expected.shape().element_type()) { + case PRED: + match = ExpectLiteralsEqual<bool>(expected, actual, &multi_index, 0); + break; + case U8: + match = ExpectLiteralsEqual<uint8>(expected, actual, &multi_index, 0); + break; + case S32: + match = ExpectLiteralsEqual<int32>(expected, actual, &multi_index, 0); + break; + case S64: + match = ExpectLiteralsEqual<int64>(expected, actual, &multi_index, 0); + break; + case U32: + match = ExpectLiteralsEqual<uint32>(expected, actual, &multi_index, 0); + break; + case U64: + match = ExpectLiteralsEqual<uint64>(expected, actual, &multi_index, 0); + break; + case F32: + match = ExpectLiteralsEqual<float>(expected, actual, &multi_index, 0); + break; + case F64: + match = ExpectLiteralsEqual<double>(expected, actual, &multi_index, 0); + break; + case TUPLE: { + bool tuple_match = true; + for (int i = 0; i < actual.tuple_literals_size(); ++i) { + auto result = + Equal(expected.tuple_literals(i), actual.tuple_literals(i)); + tuple_match = tuple_match ? !!result : false; + } + match = tuple_match; + break; + } + default: + LOG(FATAL) + << "Unsupported primitive type in LiteralTestUtil::ExpectEqual: " + << PrimitiveType_Name(expected.shape().element_type()); + } + testing::AssertionResult result = testing::AssertionSuccess(); + if (!match) { + result = testing::AssertionFailure() + << "expected: " << LiteralUtil::ToString(expected) + << "\nactual: " << LiteralUtil::ToString(actual); + VLOG(1) << result.message(); + } + return result; +} + +/* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected, + const Literal& actual) { + VLOG(1) << "expected: " << LiteralUtil::ToString(expected); + VLOG(1) << "actual: " << LiteralUtil::ToString(actual); + + ASSERT_TRUE(ShapeUtil::IsTuple(expected.shape())); + ASSERT_TRUE(ShapeUtil::IsTuple(actual.shape())); + AssertEqualShapes(expected.shape(), actual.shape()); + for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) { + const auto& expected_element = expected.tuple_literals(i); + const auto& actual_element = actual.tuple_literals(i); + if (ShapeUtil::IsTuple(expected_element.shape())) { + ExpectEqualTuple(expected_element, actual_element); + } else { + ExpectEqual(expected_element, actual_element); + } + } +} + +namespace { + +// Helper class for comparing floating-point literals within an error bound. +class NearComparator { + public: + explicit NearComparator(ErrorSpec error) : error_(error) {} + + // Compares the two literals elementwise. EXPECTs each pair of elements to be + // within the error bound. Emits useful log messages and dumps literals to + // temporary files on failure. Returns true if literals match. + bool ExpectNear(const Literal& expected, const Literal& actual) { + VLOG(1) << "expected: " << LiteralUtil::ToString(expected); + VLOG(1) << "actual: " << LiteralUtil::ToString(actual); + + LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); + + // Set up members used during the comparison. + num_miscompares_ = 0; + abs_diff_sum_ = 0.0; + abs_expected_sum_ = 0.0; + abs_diff_miscompare_sum_ = 0.0; + abs_expected_miscompare_sum_ = 0.0; + max_rel_err_ = 0.0; + max_abs_err_ = 0.0; + *miscompares_.mutable_shape() = + ShapeUtil::ChangeElementType(actual.shape(), PRED); + miscompares_.mutable_preds()->Resize( + ShapeUtil::ElementsIn(miscompares_.shape()), false); + multi_index_.resize(expected.shape().dimensions_size(), 0); + + switch (expected.shape().element_type()) { + case F32: + ExpectLiteralsNear<float>(expected, actual, 0); + break; + case F64: + ExpectLiteralsNear<double>(expected, actual, 0); + break; + default: + LOG(FATAL) << "Unsupported primitive type in near comparator: " + << PrimitiveType_Name(expected.shape().element_type()) + << ". Must be floating-point type."; + } + + if (num_miscompares_ > 0) { + if (!VLOG_IS_ON(1)) { + LOG(INFO) << "expected: " << ShapeUtil::HumanString(expected.shape()) + << " " << LiteralUtil::ToString(expected); + LOG(INFO) << "actual: " << ShapeUtil::HumanString(actual.shape()) + << " " << LiteralUtil::ToString(actual); + } + EXPECT_TRUE(num_miscompares_ == 0) + << "\nmax relative mismatch at index " + << LiteralTestUtil::MultiIndexAsString(max_rel_multi_index_) + << "\nmaximum relative error " << max_rel_err_ + << "\nmax absolute mismatch at index " + << LiteralTestUtil::MultiIndexAsString(max_abs_multi_index_) + << "\nmaximum absolute error " << max_abs_err_ + << "\nfirst mismatch at index " + << LiteralTestUtil::MultiIndexAsString(first_multi_index_) + << "\nlast mismatch at index " + << LiteralTestUtil::MultiIndexAsString(last_multi_index_) + << "\ntotal absolute error " << abs_diff_sum_ + << "\ntotal absolute error of miscompares " + << abs_diff_miscompare_sum_ << "\ntotal relative error " + << (abs_diff_sum_ / abs_expected_sum_) + << "\ntotal relative error of miscompares " + << (abs_diff_miscompare_sum_ / abs_expected_miscompare_sum_) + << "\nfailure count " << num_miscompares_; + + WriteLiteralToTempFile(expected, "expected"); + WriteLiteralToTempFile(actual, "actual"); + WriteLiteralToTempFile(miscompares_, "miscompares"); + } + return num_miscompares_ == 0; + } + + private: + // EXPECTs that the two given scalar values are within the error bound. Keeps + // track of how many mismatches have occured to keep the size of the output + // manageable. + template <typename NativeT> + bool ExpectValuesNear(NativeT expected, NativeT actual) { + if (expected == actual) { + return true; + } + + float abs_diff = std::abs(actual - expected); + float rel_err = abs_diff / std::abs(expected); + abs_diff_sum_ += abs_diff; + abs_expected_sum_ += std::abs(expected); + if (rel_err > max_rel_err_) { + max_rel_err_ = rel_err; + max_rel_multi_index_ = multi_index_; + } + if (abs_diff > max_abs_err_) { + max_abs_err_ = abs_diff; + max_abs_multi_index_ = multi_index_; + } + VLOG(10) << tensorflow::strings::Printf( + "index %s abs_diff %f rel_err %f", + LiteralTestUtil::MultiIndexAsString(multi_index_).c_str(), abs_diff, + rel_err); + bool nan_mismatch = std::isnan(actual) != std::isnan(expected); + bool mismatch = + (nan_mismatch || (abs_diff >= error_.abs && rel_err >= error_.rel)); + if (mismatch) { + abs_diff_miscompare_sum_ += abs_diff; + abs_expected_miscompare_sum_ += std::abs(expected); + const int64 kMaxFailures = 2; + if (num_miscompares_ < kMaxFailures) { + EXPECT_NEAR(expected, actual, error_.abs) + << "mismatch at index " + << LiteralTestUtil::MultiIndexAsString(multi_index_) << " abs diff " + << abs_diff << " rel err " << rel_err << " failure #" + << num_miscompares_; + } else if (num_miscompares_ == kMaxFailures) { + LOG(ERROR) + << "reached max 'loud' failure count; silently proceeding..."; + } + if (num_miscompares_ == 0) { + first_multi_index_ = multi_index_; + } + num_miscompares_++; + last_multi_index_ = multi_index_; + } + return !mismatch; + } + + // Recursive function which compares the two given literals elementwise. + template <typename NativeT> + void ExpectLiteralsNear(const Literal& expected, const Literal& actual, + int64 dimension) { + if (dimension == expected.shape().dimensions_size()) { + bool near = + ExpectValuesNear(LiteralUtil::Get<NativeT>(expected, multi_index_), + LiteralUtil::Get<NativeT>(actual, multi_index_)); + LiteralUtil::Set<bool>(&miscompares_, multi_index_, !near); + } else { + for (int64 i = 0; i < expected.shape().dimensions(dimension); ++i) { + multi_index_[dimension] = i; + ExpectLiteralsNear<NativeT>(expected, actual, dimension + 1); + } + } + } + + // Writes the given literal to a file in the test temporary directory. + void WriteLiteralToTempFile(const Literal& literal, const string& name) { + int64 now_usec = tensorflow::Env::Default()->NowMicros(); + string filename = tensorflow::io::JoinPath( + tensorflow::testing::TmpDir(), + tensorflow::strings::Printf("tempfile-%s-%llx-%s", Hostname().c_str(), + now_usec, name.c_str())); + TF_CHECK_OK(tensorflow::WriteBinaryProto(tensorflow::Env::Default(), + filename, literal)); + LOG(ERROR) << "wrote to " << name << " file: " << filename; + } + + ErrorSpec error_; + + // Number of element miscomparisons encountered so far. + int64 num_miscompares_; + + // A Literal containing which elements did not match in the expected and + // actual literals. miscompares_ contains PREDs and is of the same sizes as + // the comparison literals. + Literal miscompares_; + + // A multidimensional index used when performing the recursive comparison. + std::vector<int64> multi_index_; + + // Aggregated Statistics on input. + double abs_diff_sum_; + double abs_expected_sum_; + double abs_diff_miscompare_sum_; + double abs_expected_miscompare_sum_; + float max_rel_err_; + float max_abs_err_; + std::vector<int64> first_multi_index_; + std::vector<int64> last_multi_index_; + std::vector<int64> max_rel_multi_index_; + std::vector<int64> max_abs_multi_index_; +}; + +} // namespace + +/* static */ testing::AssertionResult LiteralTestUtil::Near( + const Literal& expected, const Literal& actual, const ErrorSpec& error) { + NearComparator comparator(error); + return comparator.ExpectNear(expected, actual) + ? testing::AssertionSuccess() + : testing::AssertionFailure() << "values were not near"; +} + +/* static */ void LiteralTestUtil::ExpectNear(const Literal& expected, + const Literal& actual, + const ErrorSpec& error) { + EXPECT_TRUE(Near(expected, actual, error)); +} + +/* static */ testing::AssertionResult LiteralTestUtil::NearTuple( + const Literal& expected, const Literal& actual, const ErrorSpec& error) { + VLOG(1) << "expected: " << LiteralUtil::ToString(expected); + VLOG(1) << "actual: " << LiteralUtil::ToString(actual); + + if (!ShapeUtil::IsTuple(expected.shape()) || + !ShapeUtil::IsTuple(actual.shape())) { + return testing::AssertionFailure() + << "tuples expected expected shape = " + << expected.shape().ShortDebugString() + << " actual shape = " << actual.shape().ShortDebugString(); + } + AssertEqualShapes(expected.shape(), actual.shape()); + for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) { + const auto& expected_element = expected.tuple_literals(i); + const auto& actual_element = actual.tuple_literals(i); + if (ShapeUtil::IsTuple(expected_element.shape())) { + auto ret = NearTuple(expected_element, actual_element, error); + if (!ret) { + return ret; + } + } else if (ShapeUtil::ElementIsFloating(expected_element.shape())) { + auto ret = Near(expected_element, actual_element, error); + if (!ret) { + return ret; + } + } else { + auto ret = Equal(expected_element, actual_element); + if (!ret) { + return ret; + } + } + } + + return testing::AssertionSuccess(); +} + +/* static */ void LiteralTestUtil::ExpectNearTuple(const Literal& expected, + const Literal& actual, + const ErrorSpec& error) { + EXPECT_TRUE(NearTuple(expected, actual, error)); +} + +/* static */ string LiteralTestUtil::MultiIndexAsString( + tensorflow::gtl::ArraySlice<int64> multi_index) { + return tensorflow::strings::StrCat( + "{", tensorflow::str_util::Join(multi_index, ","), "}"); +} + +/* static */ std::unique_ptr<Literal> LiteralTestUtil::Reshape( + tensorflow::gtl::ArraySlice<int64> new_dimensions, + tensorflow::gtl::ArraySlice<int64> minor_to_major, const Literal& literal) { + int64 new_num_elements = 1; + for (int64 i = 0; i < new_dimensions.size(); ++i) { + new_num_elements *= new_dimensions[i]; + } + CHECK_EQ(ShapeUtil::ElementsIn(literal.shape()), new_num_elements); + + auto new_literal = MakeUnique<Literal>(); + *new_literal->mutable_shape() = + ShapeUtil::MakeShape(literal.shape().element_type(), new_dimensions); + + // Create a new shape with the given minor-to-major layout. This shape is used + // solely for converting linear address to multi-dimensional addresses when + // writing elements to the new literal. + Shape shape_with_layout = new_literal->shape(); + *shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major); + + // Allocate space in the new literal. + LiteralUtil::Reserve(ShapeUtil::ElementsIn(literal.shape()), + new_literal.get()); + + // Copy data into new literal, element-by-element. + for (int64 i = 0; i < ShapeUtil::ElementsIn(literal.shape()); ++i) { + std::vector<int64> from_multi_index = + IndexUtil::LinearIndexToMultidimensionalIndex(literal.shape(), i); + std::vector<int64> to_multi_index = + IndexUtil::LinearIndexToMultidimensionalIndex(shape_with_layout, i); + switch (literal.shape().element_type()) { + case PRED: + LiteralUtil::Set<bool>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<bool>(literal, from_multi_index)); + break; + case U8: + LiteralUtil::Set<uint8>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<uint8>(literal, from_multi_index)); + break; + case U32: + LiteralUtil::Set<uint32>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<uint32>(literal, from_multi_index)); + break; + case S32: + LiteralUtil::Set<int32>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<int32>(literal, from_multi_index)); + break; + case U64: + LiteralUtil::Set<uint64>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<uint64>(literal, from_multi_index)); + break; + case S64: + LiteralUtil::Set<int64>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<int64>(literal, from_multi_index)); + break; + case F32: + LiteralUtil::Set<float>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<float>(literal, from_multi_index)); + break; + case F64: + LiteralUtil::Set<double>( + new_literal.get(), to_multi_index, + LiteralUtil::Get<double>(literal, from_multi_index)); + break; + default: + LOG(FATAL) << "Unhandled primitive element type: " + << PrimitiveType_Name(literal.shape().element_type()); + } + } + + return new_literal; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h new file mode 100644 index 0000000000..85656a53e4 --- /dev/null +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -0,0 +1,274 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_LITERAL_TEST_UTIL_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_LITERAL_TEST_UTIL_H_ + +#include <initializer_list> +#include <memory> +#include <string> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// Structure describing permissible absolute and relative error bounds. +struct ErrorSpec { + explicit ErrorSpec(float aabs, float arel = 0) : abs(aabs), rel(arel) {} + + float abs; // Absolute error bound. + float rel; // Relative error bound. +}; + +// Utility class for making expectations/assertions related to XLA literals. +class LiteralTestUtil { + public: + // Asserts that the given shapes have the same rank, dimension sizes, and + // primitive types. + static void AssertEqualShapes(const Shape& expected, const Shape& actual); + + // Asserts that the provided shapes are equal as defined in AssertEqualShapes + // and that they have the same layout. + static void AssertEqualShapesAndLayouts(const Shape& expected, + const Shape& actual); + + // Asserts that the expected and actual literals are (bitwise) equal for all + // elements in the literal. Also, asserts that the rank, dimensions sizes, and + // primitive type are equal. + static testing::AssertionResult Equal( + const Literal& expected, const Literal& actual) TF_MUST_USE_RESULT; + + // Expects that expected and actual are Equal. + static void ExpectEqual(const Literal& expected, const Literal& actual); + + // Expects that expected and actual are Not Equal. + static void ExpectNotEqual(const Literal& expected, const Literal& actual); + + // Asserts the given literal are (bitwise) equal to given expected values. + template <typename NativeT> + static void ExpectR0Equal(NativeT expected, const Literal& actual); + template <typename NativeT> + static void ExpectR1Equal(tensorflow::gtl::ArraySlice<NativeT> expected, + const Literal& actual); + template <typename NativeT> + static void ExpectR2Equal( + std::initializer_list<std::initializer_list<NativeT>> expected, + const Literal& actual); + template <typename NativeT> + static void ExpectR3Equal( + std::initializer_list< + std::initializer_list<std::initializer_list<NativeT>>> + expected, + const Literal& actual); + + // Asserts the given literal are (bitwise) equal to given array. + template <typename NativeT> + static void ExpectR2EqualArray2D(const Array2D<NativeT>& expected, + const Literal& actual); + template <typename NativeT> + static void ExpectR3EqualArray3D(const Array3D<NativeT>& expected, + const Literal& actual); + template <typename NativeT> + static void ExpectR4EqualArray4D(const Array4D<NativeT>& expected, + const Literal& actual); + + // Expects that the values of the elements in the expected and actual tuples + // are equal. Tuples are matched recursively. + static void ExpectEqualTuple(const Literal& expected, const Literal& actual); + + // Asserts that the expected and actual literals are within the given error + // bound for all elements. Also, asserts that the rank, dimensions sizes, and + // bounds are equivalent. Only supported for floating point values. + static testing::AssertionResult Near( + const Literal& expected, const Literal& actual, + const ErrorSpec& error) TF_MUST_USE_RESULT; + + // Expects expected and actual to be Near with the given error. + static void ExpectNear(const Literal& expected, const Literal& actual, + const ErrorSpec& error); + + // Asserts the given literal are within the given error bound of the given + // expected values. Only supported for floating point values. + template <typename NativeT> + static void ExpectR0Near(NativeT expected, const Literal& actual, + const ErrorSpec& error); + template <typename NativeT> + static void ExpectR1Near(tensorflow::gtl::ArraySlice<NativeT> expected, + const Literal& actual, const ErrorSpec& error); + template <typename NativeT> + static void ExpectR2Near( + std::initializer_list<std::initializer_list<NativeT>> expected, + const Literal& actual, const ErrorSpec& error); + template <typename NativeT> + static void ExpectR3Near( + std::initializer_list< + std::initializer_list<std::initializer_list<NativeT>>> + expected, + const Literal& actual, const ErrorSpec& error); + + // Asserts the given literal are within the given error bound to the given + // array. Only supported for floating point values. + template <typename NativeT> + static void ExpectR2NearArray2D(const Array2D<NativeT>& expected, + const Literal& actual, + const ErrorSpec& error); + template <typename NativeT> + static void ExpectR3NearArray3D(const Array3D<NativeT>& expected, + const Literal& actual, + const ErrorSpec& error); + template <typename NativeT> + static void ExpectR4NearArray4D(const Array4D<NativeT>& expected, + const Literal& actual, + const ErrorSpec& error); + + // Returns whether the values of the elements in the expected and actual + // tuples are within the given error bound. Tuples are matched recursively. + // If the elements of the tuple are not floating-point types, the error spec + // is ignored and exact equality is checked. + static testing::AssertionResult NearTuple( + const Literal& expected, const Literal& actual, + const ErrorSpec& error) TF_MUST_USE_RESULT; + + // Expects that the expected and actual values are near. + static void ExpectNearTuple(const Literal& expected, const Literal& actual, + const ErrorSpec& error); + + // Returns a multi-dimensional index as a string. For example: '{7, 8}' will + // be returned for a 2-dimensional index with dimension 0 index equal to 7, + // dimension 1 equal to 8. + static string MultiIndexAsString( + tensorflow::gtl::ArraySlice<int64> multi_index); + + // Creates a literal with a new shape with the given new dimensions using the + // data in the given input literal. For reshaping purposes the (flat) data + // buffer of the input literal is assumed to have the given minor_to_major + // layout order. + static std::unique_ptr<Literal> Reshape( + tensorflow::gtl::ArraySlice<int64> new_dimensions, + tensorflow::gtl::ArraySlice<int64> minor_to_major, + const Literal& literal); + + private: + TF_DISALLOW_COPY_AND_ASSIGN(LiteralTestUtil); +}; + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR0Equal(NativeT expected, + const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR0<NativeT>(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR1Equal( + tensorflow::gtl::ArraySlice<NativeT> expected, const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR1<NativeT>(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR2Equal( + std::initializer_list<std::initializer_list<NativeT>> expected, + const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR2<NativeT>(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR3Equal( + std::initializer_list<std::initializer_list<std::initializer_list<NativeT>>> + expected, + const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR3<NativeT>(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR2EqualArray2D( + const Array2D<NativeT>& expected, const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR2FromArray2D(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR3EqualArray3D( + const Array3D<NativeT>& expected, const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR3FromArray3D(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR4EqualArray4D( + const Array4D<NativeT>& expected, const Literal& actual) { + ExpectEqual(*LiteralUtil::CreateR4FromArray4D(expected), actual); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR0Near(NativeT expected, + const Literal& actual, + const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR0<NativeT>(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR1Near( + tensorflow::gtl::ArraySlice<NativeT> expected, const Literal& actual, + const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR1<NativeT>(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR2Near( + std::initializer_list<std::initializer_list<NativeT>> expected, + const Literal& actual, const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR2<NativeT>(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR3Near( + std::initializer_list<std::initializer_list<std::initializer_list<NativeT>>> + expected, + const Literal& actual, const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR3<NativeT>(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR2NearArray2D( + const Array2D<NativeT>& expected, const Literal& actual, + const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR2FromArray2D(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR3NearArray3D( + const Array3D<NativeT>& expected, const Literal& actual, + const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR3FromArray3D(expected), actual, error); +} + +template <typename NativeT> +/* static */ void LiteralTestUtil::ExpectR4NearArray4D( + const Array4D<NativeT>& expected, const Literal& actual, + const ErrorSpec& error) { + ExpectNear(*LiteralUtil::CreateR4FromArray4D(expected), actual, error); +} + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_LITERAL_TEST_UTIL_H_ diff --git a/tensorflow/compiler/xla/tests/literal_test_util_test.cc b/tensorflow/compiler/xla/tests/literal_test_util_test.cc new file mode 100644 index 0000000000..fdec11c0e9 --- /dev/null +++ b/tensorflow/compiler/xla/tests/literal_test_util_test.cc @@ -0,0 +1,102 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that our utility functions for dealing with literals are correctly +// implemented. + +#include "tensorflow/compiler/xla/tests/literal_test_util.h" + +#include <vector> + +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +TEST(LiteralTestUtilTest, ComparesEqualTuplesEqual) { + std::unique_ptr<Literal> literal = LiteralUtil::MakeTuple({ + LiteralUtil::CreateR0<int32>(42).get(), + LiteralUtil::CreateR0<int32>(64).get(), + }); + LiteralTestUtil::ExpectEqual(*literal, *literal); +} + +TEST(LiteralTestUtilTest, ComparesUnequalTuplesUnequal) { + // Implementation note: we have to use a death test here, because you can't + // un-fail an assertion failure. The CHECK-failure is death, so we can make a + // death assertion. + auto unequal_things_are_equal = [] { + std::unique_ptr<Literal> lhs = LiteralUtil::MakeTuple({ + LiteralUtil::CreateR0<int32>(42).get(), + LiteralUtil::CreateR0<int32>(64).get(), + }); + std::unique_ptr<Literal> rhs = LiteralUtil::MakeTuple({ + LiteralUtil::CreateR0<int32>(64).get(), + LiteralUtil::CreateR0<int32>(42).get(), + }); + CHECK(LiteralTestUtil::Equal(*lhs, *rhs)) << "LHS and RHS are unequal"; + }; + ASSERT_DEATH(unequal_things_are_equal(), "LHS and RHS are unequal"); +} + +TEST(LiteralTestUtilTest, ExpectNearFailurePlacesResultsInTemporaryDirectory) { + auto dummy_lambda = [] { + auto two = LiteralUtil::CreateR0<float>(2); + auto four = LiteralUtil::CreateR0<float>(4); + ErrorSpec error(0.001); + CHECK(LiteralTestUtil::Near(*two, *four, error)) << "two is not near four"; + }; + + tensorflow::Env* env = tensorflow::Env::Default(); + string pattern = + tensorflow::io::JoinPath(tensorflow::testing::TmpDir(), "/tempfile-*"); + std::vector<string> files; + TF_CHECK_OK(env->GetMatchingPaths(pattern, &files)); + for (const auto& f : files) { + TF_CHECK_OK(env->DeleteFile(f)) << f; + } + + ASSERT_DEATH(dummy_lambda(), "two is not near four"); + + // Now check we wrote temporary files to the temporary directory that we can + // read. + std::vector<string> results; + TF_CHECK_OK(env->GetMatchingPaths(pattern, &results)); + + LOG(INFO) << "results: [" << tensorflow::str_util::Join(results, ", ") << "]"; + EXPECT_EQ(3, results.size()); + for (const string& result : results) { + Literal literal; + TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), result, + &literal)); + if (result.find("expected") != string::npos) { + EXPECT_EQ("2", LiteralUtil::ToString(literal)); + } else if (result.find("actual") != string::npos) { + EXPECT_EQ("4", LiteralUtil::ToString(literal)); + } else if (result.find("miscompares") != string::npos) { + EXPECT_EQ("true", LiteralUtil::ToString(literal)); + } else { + FAIL() << "unknown file in temporary directory: " << result; + } + } +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test.cc b/tensorflow/compiler/xla/tests/local_client_aot_test.cc new file mode 100644 index 0000000000..591fff338c --- /dev/null +++ b/tensorflow/compiler/xla/tests/local_client_aot_test.cc @@ -0,0 +1,55 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/executable_run_options.h" +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/test.h" + +class LocalClientAotTest : public ::testing::Test {}; + +// This is a compiled XLA computation which calls SumStructElements, and then +// doubles the result. +extern "C" void SumAndDouble(float* out, xla::ExecutableRunOptions* options, + void** parameters, void** temporary_buffers); + +// Just some structs with some arbitrary fields used to test the OPAQUE type. +struct OpaqueData { + int field1 : 15; + int field2 : 14; + int field3 : 3; +}; + +// This is the implementation of a custom op which will be called by +// SumAndDouble. +extern "C" void SumStructElements(float* out, void** parameters) { + TF_ANNOTATE_MEMORY_IS_INITIALIZED(parameters, sizeof(OpaqueData*)); + const auto* opaque_data = static_cast<OpaqueData*>(parameters[0]); + *out = opaque_data->field1 + opaque_data->field2 + opaque_data->field3; +} + +TEST_F(LocalClientAotTest, Constant) { + xla::ExecutableRunOptions run_options; + OpaqueData opaque_data{100, 20, 3}; + void* parameters[] = {&opaque_data}; + float out = 0; + float tmp = 0; + void* temporary_buffers[] = {&out, &tmp, nullptr}; + SumAndDouble(&out, &run_options, parameters, temporary_buffers); + EXPECT_EQ(out, 246.0f); + + opaque_data = {1, 2, 3}; + SumAndDouble(&out, &run_options, parameters, temporary_buffers); + EXPECT_EQ(out, 12.0f); +} diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc new file mode 100644 index 0000000000..50e5dec0f6 --- /dev/null +++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc @@ -0,0 +1,111 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This program compiles an XLA program which computes 123 and writes the +// resulting object file to stdout. + +#include <iostream> +#include <vector> + +#include "external/llvm/include/llvm/ADT/Triple.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" + +using xla::string; + +xla::Computation Doubler(xla::Client* client) { + xla::ComputationBuilder builder(client, "doubler"); + auto r0f32 = xla::ShapeUtil::MakeShape(xla::F32, {}); + auto x = builder.Parameter(0, r0f32, "x"); + builder.Mul(x, builder.ConstantR0<float>(2.0)); + return std::move(builder.Build().ValueOrDie()); +} + +int main(int argc, char** argv) { + tensorflow::port::InitMain(argv[0], &argc, &argv); + + auto client = xla::ClientLibrary::LocalClientOrDie(); + + xla::ComputationBuilder builder(client, "aot_test_helper"); + auto opaque_shape = xla::ShapeUtil::MakeOpaqueShape(); + auto opaque_param = builder.Parameter(0, opaque_shape, "x"); + auto r0f32 = xla::ShapeUtil::MakeShape(xla::F32, {}); + auto sum = builder.CustomCall("SumStructElements", {opaque_param}, r0f32); + builder.Call(Doubler(client), {sum}); + + if (argc != 2) { + LOG(FATAL) << "local_client_aot_test_helper TARGET_CPU"; + } + + string triple_string; + string target_cpu = argv[1]; + if (target_cpu == "k8") { + triple_string = "x86_64-none-linux-gnu"; + } else if (target_cpu == "darwin") { + triple_string = "x86_64-apple-macosx"; + } else if (target_cpu == "arm") { + triple_string = "aarch64-none-linux-gnu"; + } else if (target_cpu == "ppc") { + triple_string = "powerpc64le-unknown-linux-gnu"; + } else if (target_cpu == "local") { + triple_string = xla::llvm_ir::AsString(llvm::sys::getDefaultTargetTriple()); + } else { + LOG(FATAL) << "unsupported TARGET_CPU: " << target_cpu; + } + + llvm::Triple triple(xla::llvm_ir::AsStringRef(triple_string)); + + xla::cpu::CpuAotCompilationOptions options( + triple_string, + /*cpu_name=*/"", /*features=*/"", "SumAndDouble", + xla::cpu::CpuAotCompilationOptions::RelocationModel::Static); + auto result = xla::unique_ptr_static_cast<xla::cpu::CpuAotCompilationResult>( + client + ->CompileAheadOfTime(builder.Build().ValueOrDie(), + /*argument_layouts=*/{&opaque_shape}, r0f32, + options) + .ConsumeValueOrDie()); + // We should have two buffers, one for the result and one temporary buffer, + // and both should be float-sized. It's lame to hard-code this, but we need + // local_client_aot_test.cc to be able to easily invoke the function. + CHECK_EQ(result->result_buffer_index(), 0); + CHECK_EQ(result->buffer_sizes().size(), 3); + CHECK_EQ(result->buffer_sizes()[0], sizeof(float)); // result buffer + CHECK_EQ(result->buffer_sizes()[1], sizeof(float)); // temp buffer + CHECK_EQ(result->buffer_sizes()[2], -1); + if (triple.isOSBinFormatELF()) { + // Check the ELF magic. + CHECK_EQ(result->object_file_data()[0], 0x7F); + CHECK_EQ(result->object_file_data()[1], 'E'); + CHECK_EQ(result->object_file_data()[2], 'L'); + CHECK_EQ(result->object_file_data()[3], 'F'); + // Check the ELF class. + CHECK_EQ(result->object_file_data()[4], triple.isArch32Bit() ? 1 : 2); + // Check the ELF endianness: it should be little. + CHECK_EQ(result->object_file_data()[5], triple.isLittleEndian() ? 1 : 2); + // Check the ELF version: it should be 1. + CHECK_EQ(result->object_file_data()[6], 1); + } + + const std::vector<char>& object_file_data = result->object_file_data(); + std::cout.write(object_file_data.data(), object_file_data.size()); + + return 0; +} diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc new file mode 100644 index 0000000000..5c32ed8895 --- /dev/null +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -0,0 +1,220 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/local_client_test_base.h" + +#include <vector> + +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/map_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/core/platform/logging.h" + +namespace xla { + +/* static */ TestAllocator* LocalClientTestBase::allocator_; + +StatusOr<perftools::gputools::DeviceMemoryBase> TestAllocator::Allocate( + int device_ordinal, uint64 size, bool retry_on_failure) { + VLOG(2) << "Allocate(" << device_ordinal << ", " << size << ")"; + { + tensorflow::mutex_lock lock(count_mutex_); + allocation_count_++; + device_allocation_count_[device_ordinal]++; + } + return StreamExecutorMemoryAllocator::Allocate(device_ordinal, size); +} + +tensorflow::Status TestAllocator::Deallocate( + int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) { + VLOG(2) << "Deallocate(" << device_ordinal << ")"; + { + tensorflow::mutex_lock lock(count_mutex_); + deallocation_count_++; + device_deallocation_count_[device_ordinal]++; + } + return StreamExecutorMemoryAllocator::Deallocate(device_ordinal, mem); +} + +int64 TestAllocator::allocation_count() const { + tensorflow::mutex_lock lock(count_mutex_); + return allocation_count_; +} + +int64 TestAllocator::allocation_count(int device_ordinal) const { + tensorflow::mutex_lock lock(count_mutex_); + auto it = device_allocation_count_.find(device_ordinal); + if (it == device_allocation_count_.end()) { + return 0; + } else { + return it->second; + } +} + +int64 TestAllocator::deallocation_count() const { + tensorflow::mutex_lock lock(count_mutex_); + return deallocation_count_; +} + +int64 TestAllocator::deallocation_count(int device_ordinal) const { + tensorflow::mutex_lock lock(count_mutex_); + auto it = device_deallocation_count_.find(device_ordinal); + if (it == device_deallocation_count_.end()) { + return 0; + } else { + return it->second; + } +} + +/* static */ TestAllocator* LocalClientTestBase::GetOrCreateAllocator( + perftools::gputools::Platform* platform) { + if (allocator_ == nullptr) { + allocator_ = new TestAllocator( + platform == nullptr ? PlatformUtil::GetDefaultPlatform().ValueOrDie() + : platform); + } + return allocator_; +} + +LocalClientTestBase::LocalClientTestBase( + perftools::gputools::Platform* platform) + : local_client_( + ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie()) { + stream_executor_ = PlatformUtil::GetStreamExecutors(local_client_->platform()) + .ValueOrDie()[local_client_->default_device_ordinal()]; + transfer_manager_ = + TransferManager::GetForPlatform(local_client_->platform()).ValueOrDie(); +} + +std::unique_ptr<ScopedShapedBuffer> +LocalClientTestBase::LiteralToScopedShapedBuffer(const Literal& literal) { + return LiteralToScopedShapedBuffer(literal, + local_client_->default_device_ordinal()); +} + +std::unique_ptr<ScopedShapedBuffer> +LocalClientTestBase::LiteralToScopedShapedBuffer(const Literal& literal, + int device_ordinal) { + CHECK(!ShapeUtil::IsTuple(literal.shape())); + auto scoped_buffer = + ScopedShapedBuffer::MakeScopedShapedBuffer( + literal.shape(), GetOrCreateAllocator(local_client_->platform()), + device_ordinal) + .ConsumeValueOrDie(); + // The creation of the scoped shaped buffer should allocate the buffer. + CHECK(!scoped_buffer->buffer(/*index=*/{}).is_null() || + ShapeUtil::HasZeroElements(literal.shape())); + TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice( + stream_executor_, literal, scoped_buffer->mutable_buffer(/*index=*/{}))); + return scoped_buffer; +} + +void LocalClientTestBase::CopyShapedBufferToLiteral( + const ShapedBuffer& shaped_buffer, ShapeIndex* index, Literal* literal) { + const Shape& shape = ShapeUtil::GetSubshape(shaped_buffer.shape(), *index); + if (ShapeUtil::IsTuple(shape)) { + *literal->mutable_shape() = shape; + for (int i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) { + Literal* element_literal = literal->add_tuple_literals(); + index->push_back(i); + CopyShapedBufferToLiteral(shaped_buffer, index, element_literal); + index->pop_back(); + } + } else { + ASSERT_IS_OK(transfer_manager_->TransferLiteralFromDevice( + stream_executor_, shaped_buffer.buffer(*index), shape, shape, literal)); + } +} + +std::unique_ptr<Literal> LocalClientTestBase::ShapedBufferToLiteral( + const ShapedBuffer& shaped_buffer) { + auto literal = MakeUnique<Literal>(); + ShapeIndex index; + CopyShapedBufferToLiteral(shaped_buffer, &index, literal.get()); + return literal; +} + +std::unique_ptr<ScopedShapedBuffer> +LocalClientTestBase::ShapedBufferToScopedShapedBuffer( + std::unique_ptr<ShapedBuffer> shaped_buffer, + DeviceMemoryAllocator* allocator) { + std::unique_ptr<ScopedShapedBuffer> scoped_buffer = + ScopedShapedBuffer::MakeScopedShapedBuffer( + shaped_buffer->shape(), allocator, shaped_buffer->device_ordinal()) + .ConsumeValueOrDie(); + // Deallocate the existing DeviceMemoryBase values in the newly created scoped + // buffer and replace them with the values from the shaped buffer. + for (perftools::gputools::DeviceMemoryBase& memory_base : + *scoped_buffer->mutable_buffers()) { + TF_CHECK_OK( + allocator->Deallocate(shaped_buffer->device_ordinal(), &memory_base)); + } + *scoped_buffer->mutable_buffers() = shaped_buffer->buffers(); + + TF_CHECK_OK( + scoped_buffer->mutable_shape_index_to_buffer_entry() + ->ForEachMutableElement( + [&shaped_buffer](const ShapeIndex& index, bool is_leaf, + size_t* buffer_entry) -> ::tensorflow::Status { + if (is_leaf) { + *buffer_entry = + shaped_buffer->shape_index_to_buffer_entry().element( + index); + } + return tensorflow::Status::OK(); + })); + return scoped_buffer; +} + +LocalExecuteOptions LocalClientTestBase::DefaultLocalExecuteOptions() const { + return LocalExecuteOptions().set_allocator( + GetOrCreateAllocator(local_client_->platform())); +} + +std::unique_ptr<ScopedShapedBuffer> LocalClientTestBase::ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) { + return ExecuteLocally(computation, arguments, DefaultLocalExecuteOptions()); +} + +std::unique_ptr<ScopedShapedBuffer> LocalClientTestBase::ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + const LocalExecuteOptions& options) { + return ShapedBufferToScopedShapedBuffer( + local_client_->ExecuteLocally(computation, arguments, options) + .ConsumeValueOrDie(), + options.allocator()); +} + +void LocalClientTestBase::ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + ShapedBuffer* result) { + ExecuteLocally(computation, arguments, DefaultLocalExecuteOptions(), result); +} + +void LocalClientTestBase::ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + const LocalExecuteOptions& options, ShapedBuffer* result) { + ASSERT_IS_OK( + local_client_->ExecuteLocally(computation, arguments, options, result)); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.h b/tensorflow/compiler/xla/tests/local_client_test_base.h new file mode 100644 index 0000000000..62916d50e3 --- /dev/null +++ b/tensorflow/compiler/xla/tests/local_client_test_base.h @@ -0,0 +1,146 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_LOCAL_CLIENT_TEST_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_LOCAL_CLIENT_TEST_BASE_H_ + +#include <map> +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/local_service.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +class TestAllocator : public StreamExecutorMemoryAllocator { + public: + explicit TestAllocator(perftools::gputools::Platform* platform) + : StreamExecutorMemoryAllocator( + platform, PlatformUtil::GetStreamExecutors(platform).ValueOrDie()) { + } + + StatusOr<perftools::gputools::DeviceMemoryBase> Allocate( + int device_ordinal, uint64 size, bool retry_on_failure) override; + tensorflow::Status Deallocate( + int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) override; + + // Return the number of allocations that have been performed. + int64 allocation_count() const; + int64 allocation_count(int device_ordinal) const; + + // Return the number of deallocations that have been performed. + int64 deallocation_count() const; + int64 deallocation_count(int device_ordinal) const; + + private: + mutable tensorflow::mutex count_mutex_; + + // Global counts of allocations and deallocations. + int64 allocation_count_ GUARDED_BY(count_mutex_) = 0; + int64 deallocation_count_ GUARDED_BY(count_mutex_) = 0; + + // Per-device counts of allocations and deallocations. + std::map<int, int64> device_allocation_count_ GUARDED_BY(count_mutex_); + std::map<int, int64> device_deallocation_count_ GUARDED_BY(count_mutex_); +}; + +// A base class for tests which exercise the LocalClient interface. +class LocalClientTestBase : public ::testing::Test { + protected: + explicit LocalClientTestBase( + perftools::gputools::Platform* platform = nullptr); + + static TestAllocator* GetOrCreateAllocator( + perftools::gputools::Platform* platform); + + // Copy the given literal onto the default device and return a + // ScopedShapedBuffer. + std::unique_ptr<ScopedShapedBuffer> LiteralToScopedShapedBuffer( + const Literal& literal); + // As above, but copy to a specific device. + std::unique_ptr<ScopedShapedBuffer> LiteralToScopedShapedBuffer( + const Literal& literal, int device_ordinal); + + // Construct and return a literal containing the array represented by + // shaped_buffer. + std::unique_ptr<Literal> ShapedBufferToLiteral( + const ShapedBuffer& shaped_buffer); + + // Helper for converting a ShapedBuffer into a literal. + void CopyShapedBufferToLiteral(const ShapedBuffer& shaped_buffer, + ShapeIndex* index, Literal* literal); + + // Execute the given computation on the local client. With and without + // options. + std::unique_ptr<ScopedShapedBuffer> ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments); + std::unique_ptr<ScopedShapedBuffer> ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + const LocalExecuteOptions& options); + + // Returns a default set of execute options, configured to use allocator_ + // as the allocator. + LocalExecuteOptions DefaultLocalExecuteOptions() const; + + // Overloads which write result into the given buffer. + void ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + ShapedBuffer* result); + void ExecuteLocally( + const Computation& computation, + tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, + const LocalExecuteOptions& options, ShapedBuffer* result); + + // Convert a ShapedBuffer into a ScopedShaped buffer so that all buffers are + // deallocated when the object is destructed. + std::unique_ptr<ScopedShapedBuffer> ShapedBufferToScopedShapedBuffer( + std::unique_ptr<ShapedBuffer> shaped_buffer, + DeviceMemoryAllocator* allocator); + + string TestName() const { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); + } + + // The allocator must live as long as the service which lives until the end of + // the process, so make the allocator static. + static TestAllocator* allocator_; + + perftools::gputools::StreamExecutor* stream_executor_; + TransferManager* transfer_manager_; + + LocalClient* local_client_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_LOCAL_CLIENT_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/log_test.cc b/tensorflow/compiler/xla/tests/log_test.cc new file mode 100644 index 0000000000..b520d89de3 --- /dev/null +++ b/tensorflow/compiler/xla/tests/log_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cmath> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class LogTest : public ClientLibraryTestBase {}; + +XLA_TEST_F(LogTest, LogZeroValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR3FromArray3D<float>(Array3D<float>(3, 0, 0)); + builder.Log(x); + + ComputeAndCompareR3<float>(&builder, Array3D<float>(3, 0, 0), {}, + ErrorSpec(0.0001)); +} + +TEST_F(LogTest, LogTenValues) { + std::vector<float> input = {-0.0, 1.0, 2.0, -3.0, -4.0, + 5.0, 6.0, -7.0, -8.0, 9.0}; + + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>(input); + builder.Log(x); + + std::vector<float> expected; + for (float f : input) { + expected.push_back(std::log(f)); + } + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc new file mode 100644 index 0000000000..014417a205 --- /dev/null +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -0,0 +1,589 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class MapTest : public ClientLibraryTestBase { + public: + explicit MapTest(perftools::gputools::Platform* platform = nullptr) + : ClientLibraryTestBase(platform, + /*disabled_pass_names=*/{"algsimp", "inline"}) {} + + // Creates a function that adds its scalar argument with the constant 1.0. + // + // x {R0F32} ----> (add) + // / + // 1.0f ---------/ + Computation CreateAdderToOne() { + ComputationBuilder mapped_builder(client_, TestName()); + auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto one = mapped_builder.ConstantR0<float>(1.0); + auto adder_to_one = mapped_builder.Add(x, one); + auto computation_status = mapped_builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + Computation CreateMax() { + ComputationBuilder b(client_, TestName()); + auto lhs = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto rhs = b.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + b.Max(lhs, rhs); + auto computation_status = b.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a computation that accepts an F32 and returns T(1) (ignoring the + // argument). + template <class T> + Computation CreateScalarOne() { + ComputationBuilder mapped_builder(client_, "scalar_one"); + (void)mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + mapped_builder.ConstantR0<T>(1); + auto computation_status = mapped_builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a function that multiplies its scalar argument by the constant 2.0 + // + // x {R0F32} ----> (mul) + // / + // 2.0f ---------/ + Computation CreateMulByTwo() { + ComputationBuilder mapped_builder(client_, TestName()); + auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto two = mapped_builder.ConstantR0<float>(2.0); + auto mul_by_two = mapped_builder.Mul(x, two); + auto computation_status = mapped_builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a function that adds its scalar argument with the constant 1.0 and + // then multiplies by the original element. + // + // /---------------\ + // / \ + // x {R0F32} ----> (add) ----> (mul) + // / + // 1.0f ---------/ + Computation CreateAdderToOneTimesItself() { + ComputationBuilder mapped_builder(client_, TestName()); + auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto one = mapped_builder.ConstantR0<float>(1.0); + auto adder_to_one = mapped_builder.Add(x, one); + auto result = mapped_builder.Mul(x, adder_to_one); + auto computation_status = mapped_builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a function that takes a single parameter and calls map with + // "embedded_computation" on it, and then adds "n" to the result. + // + // x {R0F32} -----------> (map) ----> (add) + // / / + // embedded_computation --/ n --/ + Computation CreateMapPlusN(const Computation& embedded_computation, float n) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto map = builder.Map({x}, embedded_computation); + auto constant_n = builder.ConstantR0<float>(n); + auto add = builder.Add(map, constant_n); + auto computation_status = builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a binary function with signature (F32, F32) -> Pred + // defined by (x, y) -> x > y. + Computation CreateGt() { + ComputationBuilder b(client_, "Gt"); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = b.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + auto gt = b.Gt(x, y); + auto computation_status = b.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + // Creates a function that adds three scalar arguments + // + // x {R0F32} ----\ + // \ + // y {R0F32} ----> (add) ---> (add) + // / + // z {R0F32} ---------------/ + Computation CreateTernaryAdder() { + ComputationBuilder mapped_builder(client_, "TernaryAdder"); + auto x = mapped_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = mapped_builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + auto z = mapped_builder.Parameter(2, ShapeUtil::MakeShape(F32, {}), "z"); + auto xy = mapped_builder.Add(x, y); + auto xyz = mapped_builder.Add(xy, z); + auto computation_status = mapped_builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } +}; + +TEST_F(MapTest, MapEachElemPlusOneR0) { + // Applies lambda (x) (+ x 1)) to an input scalar. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR0<float>(42.0); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateAdderToOne()); + + ComputeAndCompareR0<float>(&builder, 43.0, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +XLA_TEST_F(MapTest, MapEachElemPlusOneR1S0) { + // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 0. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR1<float>({}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateAdderToOne()); + + ComputeAndCompareR1<float>(&builder, {}, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapEachElemPlusOneR1S4) { + // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 4. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateAdderToOne()); + + ComputeAndCompareR1<float>(&builder, {3.2f, 4.3f, 5.4f, 6.5f}, + {param0_data.get()}, ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapEachF32ElementToS32Constant) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateScalarOne<int32>()); + + ComputeAndCompareR1<int32>(&builder, {1, 1, 1, 1}, {param0_data.get()}); +} + +TEST_F(MapTest, MapEachF32ElementToU32Constant) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateScalarOne<uint32>()); + + ComputeAndCompareR1<uint32>(&builder, {1, 1, 1, 1}, {param0_data.get()}); +} + +TEST_F(MapTest, MapEachElemLongerChainR1) { + // Maps (lambda (x) (* (+ x 1) x)) onto an input R1F32 vector. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.6f, -5.1f, 0.1f, 0.2f, 999.0f, 255.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateAdderToOneTimesItself()); + + ComputeAndCompareR1<float>( + &builder, {9.36f, 20.91f, 0.11f, 0.24f, 999000.0f, 65535.75f}, + {param0_data.get()}, ErrorSpec(0.01f)); +} + +XLA_TEST_F(MapTest, MapMultipleMapsR1S0) { + // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 0, and then + // maps (lambda (x) (* x 2)) on the result. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR1<float>({}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map1 = builder.Map({param}, CreateAdderToOne()); + auto map2 = builder.Map({map1}, CreateMulByTwo()); + + ComputeAndCompareR1<float>(&builder, {}, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapMultipleMapsR1S4) { + // Maps (lambda (x) (+ x 1)) onto an input R1F32 vector of length 4, and then + // maps (lambda (x) (* x 2)) on the result. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map1 = builder.Map({param}, CreateAdderToOne()); + auto map2 = builder.Map({map1}, CreateMulByTwo()); + + ComputeAndCompareR1<float>(&builder, {6.4f, 8.6f, 10.8f, 13.0f}, + {param0_data.get()}, ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapEachElemPlusOneR2) { + // Maps (lambda (x) (+ x 1)) onto an input R2F32 vector. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR2<float>( + {{13.25f, 14.0f}, {-7.1f, -7.2f}, {-8.8f, 8.8f}}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto param = builder.Parameter(0, param0_literal->shape(), "param0"); + auto map = builder.Map({param}, CreateAdderToOne()); + + Array2D<float> expected_array( + {{14.25f, 15.0f}, {-6.1f, -6.2f}, {-7.8f, 9.8f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +XLA_TEST_F(MapTest, ComplexNestedMaps) { + // Constructs a complex graph of embedded computations to test the computation + // lowering order. Python equivalent: + // + // embed1 = lambda x: x + 1 # x + 1 + // embed2 = lambda x: embed1(x) + 2 # x + 3 + // embed3 = lambda x: embed1(x) + 4 # x + 5 + // embed4 = lambda x: embed2(x) + embed3(x) # 2x + 8 + // embed5 = lambda x: embed2(x) + 6 # x + 9 + // result = embed5(42) + embed4(7) # (42 + 9) + (2 * 7 + 8) = 73 + + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + + auto embed1 = CreateAdderToOne(); + auto embed2 = CreateMapPlusN(embed1, 2.0); + auto embed3 = CreateMapPlusN(embed1, 4.0); + + ComputationBuilder embed4_builder(client_, "embed4"); + auto embed4_param = embed4_builder.Parameter(0, scalar_shape, "x"); + auto embed4_map_lhs = embed4_builder.Map({embed4_param}, embed2); + auto embed4_map_rhs = embed4_builder.Map({embed4_param}, embed3); + auto embed4_add = embed4_builder.Add(embed4_map_lhs, embed4_map_rhs); + auto embed4_status = embed4_builder.Build(); + ASSERT_IS_OK(embed4_status.status()); + auto embed4 = embed4_status.ConsumeValueOrDie(); + + auto embed5 = CreateMapPlusN(embed2, 6.0); + + ComputationBuilder builder(client_, TestName()); + auto constant_42 = builder.ConstantR0<float>(42.0); + auto constant_7 = builder.ConstantR0<float>(7.0); + auto map_42 = builder.Map({constant_42}, embed5); + auto map_7 = builder.Map({constant_7}, embed4); + builder.Add(map_42, map_7); + + ComputeAndCompareR0<float>(&builder, 73.0, {}, ErrorSpec(0.01f)); +} + +TEST_F(MapTest, VersionedEmbeddedComputation) { + // Build a computation X, use it in a map, then add an additional operation to + // computation X and use it again in a different map. Verify that the proper + // versions of computation X are used in each of the maps. + + // Create a (embedded) computation which adds one to its parameter argument. + ComputationBuilder embedded_builder(client_, "EmbeddedComputation"); + auto param_0 = + embedded_builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param0"); + auto constant_one = embedded_builder.ConstantR0<float>(1.0); + auto adder_to_one = embedded_builder.Add(param_0, constant_one); + auto computation_status = embedded_builder.Build(); + ASSERT_IS_OK(computation_status.status()); + auto embedded_computation = computation_status.ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto constant_vector = builder.ConstantR1<float>({1.0, 2.0, 3.0, 4.0}); + auto map_plus_1 = builder.Map({constant_vector}, embedded_computation); + + // Add another Add(1) operation to the existing embedded computation. This + // requires using the stub interface because the ComputationBuilder does not + // allow modification to the Computation objects after they have been built. + BinaryOpRequest request; + request.set_binop(BINOP_ADD); + *request.mutable_lhs() = adder_to_one; + *request.mutable_rhs() = constant_one; + OpRequest op_request; + *op_request.mutable_computation() = embedded_computation.handle(); + *op_request.mutable_binary_op_request() = request; + OpResponse response; + tensorflow::Status s = client_->stub()->Op(&op_request, &response); + ASSERT_TRUE(s.ok()); + + auto map_plus_2 = builder.Map({map_plus_1}, embedded_computation); + + // The original vector has Add(1) applied to it with a map, followed by + // Add(1+1) resulting in a net Add(3). + ComputeAndCompareR1<float>(&builder, {4.0, 5.0, 6.0, 7.0}, {}, + ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapBinaryAdder) { + // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR1<float>({5.1f, 4.4f, -0.1f, -5.5f}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto map = + builder.Map({param0, param1}, CreateScalarAddComputation(F32, &builder)); + + ComputeAndCompareR1<float>(&builder, {7.3f, 7.7, 4.3f, 0}, + {param0_data.get(), param1_data.get()}, + ErrorSpec(0.01f)); +} + +// Adds two rank-2 arrays with different layouts. This test exercises a path +// for Map that used to fail in shape inference (b/28989438). +XLA_TEST_F(MapTest, AddWithMixedLayouts) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + test_utils::CreateR2LiteralWithLayout({{1, 2}, {3, 4}}, {1, 0}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + std::unique_ptr<Literal> param1_literal = + test_utils::CreateR2LiteralWithLayout({{10, 20}, {30, 40}}, {0, 1}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto map = + builder.Map({param0, param1}, CreateScalarAddComputation(S32, &builder)); + + Array2D<int32> expected(2, 2); + expected(0, 0) = 11; + expected(0, 1) = 22; + expected(1, 0) = 33; + expected(1, 1) = 44; + ComputeAndCompareR2<int32>(&builder, expected, + {param0_data.get(), param1_data.get()}); +} + +XLA_TEST_F(MapTest, AddR3_3x0x2) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR3FromArray3D<int32>(Array3D<int32>(3, 0, 2)); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR3FromArray3D<int32>(Array3D<int32>(3, 0, 2)); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto map = + builder.Map({param0, param1}, CreateScalarAddComputation(S32, &builder)); + + ComputeAndCompareR3<int32>(&builder, Array3D<int32>(3, 0, 2), + {param0_data.get(), param1_data.get()}); +} + +TEST_F(MapTest, MapTernaryAdder) { + // Maps (lambda (x y z) (+ x y z)) onto three R1F32 vectors. + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR1<float>({5.1f, 4.4f, -0.1f, -5.5f}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + std::unique_ptr<Literal> param2_literal = + LiteralUtil::CreateR1<float>({-10.0f, -100.0f, -900.0f, -400.0f}); + std::unique_ptr<GlobalData> param2_data = + client_->TransferToServer(*param2_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto param2 = builder.Parameter(2, param2_literal->shape(), "param2"); + auto map = builder.Map({param0, param1, param2}, CreateTernaryAdder()); + + ComputeAndCompareR1<float>( + &builder, {-2.7f, -92.3f, -895.7f, -400.0f}, + {param0_data.get(), param1_data.get(), param2_data.get()}, + ErrorSpec(0.01f)); +} + +TEST_F(MapTest, MapGt) { + // Maps (x,y) -> x > y onto two R1F32 vectors. + ComputationBuilder b(client_, TestName()); + auto gt = CreateGt(); + b.Map({b.ConstantR1<float>({1, 20}), b.ConstantR1<float>({10, 2})}, gt); + ComputeAndCompareR1<bool>(&b, {false, true}, {}); +} + +TEST_F(MapTest, NestedBinaryMap) { + Computation max_with_square; + { + // max_with_square(x) = do max(x, x^2) via a map. + ComputationBuilder b(client_, "max_with_square"); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + b.Map({x, b.Mul(x, x)}, CreateMax()); + auto computation_status = b.Build(); + ASSERT_IS_OK(computation_status.status()); + max_with_square = computation_status.ConsumeValueOrDie(); + } + ComputationBuilder b(client_, TestName()); + auto input = b.ConstantR1<float>({0.1f, 0.5f, -0.5f, 1.0f, 2.0f}); + b.Map({input}, max_with_square); + ComputeAndCompareR1<float>(&b, {0.1f, 0.5f, 0.25f, 1.0f, 4.0f}, {}); +} + +TEST_F(MapTest, MapOperantionWithBuildError) { + // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors but uses an unsupported + // type combination (F32 + U16) to test that the error is reported to the + // outermost ComputationBuilder. + ComputationBuilder builder(client_, TestName()); + + auto sub_builder = builder.CreateSubBuilder("ErrorAdd"); + auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = sub_builder->Parameter(1, ShapeUtil::MakeShape(U16, {}), "y"); + auto adder = sub_builder->Add(x, y); + auto error_add = sub_builder->BuildAndNoteError(); + + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 3.3f, 4.4f, 5.5f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr<Literal> param1_literal = + LiteralUtil::CreateR1<float>({5.1f, 4.4f, -0.1f, -5.5f}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + auto map = builder.Map({param0, param1}, error_add); + + StatusOr<Computation> computation_status = builder.Build(); + ASSERT_TRUE(!computation_status.ok()); + EXPECT_MATCH(computation_status.status().ToString(), + testing::HasSubstr("error from: ErrorAdd: binary op with " + "different element types: f32[] and u16[]")); +} + +// MapTest disables inline and algsimp. MapTestWithFullOpt runs all +// optimizations. +using MapTestWithFullOpt = ClientLibraryTestBase; + +// Regression test for b/31466798. The inliner simplifies map(param0, param1, +// power) to power(param0, param1) without deleting the old subcomputation which +// is the same as the new entry computation. HloSubcomputationUnification used +// to have issues with such patterns and maybe invalidate the pointer to entry +// computation. +TEST_F(MapTestWithFullOpt, MapScalarPower) { + ComputationBuilder builder(client_, TestName()); + + auto sub_builder = builder.CreateSubBuilder("power"); + auto x = sub_builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = sub_builder->Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + sub_builder->Pow(x, y); + auto power = sub_builder->BuildAndNoteError(); + + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR0<float>(2.0f); + std::unique_ptr<Literal> param1_literal = LiteralUtil::CreateR0<float>(5.0f); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto param1 = builder.Parameter(1, param1_literal->shape(), "param1"); + builder.Map({param0, param1}, power); + + ComputeAndCompareR0<float>(&builder, 32.0f, + {param0_data.get(), param1_data.get()}, + ErrorSpec(0.01f)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc new file mode 100644 index 0000000000..8aa4029440 --- /dev/null +++ b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc @@ -0,0 +1,179 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <algorithm> +#include <memory> +#include <string> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class MatOpsSimpleTest : public ClientLibraryTestBase { + protected: + Computation BuildSum() { + // sum(x, y) = x + y + ComputationBuilder builder(client_, "sum"); + auto x_value = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); + auto y_value = + builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y_value"); + builder.Add(x_value, y_value); + auto computation_status = builder.Build(); + TF_CHECK_OK(computation_status.status()); + return computation_status.ConsumeValueOrDie(); + } + + void TestLinspaceMax(int64 rows, int64 cols) { + float from = -128.0, to = 256.0; + std::unique_ptr<Array2D<float>> alhs = + MakeLinspaceArray2D(from, to, rows, cols); + auto arhs = MakeUnique<Array2D<float>>(rows, cols, 1.0); + + ComputationBuilder builder( + client_, + tensorflow::strings::Printf("max_%lldx%lld_linspace", rows, cols)); + auto lhs = builder.ConstantR2FromArray2D<float>(*alhs); + auto rhs = builder.ConstantR2FromArray2D<float>(*arhs); + auto max = builder.Max(lhs, rhs); + + Array2D<float> aexpected(rows, cols); + for (int row = 0; row < rows; ++row) { + for (int col = 0; col < cols; ++col) { + aexpected(row, col) = std::max((*alhs)(row, col), (*arhs)(row, col)); + } + } + + ComputeAndCompareR2<float>(&builder, aexpected, {}, ErrorSpec(1e-6)); + } +}; + +TEST_F(MatOpsSimpleTest, ExpTwoByTwoValues) { + ComputationBuilder builder(client_, "exp_2x2"); + auto data = builder.ConstantR2<float>({ + {1.0, 0.0}, // row 0 + {-1.0, 0.5}, // row 1 + }); + builder.Exp(data); + + std::unique_ptr<Literal> expected = + LiteralUtil::CreateR2<float>({{2.71828, 1.00000}, // row 0 + {0.36788, 1.64872}}); // row 1 + + ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); +} + +TEST_F(MatOpsSimpleTest, MapTwoByTwo) { + Computation add_half; + { + // add_half(x) = x + 0.5 + ComputationBuilder builder(client_, "add_half"); + auto x_value = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); + auto half = builder.ConstantR0<float>(0.5); + builder.Add(x_value, half); + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + add_half = computation_status.ConsumeValueOrDie(); + } + + ComputationBuilder builder(client_, "map_2x2"); + auto data = builder.ConstantR2<float>({ + {1.0, 0.0}, // row 0 + {-1.0, 0.5}, // row 1 + }); + auto map = builder.Map({data}, add_half); + + std::unique_ptr<Literal> expected = + LiteralUtil::CreateR2<float>({{1.5, 0.5}, // row 0 + {-0.5, 1.0}}); // row 1 + ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); +} + +TEST_F(MatOpsSimpleTest, MaxTwoByTwoValues) { + ComputationBuilder builder(client_, "max_2x2"); + auto lhs = builder.ConstantR2<float>({ + {7.0, 2.0}, // row 0 + {3.0, -4.0}, // row 1 + }); + auto rhs = builder.ConstantR2<float>({ + {5.0, 6.0}, // row 0 + {1.0, -8.0}, // row 1 + }); + auto max = builder.Max(lhs, rhs); + + std::unique_ptr<Literal> expected = + LiteralUtil::CreateR2<float>({{7.0, 6.0}, // row 0 + {3.0, -4.0}}); // row 1 + ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-6)); +} + +TEST_F(MatOpsSimpleTest, Max1x1Linspace) { TestLinspaceMax(1, 1); } + +TEST_F(MatOpsSimpleTest, Max2x2Linspace) { TestLinspaceMax(2, 2); } + +TEST_F(MatOpsSimpleTest, Max3x3Linspace) { TestLinspaceMax(3, 3); } + +TEST_F(MatOpsSimpleTest, Max4x4Linspace) { TestLinspaceMax(4, 4); } + +TEST_F(MatOpsSimpleTest, Max6x6Linspace) { TestLinspaceMax(6, 6); } + +TEST_F(MatOpsSimpleTest, Max8x8Linspace) { TestLinspaceMax(8, 8); } + +TEST_F(MatOpsSimpleTest, Max12x12Linspace) { TestLinspaceMax(12, 12); } + +TEST_F(MatOpsSimpleTest, Max16x16Linspace) { TestLinspaceMax(16, 16); } + +TEST_F(MatOpsSimpleTest, Max32x8Linspace) { TestLinspaceMax(32, 8); } + +TEST_F(MatOpsSimpleTest, Max64x8Linspace) { TestLinspaceMax(64, 8); } + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/multidimensional_slice_test.cc b/tensorflow/compiler/xla/tests/multidimensional_slice_test.cc new file mode 100644 index 0000000000..2cd680399b --- /dev/null +++ b/tensorflow/compiler/xla/tests/multidimensional_slice_test.cc @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that slice operations can be performed. + +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class SliceTest : public ClientLibraryTestBase {}; + +XLA_TEST_F(SliceTest, Slice2D) { + ComputationBuilder builder(client_, "slice_2d"); + auto original = builder.ConstantR2<float>( + {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}); + builder.Slice(original, {2, 1}, {4, 3}); + + Array2D<float> expected({{8.0f, 9.0f}, {11.0f, 12.0f}}); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +XLA_TEST_F(SliceTest, Slice3D) { + ComputationBuilder builder(client_, "slice_3d"); + Array3D<float> array_3d( + {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}); + auto original = builder.ConstantR3FromArray3D<float>(array_3d); + builder.Slice(original, {0, 0, 1}, {2, 1, 2}); + + Array3D<float> expected_3d({{{2.0f}}, {{6.0f}}}); + ComputeAndCompareR3<float>(&builder, expected_3d, {}, ErrorSpec(0.000001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/pad_test.cc b/tensorflow/compiler/xla/tests/pad_test.cc new file mode 100644 index 0000000000..d3400b432f --- /dev/null +++ b/tensorflow/compiler/xla/tests/pad_test.cc @@ -0,0 +1,420 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class PadTest : public ClientLibraryTestBase { + protected: + PadTest() { + // Initializes the padding configuration used for R4 tests. + // Pad only on the dimension 0 {low: 1, high: 0, interior: 2} and + // dimension 1 {low: 0, high: 2, interior: 1}. + auto dimension0 = r4_padding_on_dim0_dim1_.add_dimensions(); + dimension0->set_edge_padding_low(1); + dimension0->set_edge_padding_high(0); + dimension0->set_interior_padding(2); + auto dimension1 = r4_padding_on_dim0_dim1_.add_dimensions(); + dimension1->set_edge_padding_low(0); + dimension1->set_edge_padding_high(2); + dimension1->set_interior_padding(1); + auto dimension2 = r4_padding_on_dim0_dim1_.add_dimensions(); + dimension2->set_edge_padding_low(0); + dimension2->set_edge_padding_high(0); + dimension2->set_interior_padding(0); + auto dimension3 = r4_padding_on_dim0_dim1_.add_dimensions(); + dimension3->set_edge_padding_low(0); + dimension3->set_edge_padding_high(0); + dimension3->set_interior_padding(0); + } + + // Padding configuration for R4 that only pads dimension 0 and 1. + PaddingConfig r4_padding_on_dim0_dim1_; +}; + +// Tests a Pad() with a zero-element input and output. +XLA_TEST_F(PadTest, Pad1DS0ToS0Array) { + ComputationBuilder b(client_, TestName()); + // Set up the padding configuration {low: 0, high: 0, interior: 0}. + PaddingConfig padding_config; + auto dimension = padding_config.add_dimensions(); + dimension->set_edge_padding_low(0); + dimension->set_edge_padding_high(0); + dimension->set_interior_padding(0); + + b.Pad(b.ConstantR1<float>({}), b.ConstantR0<float>(0.1), padding_config); + ComputeAndCompareR1<float>(&b, {}, {}, ErrorSpec(0.0001)); +} + +// Tests a Pad() with a zero-element input but a non-zero-element output. +XLA_TEST_F(PadTest, Pad1DS0ToS5Array) { + ComputationBuilder b(client_, TestName()); + // Set up the padding configuration {low: 3, high: 0, interior: 1}. + PaddingConfig padding_config; + auto dimension = padding_config.add_dimensions(); + dimension->set_edge_padding_low(1); + dimension->set_edge_padding_high(4); + dimension->set_interior_padding(7); + + b.Pad(b.ConstantR1<float>({}), b.ConstantR0<float>(0.1), padding_config); + ComputeAndCompareR1<float>(&b, std::vector<float>(5, 0.1), {}, + ErrorSpec(0.0001)); +} + +XLA_TEST_F(PadTest, Pad1DS3Array) { + ComputationBuilder b(client_, TestName()); + // Set up the padding configuration {low: 3, high: 0, interior: 1}. + PaddingConfig padding_config; + auto dimension = padding_config.add_dimensions(); + dimension->set_edge_padding_low(3); + dimension->set_edge_padding_high(0); + dimension->set_interior_padding(1); + + b.Pad(b.ConstantR1<float>({1, 2, 3}), b.ConstantR0<float>(0.1), + padding_config); + std::vector<float> expected({0.1, 0.1, 0.1, 1, 0.1, 2, 0.1, 3}); + ComputeAndCompareR1<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(PadTest, Pad4D_2x0x3x2_FloatArray) { + ComputationBuilder b(client_, TestName()); + b.Pad(b.ConstantR4FromArray4D<float>(Array4D<float>(2, 0, 3, 2)), + b.ConstantR0<float>(1.5), r4_padding_on_dim0_dim1_); + ComputeAndCompareR4<float>(&b, Array4D<float>(5, 2, 3, 2, 1.5f), {}, + ErrorSpec(0.0001)); +} + +TEST_F(PadTest, Pad4DFloat_1x1x3x2_Array) { + ComputationBuilder b(client_, TestName()); + auto input = MakeUnique<Array4D<float>>(1, 1, 3, 2); + Array2D<float> input_xy({ + {1.0f, 2.0f}, // row 0 + {3.0f, 4.0f}, // row 1 + {5.0f, 6.0f}, // row 2 + }); + input->FillWithYX(input_xy); + + b.Pad(b.ConstantR4FromArray4D<float>(*input), b.ConstantR0<float>(1.5), + r4_padding_on_dim0_dim1_); + + auto expected = MakeUnique<Array4D<float>>(2, 3, 3, 2); + expected->Fill(1.5); + (*expected)(1, 0, 0, 0) = 1.0f; + (*expected)(1, 0, 0, 1) = 2.0f; + (*expected)(1, 0, 1, 0) = 3.0f; + (*expected)(1, 0, 1, 1) = 4.0f; + (*expected)(1, 0, 2, 0) = 5.0f; + (*expected)(1, 0, 2, 1) = 6.0f; + ComputeAndCompareR4<float>(&b, *expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(PadTest, Pad4DFloatArrayWithInteriorPadding) { + ComputationBuilder b(client_, TestName()); + + const float pad_value = 1.5f; + Array4D<float> input(3, 2, 1, 1, {1, 2, 3, 4, 5, 6}); + b.Pad(b.ConstantR4FromArray4D<float>(input), b.ConstantR0<float>(pad_value), + r4_padding_on_dim0_dim1_); + + auto expected = MakeUnique<Array4D<float>>(8, 5, 1, 1); + expected->Fill(pad_value); + (*expected)(1, 0, 0, 0) = 1.0f; + (*expected)(1, 2, 0, 0) = 2.0f; + (*expected)(4, 0, 0, 0) = 3.0f; + (*expected)(4, 2, 0, 0) = 4.0f; + (*expected)(7, 0, 0, 0) = 5.0f; + (*expected)(7, 2, 0, 0) = 6.0f; + ComputeAndCompareR4<float>(&b, *expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(PadTest, Pad4DFloatArrayMinorFirstSmall) { + ComputationBuilder b(client_, TestName()); + + PaddingConfig padding_config; + auto dimension0 = padding_config.add_dimensions(); + dimension0->set_edge_padding_low(0); + dimension0->set_edge_padding_high(0); + dimension0->set_interior_padding(0); + auto dimension1 = padding_config.add_dimensions(); + dimension1->set_edge_padding_low(0); + dimension1->set_edge_padding_high(0); + dimension1->set_interior_padding(0); + auto dimension2 = padding_config.add_dimensions(); + dimension2->set_edge_padding_low(2); + dimension2->set_edge_padding_high(1); + dimension2->set_interior_padding(0); + auto dimension3 = padding_config.add_dimensions(); + dimension3->set_edge_padding_low(2); + dimension3->set_edge_padding_high(3); + dimension3->set_interior_padding(0); + + const Layout layout = LayoutUtil::MakeLayout({0, 1, 2, 3}); + + const float pad_value = -5.123f; + Array4D<float> input_array(1, 1, 2, 3, {1, 2, 3, 4, 5, 6}); + auto input = LiteralUtil::CreateR4FromArray4D<float>(input_array); + input = LiteralUtil::Relayout(*input, layout); + + b.Pad(b.ConstantLiteral(*input), b.ConstantR0(pad_value), padding_config); + + Array4D<float> expected_array(1, 1, 5, 8); + expected_array.Fill(pad_value); + expected_array(0, 0, 2, 2) = 1.0f; + expected_array(0, 0, 2, 3) = 2.0f; + expected_array(0, 0, 2, 4) = 3.0f; + expected_array(0, 0, 3, 2) = 4.0f; + expected_array(0, 0, 3, 3) = 5.0f; + expected_array(0, 0, 3, 4) = 6.0f; + ComputeAndCompareR4<float>(&b, expected_array, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(PadTest, Pad4DFloatArrayMinorFirstNonTrivialMinorDimensions) { + ComputationBuilder b(client_, TestName()); + + PaddingConfig padding_config; + auto dimension0 = padding_config.add_dimensions(); + dimension0->set_edge_padding_low(0); + dimension0->set_edge_padding_high(0); + dimension0->set_interior_padding(0); + auto dimension1 = padding_config.add_dimensions(); + dimension1->set_edge_padding_low(0); + dimension1->set_edge_padding_high(0); + dimension1->set_interior_padding(0); + auto dimension2 = padding_config.add_dimensions(); + dimension2->set_edge_padding_low(2); + dimension2->set_edge_padding_high(2); + dimension2->set_interior_padding(1); + auto dimension3 = padding_config.add_dimensions(); + dimension3->set_edge_padding_low(2); + dimension3->set_edge_padding_high(2); + dimension3->set_interior_padding(0); + + const Layout layout = LayoutUtil::MakeLayout({0, 1, 2, 3}); + + const float pad_value = -5.123f; + Array4D<float> input_array(1, 25, 7, 7); + input_array.Fill(pad_value); + input_array(0, 0, 0, 0) = 1.0f; + input_array(0, 24, 6, 6) = 2.0f; + input_array(0, 17, 2, 5) = 3.0f; + auto input = LiteralUtil::CreateR4FromArray4D<float>(input_array); + input = LiteralUtil::Relayout(*input, layout); + + b.Pad(b.ConstantLiteral(*input), b.ConstantR0(pad_value), padding_config); + + Array4D<float> expected_array(1, 25, 17, 11); + expected_array.Fill(pad_value); + expected_array(0, 0, 2, 2) = 1.0f; + expected_array(0, 24, 14, 8) = 2.0f; + expected_array(0, 17, 6, 7) = 3.0f; + ComputeAndCompareR4<float>(&b, expected_array, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(PadTest, Pad4DU8Array) { + ComputationBuilder b(client_, TestName()); + auto input = MakeUnique<Array4D<uint8>>(1, 1, 3, 2); + Array2D<uint8> input_xy({ + {1, 2}, // row 0 + {3, 4}, // row 1 + {5, 6}, // row 2 + }); + input->FillWithYX(input_xy); + + b.Pad(b.ConstantR4FromArray4D<uint8>(*input), b.ConstantR0<uint8>(35), + r4_padding_on_dim0_dim1_); + + auto expected = MakeUnique<Array4D<uint8>>(2, 3, 3, 2); + expected->Fill(35); + (*expected)(1, 0, 0, 0) = 1; + (*expected)(1, 0, 0, 1) = 2; + (*expected)(1, 0, 1, 0) = 3; + (*expected)(1, 0, 1, 1) = 4; + (*expected)(1, 0, 2, 0) = 5; + (*expected)(1, 0, 2, 1) = 6; + ComputeAndCompareR4<uint8>(&b, *expected, {}); +} + +XLA_TEST_F(PadTest, Pad4DPredArray) { + ComputationBuilder b(client_, TestName()); + + // Since bool is currently not well supported, use Broadcast operation to + // create the operand for Pad. + auto input = b.Broadcast(b.ConstantR0<bool>(true), {1, 1, 3, 2}); + auto padded = + b.Pad(input, b.ConstantR0<bool>(false), r4_padding_on_dim0_dim1_); + + // For the same reason, use Select to convert boolean values to int32. + auto zeros = MakeUnique<Array4D<int32>>(2, 3, 3, 2); + auto ones = MakeUnique<Array4D<int32>>(2, 3, 3, 2); + zeros->Fill(0); + ones->Fill(1); + b.Select(padded, b.ConstantR4FromArray4D<int32>(*ones), + b.ConstantR4FromArray4D<int32>(*zeros)); + + auto expected = MakeUnique<Array4D<int32>>(2, 3, 3, 2); + expected->Fill(0); + (*expected)(1, 0, 0, 0) = 1; + (*expected)(1, 0, 0, 1) = 1; + (*expected)(1, 0, 1, 0) = 1; + (*expected)(1, 0, 1, 1) = 1; + (*expected)(1, 0, 2, 0) = 1; + (*expected)(1, 0, 2, 1) = 1; + ComputeAndCompareR4<int32>(&b, *expected, {}); +} + +XLA_TEST_F(PadTest, Large2DPad) { + ComputationBuilder b(client_, TestName()); + + auto input = b.Parameter(0, ShapeUtil::MakeShape(F32, {4, 4}), "input"); + PaddingConfig padding_config = MakeNoPaddingConfig(2); + for (int dim : {0, 1}) { + padding_config.mutable_dimensions(dim)->set_edge_padding_low( + 98 + 100 * (1 - dim)); + padding_config.mutable_dimensions(dim)->set_edge_padding_high(58 + + 100 * dim); + } + auto padded = b.Pad(input, b.ConstantR0<float>(0.0f), padding_config); + + auto ones = MakeUnique<Array2D<float>>(4, 4); + ones->Fill(1.0f); + auto input_literal = LiteralUtil::CreateR2FromArray2D<float>(*ones); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + auto expected = ReferenceUtil::PadArray2D(*ones, padding_config, 0.0f); + ComputeAndCompareR2<float>(&b, *expected, {input_data.get()}); +} + +XLA_TEST_F(PadTest, AllTypes2DPad) { + ComputationBuilder b(client_, TestName()); + + constexpr int64 in_rows = 35; + constexpr int64 in_cols = 35; + auto input = + b.Parameter(0, ShapeUtil::MakeShape(F32, {in_rows, in_cols}), "input"); + PaddingConfig padding_config = MakeNoPaddingConfig(2); + padding_config.mutable_dimensions(0)->set_edge_padding_low(7); + padding_config.mutable_dimensions(0)->set_edge_padding_high(5); + padding_config.mutable_dimensions(0)->set_interior_padding(3); + padding_config.mutable_dimensions(1)->set_edge_padding_low(6); + padding_config.mutable_dimensions(1)->set_edge_padding_high(4); + padding_config.mutable_dimensions(1)->set_interior_padding(2); + auto padded = b.Pad(input, b.ConstantR0<float>(3.14f), padding_config); + + auto operand = MakeUnique<Array2D<float>>(in_rows, in_cols); + operand->FillUnique(0.0f); + auto input_literal = LiteralUtil::CreateR2FromArray2D<float>(*operand); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 3.14f); + ComputeAndCompareR2<float>(&b, *expected, {input_data.get()}, + ErrorSpec{0.0001}); +} + +XLA_TEST_F(PadTest, High2DPad) { + ComputationBuilder b(client_, TestName()); + + constexpr int64 in_rows = 129; + constexpr int64 in_cols = 129; + constexpr int64 low_padding = 0; + int64 high_padding[2] = {5, 7}; + constexpr int64 interior_padding = 0; + auto input = + b.Parameter(0, ShapeUtil::MakeShape(F32, {in_rows, in_cols}), "input"); + PaddingConfig padding_config = MakeNoPaddingConfig(2); + for (int dim : {0, 1}) { + padding_config.mutable_dimensions(dim)->set_edge_padding_low(low_padding); + padding_config.mutable_dimensions(dim)->set_edge_padding_high( + high_padding[dim]); + padding_config.mutable_dimensions(dim)->set_interior_padding( + interior_padding); + } + auto padded = b.Pad(input, b.ConstantR0<float>(2.718f), padding_config); + + auto operand = MakeUnique<Array2D<float>>(in_rows, in_cols); + operand->FillUnique(1.0f); + auto input_literal = LiteralUtil::CreateR2FromArray2D<float>(*operand); + auto expected = ReferenceUtil::PadArray2D(*operand, padding_config, 2.718f); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputeAndCompareR2<float>(&b, *expected, {input_data.get()}, + ErrorSpec(0.0001)); +} + +// Regression test for b/31827337. +XLA_TEST_F(PadTest, ReducePad) { + ComputationBuilder b(client_, TestName()); + auto input = b.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "input"); + + Computation add_f32 = CreateScalarAddComputation(F32, &b); + auto reduce = b.Reduce(input, b.ConstantR0<float>(0.0), add_f32, {0}); + + PaddingConfig padding_config = MakeNoPaddingConfig(3); + padding_config.mutable_dimensions(0)->set_edge_padding_low(1); + padding_config.mutable_dimensions(0)->set_edge_padding_high(1); + auto pad = b.Pad(reduce, b.ConstantR0<float>(0.0), padding_config); + + auto ones = MakeUnique<Array4D<float>>(2, 2, 2, 2); + ones->Fill(1.0); + auto input_literal = LiteralUtil::CreateR4FromArray4D<float>(*ones); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + Array3D<float> expected({{{0.0, 0.0}, {0.0, 0.0}}, + {{2.0, 2.0}, {2.0, 2.0}}, + {{2.0, 2.0}, {2.0, 2.0}}, + {{0.0, 0.0}, {0.0, 0.0}}}); + ComputeAndCompareR3<float>(&b, expected, {input_data.get()}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc new file mode 100644 index 0000000000..2f05576cee --- /dev/null +++ b/tensorflow/compiler/xla/tests/params_test.cc @@ -0,0 +1,357 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <algorithm> +#include <memory> +#include <string> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ParamsTest : public ClientLibraryTestBase {}; + +XLA_TEST_F(ParamsTest, ConstantR0F32Param) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR0<float>(3.14159f); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param0"); + + ComputeAndCompareR0<float>(&builder, 3.14159f, {param0_data.get()}, + ErrorSpec(0.0001f)); +} + +XLA_TEST_F(ParamsTest, ConstantR1S0F32Param) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR1<float>({}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {0}), "param0"); + + ComputeAndCompareR1<float>(&builder, {}, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +XLA_TEST_F(ParamsTest, ConstantR1S2F32Param) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({3.14f, -100.25f}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2}), "param0"); + + ComputeAndCompareR1<float>(&builder, {3.14f, -100.25f}, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +XLA_TEST_F(ParamsTest, ConstantR1U8Param) { + ComputationBuilder builder(client_, TestName()); + string str("hello world"); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR1U8(str); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter( + 0, ShapeUtil::MakeShape(U8, {static_cast<int64>(str.size())}), "param0"); + + ComputeAndCompareR1U8(&builder, str, {param0_data.get()}); +} + +XLA_TEST_F(ParamsTest, ConstantR2_3x0_F32Param) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR2FromArray2D<float>(Array2D<float>(3, 0)); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {3, 0}), "param0"); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(3, 0), + {param0_data.get()}, ErrorSpec(0.01f)); +} + +XLA_TEST_F(ParamsTest, ConstantR2F32Param) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = LiteralUtil::CreateR2<float>( + {{3.14f, -100.25f}, {7e8f, 7e-9f}, {30.3f, -100.0f}}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + + auto p = builder.Parameter(0, ShapeUtil::MakeShape(F32, {3, 2}), "param0"); + + Array2D<float> expected_array( + {{3.14f, -100.25f}, {7e8f, 7e-9f}, {30.3f, -100.0f}}); + ComputeAndCompareR2<float>(&builder, expected_array, {param0_data.get()}, + ErrorSpec(0.01f)); +} + +XLA_TEST_F(ParamsTest, TwoParameters) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> literal0 = LiteralUtil::CreateR1<float>({1, 2}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*literal0).ConsumeValueOrDie(); + auto param0 = builder.Parameter(0, literal0->shape(), "param0"); + + std::unique_ptr<Literal> literal1 = LiteralUtil::CreateR1<float>({10, 20}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*literal1).ConsumeValueOrDie(); + auto param1 = builder.Parameter(1, literal1->shape(), "param1"); + + // Use both parameters + // + // {1, 2} + {10, 20} = {11, 22} + auto sum = builder.Add(param0, param1); + sum = builder.Add(param0, param1); + + // Use only the second parameter again, to show that it can be used + // twice and to make the computation asymmetric in the two + // parameters to test that the parameters are not swapped. + // + // {11, 22} * {10, 20} = {110, 440} + auto prod = builder.Mul(sum, param1); + + ComputeAndCompareR1<float>(&builder, {110, 440}, + {param0_data.get(), param1_data.get()}, + ErrorSpec(0.0001f)); +} + +XLA_TEST_F(ParamsTest, MissingParameter) { + // Test that an error is returned when a computation with an incomplete set of + // parameters (parameter numbers not contiguous from 0) is executed. + std::unique_ptr<Literal> literal = LiteralUtil::CreateR0<float>(3.14159f); + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto p = builder.Parameter(2, ShapeUtil::MakeShape(F32, {}), "param2"); + auto computation = builder.Build().ConsumeValueOrDie(); + + auto execute_status = client_->Execute(computation, {data.get(), data.get()}, + /*output_layout=*/nullptr, + /*execution_profile=*/nullptr); + ASSERT_EQ(execute_status.status().code(), + tensorflow::error::FAILED_PRECONDITION); +} + +XLA_TEST_F(ParamsTest, UnusedParameter) { + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> literal0 = LiteralUtil::CreateR1<float>({1, 2}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*literal0).ConsumeValueOrDie(); + auto param0 = builder.Parameter(0, literal0->shape(), "param0"); + + std::unique_ptr<Literal> literal1 = LiteralUtil::CreateR1<float>({10, 20}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*literal1).ConsumeValueOrDie(); + auto param1 = builder.Parameter(1, literal1->shape(), "param1"); + + ComputeAndCompareR1<float>(&builder, {10, 20}, + {param0_data.get(), param1_data.get()}, + ErrorSpec(0.0001f)); +} + +XLA_TEST_F(ParamsTest, UnusedParametersInUnusedExpression) { + // Build a computation with a couple unused parameters which are used in an + // unused expression. + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr<Literal> literal0 = LiteralUtil::CreateR1<float>({1, 2}); + std::unique_ptr<GlobalData> param0_data = + client_->TransferToServer(*literal0).ConsumeValueOrDie(); + + std::unique_ptr<Literal> literal1 = + LiteralUtil::CreateR1<float>({10, 20, 30}); + std::unique_ptr<GlobalData> param1_data = + client_->TransferToServer(*literal1).ConsumeValueOrDie(); + + auto param0 = builder.Parameter(0, literal0->shape(), "param0"); + auto param1 = builder.Parameter(1, literal1->shape(), "param1"); + auto param2 = builder.Parameter(2, literal1->shape(), "param2"); + + // This add is unused. + builder.Add(param1, param2); + + builder.Neg(param0); + + ComputeAndCompareR1<float>( + &builder, {-1, -2}, + {param0_data.get(), param1_data.get(), param1_data.get()}, + ErrorSpec(0.0001f)); +} + +XLA_TEST_F(ParamsTest, HundredLargeR1Parameters) { + ComputationBuilder builder(client_, TestName()); + constexpr int size = 8 * 128 * 2; + + std::vector<float> init_value = {{0, 1}}; + init_value.resize(size); + ComputationDataHandle sum_handle = builder.ConstantR1<float>(init_value); + std::vector<float> sum = {{0, 1}}; + sum.resize(size); + + std::vector<std::unique_ptr<GlobalData>> param_data_owner; + + constexpr int parameter_count = 100; + for (int i = 0; i < parameter_count; ++i) { + const float entry0 = i; + const float entry1 = 2 * i; + sum[0] += entry0; + sum[1] += entry1; + + std::vector<float> sum_value = {{entry0, entry1}}; + sum_value.resize(size); + std::unique_ptr<Literal> literal = LiteralUtil::CreateR1<float>(sum_value); + param_data_owner.push_back( + client_->TransferToServer(*literal).ConsumeValueOrDie()); + ComputationDataHandle param = + builder.Parameter(i, literal->shape(), "param"); + sum_handle = builder.Add(sum_handle, param); + } + + std::vector<GlobalData*> param_data; + for (const std::unique_ptr<GlobalData>& data : param_data_owner) { + param_data.push_back(data.get()); + } + + ComputeAndCompareR1<float>(&builder, sum, param_data, ErrorSpec(0.0001f)); +} + +XLA_TEST_F(ParamsTest, + DISABLED_ON_CPU_PARALLEL(TupleOfR1ParametersAddedTogether)) { + ComputationBuilder builder(client_, TestName()); + + Shape r1f32_3 = ShapeUtil::MakeShape(F32, {3}); + Shape tuple_shape = ShapeUtil::MakeTupleShape({r1f32_3, r1f32_3}); + auto input = builder.Parameter(0, tuple_shape, "input"); + auto lhs = builder.GetTupleElement(input, 0); + auto rhs = builder.GetTupleElement(input, 1); + builder.Add(lhs, rhs); + + std::unique_ptr<GlobalData> data = + client_ + ->TransferToServer(*LiteralUtil::MakeTuple({ + LiteralUtil::CreateR1<float>({1, 2, 3}).get(), + LiteralUtil::CreateR1<float>({4, 5, 6}).get(), + })) + .ConsumeValueOrDie(); + + std::vector<GlobalData*> arguments = {data.get()}; + const std::vector<float> expected = {1 + 4, 2 + 5, 3 + 6}; + ComputeAndCompareR1<float>(&builder, expected, arguments, ErrorSpec(1e-5)); +} + +// Verifies that passing a 2x2 with {0, 1} layout returns the same value back +// when (transferred to the server and) passed through a parameter. +XLA_TEST_F(ParamsTest, R2_2x2_Layout_01) { + std::unique_ptr<Literal> literal = LiteralUtil::CreateR2<float>({ + {1, 2}, {3, 4}, + }); + *literal->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({0, 1}); + ComputationBuilder builder(client_, TestName()); + builder.Parameter(0, literal->shape(), "input"); + + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + ComputeAndCompareLiteral(&builder, *literal, {data.get()}, ErrorSpec(1e-3)); +} + +// As above, but for {1, 0} layout. +XLA_TEST_F(ParamsTest, R2_2x2_Layout_10) { + std::unique_ptr<Literal> literal = LiteralUtil::CreateR2<float>({ + {1, 3}, {2, 4}, + }); + *literal->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({1, 0}); + ComputationBuilder builder(client_, TestName()); + builder.Parameter(0, literal->shape(), "input"); + + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + ComputeAndCompareLiteral(&builder, *literal, {data.get()}, ErrorSpec(1e-3)); +} + +XLA_TEST_F(ParamsTest, R2_2x2_TryToPassReverseLayoutToParameter) { + std::unique_ptr<Literal> literal = LiteralUtil::CreateR2<float>({ + {1, 3}, {2, 4}, + }); + const Shape original = literal->shape(); + { + // Reverse the layout present in original, and make that the layout of the + // literal. + std::vector<int64> original_layout( + original.layout().minor_to_major().begin(), + original.layout().minor_to_major().end()); + std::reverse(original_layout.begin(), original_layout.end()); + *literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout(original_layout); + ASSERT_EQ(2, LiteralUtil::Get<float>(*literal, {0, 1})); + } + // Use the original shape in building the computation. + ComputationBuilder builder(client_, TestName()); + auto input = builder.Parameter(0, original, "input"); + // Use the slice operator to get an off-diagonal element. + builder.Slice(input, {0, 1}, {1, 2}); + + std::unique_ptr<GlobalData> data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + // Check that we got the off-diagonal value that we expected. + Array2D<float> expected(1, 1); + expected(0, 0) = 2; + ComputeAndCompareR2(&builder, expected, {data.get()}, ErrorSpec(1e-3)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/pred_test.cc b/tensorflow/compiler/xla/tests/pred_test.cc new file mode 100644 index 0000000000..96393c41e8 --- /dev/null +++ b/tensorflow/compiler/xla/tests/pred_test.cc @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Miscellaneous tests with the PRED type that don't fit anywhere else. +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class PredTest : public ClientLibraryTestBase { + protected: + void TestCompare(bool lhs, bool rhs, bool expected, + ComputationDataHandle (ComputationBuilder::*op)( + const ComputationDataHandle&, + const ComputationDataHandle&, + tensorflow::gtl::ArraySlice<int64>)) { + ComputationBuilder builder(client_, TestName()); + ComputationDataHandle lhs_op = builder.ConstantR0<bool>(lhs); + ComputationDataHandle rhs_op = builder.ConstantR0<bool>(rhs); + ComputationDataHandle result = (builder.*op)(lhs_op, rhs_op, {}); + ComputeAndCompareR0<bool>(&builder, expected, {}); + } +}; + +TEST_F(PredTest, ConstantR0PredTrue) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR0<bool>(true); + ComputeAndCompareR0<bool>(&builder, true, {}); +} + +TEST_F(PredTest, ConstantR0PredFalse) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR0<bool>(false); + ComputeAndCompareR0<bool>(&builder, false, {}); +} + +TEST_F(PredTest, ConstantR0PredCompareEq) { + TestCompare(true, false, false, &ComputationBuilder::Eq); +} + +TEST_F(PredTest, ConstantR0PredCompareNe) { + TestCompare(true, false, true, &ComputationBuilder::Ne); +} + +TEST_F(PredTest, ConstantR0PredCompareLe) { + TestCompare(true, false, false, &ComputationBuilder::Le); +} + +TEST_F(PredTest, ConstantR0PredCompareLt) { + TestCompare(true, false, false, &ComputationBuilder::Lt); +} + +TEST_F(PredTest, ConstantR0PredCompareGe) { + TestCompare(true, false, true, &ComputationBuilder::Ge); +} + +TEST_F(PredTest, ConstantR0PredCompareGt) { + TestCompare(true, false, true, &ComputationBuilder::Gt); +} + +TEST_F(PredTest, ConstantR1Pred) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<bool>({true, false, false, true}); + ComputeAndCompareR1<bool>(&builder, {true, false, false, true}, {}); +} + +TEST_F(PredTest, ConstantR2Pred) { + ComputationBuilder builder(client_, TestName()); + auto a = + builder.ConstantR2<bool>({{false, true, true}, {true, false, false}}); + const string expected = R"(pred[2,3] { + { 011 }, + { 100 }, +})"; + EXPECT_EQ(expected, ExecuteToString(&builder, {})); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/prng_test.cc b/tensorflow/compiler/xla/tests/prng_test.cc new file mode 100644 index 0000000000..8d77b3dd61 --- /dev/null +++ b/tensorflow/compiler/xla/tests/prng_test.cc @@ -0,0 +1,238 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class PrngTest : public ClientLibraryTestBase { + protected: + template <typename T> + void UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims); + void BernoulliTest(float p, tensorflow::gtl::ArraySlice<int64> dims); +}; + +template <typename T> +void PrngTest::UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims) { + ComputationBuilder builder(client_, TestName()); + builder.RngUniform( + builder.ConstantR0<T>(a), builder.ConstantR0<T>(b), + ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType<T>(), dims)); + + auto actual = ExecuteAndTransferOrDie(&builder, /*arguments=*/{}); + EXPECT_TRUE(ContainersEqual(dims, actual->shape().dimensions())); + LiteralUtil::EachCell<T>(*actual, + [=](tensorflow::gtl::ArraySlice<int64>, T value) { + EXPECT_LE(a, value); + EXPECT_GE(b, value); + }); +} + +void PrngTest::BernoulliTest(float p, tensorflow::gtl::ArraySlice<int64> dims) { + ComputationBuilder builder(client_, TestName()); + auto shape = ShapeUtil::MakeShape(U32, dims); + builder.RngBernoulli(builder.ConstantR0<float>(p), shape); + + TF_ASSIGN_OR_ASSERT_OK(auto computation, builder.Build()); + constexpr uint64 kTestSeed = 42; + TF_ASSIGN_OR_ASSERT_OK( + auto actual, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr, + /*seed=*/kTestSeed)); + EXPECT_TRUE(ContainersEqual(dims, actual->shape().dimensions())); + int32 sum = 0; + LiteralUtil::EachCell<uint32>( + *actual, [&sum](tensorflow::gtl::ArraySlice<int64>, uint32 value) { + EXPECT_TRUE(value == 0 || value == 1); + sum += value; + }); + int32 total = ShapeUtil::ElementsIn(shape); + float p_tilde = sum / static_cast<float>(total); + + // Test within expected range using normal approximation. The test uses a + // fixed seed and has a fixed output per p and backend. Using the normal + // approximation as this test is invoked for different `p` and the different + // backends could use different random number generators and produce different + // values. Choose 95% confidence level, so that z_{1-\alpha/2} = 1.96. + float normal_approximation_term = 1.96 * sqrt(p * (1 - p) / total); + EXPECT_GE(p_tilde, p - normal_approximation_term); + EXPECT_LE(p_tilde, p + normal_approximation_term); +} + +// Uniform random number generation tests +XLA_TEST_F(PrngTest, ScalarU01) { UniformTest<float>(0, 1, {}); } +XLA_TEST_F(PrngTest, ZeroValuesU01) { UniformTest<float>(0, 1, {0}); } +XLA_TEST_F(PrngTest, TenValuesU01) { UniformTest<float>(0, 1, {10}); } +XLA_TEST_F(PrngTest, TenValuesU37) { UniformTest<float>(3, 7, {10}); } +XLA_TEST_F(PrngTest, ZeroValuesR2) { UniformTest<float>(0, 1, {0, 20}); } +XLA_TEST_F(PrngTest, LargeU01) { UniformTest<float>(0, 1, {0x100, 0x100}); } +XLA_TEST_F(PrngTest, TwelveValuesU524) { UniformTest<int32>(5, 24, {12}); } + +XLA_TEST_F(PrngTest, MapUsingRng) { + // Build a x -> (x + U[0,1)) computation. + auto build_sum_rng = [this](ComputationBuilder& builder) { + auto b = builder.CreateSubBuilder("sum_with_rng"); + auto x = b->Parameter(0, ShapeUtil::MakeShape(F32, {}), "input"); + b->Add(x, + b->RngUniform(b->ConstantR0<float>(0), b->ConstantR0<float>(1), + ShapeUtil::MakeShape(F32, {}))); + return b->BuildAndNoteError(); + }; + + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> param0_literal = + LiteralUtil::CreateR1<float>({2.2f, 5.3f, 4.4f, 5.5f}); + TF_ASSIGN_OR_ASSERT_OK(std::unique_ptr<GlobalData> param0_data, + client_->TransferToServer(*param0_literal)); + + auto param0 = builder.Parameter(0, param0_literal->shape(), "param0"); + auto fn = build_sum_rng(builder); + builder.Map({param0}, fn); + + TF_ASSIGN_OR_ASSERT_OK(auto computation, builder.Build()); + TF_ASSIGN_OR_ASSERT_OK( + auto actual, + client_->ExecuteAndTransfer(computation, + /*arguments=*/{param0_data.get()}, nullptr, + nullptr, /*seed=*/125)); + EXPECT_EQ(actual->f32s_size(), param0_literal->f32s_size()); + for (int i = 0; i < param0_literal->f32s_size(); ++i) { + EXPECT_GE(actual->f32s(i), param0_literal->f32s(i)); + EXPECT_LT(actual->f32s(i), param0_literal->f32s(i) + 1.0f); + } +} + +// This tests demonstrates the global seeding behaviour. +// * If a seed is passed in via Execute (ExecuteAndTransfer) then the output is +// fixed (i.e., there is a single output for a given seed); +// * If no seed is passed in then the output of every call can be different; +XLA_TEST_F(PrngTest, PassInGlobalRngSeed) { + // Build a U[0,1) computation. + auto build_computation = [this]() { + ComputationBuilder builder(client_, TestName()); + builder.RngUniform(builder.ConstantR0<float>(0), + builder.ConstantR0<float>(1), + ShapeUtil::MakeShape(F32, {10})); + return builder.Build(); + }; + + std::unique_ptr<Literal> result1; + { + TF_ASSIGN_OR_ASSERT_OK(auto computation, build_computation()); + TF_ASSIGN_OR_ASSERT_OK( + result1, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr, + /*seed=*/42)); + } + std::unique_ptr<Literal> result2; + std::unique_ptr<Literal> result3; + { + TF_ASSIGN_OR_ASSERT_OK(auto computation, build_computation()); + TF_ASSIGN_OR_ASSERT_OK( + result2, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr, + /*seed=*/42)); + TF_ASSIGN_OR_ASSERT_OK( + result3, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr, + /*seed=*/42)); + } + + std::unique_ptr<Literal> result4; + std::unique_ptr<Literal> result5; + std::unique_ptr<Literal> result6; + { + TF_ASSIGN_OR_ASSERT_OK(auto computation, build_computation()); + TF_ASSIGN_OR_ASSERT_OK( + result4, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr, + /*seed=*/65)); + TF_ASSIGN_OR_ASSERT_OK( + result5, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr)); + TF_ASSIGN_OR_ASSERT_OK( + result6, + client_->ExecuteAndTransfer(computation, /*arguments=*/{}, + /*shape_with_output_layout=*/nullptr, + /*execution_profile=*/nullptr)); + } + + LiteralTestUtil::ExpectEqual(*result1, *result2); + LiteralTestUtil::ExpectEqual(*result1, *result3); + LiteralTestUtil::ExpectNotEqual(*result1, *result4); + LiteralTestUtil::ExpectNotEqual(*result4, *result5); + LiteralTestUtil::ExpectNotEqual(*result5, *result6); +} + +// Bernoulli random number generation tests +XLA_TEST_F(PrngTest, HundredValuesB10p5) { BernoulliTest(0.5, {100}); } +XLA_TEST_F(PrngTest, HundredValuesB10p1) { BernoulliTest(0.1, {100}); } + +XLA_TEST_F(PrngTest, TenValuesN01) { + ComputationBuilder builder(client_, TestName()); + builder.RngNormal(builder.ConstantR0<float>(0), builder.ConstantR0<float>(1), + ShapeUtil::MakeShape(F32, {10})); + + ExecuteAndTransferOrDie(&builder, /*arguments=*/{}); + // TODO(b/25995601): Test that resultant values are reasonable +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/query_inferred_shape_test.cc b/tensorflow/compiler/xla/tests/query_inferred_shape_test.cc new file mode 100644 index 0000000000..eb7e63705b --- /dev/null +++ b/tensorflow/compiler/xla/tests/query_inferred_shape_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class QueryInferredShapeTest : public ClientLibraryTestBase {}; + +TEST_F(QueryInferredShapeTest, OnePlusOneShape) { + ComputationBuilder builder(client_, "one_plus_one"); + auto one = builder.ConstantR0<float>(1.0); + auto result = builder.Add(one, one); + StatusOr<std::unique_ptr<Shape>> shape_status = builder.GetShape(result); + ASSERT_IS_OK(shape_status.status()); + auto shape = shape_status.ConsumeValueOrDie(); + ASSERT_TRUE(ShapeUtil::Equal(*shape, ShapeUtil::MakeShape(F32, {}))); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc new file mode 100644 index 0000000000..f3d8da5c8c --- /dev/null +++ b/tensorflow/compiler/xla/tests/reduce_test.cc @@ -0,0 +1,506 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that multi-dimensional arrays can be reduced among various +// user-provided dimensions. +// +// Note that comments for these tests are white-box in that they talk about the +// default data layout. +// +// The test space for reductions is the cartesian product of: +// +// <possible ranks> x +// <possible layouts for chosen rank> x +// <possible subsets of dimensions in chosen rank> + +#include <stdlib.h> +#include <algorithm> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ReduceTest : public ClientLibraryTestBase { + protected: + ReduceTest() { + // Implementation note: layed out z >> y >> x by default. + // clang-format off + literal_2d_ = LiteralUtil::CreateR2<float>({ + // x0 x1 x2 + { 1.f, 2.f, 3.f}, // y0 + { 4.f, 5.f, 6.f}, // y1 + }); + literal_3d_ = LiteralUtil::CreateR3Projected<float>({ + // x0 x1 x2 + { 1.f, 2.f, 3.f}, // y0 + { 4.f, 5.f, 6.f}, // y1 + }, 4); + // clang-format on + CHECK(ShapeUtil::Equal( + literal_3d_->shape(), + ShapeUtil::MakeShape(F32, {/*z=*/4, /*y=*/2, /*x=*/3}))) + << literal_3d_->shape().ShortDebugString(); + } + + // Runs an R1 => R0 reduction test with the given number of elements. + void RunR1ToR0Test(int64 element_count) { + ComputationBuilder builder(client_, TestName()); + Computation add_f32 = CreateScalarAddComputation(F32, &builder); + const Shape input_shape = ShapeUtil::MakeShape(F32, {element_count}); + auto input = builder.Parameter(0, input_shape, "input"); + auto zero = builder.ConstantR0<float>(0.0); + builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0}); + + std::vector<float> input_data(element_count); + for (int64 i = 0; i < element_count; ++i) { + input_data[i] = rand_r(&seed_) % 3; + if (rand_r(&seed_) % 2 == 0) { + input_data[i] *= -1; + } + } + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR1(AsSlice(input_data)); + std::unique_ptr<GlobalData> input_global_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + float expected = 0.0; + for (float item : input_data) { + expected += item; + } + ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()}, + ErrorSpec(0.001)); + } + + // Runs an R2 => R0 reduction test with the given number of (rows, cols). + void RunR2ToR0Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) { + ComputationBuilder builder(client_, TestName()); + Computation add_f32 = CreateScalarAddComputation(F32, &builder); + const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols}); + auto input = builder.Parameter(0, input_shape, "input"); + auto zero = builder.ConstantR0<float>(0.0); + builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0, 1}); + + Array2D<float> input_data(rows, cols); + input_data.FillRandom(3.14f, 0.04); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR2FromArray2D(input_data); + input_literal = LiteralUtil::Relayout( + *input_literal, LayoutUtil::MakeLayout({minor, major})); + std::unique_ptr<GlobalData> input_global_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + float expected = 0.0; + for (int64 rowno = 0; rowno < rows; ++rowno) { + for (int64 colno = 0; colno < cols; ++colno) { + expected += input_data(rowno, colno); + } + } + ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()}, + ErrorSpec(0.01, 1e-4)); + } + + // Runs an R2 => R1 reduction test with the given number of (rows, cols). + void RunR2ToR1Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) { + ComputationBuilder builder(client_, TestName()); + Computation add_f32 = CreateScalarAddComputation(F32, &builder); + const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols}); + auto input = builder.Parameter(0, input_shape, "input"); + auto zero = builder.ConstantR0<float>(0.0); + builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0}); + + Array2D<float> input_data(rows, cols); + input_data.FillRandom(3.14f, 0.04); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR2FromArray2D(input_data); + input_literal = LiteralUtil::Relayout( + *input_literal, LayoutUtil::MakeLayout({minor, major})); + std::unique_ptr<GlobalData> input_global_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + std::vector<float> expected; + for (int64 colno = 0; colno < cols; ++colno) { + float column_sum = 0; + for (int64 rowno = 0; rowno < rows; ++rowno) { + column_sum += input_data(rowno, colno); + } + expected.push_back(column_sum); + } + ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()}, + ErrorSpec(0.01, 1e-4)); + } + + std::unique_ptr<Literal> literal_2d_; + std::unique_ptr<Literal> literal_3d_; + uint32 seed_ = 0xdeadbeef; +}; + +XLA_TEST_F(ReduceTest, ReduceR1_0_F32_To_R0) { RunR1ToR0Test(0); } +XLA_TEST_F(ReduceTest, ReduceR1_1_F32_To_R0) { RunR1ToR0Test(1); } +XLA_TEST_F(ReduceTest, ReduceR1_2_F32_To_R0) { RunR1ToR0Test(2); } +XLA_TEST_F(ReduceTest, ReduceR1_16_F32_To_R0) { RunR1ToR0Test(16); } +XLA_TEST_F(ReduceTest, ReduceR1_240_F32_To_R0) { RunR1ToR0Test(240); } +XLA_TEST_F(ReduceTest, ReduceR1_128_F32_To_R0) { RunR1ToR0Test(128); } +XLA_TEST_F(ReduceTest, ReduceR1_129_F32_To_R0) { RunR1ToR0Test(129); } +XLA_TEST_F(ReduceTest, ReduceR1_256_F32_To_R0) { RunR1ToR0Test(256); } +XLA_TEST_F(ReduceTest, ReduceR1_1024_F32_To_R0) { RunR1ToR0Test(1024); } +XLA_TEST_F(ReduceTest, ReduceR1_2048_F32_To_R0) { RunR1ToR0Test(2048); } +XLA_TEST_F(ReduceTest, ReduceR1_16K_F32_To_R0) { RunR1ToR0Test(16 * 1024); } +XLA_TEST_F(ReduceTest, ReduceR1_16KP1_F32_To_R0) { + RunR1ToR0Test(16 * 1024 + 1); +} + +XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R0) { RunR2ToR0Test(0, 0); } +XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R0) { RunR2ToR0Test(0, 2); } +XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R0) { RunR2ToR0Test(1, 1); } +XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R0) { RunR2ToR0Test(2, 0); } +XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R0) { RunR2ToR0Test(2, 2); } +XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R0) { RunR2ToR0Test(8, 8); } +XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R0) { RunR2ToR0Test(9, 9); } +XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R0) { RunR2ToR0Test(50, 111); } +XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R0) { RunR2ToR0Test(111, 50); } +XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R0) { + RunR2ToR0Test(111, 50, 0, 1); +} +XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R0) { RunR2ToR0Test(1024, 1024); } +XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R0) { RunR2ToR0Test(1000, 1500); } + +// Disabled due to b/33245142. Failed on 2016-11-30. +// XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R1) { RunR2ToR1Test(0, 0); } +XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R1) { RunR2ToR1Test(0, 2); } +XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R1) { RunR2ToR1Test(1, 1); } +// Disabled due to b/33245142. Failed on 2016-11-30. +// XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R1) { RunR2ToR1Test(2, 0); } +XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R1) { RunR2ToR1Test(2, 2); } +XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R1) { RunR2ToR1Test(8, 8); } +XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R1) { RunR2ToR1Test(9, 9); } +XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R1) { RunR2ToR1Test(50, 111); } +XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R1) { RunR2ToR1Test(111, 50); } +XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R1) { + RunR2ToR1Test(111, 50, 0, 1); +} +XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R1) { RunR2ToR1Test(1024, 1024); } +XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R1) { RunR2ToR1Test(1000, 1500); } + +XLA_TEST_F(ReduceTest, ReduceElementwiseR2_111x50_To_R1) { + const int64 rows = 111, cols = 50; + + ComputationBuilder builder(client_, TestName()); + Computation add_f32 = CreateScalarAddComputation(F32, &builder); + const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols}); + auto input = builder.Parameter(0, input_shape, "input"); + auto zero = builder.ConstantR0<float>(0.0); + auto log_ = builder.Log(input); + builder.Reduce(log_, zero, add_f32, /*dimensions_to_reduce=*/{0}); + + Array2D<float> input_data(rows, cols); + input_data.FillRandom(3.14f, 0.04); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR2FromArray2D(input_data); + input_literal = + LiteralUtil::Relayout(*input_literal, LayoutUtil::MakeLayout({0, 1})); + std::unique_ptr<GlobalData> input_global_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + std::vector<float> expected; + for (int64 colno = 0; colno < cols; ++colno) { + float column_sum = 0; + for (int64 rowno = 0; rowno < rows; ++rowno) { + column_sum += log(input_data(rowno, colno)); + } + expected.push_back(column_sum); + } + ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()}, + ErrorSpec(0.01, 1e-4)); +} + +struct BoundsLayout { + std::vector<int64> bounds; + std::vector<int64> layout; + std::vector<int64> reduce_dims; +}; + +void PrintTo(const BoundsLayout& spec, std::ostream* os) { + *os << tensorflow::strings::Printf( + "R%luToR%lu%s_%s_Reduce%s", spec.bounds.size(), + spec.bounds.size() - spec.reduce_dims.size(), + tensorflow::str_util::Join(spec.bounds, "x").c_str(), + tensorflow::str_util::Join(spec.layout, "").c_str(), + tensorflow::str_util::Join(spec.reduce_dims, "").c_str()); +} + +// Add-reduces a broadcasted scalar matrix among dimension 1 and 0. +XLA_TEST_F(ReduceTest, AddReduce2DScalarToR0) { + ComputationBuilder builder(client_, TestName()); + auto add = CreateScalarAddComputation(F32, &builder); + auto scalar = builder.ConstantR0<float>(42.0); + auto broacasted = builder.Broadcast(scalar, {500, 500}); + builder.Reduce(broacasted, builder.ConstantR0<float>(0.0f), add, {0, 1}); + + float expected = 42.0f * static_cast<float>(500 * 500); + ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Max-reduces a broadcasted scalar matrix among dimension 1 and 0. +XLA_TEST_F(ReduceTest, MaxReduce2DScalarToR0) { + ComputationBuilder builder(client_, TestName()); + auto max = CreateScalarMaxComputation(F32, &builder); + auto scalar = builder.ConstantR0<float>(42.0); + auto broacasted = builder.Broadcast(scalar, {500, 500}); + builder.Reduce(broacasted, builder.ConstantR0<float>(0.0f), max, {0, 1}); + + float expected = 42.0f; + ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Max-reduces a matrix among dimension 1 and 0. +XLA_TEST_F(ReduceTest, MaxReduce2DToR0) { + ComputationBuilder builder(client_, TestName()); + auto max = CreateScalarMaxComputation(F32, &builder); + Array2D<float> input(300, 250); + input.FillRandom(214.0f); + auto input_literal = LiteralUtil::CreateR2FromArray2D(input); + builder.Reduce(builder.ConstantLiteral(*input_literal), + builder.ConstantR0<float>(FLT_MIN), max, {0, 1}); + auto input_max = FLT_MIN; + input.Each( + [&](int64, int64, float* v) { input_max = std::max(input_max, *v); }); + ComputeAndCompareR0<float>(&builder, input_max, {}, ErrorSpec(0.0001)); +} + +// Min-reduces matrix among dimension 1 and 0. +XLA_TEST_F(ReduceTest, MinReduce2DToR0) { + ComputationBuilder builder(client_, TestName()); + auto min = CreateScalarMinComputation(F32, &builder); + Array2D<float> input(150, 130); + input.FillRandom(214.0f); + auto input_literal = LiteralUtil::CreateR2FromArray2D(input); + builder.Reduce(builder.ConstantLiteral(*input_literal), + builder.ConstantR0<float>(FLT_MAX), min, {0, 1}); + + auto input_min = FLT_MAX; + input.Each( + [&](int64, int64, float* v) { input_min = std::min(input_min, *v); }); + ComputeAndCompareR0<float>(&builder, input_min, {}, ErrorSpec(0.0001)); +} + +// Reduces a matrix among dimension 1. +XLA_TEST_F(ReduceTest, Reduce2DAmong1) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_2d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1}); + + std::vector<float> expected = {6.f, 15.f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, Reduce2DAmong0and1) { + // Reduce a matrix among dimensions 0 and 1 (sum it up to a scalar). + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_2d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1}); + + ComputeAndCompareR0<float>(&builder, 21.0f, {}, ErrorSpec(0.0001, 1e-4)); +} + +// Tests 2D matrix ReduceToRow operation. +XLA_TEST_F(ReduceTest, Reduce2DAmongY) { + ComputationBuilder builder(client_, "reduce_among_y"); + auto m = builder.ConstantLiteral(*literal_2d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0}); + + std::vector<float> expected = {5.f, 7.f, 9.f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3AmongDims_1_2) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1, 2}); + + std::vector<float> expected = {21.f, 21.f, 21.f, 21.f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3AmongDims_0_1) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1}); + + std::vector<float> expected = {20.f, 28.f, 36.f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3ToR0) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1, 2}); + + float expected = 21.0f * 4.0; + ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3AmongDim0) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0}); + + // clang-format off + Array2D<float> expected({ + {4.f, 8.f, 12.f}, + {16.f, 20.f, 24.f}, + }); + // clang-format on + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3AmongDim1) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1}); + + // clang-format off + Array2D<float> expected({ + {5.f, 7.f, 9.f}, + {5.f, 7.f, 9.f}, + {5.f, 7.f, 9.f}, + {5.f, 7.f, 9.f}, + }); + // clang-format on + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceTest, ReduceR3AmongDim2) { + ComputationBuilder builder(client_, TestName()); + auto m = builder.ConstantLiteral(*literal_3d_); + auto add = CreateScalarAddComputation(F32, &builder); + builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {2}); + + // clang-format off + Array2D<float> expected({ + {6.f, 15.f}, + {6.f, 15.f}, + {6.f, 15.f}, + {6.f, 15.f}, + }); + // clang-format on + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +class ReduceR3ToR2Test : public ReduceTest, + public ::testing::WithParamInterface<BoundsLayout> {}; + +XLA_TEST_P(ReduceR3ToR2Test, ReduceR3ToR2) { + ComputationBuilder builder(client_, TestName()); + const auto& bounds = GetParam().bounds; + Array3D<float> input_array(bounds[0], bounds[1], bounds[2]); + input_array.FillRandom(3.14f, 0.05); + + auto input_literal = LiteralUtil::CreateR3FromArray3D(input_array); + input_literal = LiteralUtil::Relayout( + *input_literal, LayoutUtil::MakeLayout(GetParam().layout)); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + auto input_activations = + builder.Parameter(0, input_literal->shape(), "input"); + Computation add = CreateScalarAddComputation(F32, &builder); + auto sum = builder.Reduce(input_activations, builder.ConstantR0<float>(0.0f), + add, GetParam().reduce_dims); + + auto expected = + ReferenceUtil::Reduce3DTo2D(input_array, 0.0f, GetParam().reduce_dims, + [](float a, float b) { return a + b; }); + + ComputeAndCompareR2<float>(&builder, *expected, {input_data.get()}, + ErrorSpec(1e-3, 1e-3)); +} + +INSTANTIATE_TEST_CASE_P( + ReduceR3ToR2Test_Instantiation, ReduceR3ToR2Test, + // Specifies (shape, layout, reduction dimensions). + ::testing::Values(BoundsLayout{{4, 8, 128}, {2, 1, 0}, {0}}, + BoundsLayout{{4, 8, 128}, {2, 1, 0}, {1}}, + BoundsLayout{{4, 8, 128}, {2, 1, 0}, {2}}, + // These should be simplified into a reshape. + BoundsLayout{{1, 21, 43}, {2, 1, 0}, {0}}, + BoundsLayout{{1, 1, 1}, {2, 1, 0}, {0}}, + BoundsLayout{{1, 1, 1}, {2, 1, 0}, {1}}, + BoundsLayout{{1, 1, 1}, {2, 1, 0}, {2}}, + BoundsLayout{{8, 16, 24}, {0, 1, 2}, {0}}, + BoundsLayout{{8, 16, 24}, {0, 1, 2}, {1}}, + BoundsLayout{{8, 16, 24}, {0, 1, 2}, {2}}, + BoundsLayout{{5, 10, 250}, {2, 1, 0}, {0}}, + BoundsLayout{{5, 10, 250}, {2, 1, 0}, {1}}, + BoundsLayout{{5, 10, 250}, {2, 1, 0}, {2}}, + BoundsLayout{{8, 16, 256}, {2, 1, 0}, {0}}, + BoundsLayout{{8, 16, 256}, {2, 1, 0}, {1}}, + BoundsLayout{{8, 16, 256}, {2, 1, 0}, {2}}, + BoundsLayout{{2, 300, 784}, {2, 1, 0}, {2}}, + BoundsLayout{{2, 300, 784}, {2, 1, 0}, {1}}, + BoundsLayout{{2, 300, 784}, {2, 1, 0}, {0}})); + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc new file mode 100644 index 0000000000..f48c14dfc6 --- /dev/null +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -0,0 +1,445 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests the reduce-window XLA operation. + +#include <limits> +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ReduceWindowTest : public ClientLibraryTestBase { + public: + ReduceWindowTest() : builder_(client_, TestName()) {} + + void ReduceWindowAdd(ComputationDataHandle input, + tensorflow::gtl::ArraySlice<int64> window_dimensions, + tensorflow::gtl::ArraySlice<int64> window_strides, + Padding padding) { + builder_.ReduceWindow(input, builder_.ConstantR0<float>(0.0f), + CreateScalarAddComputation(F32, &builder_), + window_dimensions, window_strides, padding); + } + + void ReduceWindowMax(ComputationDataHandle input, + tensorflow::gtl::ArraySlice<int64> window_dimensions, + tensorflow::gtl::ArraySlice<int64> window_strides, + Padding padding) { + builder_.ReduceWindow( + input, builder_.ConstantLiteral(LiteralUtil::MinValue(F32)), + CreateScalarMax(), window_dimensions, window_strides, padding); + } + + void ReduceWindowMin(ComputationDataHandle input, + tensorflow::gtl::ArraySlice<int64> window_dimensions, + tensorflow::gtl::ArraySlice<int64> window_strides, + Padding padding) { + builder_.ReduceWindow(input, + builder_.ConstantLiteral(LiteralUtil::MaxValue(F32)), + CreateScalarMinComputation(F32, &builder_), + window_dimensions, window_strides, padding); + } + + ComputationBuilder builder_; +}; + +XLA_TEST_F(ReduceWindowTest, ZeroElementSmall) { + Array4D<float> input_array(1, 0, 2, 1); + + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + Padding padding = Padding::kSame; + ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding); + + auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1}, + {1, 1, 1, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3)); +} + +TEST_F(ReduceWindowTest, NonSquareSmall) { + Array4D<float> input_array(1, 2, 2, 1); + input_array.FillRandom(2.f); + + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + Padding padding = Padding::kSame; + ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding); + + auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1}, + {1, 1, 1, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3)); +} + +TEST_F(ReduceWindowTest, MiddleDimsSmall) { + Array4D<float> input_array(1, 3, 3, 1); + input_array.FillRandom(2.f); + + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + Padding padding = Padding::kSame; + ReduceWindowAdd(input, {1, 1, 1, 1}, {1, 2, 2, 1}, padding); + + auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 1, 1}, + {1, 2, 2, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3)); +} + +TEST_F(ReduceWindowTest, Along2ndMinorDim) { + Array4D<float> input_array(3, 6, 7, 32); + input_array.FillRandom(2.f); + + // The parameters of this reduction mimic feature norm (e.g. LRN). + int lrn_diameter = 7; // diameter = 2*radius + 1 --> must be odd + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + Padding padding = Padding::kSame; + ReduceWindowAdd(input, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding); + + auto res = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3)); +} + +TEST_F(ReduceWindowTest, AmongMajor2DimsMediumSize) { + Array4D<float> input_array(9, 12, 4, 89); + input_array.FillRandom(2.0f); + + int win_len = 3; + int win_stride = 2; + + const auto input_data_handle = + builder_.ConstantR4FromArray4D<float>(input_array); + + Padding padding = Padding::kSame; + // Reduce only along the x and y dimensions, according to the win_len. + ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, padding); + + auto result = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3)); +} + +// TODO(b/32173947): Test support for arbitrary-sized padding. +TEST_F(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) { + Array4D<float> input_array(9, 12, 4, 89); // simulate Dim0IsMinor layout + input_array.FillRandom(2.0f); + + int64 rank = 4; + int win_len = 3; + int win_stride = 2; + + const auto input_data_handle = + builder_.ConstantR4FromArray4D<float>(input_array); + + Padding padding = Padding::kSame; + // Reduce only along the x and y dimensions, according to the win_len. + // Create padding vector with large padding values in the reduction dims. + std::vector<std::pair<int64, int64>> low_high_padding; + low_high_padding.resize(rank, {4, 4}); + + builder_.ReduceWindowWithGeneralPadding( + input_data_handle, builder_.ConstantR0<float>(0.0f), + CreateScalarAddComputation(F32, &builder_), {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, low_high_padding); + + auto result = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {win_len, win_len, 1, 1}, + {win_stride, win_stride, 1, 1}, padding); + + ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3)); +} +// TODO(b/31809540): Implement minor dim reduction to reduce num of reshapes. +TEST_F(ReduceWindowTest, ReduceR4AmongXYMinorSmall) { + Array4D<float> input_array(2, 2, 4, 16); + + Array2D<float> yx({{0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, + 11.f, 12.f, 13.f, 14.f, 15.f}, + {16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, + 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f}, + {32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, 39.f, 40.f, + 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f}, + {48.f, 49.f, 50.f, 51.f, 52.f, 53.f, 54.f, 55.f, 56.f, + 57.f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f}}); + input_array.FillWithYX(yx); + + int win_len = 2; + int win_stride = 2; + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + Padding padding = Padding::kValid; + ReduceWindowAdd(input, {1, 1, win_len, win_len}, + {1, 1, win_stride, win_stride}, padding); + + auto res = ReferenceUtil::ReduceWindow4DAdd( + input_array, 0.0f, {1, 1, win_len, win_len}, + {1, 1, win_stride, win_stride}, padding); + ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3)); +} + +// TODO(b/31809540): Implement minor dim reduction to reduce num of reshapes. +TEST_F(ReduceWindowTest, ReduceR4AmongXYMinorSmallOverlapped) { + constexpr int64 p = 2; + constexpr int64 z = 2; + constexpr int64 y = 4; + constexpr int64 x = 16; + Array4D<float> input_array(p, z, y, x); + + Array2D<float> yx({{0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, + 11.f, 12.f, 13.f, 14.f, 15.f}, + {16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, + 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f}, + {32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, 39.f, 40.f, + 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f}, + {48.f, 49.f, 50.f, 51.f, 52.f, 53.f, 54.f, 55.f, 56.f, + 57.f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f}}); + input_array.FillWithYX(yx); + + int win_len = 4; + int win_stride = 2; + const auto input = builder_.ConstantR4FromArray4D<float>(input_array); + ReduceWindowAdd(input, {1, 1, win_len, win_len}, + {1, 1, win_stride, win_stride}, Padding::kValid); + + // Expected result + Array2D<float> yx_result({{408.f, 440.f, 472.f, 504.f, 536.f, 568.f, 600.f}}); + Array4D<float> expected(p, z, 1, 7); + expected.FillWithYX(yx_result); + ComputeAndCompareR4<float>(&builder_, expected, {}, ErrorSpec(1e-3, 1e-3)); +} + +TEST_F(ReduceWindowTest, MaxTrivial) { + const auto input = builder_.ConstantR1<float>({42}); + ReduceWindowMax(input, {1}, {1}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {42}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add3In3) { + const auto input = builder_.ConstantR1<float>({20, 100, 3}); + ReduceWindowAdd(input, {3}, {1}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {123}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add4In16Stride4) { + const auto input = builder_.ConstantR1<float>( + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ReduceWindowAdd(input, {4}, {4}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {10, 26, 42, 58}, {}, + ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, DISABLED_ON_CPU(DISABLED_ON_GPU(Min3In5Stride2))) { + const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1}); + ReduceWindowMin(input, {3}, {2}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {100, 1}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max3In3) { + const auto input = builder_.ConstantR1<float>({20, 100, 3}); + ReduceWindowMax(input, {3}, {1}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {100}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add2In3) { + const auto input = builder_.ConstantR1<float>({100, 10, 1}); + ReduceWindowAdd(input, {2}, {1}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {110, 11}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add3In5Stride2) { + const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1}); + ReduceWindowAdd(input, {3}, {2}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {11100, 111}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max4In16Stride4) { + const auto input = builder_.ConstantR1<float>( + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ReduceWindowMax(input, {4}, {4}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {4, 8, 12, 16}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max4In16Stride3) { + const auto input = builder_.ConstantR1<float>( + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ReduceWindowMax(input, {4}, {3}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {4, 7, 10, 13, 16}, {}, + ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max4In16Stride8) { + const auto input = builder_.ConstantR1<float>( + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ReduceWindowMax(input, {4}, {8}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {4, 12}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max3In5Stride2) { + const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1}); + ReduceWindowMax(input, {3}, {2}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {10000, 100}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Max3In5Stride1) { + const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 101}); + ReduceWindowMax(input, {3}, {1}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {10000, 1000, 101}, {}, + ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add3In4Stride2) { + const auto input = builder_.ConstantR1<float>({1000, 100, 10, 1}); + ReduceWindowAdd(input, {3}, {2}, Padding::kValid); + ComputeAndCompareR1<float>(&builder_, {1110}, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add2In3SamePad) { + const auto input = builder_.ConstantR1<float>({100, 10, 1}); + ReduceWindowAdd(input, {2}, {1}, Padding::kSame); + ComputeAndCompareR1<float>(&builder_, {110, 11, 1}, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add3In3SamePad) { + const auto input = builder_.ConstantR1<float>({100, 10, 1}); + ReduceWindowAdd(input, {3}, {1}, Padding::kSame); + ComputeAndCompareR1<float>(&builder_, {110, 111, 11}, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add3In3Stride3SamePad) { + const auto input = builder_.ConstantR1<float>({100, 10, 1}); + ReduceWindowAdd(input, {3}, {2}, Padding::kSame); + ComputeAndCompareR1<float>(&builder_, {110, 11}, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add2x2In2x2Overlapped) { + Array2D<float> input_array({{1.2f, -2.5f, 0.9f, 1.0f}, + {3.7f, 0.2f, -1.0f, -0.2f}, + {-0.4f, 2.7f, 1.1f, 2.2f}, + {0.6f, 1.7f, 1.4f, -0.2f}}); + auto input = builder_.ConstantR2FromArray2D<float>(input_array); + ReduceWindowAdd(input, {2, 2}, {1, 1}, Padding::kValid); + Array2D<float> expected( + {{2.6f, -2.4f, 0.7f}, {6.2f, 3.0f, 2.1f}, {4.6f, 6.9f, 4.5f}}); + ComputeAndCompareR2<float>(&builder_, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(ReduceWindowTest, Add2x2In2x2Disjoint) { + Array2D<float> input_array({{1.2f, -2.5f, 0.9f, 1.0f}, + {3.7f, 0.2f, -1.0f, -0.2f}, + {-0.4f, 2.7f, 1.1f, 2.2f}, + {0.6f, 1.7f, 1.4f, -0.2f}}); + auto input = builder_.ConstantR2FromArray2D<float>(input_array); + ReduceWindowAdd(input, {2, 2}, {2, 2}, Padding::kValid); + Array2D<float> expected({ + {2.6f, 0.7f}, {4.6f, 4.5f}, + }); + ComputeAndCompareR2<float>(&builder_, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x2) { + Array3D<float> input_array(2, 1, 2); + input_array(0, 0, 0) = 1000; + input_array(0, 0, 1) = 100; + input_array(1, 0, 0) = 10; + input_array(1, 0, 1) = 1; + auto input = builder_.ConstantR3FromArray3D<float>(input_array); + + ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kValid); + + Array3D<float> expected(2, 1, 1); + expected(0, 0, 0) = 1100; + expected(1, 0, 0) = 11; + ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3Stride1x1x2) { + Array3D<float> input_array(2, 1, 3); + input_array(0, 0, 0) = 100; + input_array(0, 0, 1) = 10; + input_array(0, 0, 2) = 1; + input_array(1, 0, 0) = 500; + input_array(1, 0, 1) = 50; + input_array(1, 0, 2) = 5; + auto input = builder_.ConstantR3FromArray3D<float>(input_array); + + ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 2}, Padding::kValid); + + Array3D<float> expected(2, 1, 1); + expected(0, 0, 0) = 110; + expected(1, 0, 0) = 550; + ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3SamePad) { + Array3D<float> input_array(2, 1, 3); + input_array(0, 0, 0) = 100; + input_array(0, 0, 1) = 10; + input_array(0, 0, 2) = 1; + input_array(1, 0, 0) = 500; + input_array(1, 0, 1) = 50; + input_array(1, 0, 2) = 5; + auto input = builder_.ConstantR3FromArray3D<float>(input_array); + + ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kSame); + + Array3D<float> expected(2, 1, 3); + expected(0, 0, 0) = 110; + expected(0, 0, 1) = 11; + expected(0, 0, 2) = 1; + expected(1, 0, 0) = 550; + expected(1, 0, 1) = 55; + expected(1, 0, 2) = 5; + ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/replay_test.cc b/tensorflow/compiler/xla/tests/replay_test.cc new file mode 100644 index 0000000000..802087b508 --- /dev/null +++ b/tensorflow/compiler/xla/tests/replay_test.cc @@ -0,0 +1,168 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/protobuf_util.h" +#include "tensorflow/compiler/xla/service/session.pb.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ReplayTest : public ClientLibraryTestBase {}; + +TEST_F(ReplayTest, TwoPlusTwoReplay) { + // Make 2+2 computation. + ComputationBuilder builder(client_, TestName()); + auto two = builder.ConstantR0<int32>(2); + builder.Add(two, two); + Computation computation = builder.Build().ConsumeValueOrDie(); + + // Serialize it out. + std::unique_ptr<SessionModule> module = + computation.Snapshot().ConsumeValueOrDie(); + + // Replay it. + Computation replayed = client_->LoadSnapshot(*module).ConsumeValueOrDie(); + + // Check signature is the same. + std::unique_ptr<ProgramShape> original_shape = + client_->GetComputationShape(computation).ConsumeValueOrDie(); + std::unique_ptr<ProgramShape> replayed_shape = + client_->GetComputationShape(replayed).ConsumeValueOrDie(); + ASSERT_TRUE(protobuf_util::ProtobufEquals(*original_shape, *replayed_shape)); + + // Run it. + std::unique_ptr<Literal> literal = + client_->ExecuteAndTransfer(replayed, /*arguments=*/{}) + .ConsumeValueOrDie(); + + // Expect 4. + LiteralTestUtil::ExpectR0Equal<int32>(4, *literal); +} + +XLA_TEST_F(ReplayTest, XPlusYReplayWithParameters) { + // Make computation. + ComputationBuilder builder(client_, TestName()); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(S32, {}), "x"); + auto y = builder.Parameter(1, ShapeUtil::MakeShape(S32, {}), "y"); + builder.Add(x, y); + Computation computation = builder.Build().ConsumeValueOrDie(); + + // Serialize it out. + std::unique_ptr<SessionModule> module = + computation.Snapshot().ConsumeValueOrDie(); + + // Replay it. + Computation replayed = client_->LoadSnapshot(*module).ConsumeValueOrDie(); + + // Check signature is the same. + std::unique_ptr<ProgramShape> original_shape = + client_->GetComputationShape(computation).ConsumeValueOrDie(); + std::unique_ptr<ProgramShape> replayed_shape = + client_->GetComputationShape(replayed).ConsumeValueOrDie(); + ASSERT_TRUE(protobuf_util::ProtobufEquals(*original_shape, *replayed_shape)); + + // Run it. + std::unique_ptr<GlobalData> x_data = + client_->TransferToServer(*LiteralUtil::CreateR0<int32>(2)) + .ConsumeValueOrDie(); + std::unique_ptr<GlobalData> y_data = + client_->TransferToServer(*LiteralUtil::CreateR0<int32>(3)) + .ConsumeValueOrDie(); + std::unique_ptr<Literal> literal = + client_ + ->ExecuteAndTransfer(replayed, + /*arguments=*/{x_data.get(), y_data.get()}) + .ConsumeValueOrDie(); + + // Expect 5. + LiteralTestUtil::ExpectR0Equal<int32>(5, *literal); +} + +TEST_F(ReplayTest, MapPlusTwoOverR1) { + // As above, but with map(+2) over some constant array. + ComputationBuilder plus_two_builder(client_, "plus two"); + auto input = + plus_two_builder.Parameter(0, ShapeUtil::MakeShape(S32, {}), "input"); + plus_two_builder.Add(input, plus_two_builder.ConstantR0<int32>(2)); + Computation plus_two = plus_two_builder.Build().ConsumeValueOrDie(); + + ComputationBuilder mapper_builder(client_, TestName()); + auto original = mapper_builder.ConstantR1<int32>({1, 2, 3}); + mapper_builder.Map({original}, plus_two); + + Computation computation = mapper_builder.Build().ConsumeValueOrDie(); + + // Serialize it out. + std::unique_ptr<SessionModule> module = + computation.Snapshot().ConsumeValueOrDie(); + + // Replay it. + Computation replayed = client_->LoadSnapshot(*module).ConsumeValueOrDie(); + + // Check signature is the same. + std::unique_ptr<ProgramShape> original_shape = + client_->GetComputationShape(computation).ConsumeValueOrDie(); + std::unique_ptr<ProgramShape> replayed_shape = + client_->GetComputationShape(replayed).ConsumeValueOrDie(); + ASSERT_TRUE(protobuf_util::ProtobufEquals(*original_shape, *replayed_shape)); + + // Destroy the originals. + computation.Reset(); + plus_two.Reset(); + + // Run it. + std::unique_ptr<Literal> literal = + client_->ExecuteAndTransfer(replayed, /*arguments=*/{}) + .ConsumeValueOrDie(); + + // Expect result. + LiteralTestUtil::ExpectR1Equal<int32>({3, 4, 5}, *literal); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/reshape_motion_test.cc b/tensorflow/compiler/xla/tests/reshape_motion_test.cc new file mode 100644 index 0000000000..ce309eb743 --- /dev/null +++ b/tensorflow/compiler/xla/tests/reshape_motion_test.cc @@ -0,0 +1,77 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <numeric> +#include <random> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +using ReshapeMotionTest = ClientLibraryTestBase; + +TEST_F(ReshapeMotionTest, ElementwiseOfReshapesWithNonSameInputShapes) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2<int32>({{2, 3, 5}, {7, 11, 13}}); + auto b = builder.ConstantR2<int32>({{17, 19}, {23, 29}, {31, 37}}); + auto c = builder.Reshape(a, {6}); + auto d = builder.Reshape(b, {6}); + auto e = builder.Mul(c, d); + + ComputeAndCompareR1<int32>(&builder, {34, 57, 115, 203, 341, 481}, {}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc new file mode 100644 index 0000000000..a9159d39ca --- /dev/null +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -0,0 +1,811 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <numeric> +#include <random> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ReshapeTest : public ClientLibraryTestBase { + public: + ErrorSpec zero_error_spec_{0.0}; +}; + +// Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. +XLA_TEST_F(ReshapeTest, Trivial1x1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2<float>({{1.0}}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_); +} + +// Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. +XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2<float>({{1.0}}); + auto reshape = + builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, /*new_sizes=*/{}); + auto new_shape = builder.GetShape(reshape).ConsumeValueOrDie(); + + ComputeAndCompareR0<float>(&builder, 1.0f, {}, zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, Trivial0x3) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 3)); + auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, Trivial3x0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(3, 0)); + auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_); +} + +// Collapses a 2-dimensional row vector to 1 dimension. +XLA_TEST_F(ReshapeTest, Trivial1x3) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2<float>({{1.0f, 2.0f, 3.0f}}); + auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {}, + zero_error_spec_); +} + +// Collapses a 2-dimensional column vector to 1 dimension. +XLA_TEST_F(ReshapeTest, Trivial3x1) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2<float>({{1.0f}, {2.0f}, {3.0f}}); + auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {}, + zero_error_spec_); +} + +// Splits an empty vector into an empty matrix. +XLA_TEST_F(ReshapeTest, R1ToR2_0_To_2x0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({}); + auto result = + builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 0}); + ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {}, + zero_error_spec_); +} + +// Splits a vector into a matrix. +XLA_TEST_F(ReshapeTest, R1ToR2_6_To_2x3) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); + auto result = + builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 3}); + Array2D<float> expected_2x3({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}); + ComputeAndCompareR2<float>(&builder, expected_2x3, {}, zero_error_spec_); +} + +// Transposes a 2x0 array to a 0x2 array. +XLA_TEST_F(ReshapeTest, Reshape0x2To2x0) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2)); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, + /*new_sizes=*/{2, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {}, + zero_error_spec_); +} + +// Transposes a 2-dimensional row vector to a column vector. +XLA_TEST_F(ReshapeTest, ReshapeRowToCol) { + ComputationBuilder builder(client_, TestName()); + auto simple = MakeLinspaceArray2D(1.0f, 3.0f, 1, 3); + auto a = builder.ConstantR2FromArray2D<float>(*simple); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, + /*new_sizes=*/{3, 1}); + + auto expected = ReferenceUtil::TransposeArray2D(*simple); + ComputeAndCompareR2<float>(&builder, *expected, {}, zero_error_spec_); +} + +// Transposes a 2-dimensional array. +XLA_TEST_F(ReshapeTest, TransposeAsReshape) { + ComputationBuilder builder(client_, TestName()); + auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); + auto a = builder.ConstantR2FromArray2D<float>(*a4x3); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0}, + /*new_sizes=*/{3, 4}); + + auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3); + ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_); +} + +// Transposes a 0x4 array with ComputationBuilder::Trans. +XLA_TEST_F(ReshapeTest, Transpose0x4) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 4)); + auto result = builder.Transpose(a, {1, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(4, 0), {}, + zero_error_spec_); +} + +// Transposes a 2-dimensional array with ComputationBuilder::Trans. +XLA_TEST_F(ReshapeTest, Transpose4x3) { + ComputationBuilder builder(client_, TestName()); + auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); + auto a = builder.ConstantR2FromArray2D<float>(*a4x3); + auto result = builder.Transpose(a, {1, 0}); + + auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3); + ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_); +} + +// Reshapes an empty 2-dimensional array with dimensions that are not just a +// rearrangement of the originals (split), but no reordering (no shuffle). +XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffleZeroElements) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(6, 0)); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, + /*new_sizes=*/{2, 3, 0, 0}); + + ComputeAndCompareR4<float>(&builder, Array4D<float>(2, 3, 0, 0), {}, + zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, ReshapeR4ToR2ZeroElements) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR4FromArray4D<float>(Array4D<float>(2, 3, 4, 0)); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3}, + /*new_sizes=*/{24, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(24, 0), {}, + zero_error_spec_); +} + +// Reshapes a 2-dimensional array with dimensions that are not just a +// rearrangement of the originals (split), but no reordering (no shuffle). +XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffle) { + ComputationBuilder builder(client_, TestName()); + auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); + auto a = builder.ConstantR2FromArray2D<float>(*a4x3); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, + /*new_sizes=*/{2, 6}); + + auto expected2x6 = MakeLinspaceArray2D(1.0f, 12.0f, 2, 6); + ComputeAndCompareR2<float>(&builder, *expected2x6, {}, zero_error_spec_); +} + +// Reshapes a 2-dimensional array with dimensions that are not just a +// rearrangement of the originals (split), and reorder the input (shuffle). +XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffleZeroElements) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 6)); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0}, + /*new_sizes=*/{3, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(3, 0), {}, + zero_error_spec_); +} + +// Reshapes a 2-dimensional array with dimensions that are not just a +// rearrangement of the originals (split), and reorder the input (shuffle). +XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffle) { + ComputationBuilder builder(client_, TestName()); + auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); + auto a = builder.ConstantR2FromArray2D<float>(*a4x3); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0}, + /*new_sizes=*/{2, 6}); + + Array2D<float> expected2x6({{1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f}, + {8.0f, 11.0f, 3.0f, 6.0f, 9.0f, 12.0f}}); + ComputeAndCompareR2<float>(&builder, expected2x6, {}, zero_error_spec_); +} + +// The following tests use the same input 3D array; they test the examples we +// show for the Reshape operation in the operation_semantics document. +// TODO(eliben): find a way to show this code in the documentation without +// duplication. +Array3D<int> v_array_for_doc_R3_tests({{{10, 11, 12}, {15, 16, 17}}, + {{20, 21, 22}, {25, 26, 27}}, + {{30, 31, 32}, {35, 36, 37}}, + {{40, 41, 42}, {45, 46, 47}}}); + +XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_012) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests); + auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2}, + /*new_sizes=*/{24}); + ComputeAndCompareR1<int>(&builder, + {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27, + 30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47}, + {}); +} + +XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests); + auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2}, + /*new_sizes=*/{8, 3}); + Array2D<int> expected({{10, 11, 12}, + {15, 16, 17}, + {20, 21, 22}, + {25, 26, 27}, + {30, 31, 32}, + {35, 36, 37}, + {40, 41, 42}, + {45, 46, 47}}); + ComputeAndCompareR2<int>(&builder, expected, {}); +} + +XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_120) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests); + auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0}, + /*new_sizes=*/{24}); + ComputeAndCompareR1<int>(&builder, + {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42, + 15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47}, + {}); +} + +XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests); + auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0}, + /*new_sizes=*/{8, 3}); + Array2D<int> expected({{10, 20, 30}, + {40, 11, 21}, + {31, 41, 12}, + {22, 32, 42}, + {15, 25, 35}, + {45, 16, 26}, + {36, 46, 17}, + {27, 37, 47}}); + ComputeAndCompareR2<int>(&builder, expected, {}); +} + +XLA_TEST_F(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) { + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests); + auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0}, + /*new_sizes=*/{2, 6, 2}); + Array3D<int> expected( + {{{10, 20}, {30, 40}, {11, 21}, {31, 41}, {12, 22}, {32, 42}}, + {{15, 25}, {35, 45}, {16, 26}, {36, 46}, {17, 27}, {37, 47}}}); + ComputeAndCompareR3<int>(&builder, expected, {}); +} + +// Collapses the low dimensions of a 4D tensor to get a 2D matrix, without +// reordering dimensions (for NeuralNet::FullyConnected). +// +// First we create a tesseract raster-face like: +// +// 1 2 3 +// 4 5 6 +// +// First we collapse Y and X within the raster space yielding: +// +// 1 2 3 4 5 6 +// +// Then we collapse Z be collapsed so we just end up with planes: +// +// 1 2 3 4 5 6 1 2 3 4 5 6 +XLA_TEST_F(ReshapeTest, FullyConnectedCollapse) { + ComputationBuilder builder(client_, TestName()); + Array4D<float> t2x2x2x3(2, 2, 2, 3); + auto filler2x3 = MakeLinspaceArray2D(1.0f, 6.0f, 2, 3); + t2x2x2x3.FillWithYX(*filler2x3); + auto a = builder.ConstantR4FromArray4D<float>(t2x2x2x3); + auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{1, 2, 3}); + + Array2D<float> expected2x12( + {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}, + {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f}}); + ComputeAndCompareR2<float>(&builder, expected2x12, {}, zero_error_spec_); +} + +// As above, but uses reshape directly. +XLA_TEST_F(ReshapeTest, FullyConnectedCollapseDesugared) { + ComputationBuilder builder(client_, TestName()); + Array4D<float> t(2, 1, 2, 2); + t(0, 0, 0, 0) = 0; + t(0, 0, 0, 1) = 1; + t(0, 0, 1, 0) = 2; + t(0, 0, 1, 1) = 3; + t(1, 0, 0, 0) = 4; + t(1, 0, 0, 1) = 5; + t(1, 0, 1, 0) = 6; + t(1, 0, 1, 1) = 7; + auto a = builder.ConstantR4FromArray4D<float>(t); + auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3}, + /*new_sizes=*/{2, 4}); + + Array2D<float> expected({{0, 1, 2, 3}, {4, 5, 6, 7}}); + ComputeAndCompareR2<float>(&builder, expected, {}, zero_error_spec_); +} + +// Reshape various ranks to a scalar. +XLA_TEST_F(ReshapeTest, ToScalar) { + for (int rank = 0; rank < 8; ++rank) { + ComputationBuilder b(client_, TestName()); + auto input = LiteralUtil::CreateR1<float>({83.0f}); + std::vector<int64> ones(rank, 1); // this is {1, ..., 1}. + std::vector<int64> dimensions(rank); + std::iota(dimensions.begin(), dimensions.end(), 0); + *input->mutable_shape() = ShapeUtil::MakeShape(F32, ones); + b.Reshape(b.ConstantLiteral(*input), dimensions, {}); + + ComputeAndCompareR0<float>(&b, 83.0f, {}, zero_error_spec_); + } +} + +XLA_TEST_F(ReshapeTest, BadDimensions) { + ComputationBuilder b(client_, TestName()); + b.Reshape(b.ConstantR1<int32>({1}), {}, {}); + EXPECT_MATCH(ExecuteToString(&b, {}), + testing::HasSubstr("dimensions not a permutation")); +} + +XLA_TEST_F(ReshapeTest, BadNewSizes) { + ComputationBuilder b(client_, TestName()); + b.Reshape(b.ConstantR1<int32>({1, 2}), {1}, {}); + EXPECT_MATCH(ExecuteToString(&b, {}), + testing::HasSubstr("mismatched element counts")); +} + +XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) { + const Shape parameter_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2}); + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, parameter_shape, "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 8}); + + // clang-format off + auto literal = LiteralUtil::CreateR4FromArray4DWithLayout(Array4D<float>{ + { + { + {0, 1}, + {2, 3}, + }, + { + {100, 101}, + {102, 103}, + }, + }, + { + { + {222, 333}, + {444, 555}, + }, + { + {666, 777}, + {888, 999}, + }, + }, + }, + LayoutUtil::MakeLayout({0, 1, 2, 3})); + // clang-format on + std::unique_ptr<GlobalData> input = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + Array2D<float> expected_array({ + {0, 1, 2, 3, 100, 101, 102, 103}, + {222, 333, 444, 555, 666, 777, 888, 999}, + }); + + Computation computation = builder.Build().ConsumeValueOrDie(); + const Shape shape_with_output_layout = + ShapeUtil::MakeShapeWithLayout(F32, {2, 8}, {1, 0}); + std::unique_ptr<Literal> actual = + client_ + ->ExecuteAndTransfer(computation, {input.get()}, + &shape_with_output_layout) + .ConsumeValueOrDie(); + std::unique_ptr<Literal> expected = + LiteralUtil::CreateR2FromArray2D<float>(expected_array); + LiteralTestUtil::ExpectEqual(*expected, *actual); +} + +XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) { + std::unique_ptr<Literal> input = LiteralUtil::CreateR2<float>({ + {0, 1, 2, 3, 4, 5, 6, 7}, + {100, 101, 102, 103, 104, 105, 106, 107}, + {200, 201, 202, 203, 204, 205, 206, 207}, + }); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4}); + + // clang-format off + Array4D<float> expected = { + {{{0, 1, 2, 3}}, + {{4, 5, 6, 7}}}, + {{{100, 101, 102, 103}}, + {{104, 105, 106, 107}}}, + {{{200, 201, 202, 203}}, + {{204, 205, 206, 207}}} + }; + // clang-format on + ComputeAndCompareR4<float>(&builder, expected, {input_data.get()}, + zero_error_spec_); +} + +// Tests R2->R4 reshape with the reshape dimensions {1, 0}. +XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) { + std::unique_ptr<Literal> input = LiteralUtil::CreateR2<float>({ + {0, 1, 2, 3, 4, 5, 6, 7}, + {100, 101, 102, 103, 104, 105, 106, 107}, + {200, 201, 202, 203, 204, 205, 206, 207}, + }); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4}); + + // clang-format off + Array4D<float> expected = { + {{{0, 100, 200, 1}}, + {{101, 201, 2, 102}}}, + {{{202, 3, 103, 203}}, + {{4, 104, 204, 5}}}, + {{{105, 205, 6, 106}}, + {{206, 7, 107, 207}}} + }; + // clang-format on + ComputeAndCompareR4<float>(&builder, expected, {input_data.get()}, + zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + Array4D<float> input(2, 1, 1, 1); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1}); + + std::unique_ptr<Literal> expected = + LiteralTestUtil::Reshape({2, 1}, {1, 0}, *input_literal); + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + Array4D<float> input(2, 1, 4, 1); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2}); + + std::unique_ptr<Literal> expected = + LiteralTestUtil::Reshape({4, 2}, {1, 0}, *input_literal); + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_); +} + +// Tests R4->R2 reshape with the reshape dimensions {0, 2, 1, 3}. +XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + Array4D<float> input(5, 10, 2, 3); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 2, 1, 3}, /*new_sizes=*/{5, 60}); + + Array2D<float> expected_array(5, 60); + input.Each([&](tensorflow::gtl::ArraySlice<int64> indices, float* cell) { + expected_array(indices[0], indices[2] * 30 + indices[1] * 3 + indices[3]) = + *cell; + }); + auto expected = LiteralUtil::CreateR2FromArray2D(expected_array); + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}); +} + +XLA_TEST_F(ReshapeTest, NoopReshape) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + Array4D<float> input_array(2, 3, 5, 7); + input_array.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input_array, LayoutUtil::MakeLayout({1, 2, 3, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.Parameter(0, input_literal->shape(), "input"); + builder.Reshape(input, /*dimensions=*/{3, 0, 1, 2}, + /*new_sizes=*/{7, 2, 3, 5}); + Computation computation = builder.Build().ConsumeValueOrDie(); + + const Shape output_shape_with_layout = + ShapeUtil::MakeShapeWithLayout(F32, {7, 2, 3, 5}, {2, 3, 0, 1}); + std::unique_ptr<Literal> output_literal = + client_ + ->ExecuteAndTransfer(computation, {input_data.get()}, + &output_shape_with_layout) + .ConsumeValueOrDie(); + + // Since the reshape is a no-op, verify that it does not change the underlying + // data. + EXPECT_EQ(tensorflow::gtl::ArraySlice<float>(input_literal->f32s()), + tensorflow::gtl::ArraySlice<float>(output_literal->f32s())); +} + +XLA_TEST_F(ReshapeTest, R4ToR4Reshape_Trivial) { + auto literal_1x2x3x4 = LiteralUtil::CreateR4( + {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}, + {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}}); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantLiteral(*literal_1x2x3x4); + builder.Reshape(input, /*dimensions=*/{0, 1, 2, 3}, + /*new_sizes=*/{1, 2, 3, 4}); + + ComputeAndCompareLiteral(&builder, *literal_1x2x3x4, {}); +} + +XLA_TEST_F(ReshapeTest, R4ToR4Reshape) { + auto literal_1x2x3x4 = LiteralUtil::CreateR4( + {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}, + {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}}); + + ComputationBuilder builder(client_, TestName()); + auto input = builder.ConstantLiteral(*literal_1x2x3x4); + builder.Reshape(input, /*dimensions=*/{1, 3, 2, 0}, + /*new_sizes=*/{2, 4, 3, 1}); + + // clang-format off + auto expected_2x4x3x1 = LiteralUtil::CreateR4( + {{{{1}, {5}, {9}}, + {{2}, {6}, {10}}, + {{3}, {7}, {11}}, + {{4}, {8}, {12}}}, + {{{13}, {17}, {21}}, + {{14}, {18}, {22}}, + {{15}, {19}, {23}}, + {{16}, {20}, {24}}}}); + // clang-format on + + ComputeAndCompareLiteral(&builder, *expected_2x4x3x1, {}); +} + +XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<int64> bounds = {2, 2, 2, 2}; + std::vector<int64> new_bounds = {bounds[0], bounds[1], bounds[3], bounds[2]}; + Array4D<float> input(bounds[0], bounds[1], bounds[2], bounds[3]); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds); + + std::unique_ptr<Literal> expected = LiteralUtil::Relayout( + *LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal), + LayoutUtil::MakeLayout({3, 2, 1, 0})); + + // Specify the requested output shape explicitly to ensure that this reshape + // actually corresponds to a two minor transpose. + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_, &expected->shape()); +} + +XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<int64> bounds = {1, 1, 250, 300}; + std::vector<int64> new_bounds = {bounds[0], bounds[1], bounds[3], bounds[2]}; + Array4D<float> input(bounds[0], bounds[1], bounds[2], bounds[3]); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds); + + std::unique_ptr<Literal> expected = LiteralUtil::Relayout( + *LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal), + LayoutUtil::MakeLayout({3, 2, 1, 0})); + + // Specify the requested output shape explicitly to ensure that this reshape + // actually corresponds to a two minor transpose. + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_, &expected->shape()); +} + +XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<int64> bounds = {5, 5, 1, 10}; + std::vector<int64> new_bounds = {bounds[0], bounds[1], bounds[3], bounds[2]}; + Array4D<float> input(bounds[0], bounds[1], bounds[2], bounds[3]); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds); + + std::unique_ptr<Literal> expected = LiteralUtil::Relayout( + *LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal), + LayoutUtil::MakeLayout({3, 2, 1, 0})); + + // Specify the requested output shape explicitly to ensure that this reshape + // actually corresponds to a two minor transpose. + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_, &expected->shape()); +} + +XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + // This happens in NN-Builder MNIST. + std::vector<int64> bounds = {5, 5, 10, 1}; + std::vector<int64> new_bounds = {bounds[0], bounds[1], bounds[3], bounds[2]}; + Array4D<float> input(bounds[0], bounds[1], bounds[2], bounds[3]); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({3, 2, 1, 0})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds); + + std::unique_ptr<Literal> expected = LiteralUtil::Relayout( + *LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal), + LayoutUtil::MakeLayout({3, 2, 1, 0})); + + // Specify the requested output shape explicitly to ensure that this reshape + // actually corresponds to a two minor transpose. + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_, &expected->shape()); +} + +XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) { + std::mt19937 rng; + std::uniform_real_distribution<float> distribution; + std::vector<int64> bounds = {3, 3, 1, 3}; + std::vector<int64> new_bounds = {bounds[1], bounds[0], bounds[2], bounds[3]}; + Array4D<float> input(bounds[0], bounds[1], bounds[2], bounds[3]); + input.Each( + [&rng, &distribution](tensorflow::gtl::ArraySlice<int64> /* indices */, + float* cell) { *cell = distribution(rng); }); + std::unique_ptr<Literal> input_literal = + LiteralUtil::CreateR4FromArray4DWithLayout( + input, LayoutUtil::MakeLayout({0, 1, 2, 3})); + std::unique_ptr<GlobalData> input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.Parameter(0, input_literal->shape(), "a"); + builder.Reshape(a, /*dimensions=*/{1, 0, 2, 3}, /*new_sizes=*/new_bounds); + + std::unique_ptr<Literal> expected = LiteralUtil::Relayout( + *LiteralTestUtil::Reshape(new_bounds, {1, 0, 2, 3}, *input_literal), + input_literal->shape().layout()); + + // Specify the requested output shape explicitly to ensure that this reshape + // actually corresponds to a two minor transpose. + ComputeAndCompareLiteral(&builder, *expected, {input_data.get()}, + zero_error_spec_, &expected->shape()); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/reverse_test.cc b/tensorflow/compiler/xla/tests/reverse_test.cc new file mode 100644 index 0000000000..63dd4421fa --- /dev/null +++ b/tensorflow/compiler/xla/tests/reverse_test.cc @@ -0,0 +1,173 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ReverseTest : public ClientLibraryTestBase {}; + +// Tests the reverse operation on a scalar. +XLA_TEST_F(ReverseTest, ReverseScalar) { + ComputationBuilder b(client_, TestName()); + float input = 3.5f; + b.Rev(b.ConstantR0<float>(input), {}); + ComputeAndCompareR0<float>(&b, input, {}); +} + +// Tests the reverse operation on a 0x0 float array on both dimensions. +XLA_TEST_F(ReverseTest, Reverse0x0FloatArray) { + ComputationBuilder b(client_, TestName()); + b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(0, 0)), {0, 1}); + ComputeAndCompareR2<float>(&b, Array2D<float>(0, 0), {}); +} + +// Tests the reverse operation on a 0x1 float array on both dimensions. +XLA_TEST_F(ReverseTest, Reverse0x1FloatArray) { + ComputationBuilder b(client_, TestName()); + b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(0, 1)), {0, 1}); + ComputeAndCompareR2<float>(&b, Array2D<float>(0, 1), {}); +} + +// Tests the reverse operation on a 1x0 float array on both dimensions. +XLA_TEST_F(ReverseTest, Reverse1x0FloatArray) { + ComputationBuilder b(client_, TestName()); + b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(1, 0)), {0, 1}); + ComputeAndCompareR2<float>(&b, Array2D<float>(1, 0), {}); +} + +// Tests the reverse operation on a 1x1 float array on both dimensions. +XLA_TEST_F(ReverseTest, Reverse1x1FloatArray) { + ComputationBuilder b(client_, TestName()); + Array2D<float> input({{3.5f}}); + b.Rev(b.ConstantR2FromArray2D<float>(input), {0, 1}); + ComputeAndCompareR2<float>(&b, input, {}); +} + +XLA_TEST_F(ReverseTest, Reverse2x0x4x3FloatArrayDim02) { + ComputationBuilder b(client_, TestName()); + b.Rev(b.ConstantR4FromArray4D<float>(Array4D<float>(2, 0, 4, 3)), {0, 2}); + ComputeAndCompareR4<float>(&b, Array4D<float>(2, 0, 4, 3), {}); +} + +XLA_TEST_F(ReverseTest, Reverse2x0x4x3FloatArrayDim13) { + ComputationBuilder b(client_, TestName()); + b.Rev(b.ConstantR4FromArray4D<float>(Array4D<float>(2, 0, 4, 3)), {1, 3}); + ComputeAndCompareR4<float>(&b, Array4D<float>(2, 0, 4, 3), {}); +} + +// Tests the reverse operation on a 4D U8 array on dimension 0 and 3. +XLA_TEST_F(ReverseTest, Reverse4DU8ArrayOnDim23) { + ComputationBuilder b(client_, TestName()); + // Input shape is U8[1x2x3x4]. + // clang-format off + Array4D<uint8> input({{ + {{1, 2, 3, 4}, + {5, 6, 7, 8}, + {9, 10, 11, 12}}, + {{13, 14, 15, 16}, + {17, 18, 19, 20}, + {21, 22, 23, 24}}, + }}); + // clang-format on + + b.Rev(b.ConstantR4FromArray4D<uint8>(input), {0, 3}); + + // clang-format off + Array4D<uint8> expected({{ + {{4, 3, 2, 1}, + {8, 7, 6, 5}, + {12, 11, 10, 9}}, + {{16, 15, 14, 13}, + {20, 19, 18, 17}, + {24, 23, 22, 21}}, + }}); + // clang-format on + ComputeAndCompareR4<uint8>(&b, expected, {}); +} + +// Tests the reverse operation on a 4D float array on dimension 0 and 1. +TEST_F(ReverseTest, Reverse4DFloatArrayOnDim01) { + ComputationBuilder b(client_, TestName()); + // Input shape is float[4x3x2x1]. + // clang-format off + Array4D<float> input({ + {{{1.0f}, {2.0f}}, + {{3.0f}, {4.0f}}, + {{5.0f}, {6.0f}}}, + {{{7.0f}, {8.0f}}, + {{9.0f}, {10.0f}}, + {{11.0f}, {12.0f}}}, + {{{13.0f}, {14.0f}}, + {{15.0f}, {16.0f}}, + {{17.0f}, {18.0f}}}, + {{{19.0f}, {20.0f}}, + {{21.0f}, {22.0f}}, + {{23.0f}, {24.0f}}}, + }); + // clang-format on + + b.Rev(b.ConstantR4FromArray4D<float>(input), {0, 1}); + + // clang-format off + Array4D<float> expected({ + {{{23.0f}, {24.0f}}, + {{21.0f}, {22.0f}}, + {{19.0f}, {20.0f}}}, + {{{17.0f}, {18.0f}}, + {{15.0f}, {16.0f}}, + {{13.0f}, {14.0f}}}, + {{{11.0f}, {12.0f}}, + {{9.0f}, {10.0f}}, + {{7.0f}, {8.0f}}}, + {{{5.0f}, {6.0f}}, + {{3.0f}, {4.0f}}, + {{1.0f}, {2.0f}}}, + }); + // clang-format on + ComputeAndCompareR4<float>(&b, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/round_trip_packed_literal_test.cc b/tensorflow/compiler/xla/tests/round_trip_packed_literal_test.cc new file mode 100644 index 0000000000..5b734c0f40 --- /dev/null +++ b/tensorflow/compiler/xla/tests/round_trip_packed_literal_test.cc @@ -0,0 +1,160 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/packed_literal_reader.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class RoundTripPackedLiteralTest : public ClientLibraryTestBase { + protected: + // Sends the literal to the server and retrieves it back. + std::unique_ptr<Literal> RoundTripToServer(const Literal& original) { + std::unique_ptr<GlobalData> data = + client_->TransferToServer(original).ConsumeValueOrDie(); + return client_->Transfer(*data).ConsumeValueOrDie(); + } +}; + +TEST_F(RoundTripPackedLiteralTest, RoundTripsR1F32Length2) { + string data(sizeof(float) * 2, 0); + tensorflow::gtl::MutableArraySlice<float> floats( + tensorflow::bit_cast<float*>(data.data()), 2); + floats[0] = 42.0; + floats[1] = 24.0; + + string fname = tensorflow::testing::TmpDir() + "/RoundTripsR1F32Length2.data"; + EXPECT_TRUE( + tensorflow::WriteStringToFile(tensorflow::Env::Default(), fname, data) + .ok()); + + std::unique_ptr<tensorflow::RandomAccessFile> f; + TF_CHECK_OK(tensorflow::Env::Default()->NewRandomAccessFile(fname, &f)); + PackedLiteralReader reader(f.release()); + std::unique_ptr<Literal> actual = + reader.Read(ShapeUtil::MakeShape(F32, {2})).ConsumeValueOrDie(); + EXPECT_TRUE(reader.IsExhausted()); + + EXPECT_EQ(42.0, LiteralUtil::Get<float>(*actual, {0})); + EXPECT_EQ(24.0, LiteralUtil::Get<float>(*actual, {1})); +} + +TEST_F(RoundTripPackedLiteralTest, RoundTripsR2F32Size2x2Dim0Minor) { + string data(sizeof(float) * 4, 0); + tensorflow::gtl::MutableArraySlice<float> floats( + tensorflow::bit_cast<float*>(data.data()), 4); + // With x as the minor dimension, these will become: + floats[0] = 42.0; // y=0,x=0 + floats[1] = 24.0; // y=0,x=1 + floats[2] = 64.0; // y=1,x=0 + floats[3] = 46.0; // y=1,x=1 + + string fname = + tensorflow::testing::TmpDir() + "/RoundTripsR2F32Size2x2Dim0Minor.data"; + EXPECT_TRUE( + tensorflow::WriteStringToFile(tensorflow::Env::Default(), fname, data) + .ok()); + + const Layout layout = LayoutUtil::MakeLayout({1, 0}); + + std::unique_ptr<tensorflow::RandomAccessFile> f; + TF_CHECK_OK(tensorflow::Env::Default()->NewRandomAccessFile(fname, &f)); + PackedLiteralReader reader(f.release()); + std::unique_ptr<Literal> actual = + reader.Read(ShapeUtil::MakeShape(F32, {2, 2}), &layout) + .ConsumeValueOrDie(); + EXPECT_TRUE(reader.IsExhausted()); + + EXPECT_EQ(42.0f, LiteralUtil::Get<float>(*actual, {0, 0})); + EXPECT_EQ(24.0f, LiteralUtil::Get<float>(*actual, {0, 1})); + EXPECT_EQ(64.0f, LiteralUtil::Get<float>(*actual, {1, 0})); + EXPECT_EQ(46.0f, LiteralUtil::Get<float>(*actual, {1, 1})); + + std::unique_ptr<Literal> round_tripped = RoundTripToServer(*actual); + LiteralTestUtil::ExpectEqual(*round_tripped, *actual); +} + +TEST_F(RoundTripPackedLiteralTest, RoundTripsR2F32Size2x2Dim1Minor) { + string data(sizeof(float) * 4, 0); + tensorflow::gtl::MutableArraySlice<float> floats( + tensorflow::bit_cast<float*>(data.data()), 4); + // With y as the minor dimension, these will become: + floats[0] = 42.0; // y=0,x=0 + floats[1] = 24.0; // y=1,x=0 + floats[2] = 64.0; // y=0,x=1 + floats[3] = 46.0; // y=1,x=1 + + string fname = + tensorflow::testing::TmpDir() + "/RoundTripsR2F32Size2x2Dim1Minor.data"; + EXPECT_TRUE( + tensorflow::WriteStringToFile(tensorflow::Env::Default(), fname, data) + .ok()); + + const Layout layout = LayoutUtil::MakeLayout({0, 1}); + + std::unique_ptr<tensorflow::RandomAccessFile> f; + TF_CHECK_OK(tensorflow::Env::Default()->NewRandomAccessFile(fname, &f)); + PackedLiteralReader reader(f.release()); + std::unique_ptr<Literal> actual = + reader.Read(ShapeUtil::MakeShape(F32, {2, 2}), &layout) + .ConsumeValueOrDie(); + EXPECT_TRUE(reader.IsExhausted()); + + EXPECT_EQ(42.0f, LiteralUtil::Get<float>(*actual, {0, 0})); + EXPECT_EQ(24.0f, LiteralUtil::Get<float>(*actual, {1, 0})); + EXPECT_EQ(64.0f, LiteralUtil::Get<float>(*actual, {0, 1})); + EXPECT_EQ(46.0f, LiteralUtil::Get<float>(*actual, {1, 1})); + + std::unique_ptr<Literal> round_tripped = RoundTripToServer(*actual); + LiteralTestUtil::ExpectEqual(*round_tripped, *actual); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/round_trip_transfer_test.cc b/tensorflow/compiler/xla/tests/round_trip_transfer_test.cc new file mode 100644 index 0000000000..04a8bab0eb --- /dev/null +++ b/tensorflow/compiler/xla/tests/round_trip_transfer_test.cc @@ -0,0 +1,164 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests transferring literals of various shapes and values in and out of the +// XLA service. + +#include <memory> +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class RoundTripTransferTest : public ClientLibraryTestBase { + protected: + void RoundTripTest(const Literal& original) { + std::unique_ptr<GlobalData> data = + client_->TransferToServer(original).ConsumeValueOrDie(); + std::unique_ptr<Literal> result = + client_->Transfer(*data).ConsumeValueOrDie(); + LiteralTestUtil::ExpectEqual(original, *result); + } +}; + +TEST_F(RoundTripTransferTest, R0S32) { + RoundTripTest(*LiteralUtil::CreateR0<int32>(42)); +} + +TEST_F(RoundTripTransferTest, R0F32) { + RoundTripTest(*LiteralUtil::CreateR0<float>(42.0)); +} + +TEST_F(RoundTripTransferTest, R1F32_Len0) { + RoundTripTest(*LiteralUtil::CreateR1<float>({})); +} + +TEST_F(RoundTripTransferTest, R1F32_Len2) { + RoundTripTest(*LiteralUtil::CreateR1<float>({42.0, 64.0})); +} + +TEST_F(RoundTripTransferTest, R1F32_Len256) { + std::vector<float> values(256); + std::iota(values.begin(), values.end(), 1.0); + RoundTripTest(*LiteralUtil::CreateR1<float>(values)); +} + +TEST_F(RoundTripTransferTest, R1F32_Len1024) { + std::vector<float> values(1024); + std::iota(values.begin(), values.end(), 1.0); + RoundTripTest(*LiteralUtil::CreateR1<float>(values)); +} + +TEST_F(RoundTripTransferTest, R1F32_Len1025) { + std::vector<float> values(1025); + std::iota(values.begin(), values.end(), 1.0); + RoundTripTest(*LiteralUtil::CreateR1<float>(values)); +} + +TEST_F(RoundTripTransferTest, R1F32_Len4096) { + std::vector<float> values(4096); + std::iota(values.begin(), values.end(), 1.0); + RoundTripTest(*LiteralUtil::CreateR1<float>(values)); +} + +TEST_F(RoundTripTransferTest, R2F32_Len10x0) { + RoundTripTest( + *LiteralUtil::CreateR2FromArray2D<float>(Array2D<float>(10, 0))); +} + +TEST_F(RoundTripTransferTest, R2F32_Len2x2) { + RoundTripTest(*LiteralUtil::CreateR2<float>({{42.0, 64.0}, {77.0, 88.0}})); +} + +TEST_F(RoundTripTransferTest, R3F32) { + RoundTripTest( + *LiteralUtil::CreateR3<float>({{{1.0, 2.0}, {1.0, 2.0}, {1.0, 2.0}}, + {{3.0, 4.0}, {3.0, 4.0}, {3.0, 4.0}}})); +} + +TEST_F(RoundTripTransferTest, R4F32) { + RoundTripTest(*LiteralUtil::CreateR4<float>({{ + {{10, 11, 12, 13}, {14, 15, 16, 17}}, + {{18, 19, 20, 21}, {22, 23, 24, 25}}, + {{26, 27, 28, 29}, {30, 31, 32, 33}}, + }})); +} + +TEST_F(RoundTripTransferTest, EmptyTuple) { + RoundTripTest(*LiteralUtil::MakeTuple({})); +} + +TEST_F(RoundTripTransferTest, TupleOfR1F32) { + RoundTripTest( + *LiteralUtil::MakeTuple({LiteralUtil::CreateR1<float>({1, 2}).get(), + LiteralUtil::CreateR1<float>({3, 4}).get()})); +} + +TEST_F(RoundTripTransferTest, TupleOfR1F32_Len0_Len2) { + RoundTripTest( + *LiteralUtil::MakeTuple({LiteralUtil::CreateR1<float>({}).get(), + LiteralUtil::CreateR1<float>({3, 4}).get()})); +} + +TEST_F(RoundTripTransferTest, TupleOfR0F32AndR1S32) { + RoundTripTest( + *LiteralUtil::MakeTuple({LiteralUtil::CreateR0<float>(1.0).get(), + LiteralUtil::CreateR1<int>({2, 3}).get()})); +} + +// Below two tests are added to identify the cost of large data transfers. +TEST_F(RoundTripTransferTest, R2F32_Large) { + RoundTripTest(*LiteralUtil::CreateR2F32Linspace(-1.0f, 1.0f, 512, 512)); +} + +TEST_F(RoundTripTransferTest, R4F32_Large) { + Array4D<float> array4d(2, 2, 256, 256); + array4d.FillWithMultiples(1.0f); + RoundTripTest(*LiteralUtil::CreateR4FromArray4D<float>(array4d)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/scalar_computations_test.cc b/tensorflow/compiler/xla/tests/scalar_computations_test.cc new file mode 100644 index 0000000000..bd9cae4d1d --- /dev/null +++ b/tensorflow/compiler/xla/tests/scalar_computations_test.cc @@ -0,0 +1,630 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cmath> +#include <limits> +#include <memory> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/llvm_backend_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ScalarComputationsTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001}; + + protected: + // A template for building and running a binary comparison test. + template <typename NativeT> + void TestCompare(NativeT lhs, NativeT rhs, bool expected, + ComputationDataHandle (ComputationBuilder::*op)( + const ComputationDataHandle&, + const ComputationDataHandle&, + tensorflow::gtl::ArraySlice<int64>)) { + ComputationBuilder builder(client_, TestName()); + ComputationDataHandle lhs_op = builder.ConstantR0<NativeT>(lhs); + ComputationDataHandle rhs_op = builder.ConstantR0<NativeT>(rhs); + ComputationDataHandle result = (builder.*op)(lhs_op, rhs_op, {}); + ComputeAndCompareR0<bool>(&builder, expected, {}); + } + + template <typename NativeT> + void TestMinMax(NativeT lhs, NativeT rhs, NativeT expected, + ComputationDataHandle (ComputationBuilder::*op)( + const ComputationDataHandle&, + const ComputationDataHandle&, + tensorflow::gtl::ArraySlice<int64>)) { + ComputationBuilder builder(client_, TestName()); + ComputationDataHandle lhs_op = builder.ConstantR0<NativeT>(lhs); + ComputationDataHandle rhs_op = builder.ConstantR0<NativeT>(rhs); + ComputationDataHandle result = (builder.*op)(lhs_op, rhs_op, {}); + ComputeAndCompareR0<NativeT>(&builder, expected, {}); + } +}; + +TEST_F(ScalarComputationsTest, NegateScalarF32) { + ComputationBuilder builder(client_, TestName()); + builder.Neg(builder.ConstantR0<float>(2.1f)); + + ComputeAndCompareR0<float>(&builder, -2.1f, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, NegateScalarS32) { + ComputationBuilder builder(client_, TestName()); + builder.Neg(builder.ConstantR0<int32>(2)); + + ComputeAndCompareR0<int32>(&builder, -2, {}); +} + +TEST_F(ScalarComputationsTest, AddTwoScalarsF32) { + ComputationBuilder builder(client_, TestName()); + builder.Add(builder.ConstantR0<float>(2.1f), builder.ConstantR0<float>(5.5f)); + + ComputeAndCompareR0<float>(&builder, 7.6f, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, AddTwoScalarsS32) { + ComputationBuilder builder(client_, TestName()); + builder.Add(builder.ConstantR0<int32>(2), builder.ConstantR0<int32>(5)); + + ComputeAndCompareR0<int32>(&builder, 7, {}); +} + +TEST_F(ScalarComputationsTest, AddTwoScalarsU32) { + ComputationBuilder builder(client_, TestName()); + builder.Add(builder.ConstantR0<uint32>(35), builder.ConstantR0<uint32>(57)); + + ComputeAndCompareR0<uint32>(&builder, 92, {}); +} + +XLA_TEST_F(ScalarComputationsTest, AddTwoScalarsU8) { + ComputationBuilder builder(client_, TestName()); + builder.Add(builder.ConstantR0<uint8>(35), builder.ConstantR0<uint8>(57)); + + ComputeAndCompareR0<uint8>(&builder, 92, {}); +} + +XLA_TEST_F(ScalarComputationsTest, AddTwoScalarsU64) { + ComputationBuilder builder(client_, TestName()); + const uint64 a = static_cast<uint64>(1) << 63; + const uint64 b = a + 1; + builder.Add(builder.ConstantR0<uint64>(a), builder.ConstantR0<uint64>(b)); + + ComputeAndCompareR0<uint64>(&builder, a + b, {}); +} + +XLA_TEST_F(ScalarComputationsTest, AddTwoScalarsS64) { + ComputationBuilder builder(client_, TestName()); + const int64 a = static_cast<int64>(1) << 62; + const int64 b = a + 1; + builder.Add(builder.ConstantR0<int64>(a), builder.ConstantR0<int64>(b)); + + ComputeAndCompareR0<int64>(&builder, a + b, {}); +} + +XLA_TEST_F(ScalarComputationsTest, AddTwoScalarsF64) { + ComputationBuilder builder(client_, TestName()); + builder.Add(builder.ConstantR0<double>(0.25), + builder.ConstantR0<double>(3.5)); + + ComputeAndCompareR0<double>(&builder, 3.75, {}); +} + +TEST_F(ScalarComputationsTest, SubtractTwoScalarsF32) { + ComputationBuilder builder(client_, TestName()); + builder.Sub(builder.ConstantR0<float>(2.1f), builder.ConstantR0<float>(5.5f)); + + ComputeAndCompareR0<float>(&builder, -3.4f, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, SubtractTwoScalarsS32) { + ComputationBuilder builder(client_, TestName()); + builder.Sub(builder.ConstantR0<int32>(2), builder.ConstantR0<int32>(5)); + + ComputeAndCompareR0<int32>(&builder, -3, {}); +} + +TEST_F(ScalarComputationsTest, MulThreeScalarsF32) { + ComputationBuilder builder(client_, TestName()); + builder.Mul(builder.Mul(builder.ConstantR0<float>(2.1f), + builder.ConstantR0<float>(5.5f)), + builder.ConstantR0<float>(0.5f)); + + ComputeAndCompareR0<float>(&builder, 5.775f, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, MulTwoScalarsS32) { + std::vector<int32> data = {0, + 1, + -1, + 1234, + 0x1a243514, + std::numeric_limits<int32>::max(), + std::numeric_limits<int32>::min()}; + + for (int32 x : data) { + for (int32 y : data) { + ComputationBuilder builder(client_, TestName()); + builder.Mul(builder.ConstantR0<int32>(x), builder.ConstantR0<int32>(y)); + + // Signed integer overflow is undefined behavior in C++. Convert the input + // integers to unsigned, perform the multiplication unsigned, and convert + // back. + int32 expected = static_cast<uint32>(x) * static_cast<uint32>(y); + + ComputeAndCompareR0<int32>(&builder, expected, {}); + } + } +} + +TEST_F(ScalarComputationsTest, MulTwoScalarsU32) { + std::vector<uint32> data = {0, 1, 0xDEADBEEF, 1234, + 0x1a243514, 0xFFFFFFFF, 0x80808080}; + + for (uint32 x : data) { + for (uint32 y : data) { + ComputationBuilder builder(client_, TestName()); + builder.Mul(builder.ConstantR0<uint32>(x), builder.ConstantR0<uint32>(y)); + + uint32 expected = x * y; + ComputeAndCompareR0<uint32>(&builder, expected, {}); + } + } +} + +TEST_F(ScalarComputationsTest, MulThreeScalarsS32) { + ComputationBuilder builder(client_, TestName()); + builder.Mul( + builder.Mul(builder.ConstantR0<int32>(2), builder.ConstantR0<int32>(5)), + builder.ConstantR0<int32>(1)); + + ComputeAndCompareR0<int32>(&builder, 10, {}); +} + +TEST_F(ScalarComputationsTest, MulThreeScalarsF32Params) { + ComputationBuilder builder(client_, TestName()); + std::unique_ptr<Literal> a_literal = LiteralUtil::CreateR0<float>(2.1f); + std::unique_ptr<Literal> b_literal = LiteralUtil::CreateR0<float>(5.5f); + std::unique_ptr<Literal> c_literal = LiteralUtil::CreateR0<float>(0.5f); + + std::unique_ptr<GlobalData> a_data = + client_->TransferToServer(*a_literal).ConsumeValueOrDie(); + std::unique_ptr<GlobalData> b_data = + client_->TransferToServer(*b_literal).ConsumeValueOrDie(); + std::unique_ptr<GlobalData> c_data = + client_->TransferToServer(*c_literal).ConsumeValueOrDie(); + + ComputationDataHandle a = builder.Parameter(0, a_literal->shape(), "a"); + ComputationDataHandle b = builder.Parameter(1, b_literal->shape(), "b"); + ComputationDataHandle c = builder.Parameter(2, c_literal->shape(), "c"); + builder.Mul(builder.Mul(a, b), c); + + ComputeAndCompareR0<float>(&builder, 5.775f, + {a_data.get(), b_data.get(), c_data.get()}, + error_spec_); +} + +TEST_F(ScalarComputationsTest, DivideTwoScalarsF32) { + ComputationBuilder builder(client_, TestName()); + builder.Div(builder.ConstantR0<float>(5.0f), builder.ConstantR0<float>(2.5f)); + + ComputeAndCompareR0<float>(&builder, 2.0f, {}, error_spec_); +} + +XLA_TEST_F(ScalarComputationsTest, RemTwoScalarsF32) { + ComputationBuilder builder(client_, TestName()); + builder.Rem(builder.ConstantR0<float>(2.5f), builder.ConstantR0<float>(5.0f)); + + ComputeAndCompareR0<float>(&builder, 2.5f, {}, error_spec_); +} + +XLA_TEST_F(ScalarComputationsTest, DivideTwoScalarsS32) { + ComputationBuilder builder(client_, TestName()); + builder.Div(builder.ConstantR0<int32>(-5), builder.ConstantR0<int32>(2)); + + ComputeAndCompareR0<int32>(&builder, -2, {}); +} + +TEST_F(ScalarComputationsTest, RemainderTwoScalarsNegativeResultS32) { + ComputationBuilder builder(client_, TestName()); + builder.Rem(builder.ConstantR0<int32>(-5), builder.ConstantR0<int32>(2)); + + ComputeAndCompareR0<int32>(&builder, -1, {}); +} + +TEST_F(ScalarComputationsTest, RemainderTwoScalarsIntMinS32) { + ComputationBuilder builder(client_, TestName()); + builder.Rem(builder.ConstantR0<int32>(INT_MIN), + builder.ConstantR0<int32>(7919)); + + ComputeAndCompareR0<int32>(&builder, -1309, {}); +} + +TEST_F(ScalarComputationsTest, RemainderTwoScalarsIntMinVsIntMaxS32) { + ComputationBuilder builder(client_, TestName()); + builder.Rem(builder.ConstantR0<int32>(INT_MIN), + builder.ConstantR0<int32>(INT_MAX)); + + ComputeAndCompareR0<int32>(&builder, -1, {}); +} + +TEST_F(ScalarComputationsTest, RemainderTwoScalarsPositiveResultS32) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(S32, {}), "x"); + builder.Rem(x, builder.ConstantR0<int32>(80000)); + + std::unique_ptr<Literal> literal = LiteralUtil::CreateR0<int32>(87919); + TF_ASSIGN_OR_ASSERT_OK(auto input_data, client_->TransferToServer(*literal)); + ComputeAndCompareR0<int32>(&builder, 7919, {input_data.get()}); +} + +XLA_TEST_F(ScalarComputationsTest, DivideTwoScalarsU32) { + ComputationBuilder builder(client_, TestName()); + // This verifies 0xFFFFFFFE / 2 = 0x7FFFFFFF. If XLA incorrectly treated U32 + // as S32, it would output -2 / 2 = -1 (0xFFFFFFFF). + builder.Div(builder.ConstantR0<uint32>(0xFFFFFFFE), + builder.ConstantR0<uint32>(2)); + + ComputeAndCompareR0<uint32>(&builder, 0x7FFFFFFF, {}); +} + +TEST_F(ScalarComputationsTest, LogicalAnd) { + for (bool x : {false, true}) { + for (bool y : {false, true}) { + ComputationBuilder builder(client_, TestName()); + builder.LogicalAnd(builder.ConstantR0<bool>(x), + builder.ConstantR0<bool>(y)); + + ComputeAndCompareR0<bool>(&builder, x && y, {}); + } + } +} + +TEST_F(ScalarComputationsTest, LogicalOr) { + for (bool x : {false, true}) { + for (bool y : {false, true}) { + ComputationBuilder builder(client_, TestName()); + builder.LogicalOr(builder.ConstantR0<bool>(x), + builder.ConstantR0<bool>(y)); + + ComputeAndCompareR0<bool>(&builder, x || y, {}); + } + } +} + +TEST_F(ScalarComputationsTest, LogicalNot) { + for (bool x : {false, true}) { + ComputationBuilder builder(client_, TestName()); + builder.LogicalNot(builder.ConstantR0<bool>(x)); + + ComputeAndCompareR0<bool>(&builder, !x, {}); + } +} + +TEST_F(ScalarComputationsTest, SelectScalarTrue) { + ComputationBuilder builder(client_, TestName()); + builder.Select(builder.ConstantR0<bool>(true), // The predicate. + builder.ConstantR0<float>(123.0f), // The value on true. + builder.ConstantR0<float>(42.0f)); // The value on false. + + ComputeAndCompareR0<float>(&builder, 123.0f, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, SelectScalarFalse) { + ComputationBuilder builder(client_, TestName()); + builder.Select(builder.ConstantR0<bool>(false), // The predicate. + builder.ConstantR0<float>(123.0f), // The value on true. + builder.ConstantR0<float>(42.0f)); // The value on false. + + ComputeAndCompareR0<float>(&builder, 42.0f, {}, error_spec_); +} + +// This test is an explicit version of what is happening in the following +// templatized comparison tests. +TEST_F(ScalarComputationsTest, CompareGtScalar) { + ComputationBuilder builder(client_, TestName()); + builder.Gt(builder.ConstantR0<float>(2.0f), builder.ConstantR0<float>(1.0f)); + + ComputeAndCompareR0<bool>(&builder, true, {}); +} + +// S32 comparisons. +TEST_F(ScalarComputationsTest, CompareEqS32Greater) { + TestCompare<int32>(2, 1, false, &ComputationBuilder::Eq); +} +TEST_F(ScalarComputationsTest, CompareEqS32Equal) { + TestCompare<int32>(3, 3, true, &ComputationBuilder::Eq); +} + +TEST_F(ScalarComputationsTest, CompareNeS32) { + TestCompare<int32>(2, 1, true, &ComputationBuilder::Ne); +} + +TEST_F(ScalarComputationsTest, CompareGeS32) { + TestCompare<int32>(2, 1, true, &ComputationBuilder::Ge); +} + +TEST_F(ScalarComputationsTest, CompareGtS32) { + TestCompare<int32>(1, 5, false, &ComputationBuilder::Gt); +} + +TEST_F(ScalarComputationsTest, CompareLeS32) { + TestCompare<int32>(2, 1, false, &ComputationBuilder::Le); +} + +TEST_F(ScalarComputationsTest, CompareLtS32) { + TestCompare<int32>(9, 7, false, &ComputationBuilder::Lt); + TestCompare<int32>(std::numeric_limits<int32>::min(), + std::numeric_limits<int32>::max(), true, + &ComputationBuilder::Lt); +} + +// U32 comparisons. +TEST_F(ScalarComputationsTest, CompareEqU32False) { + TestCompare<uint32>(2, 1, false, &ComputationBuilder::Eq); +} + +TEST_F(ScalarComputationsTest, CompareNeU32) { + TestCompare<uint32>(2, 1, true, &ComputationBuilder::Ne); +} + +TEST_F(ScalarComputationsTest, CompareGeU32Greater) { + TestCompare<uint32>(2, 1, true, &ComputationBuilder::Ge); +} + +TEST_F(ScalarComputationsTest, CompareGeU32Equal) { + TestCompare<uint32>(3, 3, true, &ComputationBuilder::Ge); +} + +TEST_F(ScalarComputationsTest, CompareGtU32) { + TestCompare<uint32>(1, 5, false, &ComputationBuilder::Gt); + TestCompare<uint32>(5, 5, false, &ComputationBuilder::Gt); + TestCompare<uint32>(5, 1, true, &ComputationBuilder::Gt); +} + +TEST_F(ScalarComputationsTest, CompareLeU32) { + TestCompare<uint32>(2, 1, false, &ComputationBuilder::Le); +} + +TEST_F(ScalarComputationsTest, CompareLtU32) { + TestCompare<uint32>(9, 7, false, &ComputationBuilder::Lt); + TestCompare<uint32>(0, std::numeric_limits<uint32>::max(), true, + &ComputationBuilder::Lt); +} + +// F32 comparisons. +TEST_F(ScalarComputationsTest, CompareEqF32False) { + TestCompare<float>(2.0, 1.3, false, &ComputationBuilder::Eq); +} + +TEST_F(ScalarComputationsTest, CompareNeF32) { + TestCompare<float>(2.0, 1.3, true, &ComputationBuilder::Ne); +} + +TEST_F(ScalarComputationsTest, CompareGeF32Greater) { + TestCompare<float>(2.0, 1.9, true, &ComputationBuilder::Ge); +} +TEST_F(ScalarComputationsTest, CompareGeF32Equal) { + TestCompare<float>(3.5, 3.5, true, &ComputationBuilder::Ge); +} + +TEST_F(ScalarComputationsTest, CompareGtF32) { + TestCompare<float>(1.0, 5.2, false, &ComputationBuilder::Gt); +} + +TEST_F(ScalarComputationsTest, CompareLeF32) { + TestCompare<float>(2.0, 1.2, false, &ComputationBuilder::Le); +} + +TEST_F(ScalarComputationsTest, CompareLtF32) { + TestCompare<float>(9.0, 7.2, false, &ComputationBuilder::Lt); +} + +// F32 comparisons with exceptional values. The test names encode the +// left/right operands at the end, and use Minf and Mzero for -inf and -0.0. +TEST_F(ScalarComputationsTest, CompareLtF32MinfMzero) { + TestCompare<float>(-INFINITY, -0.0, true, &ComputationBuilder::Lt); +} +TEST_F(ScalarComputationsTest, CompareLtF32MzeroZero) { + // Comparisons of 0.0 to -0.0 consider them equal in IEEE 754. + TestCompare<float>(-0.0, 0.0, false, &ComputationBuilder::Lt); +} +TEST_F(ScalarComputationsTest, CompareLtF32ZeroInf) { + TestCompare<float>(0.0, INFINITY, true, &ComputationBuilder::Lt); +} + +TEST_F(ScalarComputationsTest, CompareGeF32MinfMzero) { + TestCompare<float>(-INFINITY, -0.0, false, &ComputationBuilder::Ge); +} +TEST_F(ScalarComputationsTest, CompareGeF32MzeroZero) { + // Comparisons of 0.0 to -0.0 consider them equal in IEEE 754. + TestCompare<float>(-0.0, 0.0, true, &ComputationBuilder::Ge); +} +TEST_F(ScalarComputationsTest, CompareGeF32ZeroInf) { + TestCompare<float>(0.0, INFINITY, false, &ComputationBuilder::Ge); +} + +TEST_F(ScalarComputationsTest, ExpScalar) { + ComputationBuilder builder(client_, TestName()); + builder.Exp(builder.ConstantR0<float>(2.0f)); + + ComputeAndCompareR0<float>(&builder, 7.3890562, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, LogScalar) { + ComputationBuilder builder(client_, "log"); + builder.Log(builder.ConstantR0<float>(2.0f)); + + ComputeAndCompareR0<float>(&builder, 0.6931471, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, TanhScalar) { + ComputationBuilder builder(client_, TestName()); + builder.Tanh(builder.ConstantR0<float>(2.0f)); + + ComputeAndCompareR0<float>(&builder, 0.96402758, {}, error_spec_); +} + +XLA_TEST_F(ScalarComputationsTest, TanhDoubleScalar) { + ComputationBuilder builder(client_, TestName()); + builder.Tanh(builder.ConstantR0<double>(2.0)); + + ComputeAndCompareR0<double>(&builder, 0.96402758, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, PowScalar) { + ComputationBuilder builder(client_, TestName()); + builder.Pow(builder.ConstantR0<float>(2.0f), builder.ConstantR0<float>(3.0f)); + + ComputeAndCompareR0<float>(&builder, 8.0, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, ClampScalarHigh) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0<float>(2.0f), // The lower bound. + builder.ConstantR0<float>(5.0f), // The operand to be clamped. + builder.ConstantR0<float>(3.0f)); // The upper bound. + + ComputeAndCompareR0<float>(&builder, 3.0, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, ClampScalarMiddle) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0<float>(2.0f), // The lower bound. + builder.ConstantR0<float>(2.5f), // The operand to be clamped. + builder.ConstantR0<float>(3.0f)); // The upper bound. + + ComputeAndCompareR0<float>(&builder, 2.5, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, ClampScalarLow) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0<float>(2.0f), // The lower bound. + builder.ConstantR0<float>(-5.0f), // The operand to be clamped. + builder.ConstantR0<float>(3.0f)); // The upper bound. + + ComputeAndCompareR0<float>(&builder, 2.0, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, MinS32Above) { + TestMinMax<int32>(10, 3, 3, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MinS32Below) { + TestMinMax<int32>(-100, 3, -100, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MaxS32Above) { + TestMinMax<int32>(10, 3, 10, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, MaxS32Below) { + TestMinMax<int32>(-100, 3, 3, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, MinU32Above) { + const uint32 large = std::numeric_limits<int32>::max(); + TestMinMax<uint32>(large, 3, 3, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MinU32Below) { + TestMinMax<uint32>(0, 5, 0, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MaxU32Above) { + const uint32 large = std::numeric_limits<int32>::max(); + TestMinMax<uint32>(large, 3, large, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, MaxU32Below) { + TestMinMax<uint32>(0, 5, 5, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, MinF32Above) { + TestMinMax<float>(10.1f, 3.1f, 3.1f, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MinF32Below) { + TestMinMax<float>(-100.1f, 3.1f, -100.1f, &ComputationBuilder::Min); +} + +TEST_F(ScalarComputationsTest, MaxF32Above) { + TestMinMax<float>(10.1f, 3.1f, 10.1f, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, MaxF32Below) { + TestMinMax<float>(-100.1f, 3.1f, 3.1f, &ComputationBuilder::Max); +} + +TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionF32) { + // Compute the expression (1 * (3 - 1) * (7 + 0) - 4) / 20. + ComputationBuilder b(client_, TestName()); + b.Div( + b.Sub(b.Mul(b.ConstantR0<float>(1), + b.Mul(b.Sub(b.ConstantR0<float>(3), b.ConstantR0<float>(1)), + b.Add(b.ConstantR0<float>(7), b.ConstantR0<float>(0)))), + b.ConstantR0<float>(4)), + b.ConstantR0<float>(20)); + + ComputeAndCompareR0<float>(&b, 0.5, {}, error_spec_); +} + +TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionS32) { + // Compute the expression 1 * (3 - 1) * (7 + 0) - 4. + ComputationBuilder b(client_, TestName()); + b.Sub(b.Mul(b.ConstantR0<int32>(1), + b.Mul(b.Sub(b.ConstantR0<int32>(3), b.ConstantR0<int32>(1)), + b.Add(b.ConstantR0<int32>(7), b.ConstantR0<int32>(0)))), + b.ConstantR0<int32>(4)); + + ComputeAndCompareR0<int32>(&b, 10, {}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendLlvmBackendFlags(&flag_list); + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/select_and_scatter_test.cc b/tensorflow/compiler/xla/tests/select_and_scatter_test.cc new file mode 100644 index 0000000000..fb1effc8c4 --- /dev/null +++ b/tensorflow/compiler/xla/tests/select_and_scatter_test.cc @@ -0,0 +1,395 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests the select-and-scatter XLA operation. + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class SelectAndScatterTest : public ClientLibraryTestBase { + public: + SelectAndScatterTest() : builder_(client_, TestName()) { + // Create S32 GE and ADD computations for select and scatter respectively. + ge_s32_ = CreateScalarGeComputation(S32, &builder_); + add_s32_ = CreateScalarAddComputation(S32, &builder_); + ge_f32_ = CreateScalarGeComputation(F32, &builder_); + add_f32_ = CreateScalarAddComputation(F32, &builder_); + max_f32_ = CreateScalarMaxComputation(F32, &builder_); + min_f32_ = CreateScalarMinComputation(F32, &builder_); + } + + ComputationBuilder builder_; + Computation ge_s32_; + Computation add_s32_; + Computation ge_f32_; + Computation add_f32_; + Computation max_f32_; + Computation min_f32_; +}; + +// Test for F32 1D array, with a zero-element input. +XLA_TEST_F(SelectAndScatterTest, R1S0F32) { + const auto operand = builder_.ConstantR1<float>({}); + const auto source = builder_.ConstantR1<float>({}); + builder_.SelectAndScatter(operand, ge_f32_, /*window_dimensions=*/{3}, + /*window_strides=*/{3}, Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR1<float>(&builder_, {}, {}, ErrorSpec(1e-7)); +} + +// Test for F32 1D array, when windows do not overlap. +XLA_TEST_F(SelectAndScatterTest, R1F32) { + const auto operand = + builder_.ConstantR1<float>({1.f, 9.f, 3.f, 7.f, 5.f, 6.f}); + const auto source = builder_.ConstantR1<float>({34.f, 42.f}); + const std::vector<float> expected = {0.f, 34.f, 0.f, 42.f, 0.f, 0.f}; + builder_.SelectAndScatter(operand, ge_f32_, /*window_dimensions=*/{3}, + /*window_strides=*/{3}, Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR1<float>(&builder_, expected, {}, ErrorSpec(1e-7)); +} + +// Test for S32 1D array, when windows do not overlap and the init value is 1. +XLA_TEST_F(SelectAndScatterTest, R1S32) { + const auto operand = builder_.ConstantR1<int32>({-1, 0, 6, 4, -4, 10}); + const auto source = builder_.ConstantR1<int32>({-10, 20}); + const std::vector<int32> expected = {1, 1, -9, 1, 1, 21}; + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{3}, + /*window_strides=*/{3}, Padding::kValid, source, + builder_.ConstantR0<int32>(1), add_s32_); + ComputeAndCompareR1<int32>(&builder_, expected, {}); +} + +// Test for S32 1D array, when windows overlap with each other. +XLA_TEST_F(SelectAndScatterTest, R1S32OverlappingWindow) { + const auto operand = builder_.ConstantR1<int32>({1, 9, 3, 7, 5, 6}); + const auto source = builder_.ConstantR1<int32>({34, 42, 53, 19}); + const std::vector<int32> expected = {0, 76, 0, 72, 0, 0}; + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{3}, + /*window_strides=*/{1}, Padding::kValid, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR1<int32>(&builder_, expected, {}); +} + +// Test for S32 2D array, when windows do not overlap. +XLA_TEST_F(SelectAndScatterTest, R2S32) { + const auto operand = + builder_.ConstantR2<int32>({{7, 2, 5, 3, 10, 2}, {3, 8, 9, 3, 4, 2}}); + const auto source = builder_.ConstantR2<int32>({{2, 6}}); + Array2D<int32> expected({{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}}); + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{2, 3}, + /*window_strides=*/{2, 3}, Padding::kValid, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR2<int32>(&builder_, expected, {}); +} + +// Similar to SelectAndScatterTest.R2S32 but the input is transposed. +XLA_TEST_F(SelectAndScatterTest, ReshapeR2S32) { + const auto operand = builder_.ConstantR2<int32>( + {{7, 3}, {2, 8}, {5, 9}, {3, 3}, {10, 4}, {2, 2}}); + const auto reshape = + builder_.Reshape(operand, /*dimensions=*/{1, 0}, /*new_sizes=*/{2, 6}); + const auto source = builder_.ConstantR2<int32>({{2, 6}}); + Array2D<int32> expected({{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}}); + builder_.SelectAndScatter(reshape, ge_s32_, /*window_dimensions=*/{2, 3}, + /*window_strides=*/{2, 3}, Padding::kValid, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR2<int32>(&builder_, expected, {}); +} + +// Test for S32 2D array, when windows overlap with each other. +XLA_TEST_F(SelectAndScatterTest, R2S32OverlappingWindow) { + const auto operand = + builder_.ConstantR2<int32>({{7, 2, 5, 3, 8}, {3, 8, 9, 3, 4}}); + const auto source = builder_.ConstantR2<int32>({{2, 6, 4}}); + Array2D<int32> expected({{0, 0, 0, 0, 0}, {0, 0, 12, 0, 0}}); + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{2, 3}, + /*window_strides=*/{1, 1}, Padding::kValid, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR2<int32>(&builder_, expected, {}); +} + +// Test for S32 2D array, when the padding is Padding::kSAME. +XLA_TEST_F(SelectAndScatterTest, R2S32SamePadding) { + const auto operand = + builder_.ConstantR2<int32>({{7, 2, 5, 3, 8}, {3, 8, 9, 3, 4}}); + const auto source = builder_.ConstantR2<int32>({{2, 6, 4}}); + Array2D<int32> expected({{0, 0, 0, 0, 4}, {0, 2, 6, 0, 0}}); + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{2, 2}, + /*window_strides=*/{2, 2}, Padding::kSame, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR2<int32>(&builder_, expected, {}); +} + +// Test for S32 2D array, when the padding is Padding::kSAME and windows overlap +// with each other. +XLA_TEST_F(SelectAndScatterTest, R2S32SamePaddingOverlappingWindow) { + const auto operand = + builder_.ConstantR2<int32>({{7, 2, 5, 3, 8}, {3, 8, 9, 3, 4}}); + const auto source = + builder_.ConstantR2<int32>({{2, 6, 4, 7, 1}, {3, 5, 8, 9, 10}}); + Array2D<int32> expected({{0, 0, 0, 0, 8}, {0, 5, 23, 0, 19}}); + builder_.SelectAndScatter(operand, ge_s32_, /*window_dimensions=*/{2, 2}, + /*window_strides=*/{1, 1}, Padding::kSame, source, + builder_.ConstantR0<int32>(0), add_s32_); + ComputeAndCompareR2<int32>(&builder_, expected, {}); +} + +XLA_TEST_F(SelectAndScatterTest, R2F32OverlappingR2Source) { + const auto operand = builder_.ConstantR2<float>( + {{1.5f, 2.5f, 1.5f}, {3.5f, 1.5f, 3.5f}, {4.5f, 2.5f, 4.5f}}); + const auto source = builder_.ConstantR2<float>({{1.0f, 2.0f}, {3.0f, 4.0f}}); + Array2D<float> expected( + {{0.0f, 0.0f, 0.0f}, {1.0f, 0.0f, 2.0f}, {3.0f, 0.0f, 4.0f}}); + builder_.SelectAndScatter(operand, ge_f32_, /*window_dimensions=*/{2, 2}, + /*window_strides=*/{1, 1}, Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR2<float>(&builder_, expected, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32Valid) { + Array2D<float> pzo = {{7.0f, 2.0f, 5.0f, 3.0f, 10.0f, 2.0f}, + {3.0f, 8.0f, 9.0f, 3.0f, 4.00f, 2.0f}, + {1.0f, 5.0f, 7.0f, 5.0f, 6.00f, 1.0f}, + {0.0f, 6.0f, 2.0f, 7.0f, 2.00f, 8.0f}}; + Array2D<float> pzs = {{2.0f, 6.0f}, {3.0f, 1.0f}}; + Array2D<float> pze = {{0.0f, 0.0f, 0.0f, 0.0f, 6.0f, 0.0f}, + {0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 3.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f}}; + Array4D<float> o(4, 6, 15, 220); + o.FillWithPZ(pzo); + auto operand = builder_.ConstantR4FromArray4D(o); + Array4D<float> e(4, 6, 15, 220); + e.FillWithPZ(pze); + Array4D<float> s(2, 2, 15, 220); + s.FillWithPZ(pzs); + auto source = builder_.ConstantR4FromArray4D(s); + s.FillWithPZ(pzs); + builder_.SelectAndScatter(operand, ge_f32_, {2, 3, 1, 1}, {2, 3, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR4<float>(&builder_, e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32Overlap) { + Array2D<float> pzo = {{7.0f, 2.0f, 5.0f, 3.0f, 8.0f}, + {3.0f, 8.0f, 9.0f, 3.0f, 4.0f}, + {1.0f, 5.0f, 7.0f, 5.0f, 6.0f}, + {0.0f, 6.0f, 2.0f, 10.0f, 2.0f}}; + Array2D<float> pzs = {{2.0f, 6.0f}, {3.0f, 1.0f}}; + Array2D<float> pze = {{0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 8.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 3.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 1.0f, 0.0f}}; + Array4D<float> o(4, 5, 17, 128); + o.FillWithPZ(pzo); + auto operand = builder_.ConstantR4FromArray4D(o); + Array4D<float> e(4, 5, 17, 128); + e.FillWithPZ(pze); + Array4D<float> s(2, 2, 17, 128); + s.FillWithPZ(pzs); + auto source = builder_.ConstantR4FromArray4D(s); + s.FillWithPZ(pzs); + builder_.SelectAndScatter(operand, ge_f32_, {2, 3, 1, 1}, {2, 2, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR4<float>(&builder_, e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32OverlapSmall) { + Array2D<float> pzo = {{7.0f, 2.0f, 5.0f, 3.0f, 8.0f}, + {3.0f, 8.0f, 9.0f, 3.0f, 4.0f}, + {1.0f, 5.0f, 7.0f, 5.0f, 6.0f}, + {0.0f, 6.0f, 2.0f, 10.0f, 2.0f}}; + Array2D<float> pzs = {{2.0f, 6.0f}, {3.0f, 1.0f}}; + Array2D<float> pze = {{0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 8.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 3.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 1.0f, 0.0f}}; + Array4D<float> o(4, 5, 1, 1); + o.FillWithPZ(pzo); + auto operand = builder_.ConstantR4FromArray4D(o); + Array4D<float> e(4, 5, 1, 1); + e.FillWithPZ(pze); + Array4D<float> s(2, 2, 1, 1); + s.FillWithPZ(pzs); + auto source = builder_.ConstantR4FromArray4D(s); + s.FillWithPZ(pzs); + builder_.SelectAndScatter(operand, ge_f32_, {2, 3, 1, 1}, {2, 2, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + ComputeAndCompareR4<float>(&builder_, e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32RefValidFixedSmall) { + // This test is testing the Reference Util + Array2D<float> pzo = {{7.0f, 2.0f, 5.0f, 3.0f, 10.0f, 2.0f}, + {3.0f, 8.0f, 9.0f, 3.0f, 4.00f, 2.0f}, + {1.0f, 5.0f, 7.0f, 5.0f, 6.00f, 1.0f}, + {0.0f, 6.0f, 2.0f, 7.0f, 2.00f, 8.0f}}; + Array2D<float> pzs = {{2.0f, 6.0f}, {3.0f, 1.0f}}; + Array4D<float> o(4, 6, 4, 4); + o.FillWithPZ(pzo); + auto operand = builder_.ConstantR4FromArray4D(o); + Array4D<float> s(2, 2, 4, 4); + s.FillWithPZ(pzs); + + auto source = builder_.ConstantR4FromArray4D(s); + s.FillWithPZ(pzs); + builder_.SelectAndScatter(operand, ge_f32_, {2, 3, 1, 1}, {2, 3, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + auto e = ReferenceUtil::SelectAndScatter4DGePlus(o, s, 0.0f, {2, 3, 1, 1}, + {2, 3, 1, 1}, false); + ComputeAndCompareR4<float>(&builder_, *e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32RefSameRandom) { + Array4D<float> o(7, 7, 8, 256); + o.FillRandom(1.5f); + auto operand = builder_.ConstantR4FromArray4D(o); + + Array4D<float> s(4, 4, 8, 256); + s.FillRandom(12.0f); + auto source = builder_.ConstantR4FromArray4D(s); + + builder_.SelectAndScatter(operand, ge_f32_, {2, 2, 1, 1}, {2, 2, 1, 1}, + Padding::kSame, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + auto e = ReferenceUtil::SelectAndScatter4DGePlus(o, s, 0.0f, {2, 2, 1, 1}, + {2, 2, 1, 1}, true); + + ComputeAndCompareR4<float>(&builder_, *e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32RefSameRandomFullyPadded) { + Array4D<float> o(1, 1, 5, 5); + o.FillRandom(1.5f); + auto operand = builder_.ConstantR4FromArray4D(o); + + Array4D<float> s(1, 1, 5, 5); + s.FillRandom(12.0f); + auto source = builder_.ConstantR4FromArray4D(s); + + builder_.SelectAndScatter(operand, ge_f32_, {3, 3, 1, 1}, {3, 3, 1, 1}, + Padding::kSame, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + auto e = ReferenceUtil::SelectAndScatter4DGePlus(o, s, 0.0f, {3, 3, 1, 1}, + {3, 3, 1, 1}, true); + + ComputeAndCompareR4<float>(&builder_, *e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32RefValidRandom) { + Array4D<float> o(9, 9, 16, 128); + o.FillRandom(1.5f); + auto operand = builder_.ConstantR4FromArray4D(o); + + Array4D<float> s(3, 3, 16, 128); + s.FillRandom(12.0f); + auto source = builder_.ConstantR4FromArray4D(s); + + builder_.SelectAndScatter(operand, ge_f32_, {3, 3, 1, 1}, {3, 3, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + + auto e = ReferenceUtil::SelectAndScatter4DGePlus(o, s, 0.0f, {3, 3, 1, 1}, + {3, 3, 1, 1}, false); + + ComputeAndCompareR4<float>(&builder_, *e, {}, ErrorSpec(1e-7)); +} + +TEST_F(SelectAndScatterTest, R4F32RefValidRandomSmall) { + Array4D<float> o(3, 3, 4, 4); + o.FillRandom(1.5f); + auto operand = builder_.ConstantR4FromArray4D(o); + + Array4D<float> s(1, 1, 4, 4); + s.FillRandom(12.0f); + auto source = builder_.ConstantR4FromArray4D(s); + + builder_.SelectAndScatter(operand, ge_f32_, {3, 3, 1, 1}, {3, 3, 1, 1}, + Padding::kValid, source, + builder_.ConstantR0<float>(0.0f), add_f32_); + + auto e = ReferenceUtil::SelectAndScatter4DGePlus(o, s, 0.0f, {3, 3, 1, 1}, + {3, 3, 1, 1}, false); + + ComputeAndCompareR4<float>(&builder_, *e, {}, ErrorSpec(1e-7)); +} + +XLA_TEST_F(SelectAndScatterTest, R1F32OverlappingWindowMaxScatter) { + const auto operand = builder_.ConstantR1<float>({1, 2, 3, 100, 3, 2, 1}); + const auto source = builder_.ConstantR1<float>({34, 42, 53, 19}); + const std::vector<float> expected = {0, 0, 0, 53, 0, 0, 0}; + builder_.SelectAndScatter(operand, ge_f32_, /*window_dimensions=*/{4}, + /*window_strides=*/{1}, Padding::kValid, source, + builder_.ConstantR0<float>(0), max_f32_); + ComputeAndCompareR1<float>(&builder_, expected, {}, ErrorSpec(1e-7)); +} + +XLA_TEST_F(SelectAndScatterTest, R1F32OverlappingWindowMinScatter) { + const auto operand = builder_.ConstantR1<float>({1, 2, 3, 100, 3, 2, 1}); + const auto source = builder_.ConstantR1<float>({34, 42, 53, 19}); + const float max_float = std::numeric_limits<float>::max(); + const std::vector<float> expected = {max_float, max_float, max_float, 19, + max_float, max_float, max_float}; + builder_.SelectAndScatter(operand, ge_f32_, /*window_dimensions=*/{4}, + /*window_strides=*/{1}, Padding::kValid, source, + builder_.ConstantR0<float>(max_float), min_f32_); + ComputeAndCompareR1<float>(&builder_, expected, {}, ErrorSpec(1e-7)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/select_test.cc b/tensorflow/compiler/xla/tests/select_test.cc new file mode 100644 index 0000000000..5ec9ac95fa --- /dev/null +++ b/tensorflow/compiler/xla/tests/select_test.cc @@ -0,0 +1,276 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class SelectTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001}; +}; + +TEST_F(SelectTest, SelectScalarF32True) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(true); + auto on_true = builder.ConstantR0<float>(123.0f); + auto on_false = builder.ConstantR0<float>(42.0f); + auto result = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR0<float>(&builder, 123.0f, {}, error_spec_); +} + +TEST_F(SelectTest, SelectScalarS32True) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(true); + auto on_true = builder.ConstantR0<int32>(-42); + auto on_false = builder.ConstantR0<int32>(42); + auto result = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR0<int32>(&builder, -42, {}); +} + +TEST_F(SelectTest, SelectScalarF32False) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(false); + auto on_true = builder.ConstantR0<float>(123.0f); + auto on_false = builder.ConstantR0<float>(42.0f); + auto result = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR0<float>(&builder, 42.0f, {}, error_spec_); +} + +XLA_TEST_F(SelectTest, SelectR1S0F32WithConstantR1S0PRED) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR1<bool>({}); + auto on_true = builder.ConstantR1<float>({}); + auto on_false = builder.ConstantR1<float>({}); + auto select = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithConstantR1PRED) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR1<bool>({false, true, false, true, false}); + auto on_true = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); + auto on_false = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, -6.0f}); + auto select = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {10.0f, 25.5f, 1.0f, -10.0f, -6.0f}, {}, + error_spec_); +} + +XLA_TEST_F(SelectTest, SelectR1S0F32WithCmpR1S0S32s) { + // Similar to SelectR1S0F32WithConstantR1S0PRED, except that the pred vector + // is not a constant, but rather the result of comparing two other vectors. + ComputationBuilder builder(client_, TestName()); + auto v1 = builder.ConstantR1<int32>({}); + auto v2 = builder.ConstantR1<int32>({}); + auto cmp = builder.Eq(v1, v2); + auto on_true = builder.ConstantR1<float>({}); + auto on_false = builder.ConstantR1<float>({}); + auto select = builder.Select(cmp, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1S32s) { + // Similar to SelectR1F32WithConstantR1PRED, except that the pred vector is + // not a constant, but rather the result of comparing two other vectors. + ComputationBuilder builder(client_, TestName()); + auto v1 = builder.ConstantR1<int32>({1, 2, 3, 4, 5}); + auto v2 = builder.ConstantR1<int32>({9, 2, 9, 4, 9}); + auto cmp = builder.Eq(v1, v2); + auto on_true = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); + auto on_false = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, -6.0f}); + auto select = builder.Select(cmp, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {10.0f, 25.5f, 1.0f, -10.0f, -6.0f}, {}, + error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1F32s) { + // Similar to SelectR1F32WithCmpR1S32s, except "gt"-comparing two R1F32s. + ComputationBuilder builder(client_, TestName()); + auto v1 = builder.ConstantR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); + auto v2 = builder.ConstantR1<float>({-1.0f, -2.0f, 13.0f, 14.0f, 4.4f}); + auto cmp = builder.Gt(v1, v2); + auto on_true = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); + auto on_false = builder.ConstantR1<float>({10.0f, 5.0f, 1.0f, 10.0f, -6.0f}); + auto select = builder.Select(cmp, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {-2.5f, 25.5f, 1.0f, 10.0f, 6.0f}, {}, + error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1F32sFromParamsSmall) { + // Selects among two R1F32s, which come from parameters. v1 and v2 are + // compared, and selection between them happens based on a gt-comparison mask. + ComputationBuilder builder(client_, TestName()); + + ComputationDataHandle v1, v2; + std::unique_ptr<GlobalData> param0_data = CreateR1Parameter<float>( + {41.0f, 2.0f, 3.0f, 84.0f}, /*parameter_number=*/0, /*name=*/"v1", + /*builder=*/&builder, /*data_handle=*/&v1); + std::unique_ptr<GlobalData> param1_data = CreateR1Parameter<float>( + {21.0f, 22.0f, 23.0f, 24.0f}, /*parameter_number=*/1, /*name=*/"v2", + /*builder=*/&builder, /*data_handle=*/&v2); + + auto cmp = builder.Gt(v1, v2); + auto select = builder.Select(cmp, v1, v2); + ComputeAndCompareR1<float>(&builder, {41.0f, 22.0f, 23.0f, 84.0f}, + {param0_data.get(), param1_data.get()}, + error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1F32sFromParamsLarge) { + // Similar to SelectR1F32WithCmpR1F32sFromParamsSmall, except that the + // data size passed in and out is large. + ComputationBuilder builder(client_, TestName()); + + // Number of floats in the data passed into and out of the computation. + constexpr int datalen = 15 * 1000; + + // The inputs are initialized with a special pattern where in the first third + // of the data v1[i] > v2[i] and elsewhere it's vice versa. + std::vector<float> v1vec; + std::vector<float> v2vec; + std::vector<float> expected_vec; + for (int i = 0; i < datalen; ++i) { + float smaller = i; + float larger = i * 2; + if (i < datalen / 3) { + v1vec.push_back(larger); + v2vec.push_back(smaller); + } else { + v1vec.push_back(smaller); + v2vec.push_back(larger); + } + expected_vec.push_back(larger); + } + + ComputationDataHandle v1, v2; + std::unique_ptr<GlobalData> param0_data = + CreateR1Parameter<float>(v1vec, /*parameter_number=*/0, /*name=*/"v1", + /*builder=*/&builder, /*data_handle=*/&v1); + std::unique_ptr<GlobalData> param1_data = + CreateR1Parameter<float>(v2vec, /*parameter_number=*/1, /*name=*/"v2", + /*builder=*/&builder, /*data_handle=*/&v2); + + auto cmp = builder.Gt(v1, v2); + auto select = builder.Select(cmp, v1, v2); + ComputeAndCompareR1<float>(&builder, expected_vec, + {param0_data.get(), param1_data.get()}, + error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1S32ToScalar) { + // "gt"-compares a R1S32 with a S32 scalar, and uses the resulting R1PRED to + // select between two R1F32s. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<int32>({1, -1, 2, -2}); + auto s = builder.ConstantR0<int32>(0); + auto cmp = builder.Gt(v, s); + + auto on_true = builder.ConstantR1<float>({11.0f, 22.0f, 33.0f, 44.0f}); + auto on_false = + builder.ConstantR1<float>({-111.0f, -222.0f, -333.0f, -444.0f}); + auto select = builder.Select(cmp, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {11.0f, -222.0f, 33.0f, -444.0f}, {}, + error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithCmpR1F32ToScalar) { + // "gt"-compares a R1F32 with a F32 scalar, and uses the resulting R1PRED to + // select between two R1F32s. + ComputationBuilder builder(client_, TestName()); + auto v = builder.ConstantR1<float>({1.0f, 2.0f, 3.0f, 4.0f}); + auto s = builder.ConstantR0<float>(2.5f); + auto cmp = builder.Gt(v, s); + + auto on_true = builder.ConstantR1<float>({11.0f, 22.0f, 33.0f, 44.0f}); + auto on_false = + builder.ConstantR1<float>({-111.0f, -222.0f, -333.0f, -444.0f}); + auto select = builder.Select(cmp, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {-111.0f, -222.0f, 33.0f, 44.0f}, {}, + error_spec_); +} + +XLA_TEST_F(SelectTest, SelectR1S0F32WithScalarPredicate) { + for (bool which : {false, true}) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(which); + auto on_true = builder.ConstantR1<float>({}); + auto on_false = builder.ConstantR1<float>({}); + auto select = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_); + } +} + +TEST_F(SelectTest, SelectR1F32WithScalarPredicateTrue) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(true); + auto on_true = builder.ConstantR1<float>({-2.5f, 25.5f}); + auto on_false = builder.ConstantR1<float>({10.0f, 5.0f}); + auto select = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {-2.5f, 25.5f}, {}, error_spec_); +} + +TEST_F(SelectTest, SelectR1F32WithScalarPredicateFalse) { + ComputationBuilder builder(client_, TestName()); + auto pred = builder.ConstantR0<bool>(false); + auto on_true = builder.ConstantR1<float>({-2.5f, 25.5f}); + auto on_false = builder.ConstantR1<float>({10.0f, 5.0f}); + auto select = builder.Select(pred, on_true, on_false); + + ComputeAndCompareR1<float>(&builder, {10.0f, 5.0f}, {}, error_spec_); +} +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/set_return_value_test.cc b/tensorflow/compiler/xla/tests/set_return_value_test.cc new file mode 100644 index 0000000000..e15d744d95 --- /dev/null +++ b/tensorflow/compiler/xla/tests/set_return_value_test.cc @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class SetReturnValueTest : public ClientLibraryTestBase {}; + +TEST_F(SetReturnValueTest, NoSetValue) { + ComputationBuilder builder(client_, "no_set_value"); + auto alpha = builder.ConstantR0<float>(1.0); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto ax = builder.Add(alpha, x); + auto aax = builder.Add(alpha, ax); + + std::vector<float> expected = {1.0, 3.0, 4.0, 0.0, -1.0, + 5.0, 6.0, -2.0, -3.0, 7.0}; + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(SetReturnValueTest, SetValue) { + ComputationBuilder builder(client_, "set_value"); + auto alpha = builder.ConstantR0<float>(1.0); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto ax = builder.Add(alpha, x); + auto aax = builder.Add(alpha, ax); + auto builder_status = builder.SetReturnValue(ax); + EXPECT_TRUE(builder_status.ok()); + + std::vector<float> expected = {0.0, 2.0, 3.0, -1.0, -2.0, + 4.0, 5.0, -3.0, -4.0, 6.0}; + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(SetReturnValueTest, SetValueAndModify) { + ComputationBuilder builder(client_, "set_value_and_modify"); + auto alpha = builder.ConstantR0<float>(1.0); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto ax = builder.Add(alpha, x); + auto aax = builder.Add(alpha, ax); + auto builder_status = builder.SetReturnValue(ax); + EXPECT_TRUE(builder_status.ok()); + auto aaax = builder.Add(alpha, aax); + + std::vector<float> expected = {0.0, 2.0, 3.0, -1.0, -2.0, + 4.0, 5.0, -3.0, -4.0, 6.0}; + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +TEST_F(SetReturnValueTest, SetValueMultipleTimesAndModify) { + ComputationBuilder builder(client_, "set_value_multiple_times_and_modify"); + auto alpha = builder.ConstantR0<float>(1.0); + auto x = builder.ConstantR1<float>( + {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); + auto ax = builder.Add(alpha, x); + auto aax = builder.Add(alpha, ax); + auto builder_status = builder.SetReturnValue(aax); + EXPECT_TRUE(builder_status.ok()); + auto aaax = builder.Add(alpha, aax); + builder_status = builder.SetReturnValue(ax); + EXPECT_TRUE(builder_status.ok()); + auto aaaax = builder.Add(alpha, aaax); + + std::vector<float> expected = {0.0, 2.0, 3.0, -1.0, -2.0, + 4.0, 5.0, -3.0, -4.0, 6.0}; + + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc new file mode 100644 index 0000000000..d63582fb98 --- /dev/null +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -0,0 +1,277 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that slice operations can be performed. + +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class SliceTest : public ClientLibraryTestBase { + protected: + template <typename NativeT> + void RunSliceTenToTwo() { + std::vector<NativeT> constant; + for (int i = 0; i < 10; ++i) { + constant.push_back(static_cast<NativeT>(i)); + } + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR1<NativeT>(constant); + builder.Slice(original, {2}, {4}); + + const std::vector<NativeT> expected = {static_cast<NativeT>(2), + static_cast<NativeT>(3)}; + ComputeAndCompareR1<NativeT>(&builder, expected, {}); + } +}; + +XLA_TEST_F(SliceTest, SliceZeroToZeroF32) { + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR1<float>({}); + builder.Slice(original, {0}, {0}); + + ComputeAndCompareR1<float>(&builder, {}, {}); +} + +XLA_TEST_F(SliceTest, SliceTenToZeroF32) { + ComputationBuilder builder(client_, TestName()); + std::vector<float> constant(10, 0.3); + auto original = builder.ConstantR1<float>(constant); + builder.Slice(original, {7}, {7}); + + ComputeAndCompareR1<float>(&builder, {}, {}); +} + +TEST_F(SliceTest, SliceTenToTwoF32) { RunSliceTenToTwo<float>(); } + +XLA_TEST_F(SliceTest, SliceTenToTwoF64) { RunSliceTenToTwo<double>(); } + +TEST_F(SliceTest, SliceTenToTwoU32) { RunSliceTenToTwo<uint32>(); } + +TEST_F(SliceTest, SliceTenToTwoS32) { RunSliceTenToTwo<int32>(); } + +XLA_TEST_F(SliceTest, SliceTenToTwoU64) { RunSliceTenToTwo<uint64>(); } + +XLA_TEST_F(SliceTest, SliceTenToTwoS64) { RunSliceTenToTwo<int64>(); } + +TEST_F(SliceTest, SliceTenToTen) { + const std::vector<float> values = {0.0, 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, 9.0}; + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR1<float>(values); + builder.Slice(original, {0}, {10}); + + ComputeAndCompareR1<float>(&builder, values, {}, ErrorSpec(0.000001)); +} + +TEST_F(SliceTest, SliceLastFourOf1024) { + std::vector<float> values(1024); + std::iota(values.begin(), values.end(), 0.0); + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR1<float>(values); + builder.Slice(original, {1024 - 4}, {1024}); + + const std::vector<float> expected = {1020, 1021, 1022, 1023}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +// TODO(b/28491443): Fix wrong result on CPU and GPU. Failed on +// 2016-05-01. Also b/28508652 +TEST_F(SliceTest, DISABLED_SliceUnaligned1024In4096Values) { + std::vector<float> values(4096); + std::iota(values.begin(), values.end(), 0.0); + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR1<float>(values); + builder.Slice(original, {7}, {7 + 1024}); + + std::vector<float> expected(1024); + std::iota(values.begin(), values.end(), 7.0); + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +XLA_TEST_F(SliceTest, Slice0x0to0x0F32) { + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 0)); + builder.Slice(original, {0, 0}, {0, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 0), {}); +} + +XLA_TEST_F(SliceTest, Slice0x20to0x5F32) { + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 20)); + builder.Slice(original, {0, 15}, {0, 20}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 5), {}); +} + +XLA_TEST_F(SliceTest, Slice3x0to2x0F32) { + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(Array2D<float>(3, 0)); + builder.Slice(original, {1, 0}, {3, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {}); +} + +XLA_TEST_F(SliceTest, SliceQuadrantOf256x256) { + Array2D<float> values(256, 256); + for (int row = 0; row < 256; ++row) { + for (int col = 0; col < 256; ++col) { + values(row, col) = (row << 10) | col; + } + } + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(values); + builder.Slice(original, {128, 128}, {256, 256}); + + Array2D<float> expected(128, 128); + for (int row = 0; row < 128; ++row) { + for (int col = 0; col < 128; ++col) { + expected(row, col) = ((row + 128) << 10) | (col + 128); + } + } + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +// Tests: (f32[1,4096], starts={0, 3072}, limits={1, 4096}) -> f32[1,1024]) +TEST_F(SliceTest, Slice_1x4096_To_1x1024) { + Array2D<float> values(1, 4096); + std::iota(values.data(), values.data() + 4096, 0.0); + + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(values); + builder.Slice(original, {0, 3072}, {1, 4096}); + + Array2D<float> expected(1, 1024); + std::iota(expected.data(), expected.data() + 1024, 3072.0); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +// Tests slice: (f32[16,4], starts={0, 0}, limits={16, 2}) -> f32[16,2] +TEST_F(SliceTest, Slice_16x4_To_16x2) { + Array2D<float> values(16, 4); + Array2D<float> expected(16, 2); + for (int row = 0; row < 16; ++row) { + for (int col = 0; col < 4; ++col) { + values(row, col) = (row << 10) | col; + if (col < 2) { + expected(row, col) = (row << 10) | col; + } + } + } + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR2FromArray2D<float>(values); + builder.Slice(original, {0, 0}, {16, 2}); + ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.000001)); +} + +// Tests: (f32[2, 2, 24, 256], starts = {1, 0, 8, 0}, ends = {2, 2, 16, 128} +TEST_F(SliceTest, SliceR4ThreeDimsMiddleMinor) { + Array4D<float> values(2, 2, 24, 256); + values.FillRandom(3.14f); + auto expected = + ReferenceUtil::Slice4D(values, {{1, 0, 8, 0}}, {{2, 2, 16, 128}}); + ComputationBuilder builder(client_, TestName()); + auto original = builder.ConstantR4FromArray4D(values); + builder.Slice(original, {1, 0, 8, 0}, {2, 2, 16, 128}); + ComputeAndCompareR4(&builder, *expected, {}, ErrorSpec(0.000001)); +} + +struct R2Spec { + int64 input_dim0; + int64 input_dim1; + std::array<int64, 2> slice_starts; + std::array<int64, 2> slice_limits; + Layout layout; +}; + +// Parameterized test that generates patterned R2 values, slices them according +// to the R2Spec, and compares the results with the ReferenceUtil version. +class SliceR2Test : public ClientLibraryTestBase, + public ::testing::WithParamInterface<R2Spec> {}; + +TEST_P(SliceR2Test, DoIt) { + const R2Spec& spec = GetParam(); + Array2D<int32> input(spec.input_dim0, spec.input_dim1); + input.FillUnique(); + + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR2FromArray2D<int32>(input); + builder.Slice(a, spec.slice_starts, spec.slice_limits); + + std::unique_ptr<Array2D<int32>> expected = + ReferenceUtil::Slice2D(input, spec.slice_starts, spec.slice_limits); + ComputeAndCompareR2<int32>(&builder, *expected, {}); +} + +// clang-format off +INSTANTIATE_TEST_CASE_P( + SliceR2TestInstantiation, SliceR2Test, + ::testing::Values( + R2Spec {4, 12, {{0, 3}}, {{4, 6}}, LayoutUtil::MakeLayout({0, 1})}, + R2Spec {4, 12, {{0, 3}}, {{4, 6}}, LayoutUtil::MakeLayout({1, 0})}, + R2Spec {16, 4, {{0, 2}}, {{16, 4}}, LayoutUtil::MakeLayout({0, 1})}, + R2Spec {16, 4, {{0, 2}}, {{16, 4}}, LayoutUtil::MakeLayout({1, 0})}, + R2Spec {256, 400, {{0, 300}}, {{256, 400}}, + LayoutUtil::MakeLayout({1, 0})}, + R2Spec {500, 400, {{111, 123}}, {{300, 257}}, + LayoutUtil::MakeLayout({1, 0})}, + R2Spec {500, 400, {{111, 123}}, {{300, 400}}, + LayoutUtil::MakeLayout({1, 0})}, + R2Spec {384, 512, {{128, 256}}, {{256, 384}}, + LayoutUtil::MakeLayout({1, 0})}, + R2Spec {357, 512, {{111, 256}}, {{301, 384}}, + LayoutUtil::MakeLayout({1, 0})} + ) +); +// clang-format on + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/test_macros.h b/tensorflow/compiler/xla/tests/test_macros.h new file mode 100644 index 0000000000..7f987a21ca --- /dev/null +++ b/tensorflow/compiler/xla/tests/test_macros.h @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Macros for use in enabling/disabling tests on particular +// platforms. Marking a gunit test as disabled still ensures that it +// compiles. +// +// Implementation note: the macros are structured as follows: +// * Define the disabled macro to just pass the test name through (which, in +// effect, does not disable it at all) +// * If a XLA_TEST_BACKEND_$TARGET macro indicates we're compiling for +// $TARGET platform, make the disabled macro truly disable the test; i.e. by +// redefining the DISABLED_ON_$TARGET macro to prepend "DISABLED_" to the test +// name. + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_TEST_MACROS_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_TEST_MACROS_H_ + +#include <string> + +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/test.h" + +// Use this macro instead of directly using TEST_P for parameterized tests, +// otherwise DISABLED_ON_* macros nested in TEST_P will not get expanded since +// TEST_P stringifies its argument. That makes the test disabled for all targets +// when any one of the DISABLED_ON_* macro is used, and the test will just pass. +// TODO(b/29122096): Remove this once TEST_P fixes this problem. +#define XLA_TEST_P(test_case_name, test_name) TEST_P(test_case_name, test_name) + +#define DISABLED_ON_CPU(X) X +#define DISABLED_ON_CPU_PARALLEL(X) X +#define DISABLED_ON_GPU(X) X + +// We need this macro instead of pasting directly to support nesting +// the DISABLED_ON_FOO macros, as in the definition of DISABLED_ON_CPU. +// Otherwise the pasting is applied before macro expansion completes. +#define XLA_TEST_PASTE(A, B) A##B + +// We turn off clang-format so we can indent the macros for readability. +// clang-format off + +#ifdef XLA_TEST_BACKEND_CPU +# undef DISABLED_ON_CPU +# define DISABLED_ON_CPU(X) XLA_TEST_PASTE(DISABLED_, X) +#endif // XLA_TEST_BACKEND_CPU + +#ifdef XLA_TEST_BACKEND_CPU_PARALLEL +# undef DISABLED_ON_CPU +# define DISABLED_ON_CPU(X) XLA_TEST_PASTE(DISABLED_, X) +# undef DISABLED_ON_CPU_PARALLEL +# define DISABLED_ON_CPU_PARALLEL(X) XLA_TEST_PASTE(DISABLED_, X) +#endif // XLA_TEST_BACKEND_CPU_PARALLEL + +#ifdef XLA_TEST_BACKEND_GPU +# undef DISABLED_ON_GPU +# define DISABLED_ON_GPU(X) XLA_TEST_PASTE(DISABLED_, X) +#endif // XLA_TEST_BACKEND_GPU + +// clang-format on + +#define XLA_TEST_F(test_fixture, test_name) TEST_F(test_fixture, test_name) + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_MACROS_H_ diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h new file mode 100644 index 0000000000..6a23df4d3c --- /dev/null +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ + +#include <initializer_list> +#include <memory> +#include <random> + +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace test_utils { + +// A class which generates pseudorandom numbers of a given type within a given +// range. Not cryptographically secure and likely not perfectly evenly +// distributed across the range but sufficient for most tests. +template <typename NativeT> +class PseudorandomGenerator { + public: + explicit PseudorandomGenerator(NativeT min_value, NativeT max_value, + uint32 seed) + : min_(min_value), max_(max_value), generator_(seed) {} + + // Get a pseudorandom value. + NativeT get() { + std::uniform_real_distribution<> distribution; + return static_cast<NativeT>(min_ + + (max_ - min_) * distribution(generator_)); + } + + private: + NativeT min_; + NativeT max_; + std::mt19937 generator_; +}; + +// Convenience function for creating a rank-2 array with arbitrary layout. +template <typename NativeT> +std::unique_ptr<Literal> CreateR2LiteralWithLayout( + std::initializer_list<std::initializer_list<NativeT>> values, + tensorflow::gtl::ArraySlice<int64> minor_to_major) { + auto literal = MakeUnique<Literal>(); + const int64 d0 = values.size(); + const int64 d1 = values.begin()->size(); + LiteralUtil::PopulateWithValue<NativeT>(0, {d0, d1}, literal.get()); + *literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout(minor_to_major); + TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); + + int64 dim0 = 0; + for (auto inner_list : values) { + int64 dim1 = 0; + for (auto value : inner_list) { + LiteralUtil::Set(literal.get(), {dim0, dim1}, value); + ++dim1; + } + ++dim0; + } + return literal; +} + +// Convenience function for creating a rank-3 array with arbitrary layout. +template <typename NativeT> +std::unique_ptr<Literal> CreateR3LiteralWithLayout( + std::initializer_list<std::initializer_list<std::initializer_list<NativeT>>> + values, + tensorflow::gtl::ArraySlice<int64> minor_to_major) { + auto literal = MakeUnique<Literal>(); + const int64 d0 = values.size(); + const int64 d1 = values.begin()->size(); + const int64 d2 = values.begin()->begin()->size(); + LiteralUtil::PopulateWithValue<NativeT>(0, {d0, d1, d2}, literal.get()); + *literal->mutable_shape()->mutable_layout() = + LayoutUtil::MakeLayout(minor_to_major); + TF_CHECK_OK(ShapeUtil::ValidateShape(literal->shape())); + + int64 dim0 = 0; + for (auto inner_list : values) { + int64 dim1 = 0; + for (auto inner_inner_list : inner_list) { + int64 dim2 = 0; + for (auto value : inner_inner_list) { + LiteralUtil::Set(literal.get(), {dim0, dim1, dim2}, value); + ++dim2; + } + ++dim1; + } + ++dim0; + } + return literal; +} + +} // namespace test_utils +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_UTILS_H_ diff --git a/tensorflow/compiler/xla/tests/transpose_test.cc b/tensorflow/compiler/xla/tests/transpose_test.cc new file mode 100644 index 0000000000..79f251bbc4 --- /dev/null +++ b/tensorflow/compiler/xla/tests/transpose_test.cc @@ -0,0 +1,203 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class TransposeTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001}; + + protected: + void TestTransposeConstant021(size_t n1, size_t n2, size_t n3); +}; + +XLA_TEST_F(TransposeTest, Transpose0x0) { + ComputationBuilder builder(client_, "Transpose"); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 0)); + auto result = builder.Transpose(lhs, {1, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 0), {}, error_spec_); +} + +XLA_TEST_F(TransposeTest, Transpose0x42) { + ComputationBuilder builder(client_, "Transpose"); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 42)); + auto result = builder.Transpose(lhs, {1, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(42, 0), {}, error_spec_); +} + +XLA_TEST_F(TransposeTest, Transpose7x0) { + ComputationBuilder builder(client_, "Transpose"); + auto lhs = builder.ConstantR2FromArray2D<float>(Array2D<float>(7, 0)); + auto result = builder.Transpose(lhs, {1, 0}); + + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 7), {}, error_spec_); +} + +TEST_F(TransposeTest, Transpose2x2) { + ComputationBuilder builder(client_, "Transpose"); + auto lhs = builder.ConstantR2<float>({ + {1.0, 2.0}, {3.0, 4.0}, + }); + auto result = builder.Transpose(lhs, {1, 0}); + + Array2D<float> expected({{1.0f, 3.0f}, {2.0f, 4.0f}}); + + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(TransposeTest, Transpose0x2x3_2x3x0) { + ComputationBuilder builder(client_, "Transpose"); + auto operand = builder.ConstantR3FromArray3D<int32>(Array3D<int32>(0, 2, 3)); + auto result = builder.Transpose(operand, {1, 2, 0}); + + ComputeAndCompareR3<int32>(&builder, Array3D<int32>(2, 3, 0), {}); +} + +TEST_F(TransposeTest, Transpose1x2x3_2x3x1) { + ComputationBuilder builder(client_, "Transpose"); + auto operand = builder.ConstantR3FromArray3D<int32>({{{1, 2, 3}, {4, 5, 6}}}); + auto result = builder.Transpose(operand, {1, 2, 0}); + + Array3D<int32> expected({{{1}, {2}, {3}}, {{4}, {5}, {6}}}); + + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +TEST_F(TransposeTest, Transpose1x2x3_3x2x1) { + ComputationBuilder builder(client_, "Transpose"); + auto operand = builder.ConstantR3FromArray3D<int32>({{{1, 2, 3}, {4, 5, 6}}}); + auto result = builder.Transpose(operand, {2, 1, 0}); + + Array3D<int32> expected({{{1}, {4}}, {{2}, {5}}, {{3}, {6}}}); + + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +TEST_F(TransposeTest, Transpose1x2x3_1x2x3) { + ComputationBuilder builder(client_, "Transpose"); + auto operand = builder.ConstantR3FromArray3D<int32>({{{1, 2, 3}, {4, 5, 6}}}); + auto result = builder.Transpose(operand, {0, 1, 2}); + + Array3D<int32> expected({{{1, 2, 3}, {4, 5, 6}}}); + + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +TEST_F(TransposeTest, MultiTranspose3x2) { + Array2D<float> input({{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}); + Array2D<float> transposed({{1.0f, 3.0f, 5.0f}, {2.0f, 4.0f, 6.0f}}); + + for (int transposes = 0; transposes <= 10; ++transposes) { + ComputationBuilder builder(client_, "Transpose"); + auto computed = builder.ConstantR2FromArray2D<float>(input); + for (int i = 0; i < transposes; ++i) { + computed = builder.Transpose(computed, {1, 0}); + } + const Array2D<float>& expected = transposes % 2 == 0 ? input : transposed; + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); + } +} + +// Test for transposing [1x1] matrix. +TEST_F(TransposeTest, Small_1x1) { + auto aoperand = MakeLinspaceArray2D(0.0, 1.0, 1, 1); + + ComputationBuilder builder(client_, "transpose_1x1"); + auto operand = builder.ConstantR2FromArray2D<float>(*aoperand); + builder.Transpose(operand, {1, 0}); + + auto expected = ReferenceUtil::TransposeArray2D(*aoperand); + ComputeAndCompareR2<float>(&builder, *expected, {}, ErrorSpec(1e-4)); +} + +// Test for transposing [2x2] matrix. +TEST_F(TransposeTest, Small_2x2) { + auto aoperand = MakeLinspaceArray2D(0.0, 4.0, 2, 2); + + ComputationBuilder builder(client_, "transpose_2x2"); + auto operand = builder.ConstantR2FromArray2D<float>(*aoperand); + builder.Transpose(operand, {1, 0}); + + auto expected = ReferenceUtil::TransposeArray2D(*aoperand); + ComputeAndCompareR2<float>(&builder, *expected, {}, ErrorSpec(1e-4)); +} + +void TransposeTest::TestTransposeConstant021(size_t n1, size_t n2, size_t n3) { + Array3D<int32> aoperand(n1, n2, n3); + Array3D<int32> expected(n1, n3, n2); + for (size_t i = 0; i < n1; ++i) { + for (size_t j = 0; j < n2; ++j) { + for (size_t k = 0; k < n3; ++k) { + aoperand(i, j, k) = i * n3 * n2 + j * n3 + k; + expected(i, k, j) = aoperand(i, j, k); + } + } + } + + ComputationBuilder builder(client_, TestName()); + auto operand = builder.ConstantR3FromArray3D(aoperand); + builder.Transpose(operand, {0, 2, 1}); + + ComputeAndCompareR3<int32>(&builder, expected, {}); +} + +TEST_F(TransposeTest, TransposeConstant021_SingleIncompleteTilePerLayer) { + TestTransposeConstant021(2, 2, 3); +} + +TEST_F(TransposeTest, TransposeConstant021_SingleCompleteTilePerLayer) { + TestTransposeConstant021(2, 32, 32); +} + +TEST_F(TransposeTest, TransposeConstant021_MultipleTilesPerLayer) { + TestTransposeConstant021(2, 70, 35); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc new file mode 100644 index 0000000000..cea9316a6d --- /dev/null +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -0,0 +1,415 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <initializer_list> +#include <memory> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class TupleTest : public ClientLibraryTestBase { + public: + ErrorSpec error_spec_{0.0001}; +}; + +// Tests the creation of tuple data. +XLA_TEST_F(TupleTest, TupleCreate) { + ComputationBuilder builder(client_, TestName()); + + const float constant_scalar = 7.3f; + std::initializer_list<float> constant_vector = {1.1f, 2.0f, 3.3f}; + std::initializer_list<std::initializer_list<float>> constant_matrix = { + {1.1f, 2.2f, 3.5f}, // row 0 + {4.8f, 5.0f, 6.7f}, // row 1 + }; + auto result = builder.Tuple({builder.ConstantR0<float>(constant_scalar), + builder.ConstantR1<float>(constant_vector), + builder.ConstantR2<float>(constant_matrix)}); + + auto expected = LiteralUtil::MakeTuple( + {LiteralUtil::CreateR0<float>(constant_scalar).get(), + LiteralUtil::CreateR1<float>(constant_vector).get(), + LiteralUtil::CreateR2<float>(constant_matrix).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +// Tests the creation of tuple data. +XLA_TEST_F(TupleTest, TupleCreateWithZeroElementEntry) { + ComputationBuilder builder(client_, TestName()); + + auto result = builder.Tuple( + {builder.ConstantR0<float>(7.0), builder.ConstantR1<float>({})}); + + auto expected = + LiteralUtil::MakeTuple({LiteralUtil::CreateR0<float>(7.0).get(), + LiteralUtil::CreateR1<float>({}).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +// Tests the creation of an empty tuple. +XLA_TEST_F(TupleTest, EmptyTupleCreate) { + ComputationBuilder builder(client_, TestName()); + auto result = builder.Tuple({}); + auto expected = LiteralUtil::MakeTuple({}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +// Trivial test for extracting a tuple element with GetTupleElement. +XLA_TEST_F(TupleTest, GetTupleElement) { + ComputationBuilder builder(client_, TestName()); + std::initializer_list<float> constant_vector = {1.f, 2.f, 3.f}; + std::initializer_list<std::initializer_list<float>> constant_matrix = { + {1.f, 2.f, 3.f}, // row 0 + {4.f, 5.f, 6.f}, // row 1 + }; + auto tuple_data = builder.Tuple({builder.ConstantR1<float>(constant_vector), + builder.ConstantR2<float>(constant_matrix)}); + auto matrix_element = builder.GetTupleElement(tuple_data, 1); + ComputeAndCompareR2<float>(&builder, Array2D<float>(constant_matrix), {}, + error_spec_); +} + +// Trivial test for extracting a tuple element with GetTupleElement. +XLA_TEST_F(TupleTest, GetTupleElementWithZeroElements) { + ComputationBuilder builder(client_, TestName()); + auto tuple_data = builder.Tuple( + {builder.ConstantR1<float>({}), + builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 101))}); + auto matrix_element = builder.GetTupleElement(tuple_data, 1); + ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 101), {}, error_spec_); +} + +// Extracts both elements from a tuple with GetTupleElement and then adds them +// together. +XLA_TEST_F(TupleTest, AddTupleElements) { + ComputationBuilder builder(client_, TestName()); + std::initializer_list<float> constant_vector = {1.f, 2.f, 3.f}; + std::initializer_list<std::initializer_list<float>> constant_matrix = { + {1.f, 2.f, 3.f}, // row 0 + {4.f, 5.f, 6.f}, // row 1 + }; + auto tuple_data = builder.Tuple({builder.ConstantR1<float>(constant_vector), + builder.ConstantR2<float>(constant_matrix)}); + auto vector_element = builder.GetTupleElement(tuple_data, 0); + auto matrix_element = builder.GetTupleElement(tuple_data, 1); + auto vector_shape = builder.GetShape(vector_element).ConsumeValueOrDie(); + auto matrix_shape = builder.GetShape(matrix_element).ConsumeValueOrDie(); + auto result = builder.Add(matrix_element, vector_element, + /*broadcast_dimensions=*/{1}); + + Array2D<float> expected({ + {2.f, 4.f, 6.f}, // row 0 + {5.f, 7.f, 9.f}, // row 1 + }); + ASSERT_TRUE(ShapeUtil::ShapeIs(*vector_shape, F32, {3})); + ASSERT_TRUE(ShapeUtil::ShapeIs(*matrix_shape, F32, {/*y=*/2, /*x=*/3})); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +// Extracts both elements from a tuple and then puts them into a new tuple in +// the opposite order. +XLA_TEST_F(TupleTest, TupleGTEToTuple) { + ComputationBuilder builder(client_, TestName()); + std::initializer_list<float> constant_vector = {1.f, 2.f, 3.f}; + std::initializer_list<std::initializer_list<float>> constant_matrix = { + {1.f, 2.f, 3.f}, // row 0 + {4.f, 5.f, 6.f}, // row 1 + }; + auto tuple_data = builder.Tuple({builder.ConstantR1<float>(constant_vector), + builder.ConstantR2<float>(constant_matrix)}); + auto new_tuple = builder.Tuple({builder.GetTupleElement(tuple_data, 1), + builder.GetTupleElement(tuple_data, 0)}); + auto expected = LiteralUtil::MakeTuple( + {LiteralUtil::CreateR2<float>(constant_matrix).get(), + LiteralUtil::CreateR1<float>(constant_vector).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +// Builds two new tuples from an existing tuple (by means of GetTupleElement), +// then adds up the components of the new tuples. +XLA_TEST_F(TupleTest, TupleGTEToTupleToGTEAdd) { + // + // v------ --(GTE 0)-- --(GTE 0)---------- + // \ / \ / \ + // (tuple)-- (tuple01)-- \ + // / | \ / \ \ + // m------ | --(GTE 1)-- --(GTE 1)------------ \ + // | \ \ + // | (add) + // | / / + // |--------(GTE 1)-- --(GTE 0)------------ / + // \ \ / / + // \ (tuple10)-- / + // \ / \ / + // -----(GTE 0)-- --(GTE 1)---------- + ComputationBuilder builder(client_, TestName()); + std::initializer_list<float> constant_vector = {1.f, 2.f, 3.f}; + std::initializer_list<std::initializer_list<float>> constant_matrix = { + {1.f, 2.f, 3.f}, // row 0 + {4.f, 5.f, 6.f}, // row 1 + }; + auto tuple_data = builder.Tuple({builder.ConstantR1<float>(constant_vector), + builder.ConstantR2<float>(constant_matrix)}); + auto new_tuple01 = builder.Tuple({builder.GetTupleElement(tuple_data, 0), + builder.GetTupleElement(tuple_data, 1)}); + auto new_tuple10 = builder.Tuple({builder.GetTupleElement(tuple_data, 1), + builder.GetTupleElement(tuple_data, 0)}); + auto vector_from_01 = builder.GetTupleElement(new_tuple01, 0); + auto vector_from_10 = builder.GetTupleElement(new_tuple10, 1); + auto matrix_from_01 = builder.GetTupleElement(new_tuple01, 1); + auto matrix_from_10 = builder.GetTupleElement(new_tuple10, 0); + + auto addvectors = builder.Add(vector_from_01, vector_from_10); + auto addmatrices = builder.Add(matrix_from_01, matrix_from_10); + + auto result = builder.Add(addmatrices, addvectors, + /*broadcast_dimensions=*/{1}); + + Array2D<float> expected({ + {4.f, 8.f, 12.f}, // row 0 + {10.f, 14.f, 18.f}, // row 1 + }); + ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnFalse)) { + // Tests a selection between tuples with "false" path taken. + ComputationBuilder builder(client_, TestName()); + + std::initializer_list<float> vec1 = {1.f, 2.f, 3.f}; + std::initializer_list<float> vec2 = {2.f, 4.f, 6.f}; + auto tuple12 = builder.Tuple( + {builder.ConstantR1<float>(vec1), builder.ConstantR1<float>(vec2)}); + auto tuple21 = builder.Tuple( + {builder.ConstantR1<float>(vec2), builder.ConstantR1<float>(vec1)}); + + auto select = + builder.Select(builder.ConstantR0<bool>(false), tuple12, tuple21); + auto expected = + LiteralUtil::MakeTuple({LiteralUtil::CreateR1<float>(vec2).get(), + LiteralUtil::CreateR1<float>(vec1).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, TuplesInAMap) { + Computation tuple_computation; + { + // tuple_computation(x) = 100 * min(x, x^2) + max(x, x^2) using tuples. + // + // Need to put a select in there to prevent HLO-level optimizations from + // optimizing out the tuples. + ComputationBuilder b(client_, "sort_square"); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto x2 = b.Mul(x, x); + auto x_smaller_tuple = b.Tuple({x, x2}); + auto x2_smaller_tuple = b.Tuple({x2, x}); + auto sorted = b.Select(b.Lt(x, x2), x_smaller_tuple, x2_smaller_tuple); + auto smaller = b.GetTupleElement(sorted, 0); + auto greater = b.GetTupleElement(sorted, 1); + b.Add(greater, b.Mul(b.ConstantR0<float>(100.0f), smaller)); + auto computation_status = b.Build(); + ASSERT_IS_OK(computation_status.status()); + tuple_computation = computation_status.ConsumeValueOrDie(); + } + + ComputationBuilder b(client_, TestName()); + auto input = b.ConstantR1<float>({-1.0f, 1.0f, 2.1f}); + b.Map({input}, tuple_computation); + ComputeAndCompareR1<float>(&b, {-99.0f, 101.0f, 214.41f}, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnTrue)) { + // Tests a selection between tuples with "true" path taken. + ComputationBuilder builder(client_, TestName()); + + std::initializer_list<float> vec1 = {1.f, 2.f, 3.f}; + std::initializer_list<float> vec2 = {2.f, 4.f, 6.f}; + auto tuple12 = builder.Tuple( + {builder.ConstantR1<float>(vec1), builder.ConstantR1<float>(vec2)}); + auto tuple21 = builder.Tuple( + {builder.ConstantR1<float>(vec2), builder.ConstantR1<float>(vec1)}); + + auto select = + builder.Select(builder.ConstantR0<bool>(true), tuple12, tuple21); + auto expected = + LiteralUtil::MakeTuple({LiteralUtil::CreateR1<float>(vec1).get(), + LiteralUtil::CreateR1<float>(vec2).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, SelectBetweenTuplesElementResult) { + // Tests a selection between tuples but the final result is an element of the + // tuple, not the whole tuple. + ComputationBuilder builder(client_, TestName()); + + std::initializer_list<float> vec1 = {1.f, 2.f, 3.f}; + std::initializer_list<float> vec2 = {2.f, 4.f, 6.f}; + auto tuple12 = builder.Tuple( + {builder.ConstantR1<float>(vec1), builder.ConstantR1<float>(vec2)}); + auto tuple21 = builder.Tuple( + {builder.ConstantR1<float>(vec2), builder.ConstantR1<float>(vec1)}); + + auto select = + builder.Select(builder.ConstantR0<bool>(false), tuple12, tuple21); + auto element = builder.GetTupleElement(select, 0); + + ComputeAndCompareR1<float>(&builder, vec2, {}, error_spec_); +} + +// Cascaded selects between tuple types. +XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesCascaded)) { + // + // vec1 vec2 vec2 vec1 + // | | | | + // | | | | + // (tuple 12) (tuple 21) + // \ / + // \ / + // \ / + // true -- --(GTE 0)--(select 1) + // \ / | + // (pred tuple)-- | --(GTE 0)-- + // / \ V / \ + // false -- --(GTE 1)--(select 2)-- --(add) + // / \ / + // / --(GTE 1)-- + // / + // (tuple 21) + ComputationBuilder builder(client_, TestName()); + + std::initializer_list<float> vec1 = {1.f, 2.f, 3.f}; + std::initializer_list<float> vec2 = {2.f, 4.f, 6.f}; + + auto pred_tuple = builder.Tuple( + {builder.ConstantR0<bool>(true), builder.ConstantR0<bool>(false)}); + auto tuple12 = builder.Tuple( + {builder.ConstantR1<float>(vec1), builder.ConstantR1<float>(vec2)}); + auto tuple21 = builder.Tuple( + {builder.ConstantR1<float>(vec2), builder.ConstantR1<float>(vec1)}); + + auto select1 = + builder.Select(builder.GetTupleElement(pred_tuple, 0), tuple12, tuple21); + auto select2 = + builder.Select(builder.GetTupleElement(pred_tuple, 1), tuple21, select1); + auto result = builder.Add(builder.GetTupleElement(select2, 0), + builder.GetTupleElement(select2, 1)); + + ComputeAndCompareR1<float>(&builder, {3.f, 6.f, 9.f}, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, + DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesReuseConstants)) { + // Similar to SelectBetweenTuples, but the constants are shared between the + // input tuples. + ComputationBuilder builder(client_, TestName()); + + std::initializer_list<float> vec1 = {1.f, 2.f, 3.f}; + std::initializer_list<float> vec2 = {2.f, 4.f, 6.f}; + auto c1 = builder.ConstantR1<float>(vec1); + auto c2 = builder.ConstantR1<float>(vec2); + auto tuple12 = builder.Tuple({c1, c2}); + auto tuple21 = builder.Tuple({c2, c1}); + + auto select = + builder.Select(builder.ConstantR0<bool>(false), tuple12, tuple21); + auto expected = + LiteralUtil::MakeTuple({LiteralUtil::CreateR1<float>(vec2).get(), + LiteralUtil::CreateR1<float>(vec1).get()}); + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, NestedTuples) { + ComputationBuilder builder(client_, TestName()); + auto inner_tuple = builder.Tuple( + {builder.ConstantR1<float>({1.0, 2.0}), builder.ConstantR0<float>(42.0)}); + auto outer_tuple = + builder.Tuple({inner_tuple, builder.ConstantR1<float>({22.0, 44.0})}); + + auto expected_v1 = LiteralUtil::CreateR1<float>({1.0, 2.0}); + auto expected_s = LiteralUtil::CreateR0<float>(42.0); + auto expected_inner_tuple = + LiteralUtil::MakeTuple({expected_v1.get(), expected_s.get()}); + auto expected_v2 = LiteralUtil::CreateR1<float>({22.0, 44.0}); + auto expected = + LiteralUtil::MakeTuple({expected_inner_tuple.get(), expected_v2.get()}); + + ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); +} + +XLA_TEST_F(TupleTest, GetTupleElementOfNestedTuple) { + ComputationBuilder builder(client_, TestName()); + + Shape data_shape = ShapeUtil::MakeShape(F32, {3}); + Shape inner_tuple_shape = ShapeUtil::MakeTupleShape({data_shape, data_shape}); + Shape outer_tuple_shape = + ShapeUtil::MakeTupleShape({inner_tuple_shape, data_shape}); + + auto input = builder.Parameter(0, outer_tuple_shape, "input"); + auto gte0 = builder.GetTupleElement(input, 0); + auto gte1 = builder.GetTupleElement(gte0, 1); + builder.Add(gte1, builder.ConstantR1<float>({10.0, 11.0, 12.0})); + + std::unique_ptr<GlobalData> data = + client_ + ->TransferToServer(*LiteralUtil::MakeTuple({ + LiteralUtil::MakeTuple( + { + LiteralUtil::CreateR1<float>({1.0, 2.0, 3.0}).get(), + LiteralUtil::CreateR1<float>({4.0, 5.0, 6.0}).get(), + }) + .get(), + LiteralUtil::CreateR1<float>({7.0, 8.0, 9.0}).get(), + })) + .ConsumeValueOrDie(); + + std::vector<GlobalData*> arguments = {data.get()}; + const std::vector<float> expected = {4.0 + 10.0, 5.0 + 11.0, 6.0 + 12.0}; + ComputeAndCompareR1<float>(&builder, expected, arguments, ErrorSpec(1e-5)); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/unary_op_test.cc b/tensorflow/compiler/xla/tests/unary_op_test.cc new file mode 100644 index 0000000000..fdbaa0d178 --- /dev/null +++ b/tensorflow/compiler/xla/tests/unary_op_test.cc @@ -0,0 +1,179 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class UnaryOpTest : public ClientLibraryTestBase { + protected: + template <typename T> + T inf() { + return std::numeric_limits<T>::infinity(); + } + template <typename T> + void AbsSize0TestHelper() { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<T>({}); + auto abs = builder.Abs(arg); + + ComputeAndCompareR1<T>(&builder, {}, {}); + } + + template <typename T> + void AbsTestHelper() { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<T>({-2, 25, 0, -123, inf<T>(), -inf<T>()}); + auto abs = builder.Abs(arg); + + ComputeAndCompareR1<T>(&builder, {2, 25, 0, 123, inf<T>(), inf<T>()}, {}); + } + + template <typename T> + void SignTestHelper() { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<T>( + {-2, 25, 0, static_cast<T>(-0.0), -123, inf<T>(), -inf<T>()}); + auto sign = builder.Sign(arg); + + ComputeAndCompareR1<T>(&builder, {-1, 1, 0, 0, -1, 1, -1}, {}); + } + + template <typename T> + void SignAbsTestHelper() { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<T>({-2, 25, 0, -123}); + auto sign = builder.Sign(arg); + auto abs = builder.Abs(arg); + builder.Sub(builder.Mul(sign, abs), arg); + + ComputeAndCompareR1<T>(&builder, {0, 0, 0, 0}, {}); + } +}; + +template <> +int UnaryOpTest::inf<int>() { + return 2147483647; +} + +XLA_TEST_F(UnaryOpTest, AbsTestR1Size0) { + AbsSize0TestHelper<int>(); + AbsSize0TestHelper<float>(); +} + +TEST_F(UnaryOpTest, AbsTestR1) { + AbsTestHelper<int>(); + AbsTestHelper<float>(); +} + +TEST_F(UnaryOpTest, AbsTestR0) { + ComputationBuilder builder(client_, TestName()); + auto argi = builder.ConstantR0<int>(-5); + auto absi = builder.Abs(argi); + auto argf = builder.ConstantR0<float>(-3.0f); + auto absf = builder.Abs(argf); + auto argf0 = builder.ConstantR0<float>(-0.0f); + auto absf0 = builder.Abs(argf0); + builder.Add(absf0, builder.Add(absf, builder.ConvertElementType( + absi, PrimitiveType::F32))); + + ComputeAndCompareR0<float>(&builder, 8.0f, {}); +} + +TEST_F(UnaryOpTest, SignTestR0) { + ComputationBuilder builder(client_, TestName()); + auto argi = builder.ConstantR0<int>(-5); + auto absi = builder.Sign(argi); + auto argf = builder.ConstantR0<float>(-4.0f); + auto absf = builder.Sign(argf); + auto argf0 = builder.ConstantR0<float>(-0.0f); + auto absf0 = builder.Sign(argf0); + builder.Add(absf0, builder.Add(absf, builder.ConvertElementType( + absi, PrimitiveType::F32))); + + ComputeAndCompareR0<float>(&builder, -2.0f, {}); +} + +TEST_F(UnaryOpTest, SignTestR1) { + SignTestHelper<int>(); + SignTestHelper<float>(); +} + +TEST_F(UnaryOpTest, SignAbsTestR1) { + SignAbsTestHelper<int>(); + SignAbsTestHelper<float>(); +} + +TEST_F(UnaryOpTest, UnsignedAbsTestR1) { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<unsigned int>( + {2, 25, 0, 123, std::numeric_limits<unsigned int>::max()}); + auto abs = builder.Abs(arg); + + ComputeAndCompareR1<unsigned int>( + &builder, {2, 25, 0, 123, std::numeric_limits<unsigned int>::max()}, {}); +} + +TEST_F(UnaryOpTest, UnsignedSignTestR1) { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR1<unsigned int>( + {2, 25, 0, 123, std::numeric_limits<unsigned int>::max()}); + auto sign = builder.Sign(arg); + + ComputeAndCompareR1<unsigned int>(&builder, {1, 1, 0, 1, 1}, {}); +} + +TEST_F(UnaryOpTest, SignAbsTestR2) { + ComputationBuilder builder(client_, TestName()); + auto arg = builder.ConstantR2<float>({{1.0, -2.0}, {-3.0, 4.0}}); + auto sign = builder.Sign(arg); + auto abs = builder.Abs(arg); + builder.Sub(builder.Mul(sign, abs), arg); + + ComputeAndCompareR2<float>(&builder, {{0, 0}, {0, 0}}, {}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/vector_ops_reduce_test.cc b/tensorflow/compiler/xla/tests/vector_ops_reduce_test.cc new file mode 100644 index 0000000000..7f3d7d9cb4 --- /dev/null +++ b/tensorflow/compiler/xla/tests/vector_ops_reduce_test.cc @@ -0,0 +1,235 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <numeric> +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class VecOpsReduceTest : public ClientLibraryTestBase { + public: + VecOpsReduceTest() : builder_(client_, TestName()) {} + + ComputationDataHandle BuildSampleConstantCube() { + // clang-format off + Array3D<float> x3d({ + {{1.0, 2.0, 3.0}, // | dim 1 // } plane 0 in dim 0 + {4.0, 5.0, 6.0}}, // V // } + // ---- dim 2 ----> + {{1.0, 2.0, 3.0}, // } plane 1 in dim 0 + {4.0, 5.0, 6.0}}, + {{1.0, 2.0, 3.0}, // } plane 2 in dim 0 + {4.0, 5.0, 6.0}}}); + // clang-format on + return builder_.ConstantR3FromArray3D<float>(x3d); + } + + ComputationBuilder builder_; + ErrorSpec errspec_{1e-3, 0}; +}; + +TEST_F(VecOpsReduceTest, AddReduceR1F32) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + + auto x = builder_.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0}); + + ComputeAndCompareR0<float>(&builder_, -4.2f, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceBigR1F32) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + + std::vector<float> input(3000); + std::iota(input.begin(), input.end(), 100.0f); + + auto x = builder_.ConstantR1<float>(input); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0}); + + float expected = std::accumulate(input.begin(), input.end(), 0.0f); + ComputeAndCompareR0<float>(&builder_, expected, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, MaxReduceR1F32) { + auto max_reducer = CreateScalarMax(); + + auto x = builder_.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto max_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), max_reducer, + /*dimensions_to_reduce=*/{0}); + + ComputeAndCompareR0<float>(&builder_, 2.6f, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, MaxReduceR1F32WithNontrivialInit) { + auto max_reducer = CreateScalarMax(); + + auto x = builder_.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto max_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(4.0f), max_reducer, + /*dimensions_to_reduce=*/{0}); + + ComputeAndCompareR0<float>(&builder_, 4.0f, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR2F32Dim1) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + + // clang-format off + auto x = builder_.ConstantR2<float>({ + {1.0, 2.0, 3.0}, // | dim 0 + {4.0, 5.0, 6.0}}); // | + // ------ dim 1 ---------- + // clang-format on + + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{1}); + + ComputeAndCompareR1<float>(&builder_, {6.0, 15.0}, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR2F32Dim0) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + + // clang-format off + auto x = builder_.ConstantR2<float>({ + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}); + // clang-format on + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0}); + + ComputeAndCompareR1<float>(&builder_, {5.0, 7.0, 9.0}, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32Dim2) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{2}); + + Array2D<float> expected_array({{6.0f, 15.0f}, {6.0f, 15.0f}, {6.0f, 15.0f}}); + + ComputeAndCompareR2<float>(&builder_, expected_array, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32Dim1) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{1}); + + Array2D<float> expected_array( + {{5.0f, 7.0f, 9.0f}, {5.0f, 7.0f, 9.0f}, {5.0f, 7.0f, 9.0f}}); + + ComputeAndCompareR2<float>(&builder_, expected_array, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32Dim0) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0}); + + Array2D<float> expected_array({{3.0f, 6.0f, 9.0f}, {12.0f, 15.0f, 18.0f}}); + + ComputeAndCompareR2<float>(&builder_, expected_array, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32Dims1and2) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{1, 2}); + + ComputeAndCompareR1<float>(&builder_, {21.0, 21.0, 21.0}, {}, errspec_); +} + +XLA_TEST_F(VecOpsReduceTest, AddReduceR3F32Dims0and2) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0, 2}); + + ComputeAndCompareR1<float>(&builder_, {18.0, 45.0}, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32Dims0and1) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0, 1}); + + ComputeAndCompareR1<float>(&builder_, {15.0, 21.0, 27.0}, {}, errspec_); +} + +TEST_F(VecOpsReduceTest, AddReduceR3F32AllDims) { + auto sum_reducer = CreateScalarAddComputation(F32, &builder_); + auto x = BuildSampleConstantCube(); + auto add_reduce = + builder_.Reduce(x, builder_.ConstantR0<float>(0.0f), sum_reducer, + /*dimensions_to_reduce=*/{0, 1, 2}); + + ComputeAndCompareR0<float>(&builder_, 63.0, {}, errspec_); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc new file mode 100644 index 0000000000..d9fc1e1e8f --- /dev/null +++ b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc @@ -0,0 +1,423 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <cmath> +#include <memory> +#include <vector> + +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class VecOpsSimpleTest : public ClientLibraryTestBase { + public: + explicit VecOpsSimpleTest(perftools::gputools::Platform* platform = nullptr) + : ClientLibraryTestBase(platform, + /*disabled_pass_names=*/{"algsimp", "inline"}) {} + + ErrorSpec error_spec_{0.0001}; +}; + +TEST_F(VecOpsSimpleTest, ExpTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto exp = builder.Exp(x); + + std::vector<float> expected = {8.1662, 7.4274e-02, 13.4637, 1.8316e-02, + 8.1662, 9.9742, 6.7379e-03, 4.0657e-01, + 9.0718e-02, 4.9530}; + + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(VecOpsSimpleTest, ExpManyValues) { + for (int count : {63, 64, 65, 127, 128, 129, 17 * 4096}) { + ComputationBuilder builder(client_, TestName()); + std::vector<float> exponents; + for (int i = 0; i < count; ++i) { + exponents.push_back(i / static_cast<float>(count)); + } + auto x = builder.ConstantR1<float>(exponents); + auto exp = builder.Exp(x); + + std::vector<float> expected; + for (float exponent : exponents) { + expected.push_back(std::exp(exponent)); + } + + ComputeAndCompareR1<float>(&builder, expected, {}, + ErrorSpec(/*aabs=*/1e-2, /*arel=*/1e-3)); + } +} + +TEST_F(VecOpsSimpleTest, ExpIn4D) { + ComputationBuilder builder(client_, TestName()); + Array4D<float> exponents(2, 2, 2, 2); + + std::vector<float> exponents_vector; + std::vector<float> expected_vector; + for (int i = 0; i < exponents.num_elements(); ++i) { + exponents_vector.push_back(static_cast<float>(i) / + exponents.num_elements()); + expected_vector.push_back(std::exp(exponents_vector.back())); + } + exponents.SetValues(exponents_vector); + + Array4D<float> expected(2, 2, 2, 2, expected_vector); + + auto x = builder.ConstantR4FromArray4D<float>(exponents); + auto exp = builder.Exp(x); + + ComputeAndCompareR4<float>(&builder, expected, {}, + ErrorSpec(/*aabs=*/1e-2, /*arel=*/1e-3)); +} + +TEST_F(VecOpsSimpleTest, NegateTenFloatValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + builder.Neg(x); + + std::vector<float> expected = {-2.1, 2.6, -2.6, 4.0, -2.1, + -2.3, 5.0, 0.9, 2.4, -1.6}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(VecOpsSimpleTest, NegateTenInt32Values) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>({2, -2, 12, -4, 5, 20, -15, 0, -2, 1}); + builder.Neg(x); + + std::vector<int> expected = {-2, 2, -12, 4, -5, -20, 15, 0, 2, -1}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, NegateUint32Values) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<uint32>( + {0, 1, 42, static_cast<uint32>(-1), static_cast<uint32>(-12)}); + builder.Neg(x); + std::vector<uint32> expected = {0, static_cast<uint32>(-1), + static_cast<uint32>(-42), 1, 12}; + ComputeAndCompareR1<uint32>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, SquareTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + builder.SquareF32(x); + + std::vector<float> expected = {4.41, 6.76, 6.76, 16., 4.41, + 5.29, 25., 0.81, 5.76, 2.56}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(VecOpsSimpleTest, ReciprocalTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + builder.ReciprocalF32(x); + + std::vector<float> expected = { + 0.47619048, -0.38461538, 0.38461538, -0.25, 0.47619048, + 0.43478261, -0.2, -1.11111111, -0.41666667, 0.625}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(VecOpsSimpleTest, AddTenValuesViaMap) { + ComputationBuilder builder(client_, TestName()); + auto add = CreateScalarAddComputation(F32, &builder); + + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto y = builder.ConstantR1<float>( + {-0.4, -0.6, -3.0, 0.2, 3.8, -2.2, -1.8, 4.9, 1.4, 0.6}); + auto max = builder.Map({x, y}, add); + + std::vector<float> expected = {1.7, -3.2, -0.4, -3.8, 5.9, + 0.1, -6.8, 4., -1., 2.2}; + ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_); +} + +TEST_F(VecOpsSimpleTest, MaxTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto y = builder.ConstantR1<float>( + {-0.4, -0.6, -3.0, 0.2, 3.8, -2.2, -1.8, 4.9, 1.4, 0.6}); + auto max = builder.Max(x, y); + + std::vector<float> expected = {2.1, -0.6, 2.6, 0.2, 3.8, + 2.3, -1.8, 4.9, 1.4, 1.6}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, MaxTenValuesFromParams) { + // Similar to MaxTenValues, except that the inputs come from params rather + // than constants. + ComputationBuilder builder(client_, TestName()); + ComputationDataHandle v1, v2; + std::unique_ptr<GlobalData> param0_data = CreateR1Parameter<float>( + {41.0f, 2.0f, 3.0f, 84.0f}, /*parameter_number=*/0, /*name=*/"v1", + /*builder=*/&builder, /*data_handle=*/&v1); + std::unique_ptr<GlobalData> param1_data = CreateR1Parameter<float>( + {21.0f, 22.0f, 23.0f, 24.0f}, /*parameter_number=*/1, /*name=*/"v2", + /*builder=*/&builder, /*data_handle=*/&v2); + + auto max = builder.Max(v1, v2); + ComputeAndCompareR1<float>(&builder, {41.0f, 22.0f, 23.0f, 84.0f}, + {param0_data.get(), param1_data.get()}, + error_spec_); +} + +TEST_F(VecOpsSimpleTest, Max15000ValuesFromParams) { + // Similar to MaxTenValuesFromParams, except that the data size passed in and + // out is large. + ComputationBuilder builder(client_, TestName()); + + // Number of floats in the data passed into and out of the computation. + constexpr int datalen = 15 * 1000; + + // The inputs are initialized with a special pattern where in the first third + // of the data v1[i] > v2[i] and elsewhere it's vice versa. + std::vector<float> v1vec; + std::vector<float> v2vec; + std::vector<float> expected_vec; + for (int i = 0; i < datalen; ++i) { + float smaller = i; + float larger = i * 2; + if (i < datalen / 3) { + v1vec.push_back(larger); + v2vec.push_back(smaller); + } else { + v1vec.push_back(smaller); + v2vec.push_back(larger); + } + expected_vec.push_back(larger); + } + + ComputationDataHandle v1, v2; + std::unique_ptr<GlobalData> param0_data = + CreateR1Parameter<float>(v1vec, /*parameter_number=*/0, /*name=*/"v1", + /*builder=*/&builder, /*data_handle=*/&v1); + std::unique_ptr<GlobalData> param1_data = + CreateR1Parameter<float>(v2vec, /*parameter_number=*/1, /*name=*/"v2", + /*builder=*/&builder, /*data_handle=*/&v2); + + auto max = builder.Max(v1, v2); + ComputeAndCompareR1<float>(&builder, expected_vec, + {param0_data.get(), param1_data.get()}, + error_spec_); +} + +TEST_F(VecOpsSimpleTest, MaxTenValuesWithScalar) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto y = builder.ConstantR0<float>(0); + auto max = builder.Max(x, y); + + std::vector<float> expected = {2.1, 0.0, 2.6, 0.0, 2.1, + 2.3, 0.0, 0.0, 0.0, 1.6}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, MinTenValues) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto y = builder.ConstantR1<float>( + {-0.4, -0.6, -3.0, 0.2, 3.8, -2.2, -1.8, 4.9, 1.4, 0.6}); + auto min = builder.Min(x, y); + + std::vector<float> expected = {-0.4, -2.6, -3.0, -4.0, 2.1, + -2.2, -5.0, -0.9, -2.4, 0.6}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, MinMaxTenValues) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR0<float>(0); + auto one = builder.ConstantR0<float>(1); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, 0.3, 3.1, 0.9, -5.0, 0.1, -2.4, 0.6}); + auto clamp = builder.Min(builder.Max(x, zero), one); + + std::vector<float> expected = {1.0, 0.0, 1.0, 0.3, 1.0, + 0.9, 0.0, 0.1, 0.0, 0.6}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, ClampTenValuesConstant) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR0<float>(0); + auto one = builder.ConstantR0<float>(1); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, 0.3, 3.1, 0.9, -5.0, 0.1, -2.4, 0.6}); + auto clamp = builder.Clamp(zero, x, one); + + std::vector<float> expected = {1.0, 0.0, 1.0, 0.3, 1.0, + 0.9, 0.0, 0.1, 0.0, 0.6}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, ClampTwoValuesConstant) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR1<float>({0.0f, 0.0f}); + auto one = builder.ConstantR1<float>({1.0f, 1.0f}); + auto x = builder.ConstantR1<float>({2.1, -2.6}); + auto clamp = builder.Clamp(zero, x, one); + + std::vector<float> expected = {1.0, 0.0}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, ClampTenValuesConstantNonzeroLower) { + ComputationBuilder builder(client_, TestName()); + auto one = builder.ConstantR0<float>(1); + auto two = builder.ConstantR0<float>(2); + auto x = builder.ConstantR1<float>( + {2.1, -2.6, 2.6, 0.3, 3.1, 0.9, -5.0, 0.1, -2.4, 0.6}); + auto clamp = builder.Clamp(one, x, two); + + std::vector<float> expected = {2.0, 1.0, 2.0, 1.0, 2.0, + 1.0, 1.0, 1.0, 1.0, 1.0}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +TEST_F(VecOpsSimpleTest, MapTenValues) { + Computation add_half; + { + // add_half(x) = x + 0.5 + ComputationBuilder builder(client_, "add_half"); + auto x_value = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); + auto half = builder.ConstantR0<float>(0.5); + builder.Add(x_value, half); + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + add_half = computation_status.ConsumeValueOrDie(); + } + + Computation clamp; + { + // clamp(y) = clamp<0,5>(y) + ComputationBuilder builder(client_, "clamp"); + auto y_value = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "y_value"); + auto zero = builder.ConstantR0<float>(0.0); + auto clamped = builder.Clamp(zero, y_value, builder.ConstantR0<float>(5)); + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + clamp = computation_status.ConsumeValueOrDie(); + } + + Computation mult_relu_add; + { + // mult_relu_add(z) = clamp(add_half(2 * max(z, 0))) + ComputationBuilder builder(client_, "mult_relu_add"); + auto z_value = + builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "z_value"); + auto zero = builder.ConstantR0<float>(0.0); + auto two = builder.ConstantR0<float>(2.0); + auto max = builder.Max(z_value, zero); + auto mult = builder.Mul(two, max); + auto inner = builder.Map({mult}, add_half); + builder.Map({inner}, clamp); + auto computation_status = builder.Build(); + ASSERT_IS_OK(computation_status.status()); + mult_relu_add = computation_status.ConsumeValueOrDie(); + } + + ComputationBuilder builder(client_, "map10"); + { + auto x = builder.ConstantR1<float>( + {2.1, -21.6, 2.6, -4.0, 2.1, 2.3, -5.0, -0.9, -2.4, 1.6}); + auto activations = builder.Map({x}, mult_relu_add); + } + + std::vector<float> expected = {4.7, 0.5, 5.0, 0.5, 4.7, + 5.0, 0.5, 0.5, 0.5, 3.7}; + ComputeAndCompareR1<float>(&builder, expected, {}); +} + +XLA_TEST_F(VecOpsSimpleTest, RemainderTenValuesS32) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<int32>({-5, -4, -3, -2, -1, 0, 1, 2, 3, 4}); + auto y = builder.ConstantR0<int32>(3); + builder.Rem(x, y); + + std::vector<int32> expected = {-2, -1, 0, -2, -1, 0, 1, 2, 0, 1}; + ComputeAndCompareR1<int32>(&builder, expected, {}); +} + +XLA_TEST_F(VecOpsSimpleTest, VectorPredicateEqual) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<bool>({false, true}); + auto y = builder.ConstantR1<bool>({true, false}); + builder.Eq(x, y); + + std::array<bool, 2> expected = {{false, false}}; + ComputeAndCompareR1<bool>(&builder, expected, {}); +} + +XLA_TEST_F(VecOpsSimpleTest, VectorPredicateNotEqual) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR1<bool>({false, true}); + auto y = builder.ConstantR1<bool>({true, false}); + builder.Ne(x, y); + + std::array<bool, 2> expected = {{true, true}}; + ComputeAndCompareR1<bool>(&builder, expected, {}); +} + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc new file mode 100644 index 0000000000..7820bc363d --- /dev/null +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -0,0 +1,395 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <string> +#include <vector> + +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/legacy_flags/cpu_compiler_flags.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/platform/types.h" + +namespace se = ::perftools::gputools; + +namespace xla { +namespace { + +class WhileTest : public ClientLibraryTestBase {}; + +// Tests a while node when the result type T is S32. +// +// int32 result = 0; +// while (result < 5) { +// result = result + 1; +// } +TEST_F(WhileTest, WhileWithScalarResult) { + auto result_shape = ShapeUtil::MakeShape(S32, {}); + + // Create a computation for the condition: repeat for 5 iterations. + Computation condition; + { + ComputationBuilder builder(client_, "condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + builder.Gt(builder.ConstantR0<int32>(5), prev); + condition = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the body: add 1 to the result variable. + Computation body; + { + ComputationBuilder builder(client_, "body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto input = builder.ConstantR0<int32>(1); + auto result = builder.Add(input, prev); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While node with computations for the condition and the body. + ComputationBuilder builder(client_, TestName()); + auto init = builder.ConstantR0<int32>(0); + auto result = builder.While(condition, body, init); + auto shape = builder.GetShape(result).ConsumeValueOrDie(); + + ComputeAndCompareR0<int32>(&builder, 5, {}); +} + +// Tests a while node when the result type T is a vector. +// +// All constants are chosen to produce exact results. +// vector<float> result(0); +// while (result.sum() < 15.5f) { +// result = result + vector<float>(0); +// } +// TODO(b/29185393): does not terminate on CPU. +TEST_F(WhileTest, DISABLED_WhileWithEmptyVectorResult) { + Shape result_shape = ShapeUtil::MakeShape(F32, {0}); + + // Create a computation for the reduction. + Computation add; + { + ComputationBuilder builder(client_, "add"); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + builder.Add(x, y); + add = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the condition. + // Repeat until the sum of the result vector is less than 15.5f. + Computation condition; + { + ComputationBuilder builder(client_, "condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto sum = builder.Reduce(prev, builder.ConstantR0<float>(0.0f), add, + /*dimensions_to_reduce=*/{0}); + auto test = builder.Gt(builder.ConstantR0<float>(15.5f), sum); + condition = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the body. + // Add a constant vector of 1.f to the result vector. + Computation body; + { + ComputationBuilder builder(client_, "body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto input = builder.ConstantR1<float>({}); + auto result = builder.Add(input, prev); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While node with computations for the condition and the body. + ComputationBuilder builder(client_, "while"); + auto init = builder.ConstantR1<float>({}); + auto result = builder.While(condition, body, init); + VLOG(2) << "while = " << ShapeUtil::HumanString( + *builder.GetShape(result).ConsumeValueOrDie()); + + ComputeAndCompareR1<float>(&builder, {}, {}, ErrorSpec(0.0001)); +} + +// Tests a while node when the result type T is a vector. +// +// All constants are chosen to produce exact results. +// vector<float> result(8, 0.0f); +// while (result.sum() < 15.5f) { +// result = result + vector<float>(8, 0.125f); +// } +TEST_F(WhileTest, WhileWithVectorResult) { + Shape result_shape = ShapeUtil::MakeShape(F32, {8}); + + // Create a computation for the reduction. + Computation add; + { + ComputationBuilder builder(client_, "add"); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); + auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); + builder.Add(x, y); + add = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the condition. + // Repeat until the sum of the result vector is less than 5.5f. + Computation condition; + { + ComputationBuilder builder(client_, "condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto sum = builder.Reduce(prev, builder.ConstantR0<float>(0.0f), add, + /*dimensions_to_reduce=*/{0}); + auto test = builder.Gt(builder.ConstantR0<float>(15.5f), sum); + condition = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the body. + // Add a constant vector of 1.f to the result vector. + Computation body; + { + ComputationBuilder builder(client_, "body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto input = builder.ConstantR1<float>(8, 0.125f); + auto result = builder.Add(input, prev); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While node with computations for the condition and the body. + ComputationBuilder builder(client_, "while"); + auto init = builder.ConstantR1<float>(8, 0.f); + auto result = builder.While(condition, body, init); + VLOG(2) << "while = " << ShapeUtil::HumanString( + *builder.GetShape(result).ConsumeValueOrDie()); + + // Individual elements with increase by 1/8 each time through the loop, so + // the sum will increase by 1.0. It will first be >15.5 when the elements + // have all reached 2.0. + std::vector<float> expected = {2.f, 2.f, 2.f, 2.f, 2.f, 2.f, 2.f, 2.f}; + ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001)); +} + +// Tests a while node when the result type T is a Tuple. +// +// tuple<int32, vector<float>> result(0, vector<float>(10, 0.0f)); +// while (get<0>(result) < 5) { +// get<0>(result) = get<0>(result) + 1; +// get<1>(result) = get<1>(result) + vector<float>(10, 1.0f); +// } +TEST_F(WhileTest, WhileWithTupleResult) { + std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}), + ShapeUtil::MakeShape(F32, {10})}; + Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); + + // Create a computation for the condition. + // Repeat for 5 iterations. + Computation condition; + { + ComputationBuilder builder(client_, "condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto iteration = builder.GetTupleElement(prev, 0); + builder.Gt(builder.ConstantR0<int32>(5), iteration); + condition = builder.Build().ConsumeValueOrDie(); + } + + // Create a computation for the body. + // Add 1 to the iteration variable and add a constant vector of 1.0f to + // the weight variable, both of which are tuple elements. + Computation body; + { + ComputationBuilder builder(client_, "body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto iteration = builder.GetTupleElement(prev, 0); + auto weights = builder.GetTupleElement(prev, 1); + auto input = builder.ConstantR1<float>(10, 1.f); + auto new_weights = builder.Add(weights, input); + auto result = builder.Tuple( + {builder.Add(iteration, builder.ConstantR0<int32>(1)), new_weights}); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While node with computations for the condition and the body. + ComputationBuilder builder(client_, "while"); + auto init = builder.Tuple( + {builder.ConstantR0<int32>(0), builder.ConstantR1<float>(10, 0.f)}); + auto result = builder.While(condition, body, init); + VLOG(2) << "while = " << ShapeUtil::HumanString( + *builder.GetShape(result).ConsumeValueOrDie()); + + auto expected_counter = LiteralUtil::CreateR0<int32>(5); + auto expected_data = LiteralUtil::CreateR1<float>( + {5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f}); + auto expected = + LiteralUtil::MakeTuple({expected_counter.get(), expected_data.get()}); + VLOG(2) << "expected = " << ShapeUtil::HumanString(expected->shape()); + ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001)); +} + +// Tests a while node when the result type T is a vector of S32. +// +// int32 result = (0, 0, 0, 0, 0, 0); +// while (result[0] < count) { +// result += (1, U[0, 100], U[0, 100], U[0, 100], U[0, 100], U[0, 100]); +// } +// +// This test misuses a vector to represent a pair: +// ((iteration, (random vector))). +// +// Note: this test currently only tests generating random values within a loop. +// Per backend the values generated can be different as the different backends +// use different random number generators. +// TODO(b/32240857): Extend test to verify outputs. +TEST_F(WhileTest, WhileWithPrngScalarResult) { + auto v6s32 = ShapeUtil::MakeShape(S32, {6}); + + // Create a computation for the condition: repeat for count iterations. + auto build_condition = [this, v6s32](int count) { + ComputationBuilder builder(client_, TestName()); + auto prev = builder.Reshape( + builder.Slice(builder.Parameter(0, v6s32, "prev"), {0}, {1}), {0}, {}); + builder.Gt(builder.ConstantR0<int32>(count), prev); + return builder.Build().ConsumeValueOrDie(); + }; + + // Create a computation for the body: add 1 to the result variable. + Computation body; + { + ComputationBuilder builder(client_, "body"); + auto prev = builder.Parameter(0, v6s32, "prev"); + auto inc = builder.ConcatInDim( + {builder.ConstantR1<int32>({1}), + builder.RngUniform(builder.ConstantR0<int32>(0), + builder.ConstantR0<int32>(100), + ShapeUtil::MakeShape(S32, {5}))}, + 0); + auto result = builder.Add(inc, prev); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While node with computations for the condition and the body. + auto while_loop = [this, &body, build_condition](int count) { + ComputationBuilder builder(client_, TestName()); + auto init = builder.ConstantR1<int32>({0, 0, 0, 0, 0, 0}); + auto result = builder.While(build_condition(count), body, init); + auto shape = builder.GetShape(result).ConsumeValueOrDie(); + return builder.Build(); + }; + + for (int i = 1; i < 4; ++i) { + TF_ASSIGN_OR_ASSERT_OK(auto computation, while_loop(i)); + TF_ASSIGN_OR_ASSERT_OK(auto result, + client_->ExecuteAndTransfer(computation, {}, nullptr, + nullptr, /*seed=*/65)); + } +} + +void BM_WhileLoop(int num_iters) { + // Benchmark a simple kernel to measure while loop overheads. + tensorflow::testing::StopTiming(); + + se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie(); + auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie(); + StreamExecutorMemoryAllocator allocator(platform, executors); + LocalClient* client = + ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); + + Shape loop_state_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}); + + // Create while condition computation with 'loop_limit'. + const int32 loop_limit = 100; + Computation condition; + { + ComputationBuilder builder(client, "condition"); + auto prev = builder.Parameter(0, loop_state_shape, "prev"); + auto iteration = builder.GetTupleElement(prev, 0); + builder.Lt(iteration, builder.ConstantR0<int32>(loop_limit)); + condition = builder.Build().ConsumeValueOrDie(); + } + + // Create while body computation with unit loop increment. + Computation body; + { + ComputationBuilder builder(client, "body"); + auto prev = builder.Parameter(0, loop_state_shape, "prev"); + auto iteration = builder.GetTupleElement(prev, 0); + auto weights = builder.GetTupleElement(prev, 1); + auto one = builder.ConstantR0<int32>(1); + auto next_iteration = builder.Add(iteration, one); + auto one_vec = builder.ConstantR1<float>(10, 1.f); + auto new_weights = builder.Add(weights, one_vec); + auto result = builder.Tuple({next_iteration, new_weights}); + body = builder.Build().ConsumeValueOrDie(); + } + + // Create a While instruction. + ComputationBuilder builder(client, "while"); + auto init = builder.Tuple( + {builder.ConstantR0<int32>(0), builder.ConstantR1<float>(10, 0.f)}); + builder.While(condition, body, init); + auto computation = builder.Build().ConsumeValueOrDie(); + + // Run some warm-up executions. + LocalExecuteOptions options; + options.set_allocator(&allocator); + const int kWarmups = 2; + for (int i = 0; i < kWarmups; ++i) { + auto result = client->ExecuteLocally(computation, {}, options); + ASSERT_TRUE(result.ok()); + } + + // Run benchmark. + tensorflow::testing::StartTiming(); + for (int i = 0; i < num_iters; ++i) { + auto result = client->ExecuteLocally(computation, {}, options); + ASSERT_TRUE(result.ok()); + } +} + +// TODO(b/32470510): Benchmark fails on parallel CPU backend. +#ifndef XLA_TEST_BACKEND_CPU_PARALLEL +BENCHMARK(BM_WhileLoop); +#endif + +} // namespace +} // namespace xla + +int main(int argc, char** argv) { + std::vector<tensorflow::Flag> flag_list; + xla::legacy_flags::AppendCpuCompilerFlags(&flag_list); + xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << "\n" << usage; + return 2; + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return 2; + } + tensorflow::testing::RunBenchmarks(); + return RUN_ALL_TESTS(); +} |