aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/blas.cc
diff options
context:
space:
mode:
authorGravatar Bixia Zheng <bixia@google.com>2018-02-28 12:55:34 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-02-28 12:59:55 -0800
commit8a31fec675f3f1ade28a9a8f38cc8f72d9573256 (patch)
tree3d89167cc1f5977c3e8e583a0fdf8211718f3d90 /tensorflow/stream_executor/blas.cc
parent3dbbf740441cdd41b2dc998e09980d72d2e9d440 (diff)
[XLA] FP16 Dot support for the CPU and GPU backends.
Extend the stream interface ThenBlasGemmWithAlgorithm to support F16 matrix multiplication with computation type FP32. Extend the stream executor interface DoBlasGemmWithAlgorithm to support F16 GEMM with computation type FP32. Extend the CPU IR emitter to handle F16 Dot instruction, and add F16 matrix multiplication implementation to the CPU runtime. Extend the GPU backend to handle FP16 GEMM Thunk. Replicate the existing matrix multiplication test cases in matrix_ops_simple_test and dot_operation_test for FP16. RELNOTES: PiperOrigin-RevId: 187369731
Diffstat (limited to 'tensorflow/stream_executor/blas.cc')
-rw-r--r--tensorflow/stream_executor/blas.cc6
1 files changed, 6 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/blas.cc b/tensorflow/stream_executor/blas.cc
index da09d84921..31724cf6c9 100644
--- a/tensorflow/stream_executor/blas.cc
+++ b/tensorflow/stream_executor/blas.cc
@@ -79,6 +79,8 @@ string ComputationTypeString(ComputationType ty) {
return "f32";
case ComputationType::kF64:
return "f64";
+ case ComputationType::kI32:
+ return "i32";
case ComputationType::kComplexF32:
return "complex f32";
case ComputationType::kComplexF64:
@@ -88,6 +90,10 @@ string ComputationTypeString(ComputationType ty) {
}
}
+std::ostream& operator<<(std::ostream& os, ComputationType ty) {
+ return os << ComputationTypeString(ty);
+}
+
} // namespace blas
} // namespace gputools
} // namespace perftools