aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/blas.cc
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2017-03-02 17:49:45 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-02 18:08:01 -0800
commit01194694948eb883e99af597d9dbbf3fc9f5c9e2 (patch)
treeab3517cf656259681283a90c6682c5b320ac36e3 /tensorflow/stream_executor/blas.cc
parente065b3093f4fec5a5f79ad9de81f6baab361962e (diff)
[XLA] [StreamExecutor] Tune GEMMs when possible.
cublas 8 adds the cublasGemmEx function, which lets you specify an explicit "algorithm" for the computation. This functions as an opaque tuning hint to cublas. This patch adds support for cublasGemmEx to StreamExecutor, and wires up XLA's GemmThunk to use the new function. This patch does not add GEMM autotuning support in TensorFlow proper, only XLA. Change: 149068961
Diffstat (limited to 'tensorflow/stream_executor/blas.cc')
-rw-r--r--tensorflow/stream_executor/blas.cc17
1 files changed, 17 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/blas.cc b/tensorflow/stream_executor/blas.cc
index 239e3fce01..a59a1dda71 100644
--- a/tensorflow/stream_executor/blas.cc
+++ b/tensorflow/stream_executor/blas.cc
@@ -67,6 +67,23 @@ string SideString(Side s) {
}
}
+string ComputationTypeString(ComputationType ty) {
+ switch (ty) {
+ case ComputationType::kF16:
+ return "f16";
+ case ComputationType::kF32:
+ return "f32";
+ case ComputationType::kF64:
+ return "f64";
+ case ComputationType::kComplexF32:
+ return "complex f32";
+ case ComputationType::kComplexF64:
+ return "complex f64";
+ default:
+ LOG(FATAL) << "Unknown ComputationType " << static_cast<int32>(ty);
+ }
+}
+
} // namespace blas
} // namespace gputools
} // namespace perftools