Add support for int8 x int8 -> int32 matrix multiplication via cublasGemmEx to stream_executor.

PiperOrigin-RevId: 161137741
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-07-06 15:15:27 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-07-06 15:21:23 -0700
commit: a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cd (patch)
tree: 35d554fcf7dc24246d33be4a405ef7a8d6cf8abc /tensorflow/stream_executor/stream.h
parent: 755fa7b501b5a1dadf2b8a1814d74d4451a05975 (diff)
1 files changed, 9 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 2ab3f44af5..ab6b866744 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -1259,6 +1259,15 @@ class Stream {
       blas::ProfileResult *output_profile_result);
   Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa,
                                     blas::Transpose transb, uint64 m, uint64 n,
+                                    uint64 k, int alpha,
+                                    const DeviceMemory<int8> &a, int lda,
+                                    const DeviceMemory<int8> &b, int ldb,
+                                    int beta, DeviceMemory<int> *c, int ldc,
+                                    blas::ComputationType computation_type,
+                                    blas::AlgorithmType algorithm,
+                                    blas::ProfileResult *output_profile_result);
+  Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa,
+                                    blas::Transpose transb, uint64 m, uint64 n,
                                     uint64 k, float alpha,
                                     const DeviceMemory<float> &a, int lda,
                                     const DeviceMemory<float> &b, int ldb,
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-07-06 15:15:27 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-07-06 15:21:23 -0700
commit	a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cd (patch)
tree	35d554fcf7dc24246d33be4a405ef7a8d6cf8abc /tensorflow/stream_executor/stream.h
parent	755fa7b501b5a1dadf2b8a1814d74d4451a05975 (diff)