diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-07-06 15:15:27 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-07-06 15:21:23 -0700 |
commit | a2ee8bca3f3fa08bf022f5c8a68c3e1cf829f2cd (patch) | |
tree | 35d554fcf7dc24246d33be4a405ef7a8d6cf8abc /tensorflow/stream_executor/stream.h | |
parent | 755fa7b501b5a1dadf2b8a1814d74d4451a05975 (diff) |
Add support for int8 x int8 -> int32 matrix multiplication via cublasGemmEx to stream_executor.
PiperOrigin-RevId: 161137741
Diffstat (limited to 'tensorflow/stream_executor/stream.h')
-rw-r--r-- | tensorflow/stream_executor/stream.h | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index 2ab3f44af5..ab6b866744 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1259,6 +1259,15 @@ class Stream { blas::ProfileResult *output_profile_result); Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, int alpha, + const DeviceMemory<int8> &a, int lda, + const DeviceMemory<int8> &b, int ldb, + int beta, DeviceMemory<int> *c, int ldc, + blas::ComputationType computation_type, + blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result); + Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa, + blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory<float> &a, int lda, const DeviceMemory<float> &b, int ldb, |