diff options
author | Yifei Feng <yifeif@google.com> | 2018-05-24 19:12:26 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-05-24 19:15:01 -0700 |
commit | b59833c3fd91511b33255369016868e4ae6cda2e (patch) | |
tree | ecbd70cfd3abb5d934f6eb4b7280a35e8589f5cf /tensorflow/stream_executor/stream.h | |
parent | 2b99d9cbc7166efedaff9eee11744348da30fc8a (diff) |
Merge changes from github.
Revert #18413. Too many internal test failures due to the name scope change caused by this change.
Revert #18192. Cannot use re2::StringPiece internally. Need alternative for set call. Will pull and clean this up in a separate change.
PiperOrigin-RevId: 197991247
Diffstat (limited to 'tensorflow/stream_executor/stream.h')
-rw-r--r-- | tensorflow/stream_executor/stream.h | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index c6e37da6d1..3da1b856d6 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1471,6 +1471,13 @@ class Stream { blas::ProfileResult *output_profile_result); // See BlasSupport::DoBlasGemmBatched. + Stream &ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda, + const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb, + float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c, + int ldc, int batch_count); Stream &ThenBlasGemmBatched(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice<DeviceMemory<float> *> &a, @@ -1505,6 +1512,13 @@ class Stream { int batch_count); Stream &ThenBlasGemmBatchedWithScratch( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda, + const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb, + float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c, + int ldc, int batch_count, ScratchAllocator *scratch_allocator); + Stream &ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice<DeviceMemory<float> *> &a, int lda, const port::ArraySlice<DeviceMemory<float> *> &b, int ldb, float beta, const port::ArraySlice<DeviceMemory<float> *> &c, int ldc, |