diff options
author | Wen-Heng (Jack) Chung <whchung@gmail.com> | 2018-08-01 15:35:07 +0000 |
---|---|---|
committer | Wen-Heng (Jack) Chung <whchung@gmail.com> | 2018-08-02 03:15:46 +0000 |
commit | 6cc83f55cd6fbc5af0fd6f1e8220bf9dd392306c (patch) | |
tree | d85c851b00360b40f4cb50bbe2281adaad8606f0 /tensorflow/stream_executor/dnn.h | |
parent | 3fda31fe7d17d808c18e53186beb54b457088587 (diff) |
Add scratch memory size in AlgorithmDesc
Add one field, scratch_size_, into AlgorithmDesc. The field would be set by
DNN libraries during algorithm finding / profiling stage. For algorithms not
using scratch memory the field would be zero.
Change CUDA StreamExecutor implementation to set this field properly.
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index a7449c2df4..9abfa1db6a 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -713,15 +713,23 @@ class PoolingDescriptor { class AlgorithmDesc { public: typedef int64 Index; - AlgorithmDesc() : algo_(kDefaultAlgorithm), tensor_ops_enabled_(true) {} + AlgorithmDesc() + : algo_(kDefaultAlgorithm), tensor_ops_enabled_(true), scratch_size_(0) {} AlgorithmDesc(Index a, bool use_tensor_ops) - : algo_(a), tensor_ops_enabled_(use_tensor_ops) {} + : algo_(a), tensor_ops_enabled_(use_tensor_ops), scratch_size_(0) {} + AlgorithmDesc(Index a, bool use_tensor_ops, size_t scratch_size) + : algo_(a), + tensor_ops_enabled_(use_tensor_ops), + scratch_size_(scratch_size) {} bool is_default() const { return algo_ == kDefaultAlgorithm; } bool tensor_ops_enabled() const { return tensor_ops_enabled_; } Index algo_id() const { return algo_; } + size_t scratch_size() const { return scratch_size_; } + void set_scratch_size(size_t val) { scratch_size_ = val; } bool operator==(const AlgorithmDesc& other) const { return this->algo_ == other.algo_ && - this->tensor_ops_enabled_ == other.tensor_ops_enabled_; + this->tensor_ops_enabled_ == other.tensor_ops_enabled_ && + this->scratch_size_ == other.scratch_size_; } uint64 hash() const; @@ -729,6 +737,7 @@ class AlgorithmDesc { enum { kDefaultAlgorithm = -1 }; Index algo_; bool tensor_ops_enabled_; + size_t scratch_size_; }; // Describes the result from a perf experiment. |