diff options
author | Shanqing Cai <cais@google.com> | 2017-09-25 19:35:53 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-09-25 19:39:42 -0700 |
commit | e2e3a943c0a28b7656325acb3fcd035743d55ea0 (patch) | |
tree | f4b909d5410bdf3b94012392909e7805cd27a2a7 /tensorflow/stream_executor/dnn.h | |
parent | df22044be98c8b707601e03fe22ded53bcc28c7e (diff) |
Merge changes from github.
END_PUBLIC
---
Commit 1e1b3d902 authored by Pete Warden<pete@petewarden.com>
Committed by gunan<gunan@google.com>:
Changed output directory for Pi CI build to fix permissions problem with nightlies (#13257)
* Fix for RTLD_GLOBAL breakage of Pi builds, and removed Eigen version change for Pi that's no longer needed
* Fixed Pi Zero OpenBLAS build problems and tidied up directories used
* More robust checks in Pi build script
* Changed output directory for Pi CI build to fix permissions problem
---
Commit fe3a2e65c authored by Yan Facai (???)<facai.yan@gmail.com>
Committed by drpngx<drpngx@users.noreply.github.com>:
check invalid string type for dest_nodes in extract_sub_graph (#13057)
* BUG: check str type
* TST: add unit test
* CLN: remove list check
* CLN: use warning
* CLN: 2 indent
* CLN: raise TypeError if not list
* CLN: check string only
---
Commit 225ab7629 authored by Jean Wanka<jm.wanka@gmail.com>
Committed by Jean Wanka<jm.wanka@gmail.com>:
Fix polynomial decay with cycle for global step=0
For polynomial decay with cycle=True the learning rate at
step 0 becomes NaN, because in the process of calculating it we
devide by 0. This change should fix it, by setting the multiplier
for the decay steps to one for global_step=0.
---
Commit 286f57061 authored by Bjarke Hammersholt Roune<broune@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
Make Service::TransferToClient not attempt to manipulate the literal when the transfer failed, preventing a crash and allowing the caller to see the reason for the failed transfer.
PiperOrigin-RevId: 169770126
---
Commit e0501bc4d authored by Yong Tang<yong.tang.github@outlook.com>
Committed by Shanqing Cai<cais@google.com>:
Fix GRUBlockCell parameter naming inconsistency (#13153)
* Fix GRUBlockCell parameter naming inconsistency
This fix tries to fix the issue in 13137 where
parameter `cell_size` is used instead of `num_units`.
This is inconsistent with other RNN cells.
This fix adds support of `num_units` while at the same
time maintains backward compatiblility for `cell_size`.
This fix fixes 13137.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Add `@deprecated_args` for 'cell_size' in `GRUBlockCell`
This commit adds `@deprecated_args` for 'cell_size' in `GRUBlockCell`
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Address review comment
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
Commit 02a2eba05 authored by Pete Warden<pete@petewarden.com>
Committed by gunan<gunan@google.com>:
Fix for RTLD_GLOBAL breakage of Pi builds, and removed Eigen version change that's no longer needed (#13251)
* Fix for RTLD_GLOBAL breakage of Pi builds, and removed Eigen version change for Pi that's no longer needed
* Fixed Pi Zero OpenBLAS build problems and tidied up directories used
* More robust checks in Pi build script
---
Commit 8ef722253 authored by Sanjoy Das<sanjoy@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
Remove a redundant setName.
The EmitComputation should have emitted a function with the right name, so use a
CHECK instead.
PiperOrigin-RevId: 169764856
---
Commit 1b94147dc authored by Neal Wu<wun@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
Fix broken GitHub links in tensorflow and tensorflow_models resulting from The Great Models Move (a.k.a. the research subfolder)
PiperOrigin-RevId: 169763373
---
Commit b1ada5f0c authored by Justine Tunney<jart@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
Fix TensorBoard python -m invoke in docs
PiperOrigin-RevId: 169758752
---
Commit 2957cd894 authored by Mustafa Ispir<ispir@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
Local run option of estimator training.
PiperOrigin-RevId: 169756384
---
Commit 1dc2fe7ac authored by Gunhan Gulsoy<gunan@google.com>
Committed by TensorFlower Gardener<gardener@tensorflow.org>:
BEGIN_PUBLIC
Automated g4 rollback of changelist 166264198
PiperOrigin-RevId: 169998124
Diffstat (limited to 'tensorflow/stream_executor/dnn.h')
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 65 |
1 files changed, 41 insertions, 24 deletions
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 0a4525c1b7..b11c6417be 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -667,9 +667,26 @@ class PoolingDescriptor { std::vector<int64> strides_; }; -typedef int64 AlgorithmType; -constexpr AlgorithmType kDefaultAlgorithm = -1; -constexpr AlgorithmType kNoSuitableAlgorithmFound = -2; +// Collects parameters for DNN algorithms +class AlgorithmDesc { + public: + typedef int64 Index; + AlgorithmDesc() : algo_(kDefaultAlgorithm), tensor_ops_enabled_(false) {} + AlgorithmDesc(Index a, bool use_tensor_ops) + : algo_(a), tensor_ops_enabled_(use_tensor_ops) {} + bool is_default() const { return algo_ == kDefaultAlgorithm; } + bool tensor_ops_enabled() const { return tensor_ops_enabled_; } + Index algo_id() const { return algo_; } + bool operator==(const AlgorithmDesc& other) const { + return this->algo_ == other.algo_ && + this->tensor_ops_enabled_ == other.tensor_ops_enabled_; + } + + private: + enum { kDefaultAlgorithm = -1 }; + Index algo_; + bool tensor_ops_enabled_; +}; // Describes the result from a perf experiment. // @@ -679,16 +696,16 @@ constexpr AlgorithmType kNoSuitableAlgorithmFound = -2; class ProfileResult { public: bool is_valid() const { - return (algorithm_ != kDefaultAlgorithm && + return (!algorithm_.is_default() && elapsed_time_in_ms_ != std::numeric_limits<float>::max()); } - AlgorithmType algorithm() const { return algorithm_; } - void set_algorithm(AlgorithmType val) { algorithm_ = val; } + AlgorithmDesc algorithm() const { return algorithm_; } + void set_algorithm(AlgorithmDesc val) { algorithm_ = val; } float elapsed_time_in_ms() const { return elapsed_time_in_ms_; } void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; } private: - AlgorithmType algorithm_ = kDefaultAlgorithm; + AlgorithmDesc algorithm_; float elapsed_time_in_ms_ = std::numeric_limits<float>::max(); }; @@ -700,17 +717,14 @@ class ProfileResult { // the allocation for the scratch memory fails. class AlgorithmConfig { public: - AlgorithmConfig() - : algorithm_(kDefaultAlgorithm), - algorithm_no_scratch_(kDefaultAlgorithm) {} - explicit AlgorithmConfig(AlgorithmType algorithm) - : algorithm_(algorithm), algorithm_no_scratch_(kDefaultAlgorithm) {} - AlgorithmConfig(AlgorithmType algorithm, AlgorithmType algorithm_no_scratch) + AlgorithmConfig() {} + explicit AlgorithmConfig(AlgorithmDesc algorithm) : algorithm_(algorithm) {} + AlgorithmConfig(AlgorithmDesc algorithm, AlgorithmDesc algorithm_no_scratch) : algorithm_(algorithm), algorithm_no_scratch_(algorithm_no_scratch) {} - AlgorithmType algorithm() const { return algorithm_; } - void set_algorithm(AlgorithmType val) { algorithm_ = val; } - AlgorithmType algorithm_no_scratch() const { return algorithm_no_scratch_; } - void set_algorithm_no_scratch(AlgorithmType val) { + AlgorithmDesc algorithm() const { return algorithm_; } + void set_algorithm(AlgorithmDesc val) { algorithm_ = val; } + AlgorithmDesc algorithm_no_scratch() const { return algorithm_no_scratch_; } + void set_algorithm_no_scratch(AlgorithmDesc val) { algorithm_no_scratch_ = val; } bool operator==(const AlgorithmConfig& other) const { @@ -723,8 +737,8 @@ class AlgorithmConfig { string ToString() const; private: - AlgorithmType algorithm_; - AlgorithmType algorithm_no_scratch_; + AlgorithmDesc algorithm_; + AlgorithmDesc algorithm_no_scratch_; }; // Describes a local response normalization (LRN). LRN is used e.g. in @@ -944,8 +958,8 @@ class DnnSupport { // convolution result. // scratch_allocator: un-owned, may-be-null object that may allocate scratch // space in order to speed up the convolution operation. - // algorithm: an integer to specify which algorithm should be used for the - // operation. kDefaultAlgorithm means the system will pick an algorithm + // algorithm: specifies which algorithm should be used for the + // operation. If algorithm.is_default(), the system will pick an algorithm // by default. The coding of the algorithm is be interpretted by the // underlying implementation. // output_profile_result: the output profile result for this call. The @@ -1112,7 +1126,8 @@ class DnnSupport { // Return a list of algorithms supported by the forward convolution pass. virtual bool GetConvolveAlgorithms( - bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms); + bool with_winograd_nonfused, + std::vector<AlgorithmDesc::Index>* out_algorithms); // Version of DoConvolve that uses pre-quantized 8 bit coefficients. // coefficient_scales specifies the scaling of each column of coefficients: @@ -1191,7 +1206,8 @@ class DnnSupport { // Return a list of algorithms supported by the backward convolution pass for // data. virtual bool GetConvolveBackwardDataAlgorithms( - bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms); + bool with_winograd_nonfused, + std::vector<AlgorithmDesc::Index>* out_algorithms); virtual bool DoConvolveBackwardData( Stream* stream, const FilterDescriptor& filter_descriptor, @@ -1239,7 +1255,8 @@ class DnnSupport { // Return a list of algorithms supported by the backward convolution pass for // filters. virtual bool GetConvolveBackwardFilterAlgorithms( - bool with_winograd_nonfused, std::vector<AlgorithmType>* out_algorithms); + bool with_winograd_nonfused, + std::vector<AlgorithmDesc::Index>* out_algorithms); virtual bool DoConvolveBackwardFilter( Stream* stream, const BatchDescriptor& input_descriptor, |