diff options
Diffstat (limited to 'tensorflow/c')
-rw-r--r-- | tensorflow/c/eager/tape.h | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index ce038a4b57..49990b6249 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -440,6 +440,18 @@ Status InitialGradients(const VSpace<Gradient, BackwardFunction>& vspace, return Status::OK(); } +// TODO(agarwal): use an automatic mechanism for handling None arguments to +// gradient functions. +// +// Some gradient functions can accept None arguments for gradients. The +// following maps the operation name to the indices at which the corresponding +// gradient function can accept None values. e.g. FusedBatchNorm outputs 5 +// values and hence receives 5 gradient values during backprop. However the +// gradient function uses only the first of those values and ignores the rest. +// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient +// corresponding to index 0 is used, and the gradient values at indices 1-4 are +// ignored (and hence can be None). The backprop algorithm can then leverage +// this by not constructing zeros to pass for those indices. gtl::FlatMap<string, gtl::FlatSet<int>>* FunctionsAcceptingNoneForIndicesMap() { static auto* const m = new gtl::FlatMap<string, gtl::FlatSet<int>>({ {"SoftmaxCrossEntropyWithLogits", {1}}, |