diff options
-rw-r--r-- | tensorflow/c/eager/tape.h | 12 | ||||
-rw-r--r-- | tensorflow/python/eager/backprop.py | 21 | ||||
-rw-r--r-- | tensorflow/python/eager/pywrap_tfe_src.cc | 3 |
3 files changed, 15 insertions, 21 deletions
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index ce038a4b57..49990b6249 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -440,6 +440,18 @@ Status InitialGradients(const VSpace<Gradient, BackwardFunction>& vspace, return Status::OK(); } +// TODO(agarwal): use an automatic mechanism for handling None arguments to +// gradient functions. +// +// Some gradient functions can accept None arguments for gradients. The +// following maps the operation name to the indices at which the corresponding +// gradient function can accept None values. e.g. FusedBatchNorm outputs 5 +// values and hence receives 5 gradient values during backprop. However the +// gradient function uses only the first of those values and ignores the rest. +// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient +// corresponding to index 0 is used, and the gradient values at indices 1-4 are +// ignored (and hence can be None). The backprop algorithm can then leverage +// this by not constructing zeros to pass for those indices. gtl::FlatMap<string, gtl::FlatSet<int>>* FunctionsAcceptingNoneForIndicesMap() { static auto* const m = new gtl::FlatMap<string, gtl::FlatSet<int>>({ {"SoftmaxCrossEntropyWithLogits", {1}}, diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index e6cf9653a8..907234b0f8 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -120,27 +120,6 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function) -_tracing = False - - -# TODO(agarwal): use an automatic mechanism for handling None arguments to -# gradient functions. -# Some gradient functions can accept None arguments for gradients. The following -# maps the operation name to the indices at which the corresponding gradient -# function can accept None values. -# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values -# during backprop. However the gradient function uses only the first of those -# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], -# indicates that only the gradient corresponding to index 0 is used, and the -# gradient values at indices 1-4 are ignored (and hence can be None). The -# backprop algorithm can then leverage this by not constructing zeros to -# pass for those indices. -_grad_fn_accepts_none_for_indices = { - "SoftmaxCrossEntropyWithLogits": [1], - "FusedBatchNorm": [1, 2, 3, 4] -} - - def _record_gradient(op_name, inputs, attrs, results, name): return pywrap_tensorflow.TFE_Py_RecordGradient(op_name, inputs, attrs, results, name) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9f2f4e06ad..99b46159a9 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1744,6 +1744,9 @@ PyObject* MaybeGetDTypeForAttr(const string& attr, Py_RETURN_NONE; } +// TODO(agarwal): use an automatic mechanism for handling None arguments to +// gradient functions. + // Returns a pair where the first value of the pair indicates whether or not all // outputs are unused. If the first value is false, the second value is a // set that identifies which of the output indices are unused. |