diff options
author | Akshay Modi <nareshmodi@google.com> | 2018-10-01 13:46:31 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-10-01 13:57:32 -0700 |
commit | ec900f15e352e4b203b1f0678f7d2ff042df57d5 (patch) | |
tree | 2d7a7ffc0f17cb28801c7a9937b6f4e3777592c7 /tensorflow/python/eager | |
parent | 3039a4694e22674b502257ae34b0a5b614a631f3 (diff) |
Minor speed improvements to defun.
- EncodeArg in C instead of python.
- Also caches parsed device specs, and device spec hashes
- Adds a common way to register python types in C.
- Fastpath canonicalize function inputs when no kwargs are passed
- Set the func name attr directly instead of creating an op to wrap it.
- Rewrite IsAttrsHelper without caching
Before:
entry {
name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
iters: 30000
wall_time: 101.803263028
extras {
key: "examples_per_sec"
value {
double_value: 9822.86785562
}
}
}
After:
entry {
name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
iters: 30000
wall_time: 47.2899993261
extras {
key: "examples_per_sec"
value {
double_value: 21146.1199884
}
}
}
PiperOrigin-RevId: 215272962
Diffstat (limited to 'tensorflow/python/eager')
-rw-r--r-- | tensorflow/python/eager/BUILD | 1 | ||||
-rw-r--r-- | tensorflow/python/eager/function.py | 100 | ||||
-rw-r--r-- | tensorflow/python/eager/function_test.py | 26 | ||||
-rwxr-xr-x | tensorflow/python/eager/pywrap_tfe.h | 4 | ||||
-rw-r--r-- | tensorflow/python/eager/pywrap_tfe_src.cc | 223 |
5 files changed, 277 insertions, 77 deletions
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index d3d997e6df..d0c1a93118 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -37,6 +37,7 @@ cc_library( "//tensorflow/python:safe_ptr", "//third_party/py/numpy:headers", "//third_party/python_runtime:headers", + "@com_google_absl//absl/strings", "@com_google_absl//absl/types:variant", ], ) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 60a4f018cd..3b6f288fb9 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1005,52 +1005,8 @@ def func_graph_from_py_func(name, return func_graph -_TensorType = collections.namedtuple("_TensorType", ["dtype", "shape"]) - - -def _encode_arg(arg): - """A canonical representation for this argument, for use in a cache key.""" - - # `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes - # are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes - # are used for both performance reasons, as much TensorFlow code specializes - # on known shapes to produce slimmer graphs, and correctness, as some - # high-level APIs require shapes to be fully-known. - # - # TODO(akshayka): Add support for sparse tensors. - # - # pylint: disable=protected-access - if isinstance(arg, ops.Tensor): - return _TensorType(arg.dtype, arg._shape_tuple()) - elif isinstance(arg, ops.IndexedSlices): - if arg.dense_shape is not None: - return tuple([ - _TensorType(arg.values.dtype, arg.values._shape_tuple()), - _TensorType(arg.indices.dtype, arg.indices._shape_tuple()), - _TensorType(arg.dense_shape.dtype, arg.dense_shape._shape_tuple()), - ]) - else: - return tuple([ - _TensorType(arg.values.dtype, arg.values._shape_tuple()), - _TensorType(arg.indices.dtype, arg.indices._shape_tuple()), - ]) - # pylint: enable=protected-access - elif isinstance(arg, (list, tuple)): - return tuple([_encode_arg(elem) for elem in arg]) - elif isinstance(arg, dict): - return tuple( - (_encode_arg(key), _encode_arg(arg[key])) for key in sorted(arg)) - else: - try: - # If possible, keep only a weak reference to Python objects. Weak - # references hash to the same value as the original object. - # TODO(allenl): Clean up dead functions and their cache keys if the cache - # gets large. Right now creating objects with a defunned method, calling - # the method, and losing a reference to the object in a loop will leak - # memory here. - return weakref.ref(arg) - except TypeError: - return arg +pywrap_tensorflow.RegisterType("Tensor", ops.Tensor) +pywrap_tensorflow.RegisterType("IndexedSlices", ops.IndexedSlices) def _deterministic_dict_values(dictionary): @@ -1120,6 +1076,8 @@ class PolymorphicFunction(object): offset + index: default for index, default in enumerate(fullargspec.defaults or []) } + self._default_values = fullargspec.defaults + self._default_values_start_index = offset if input_signature is None: self._input_signature = None else: @@ -1180,7 +1138,7 @@ class PolymorphicFunction(object): """Computes the cache key given inputs and execution context.""" if self._input_signature is None: inputs = (args, kwargs) if kwargs else args - cache_key = tuple(_encode_arg(arg) for arg in inputs) + cache_key = pywrap_tensorflow.TFE_Py_EncodeArg(inputs) else: del args, kwargs cache_key = self._flat_input_signature @@ -1203,7 +1161,7 @@ class PolymorphicFunction(object): colocation_stack = (() if executing_eagerly else tuple(default_graph._colocation_stack.peek_objs())) # pylint: disable=protected-access - return cache_key + (execution_context, device_functions, colocation_stack) + return (cache_key, execution_context, device_functions, colocation_stack) def _canonicalize_function_inputs(self, *args, **kwargs): """Canonicalizes `args` and `kwargs`. @@ -1231,26 +1189,32 @@ class PolymorphicFunction(object): # Maps from index of arg to its corresponding value, according to `args` # and `kwargs`; seeded with the default values for the named args that # aren't in `args`. - arg_indices_to_values = { - index: default - for index, default in six.iteritems(self._arg_indices_to_default_values) - if index >= len(args) - } - consumed_args = [] - for arg, value in six.iteritems(kwargs): - index = self._args_to_indices.get(arg, None) - if index is not None: - arg_indices_to_values[index] = value - consumed_args.append(arg) - elif self._input_signature is not None: - raise ValueError("Cannot define a TensorFlow function from a Python " - "function with keyword arguments when " - "input_signature is provided.") - for arg in consumed_args: - # After this loop, `kwargs` will only contain true keyword arguments, as - # opposed to named arguments called in a keyword-like fashion. - kwargs.pop(arg) - inputs = args + _deterministic_dict_values(arg_indices_to_values) + if not kwargs: + if self._default_values: + inputs = args + self._default_values[len(args) - + self._default_values_start_index:] + else: + inputs = args + else: + arg_indices_to_values = { + index: default for index, default in six.iteritems( + self._arg_indices_to_default_values) if index >= len(args) + } + consumed_args = [] + for arg, value in six.iteritems(kwargs): + index = self._args_to_indices.get(arg, None) + if index is not None: + arg_indices_to_values[index] = value + consumed_args.append(arg) + elif self._input_signature is not None: + raise ValueError("Cannot define a TensorFlow function from a Python " + "function with keyword arguments when " + "input_signature is provided.") + for arg in consumed_args: + # After this loop, `kwargs` will only contain true keyword arguments, as + # opposed to named arguments called in a keyword-like fashion. + kwargs.pop(arg) + inputs = args + _deterministic_dict_values(arg_indices_to_values) flat_inputs = nest.flatten(inputs) # Check for NumPy arrays in arguments and convert them to Tensors. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index afe3ba9893..9ce367a837 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1237,6 +1237,24 @@ class FunctionTest(test.TestCase): x = constant_op.constant([1.0, 2.0]) self.assertAllEqual([2., 4.], self.evaluate(defined(x))) + def testCacheObjectHashCollisions(self): + + class Foo(object): + + def __hash__(self): + return 42 + + def func(foo): + del foo + return + + defined = function.defun(func) + defined(Foo()) + self.assertEqual(len(defined._function_cache), 1) + + defined(Foo()) + self.assertEqual(len(defined._function_cache), 2) + def testPythonFunctionWithDefaultArgs(self): def func(foo, bar=1, baz=2): @@ -1250,20 +1268,20 @@ class FunctionTest(test.TestCase): def cache_keys(): """Sanitizes cache keys of non-input metadata.""" - return tuple(key[:3] for key in defined._function_cache) + return tuple(key[0] for key in defined._function_cache) # `True` corresponds to the fact that we're executing eagerly - self.assertIn((0, 1, 20), cache_keys()) + self.assertIn(('tRRR', (0, 1, 20)), cache_keys()) defined(1) # bar=1, baz=2 - self.assertIn((1, 1, 2), cache_keys()) + self.assertIn(('tRRR', (1, 1, 2)), cache_keys()) # This matches the previous call. defined(foo=1) self.assertEqual(len(defined._function_cache), 2) defined(1, 2, 3) - self.assertIn((1, 2, 3), cache_keys()) + self.assertIn(('tRRR', (1, 2, 3)), cache_keys()) # This matches the previous call. defined(1, bar=2, baz=3) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index f1b4042ec9..decd635b58 100755 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -224,4 +224,8 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim); // The shape is represented as a Python tuple of integers. PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor); +// Encodes the object as a tuple that is meant to be used as part of the key +// for the defun function cache. +PyObject* TFE_Py_EncodeArg(PyObject*); + #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 196e20e4d7..4b9f7f4100 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/python/eager/pywrap_tfe.h" +#include "absl/strings/str_cat.h" #include "absl/types/variant.h" #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_internal.h" @@ -567,11 +568,8 @@ bool SetOpAttrScalar( return false; } } - TFE_Op* func = TFE_NewOp( - ctx, string(func_name.data(), func_name.size()).c_str(), status); - if (TF_GetCode(status) != TF_OK) return false; - TFE_OpSetAttrFunction(op, key, func); - TFE_DeleteOp(func); + TF_SetStatus(status, TF_OK, ""); + TFE_OpSetAttrFunctionName(op, key, func_name.data(), func_name.size()); } else { TF_SetStatus( status, TF_UNIMPLEMENTED, @@ -2748,3 +2746,218 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs, return RecordGradient(op_name, inputs, attrs, results, name); } + +namespace { + +tensorflow::int64 GetPyNoneHash() { + tensorflow::int64 py_none_hash = PyObject_Hash(Py_None); + return py_none_hash; +} + +struct EncodeResult { + string str; + std::vector<PyObject*> objects; + + PyObject* ToPyTuple() { + PyObject* result = PyTuple_New(2); + + PyTuple_SET_ITEM(result, 0, GetPythonObjectFromString(str.c_str())); + + if (objects.empty()) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, 1, Py_None); + } else { + PyObject* objects_tuple = PyTuple_New(objects.size()); + + for (int i = 0; i < objects.size(); i++) { + PyTuple_SET_ITEM(objects_tuple, i, objects[i]); + } + + PyTuple_SET_ITEM(result, 1, objects_tuple); + } + + return result; + } +}; + +tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) { + if (EagerTensor_CheckExact(arg)) { + TFE_TensorHandle* t = EagerTensor_Handle(arg); + tensorflow::TensorShape tensor_shape; + TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape)); + absl::StrAppend(&result->str, t->handle->dtype); + + for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) { + absl::StrAppend(&result->str, dim_size); + } + + return tensorflow::Status::OK(); + } + + tensorflow::Safe_PyObjectPtr dtype_object( + PyObject_GetAttrString(arg, "dtype")); + + if (dtype_object == nullptr) { + return tensorflow::errors::InvalidArgument( + "ops.Tensor object doesn't have dtype() attr."); + } + + tensorflow::Safe_PyObjectPtr dtype_enum( + PyObject_GetAttrString(dtype_object.get(), "_type_enum")); + + if (dtype_enum == nullptr) { + return tensorflow::errors::InvalidArgument( + "ops.Tensor's dtype object doesn't have _type_enum() attr."); + } + + tensorflow::DataType dtype = + static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get())); + + absl::StrAppend(&result->str, dtype); + static char _shape_tuple[] = "_shape_tuple"; + tensorflow::Safe_PyObjectPtr shape_tuple( + PyObject_CallMethod(arg, _shape_tuple, nullptr)); + + if (shape_tuple == nullptr) { + return tensorflow::errors::InvalidArgument( + "ops.Tensor object doesn't have _shape_tuple() method."); + } + + if (shape_tuple.get() == Py_None) { + // Unknown shape, encode that directly. + absl::StrAppend(&result->str, GetPyNoneHash()); + return tensorflow::Status::OK(); + } + + tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast( + shape_tuple.get(), "shape_tuple didn't return a sequence")); + + int len = PySequence_Fast_GET_SIZE(shape_seq.get()); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i); + if (item == Py_None) { + absl::StrAppend(&result->str, GetPyNoneHash()); + } else { + absl::StrAppend(&result->str, MakeInt(item)); + } + } + + return tensorflow::Status::OK(); +} + +const char kTensor[] = "T"; +const char kIndexedSlices[] = "I"; +const char kList[] = "L"; +const char kTuple[] = "t"; +const char kDict[] = "D"; +const char kRaw[] = "R"; + +tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result); + +// This function doesn't set the type of sequence before +tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type, + EncodeResult* result) { + tensorflow::Safe_PyObjectPtr arg_seq( + PySequence_Fast(arg, "unable to create seq from list/tuple")); + + absl::StrAppend(&result->str, type); + int len = PySequence_Fast_GET_SIZE(arg_seq.get()); + for (int i = 0; i < len; ++i) { + PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i); + if (item == Py_None) { + absl::StrAppend(&result->str, GetPyNoneHash()); + } else { + TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result)); + } + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result) { + if (tensorflow::swig::IsTensor(arg)) { + absl::StrAppend(&result->str, kTensor); + TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(arg, result)); + } else if (tensorflow::swig::IsIndexedSlices(arg)) { + absl::StrAppend(&result->str, kIndexedSlices); + tensorflow::Safe_PyObjectPtr values(PyObject_GetAttrString(arg, "values")); + if (values == nullptr) { + PyErr_Clear(); + return tensorflow::errors::InvalidArgument( + "IndexedSlices does not have a values attr"); + } + TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(values.get(), result)); + + tensorflow::Safe_PyObjectPtr indices( + PyObject_GetAttrString(arg, "indices")); + if (indices == nullptr) { + PyErr_Clear(); + return tensorflow::errors::InvalidArgument( + "IndexedSlices does not have a indices attr"); + } + TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(indices.get(), result)); + + tensorflow::Safe_PyObjectPtr dense_shape( + PyObject_GetAttrString(arg, "dense_shape")); + if (dense_shape == nullptr) { + PyErr_Clear(); + return tensorflow::errors::InvalidArgument( + "IndexedSlices does not have a dense_shape attr"); + } + if (dense_shape.get() != Py_None) { + TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(dense_shape.get(), result)); + } + } else if (PyList_Check(arg)) { + TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, result)); + } else if (PyTuple_Check(arg)) { + TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, result)); + } else if (PyDict_Check(arg)) { + tensorflow::Safe_PyObjectPtr keys(PyDict_Keys(arg)); + if (PyList_Sort(keys.get()) == -1) { + return tensorflow::errors::Internal("Unable to sort keys"); + } + + absl::StrAppend(&result->str, kDict); + int len = PyList_Size(keys.get()); + + for (int i = 0; i < len; i++) { + PyObject* key = PyList_GetItem(keys.get(), i); + TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(key, result)); + PyObject* value = PyDict_GetItem(arg, key); + TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(value, result)); + } + } else { + PyObject* object = PyWeakref_NewRef(arg, nullptr); + + if (object == nullptr) { + PyErr_Clear(); + + object = arg; + Py_INCREF(object); + } + + absl::StrAppend(&result->str, kRaw); + result->objects.push_back(object); + } + + return tensorflow::Status::OK(); +} + +} // namespace + +// `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes +// are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes +// are used for both performance reasons, as much TensorFlow code specializes +// on known shapes to produce slimmer graphs, and correctness, as some +// high-level APIs require shapes to be fully-known. +// +// TODO(nareshmodi): Add support for sparse tensors. +PyObject* TFE_Py_EncodeArg(PyObject* arg) { + EncodeResult result; + const auto status = TFE_Py_EncodeArgHelper(arg, &result); + if (MaybeRaiseExceptionFromStatus(status, nullptr)) { + return nullptr; + } + + return result.ToPyTuple(); +} |