aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/lib
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-06-06 11:26:43 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-06 11:29:18 -0700
commit879fc3440495d9388754cb7d1878caf034d03d61 (patch)
treeeea14054c8f73365f1c185b3d86f5eca698ab722 /tensorflow/python/lib
parent980c390941853649bb56c4940a46f474eb97ed80 (diff)
Use memmove instead of memcpy for the large tensors on Linux.
Issue: #17246 ~1.7x speedup for fetching a variable Before: fetch_cpu_variable : 5.5 GB/sec, min: 14.56, median: 15.05, mean: 15.14 fetch_cpu_variable_add: 11.0 GB/sec, min: 7.29, median: 12.03, mean: 12.56 fetch_cpu_variable_concat: 11.6 GB/sec, min: 6.92, median: 13.78, mean: 14.76 After: fetch_cpu_variable : 9.2 GB/sec, min: 8.71, median: 8.79, mean: 8.80 fetch_cpu_variable_add: 12.5 GB/sec, min: 6.41, median: 7.20, mean: 7.51 fetch_cpu_variable_concat: 12.7 GB/sec, min: 6.32, median: 6.54 PiperOrigin-RevId: 199497691
Diffstat (limited to 'tensorflow/python/lib')
-rw-r--r--tensorflow/python/lib/core/ndarray_tensor.cc38
1 files changed, 36 insertions, 2 deletions
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index 9df38d464c..2acab92764 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -312,6 +312,40 @@ Status GetPyArrayDescrForTensor(const TF_Tensor* tensor,
return Status::OK();
}
+
+inline void FastMemcpy(void* dst, const void* src, size_t size) {
+ // clang-format off
+ switch (size) {
+ // Most compilers will generate inline code for fixed sizes,
+ // which is significantly faster for small copies.
+ case 1: memcpy(dst, src, 1); break;
+ case 2: memcpy(dst, src, 2); break;
+ case 3: memcpy(dst, src, 3); break;
+ case 4: memcpy(dst, src, 4); break;
+ case 5: memcpy(dst, src, 5); break;
+ case 6: memcpy(dst, src, 6); break;
+ case 7: memcpy(dst, src, 7); break;
+ case 8: memcpy(dst, src, 8); break;
+ case 9: memcpy(dst, src, 9); break;
+ case 10: memcpy(dst, src, 10); break;
+ case 11: memcpy(dst, src, 11); break;
+ case 12: memcpy(dst, src, 12); break;
+ case 13: memcpy(dst, src, 13); break;
+ case 14: memcpy(dst, src, 14); break;
+ case 15: memcpy(dst, src, 15); break;
+ case 16: memcpy(dst, src, 16); break;
+#if defined(PLATFORM_GOOGLE) || defined(PLATFORM_POSIX) && \
+ !defined(IS_MOBILE_PLATFORM)
+ // On Linux, memmove appears to be faster than memcpy for
+ // large sizes, strangely enough.
+ default: memmove(dst, src, size); break;
+#else
+ default: memcpy(dst, src, size); break;
+#endif
+ }
+ // clang-format on
+}
+
} // namespace
// Converts the given TF_Tensor to a numpy ndarray.
@@ -362,8 +396,8 @@ Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) {
" bytes but TF_Tensor was ",
TF_TensorByteSize(tensor.get()), " bytes");
} else {
- memcpy(PyArray_DATA(py_array), TF_TensorData(tensor.get()),
- PyArray_NBYTES(py_array));
+ FastMemcpy(PyArray_DATA(py_array), TF_TensorData(tensor.get()),
+ PyArray_NBYTES(py_array));
}
// PyArray_Return turns rank 0 arrays into numpy scalars