From 961ecf2a219494658f7f1c9acd32d6168efde43d Mon Sep 17 00:00:00 2001 From: "David G. Andersen" Date: Wed, 2 Mar 2016 14:21:11 -0800 Subject: Mandating that the compiler cannot elide the copy to a local variable, in case there is unlocked concurrent access to the source tensor. No noticeable speed changes for scatter (it should not affect the asm output in any case I can think of; it's designed to prevent pathological corner cases.) Change: 116181195 --- tensorflow/core/kernels/bounds_check.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tensorflow/core/kernels/bounds_check.h') diff --git a/tensorflow/core/kernels/bounds_check.h b/tensorflow/core/kernels/bounds_check.h index 286ef8959c..805f698f11 100644 --- a/tensorflow/core/kernels/bounds_check.h +++ b/tensorflow/core/kernels/bounds_check.h @@ -33,6 +33,19 @@ EIGEN_ALWAYS_INLINE bool FastBoundsCheck(Index index, Index limit) { static_cast(limit)); } +namespace internal { +// Ensure that the compiler cannot elide a copy into a local, for +// bounds checking on source tensors that might be updated asynchronously. +// This function may only be used on primitive integral types (int32, int64, +// etc). It does not guarantee any atomicity or barriers. +template +const T SubtleMustCopy(const T &x) { + static_assert(std::is_integral::value, + "must_copy can only be used on integer types."); + auto *to_x = reinterpret_cast(&x); + return *to_x; +} +} // namespace tensorflow::internal } // namespace tensorflow #endif // TENSORFLOW_UTIL_BOUNDS_CHECK_H_ -- cgit v1.2.3