diff options
Diffstat (limited to 'third_party/nccl/fix_clang_compilation.patch')
-rw-r--r-- | third_party/nccl/fix_clang_compilation.patch | 85 |
1 files changed, 0 insertions, 85 deletions
diff --git a/third_party/nccl/fix_clang_compilation.patch b/third_party/nccl/fix_clang_compilation.patch deleted file mode 100644 index e8d2a7dc9f..0000000000 --- a/third_party/nccl/fix_clang_compilation.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 8241cd7b6ed1425eeb88fd380090575978e358f4 Mon Sep 17 00:00:00 2001 -From: Ilya Biryukov <ibiryukov@google.com> -Date: Thu, 16 Mar 2017 12:01:11 +0100 -Subject: [PATCH 1/1] Fix compilation error when compiling with 'clang -x - cuda'. - -Functions vFetch and vStore are not found by ADL with clang, -so they need to be declared before usage in ReduceCopy. ---- - src/common_kernel.h | 52 ++++++++++++++++++++++++++-------------------------- - 1 file changed, 26 insertions(+), 26 deletions(-) - -diff --git a/src/common_kernel.h b/src/common_kernel.h -index 28fbc85..cc71f8a 100644 ---- a/src/common_kernel.h -+++ b/src/common_kernel.h -@@ -30,6 +30,32 @@ - #define BAR(type, barid, nthreads) \ - BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE)) - -+template<typename T> inline __device__ -+T vFetch(const volatile T* ptr) { -+ return *ptr; -+} -+ -+#ifdef CUDA_HAS_HALF -+template<> inline __device__ -+half vFetch<half>(const volatile half* ptr) { -+ half r; -+ r.x = ptr->x; -+ return r; -+} -+#endif -+ -+template<typename T> inline __device__ -+void vStore(volatile T* ptr, const T val) { -+ *ptr = val; -+} -+ -+#ifdef CUDA_HAS_HALF -+template<> inline __device__ -+void vStore<half>(volatile half* ptr, const half val) { -+ ptr->x = val.x; -+} -+#endif -+ - __device__ unsigned int spinct; - - // Spin wait until func evaluates to true -@@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) { - return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align)); - } - --template<typename T> inline __device__ --T vFetch(const volatile T* ptr) { -- return *ptr; --} -- --#ifdef CUDA_HAS_HALF --template<> inline __device__ --half vFetch<half>(const volatile half* ptr) { -- half r; -- r.x = ptr->x; -- return r; --} --#endif -- --template<typename T> inline __device__ --void vStore(volatile T* ptr, const T val) { -- *ptr = val; --} -- --#ifdef CUDA_HAS_HALF --template<> inline __device__ --void vStore<half>(volatile half* ptr, const half val) { -- ptr->x = val.x; --} --#endif -- - // Assumptions: - // - there is exactly 1 block - // - THREADS is the number of producer threads --- -2.12.0.367.g23dc2f6d3c-goog - |