diff options
Diffstat (limited to 'third_party/nccl/fix_clang_compilation.patch')
-rw-r--r-- | third_party/nccl/fix_clang_compilation.patch | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/third_party/nccl/fix_clang_compilation.patch b/third_party/nccl/fix_clang_compilation.patch new file mode 100644 index 0000000000..e8d2a7dc9f --- /dev/null +++ b/third_party/nccl/fix_clang_compilation.patch @@ -0,0 +1,85 @@ +From 8241cd7b6ed1425eeb88fd380090575978e358f4 Mon Sep 17 00:00:00 2001 +From: Ilya Biryukov <ibiryukov@google.com> +Date: Thu, 16 Mar 2017 12:01:11 +0100 +Subject: [PATCH 1/1] Fix compilation error when compiling with 'clang -x + cuda'. + +Functions vFetch and vStore are not found by ADL with clang, +so they need to be declared before usage in ReduceCopy. +--- + src/common_kernel.h | 52 ++++++++++++++++++++++++++-------------------------- + 1 file changed, 26 insertions(+), 26 deletions(-) + +diff --git a/src/common_kernel.h b/src/common_kernel.h +index 28fbc85..cc71f8a 100644 +--- a/src/common_kernel.h ++++ b/src/common_kernel.h +@@ -30,6 +30,32 @@ + #define BAR(type, barid, nthreads) \ + BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE)) + ++template<typename T> inline __device__ ++T vFetch(const volatile T* ptr) { ++ return *ptr; ++} ++ ++#ifdef CUDA_HAS_HALF ++template<> inline __device__ ++half vFetch<half>(const volatile half* ptr) { ++ half r; ++ r.x = ptr->x; ++ return r; ++} ++#endif ++ ++template<typename T> inline __device__ ++void vStore(volatile T* ptr, const T val) { ++ *ptr = val; ++} ++ ++#ifdef CUDA_HAS_HALF ++template<> inline __device__ ++void vStore<half>(volatile half* ptr, const half val) { ++ ptr->x = val.x; ++} ++#endif ++ + __device__ unsigned int spinct; + + // Spin wait until func evaluates to true +@@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) { + return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align)); + } + +-template<typename T> inline __device__ +-T vFetch(const volatile T* ptr) { +- return *ptr; +-} +- +-#ifdef CUDA_HAS_HALF +-template<> inline __device__ +-half vFetch<half>(const volatile half* ptr) { +- half r; +- r.x = ptr->x; +- return r; +-} +-#endif +- +-template<typename T> inline __device__ +-void vStore(volatile T* ptr, const T val) { +- *ptr = val; +-} +- +-#ifdef CUDA_HAS_HALF +-template<> inline __device__ +-void vStore<half>(volatile half* ptr, const half val) { +- ptr->x = val.x; +-} +-#endif +- + // Assumptions: + // - there is exactly 1 block + // - THREADS is the number of producer threads +-- +2.12.0.367.g23dc2f6d3c-goog + |