aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/nccl/fix_clang_compilation.patch
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/nccl/fix_clang_compilation.patch')
-rw-r--r--third_party/nccl/fix_clang_compilation.patch85
1 files changed, 0 insertions, 85 deletions
diff --git a/third_party/nccl/fix_clang_compilation.patch b/third_party/nccl/fix_clang_compilation.patch
deleted file mode 100644
index e8d2a7dc9f..0000000000
--- a/third_party/nccl/fix_clang_compilation.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 8241cd7b6ed1425eeb88fd380090575978e358f4 Mon Sep 17 00:00:00 2001
-From: Ilya Biryukov <ibiryukov@google.com>
-Date: Thu, 16 Mar 2017 12:01:11 +0100
-Subject: [PATCH 1/1] Fix compilation error when compiling with 'clang -x
- cuda'.
-
-Functions vFetch and vStore are not found by ADL with clang,
-so they need to be declared before usage in ReduceCopy.
----
- src/common_kernel.h | 52 ++++++++++++++++++++++++++--------------------------
- 1 file changed, 26 insertions(+), 26 deletions(-)
-
-diff --git a/src/common_kernel.h b/src/common_kernel.h
-index 28fbc85..cc71f8a 100644
---- a/src/common_kernel.h
-+++ b/src/common_kernel.h
-@@ -30,6 +30,32 @@
- #define BAR(type, barid, nthreads) \
- BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE))
-
-+template<typename T> inline __device__
-+T vFetch(const volatile T* ptr) {
-+ return *ptr;
-+}
-+
-+#ifdef CUDA_HAS_HALF
-+template<> inline __device__
-+half vFetch<half>(const volatile half* ptr) {
-+ half r;
-+ r.x = ptr->x;
-+ return r;
-+}
-+#endif
-+
-+template<typename T> inline __device__
-+void vStore(volatile T* ptr, const T val) {
-+ *ptr = val;
-+}
-+
-+#ifdef CUDA_HAS_HALF
-+template<> inline __device__
-+void vStore<half>(volatile half* ptr, const half val) {
-+ ptr->x = val.x;
-+}
-+#endif
-+
- __device__ unsigned int spinct;
-
- // Spin wait until func evaluates to true
-@@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) {
- return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align));
- }
-
--template<typename T> inline __device__
--T vFetch(const volatile T* ptr) {
-- return *ptr;
--}
--
--#ifdef CUDA_HAS_HALF
--template<> inline __device__
--half vFetch<half>(const volatile half* ptr) {
-- half r;
-- r.x = ptr->x;
-- return r;
--}
--#endif
--
--template<typename T> inline __device__
--void vStore(volatile T* ptr, const T val) {
-- *ptr = val;
--}
--
--#ifdef CUDA_HAS_HALF
--template<> inline __device__
--void vStore<half>(volatile half* ptr, const half val) {
-- ptr->x = val.x;
--}
--#endif
--
- // Assumptions:
- // - there is exactly 1 block
- // - THREADS is the number of producer threads
---
-2.12.0.367.g23dc2f6d3c-goog
-