7 files changed, 86 insertions, 52 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index 33c6c1b0f..ede3939c2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -29,25 +29,47 @@ namespace Eigen {
 namespace internal {
 
 namespace {
+
   // Note: result is undefined if val == 0
   template <typename T>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
   {
 #ifdef __CUDA_ARCH__
-    return (sizeof(T) == 8) ? __clzll(val) : __clz(val);
+    return __clz(val);
 #elif EIGEN_COMP_MSVC
-	unsigned long index;
-	if (sizeof(T) == 8) {
-      _BitScanReverse64(&index, val);
-    } else {
-      _BitScanReverse(&index, val);
-    }
-    return (sizeof(T) == 8) ? 63 - index : 31 - index;
+    unsigned long index;
+    _BitScanReverse(&index, val);
+    return 31 - index;
+#else
+    EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
+    return __builtin_clz(static_cast<uint32_t>(val));
+#endif
+  }
+
+  template <typename T>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
+  {
+#ifdef __CUDA_ARCH__
+    return __clzll(val);
+#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
+    unsigned long index;
+    _BitScanReverse64(&index, val);
+    return 63 - index;
+#elif EIGEN_COMP_MSVC
+    // MSVC's _BitScanReverse64 is not available for 32bits builds.
+    unsigned int lo = (unsigned int)(val&0xffffffff);
+    unsigned int hi = (unsigned int)((val>>32)&0xffffffff);
+    int n;
+    if(hi==0)
+      n = 32 + count_leading_zeros<unsigned int>(lo);
+    else
+      n = count_leading_zeros<unsigned int>(hi);
+    return n;
 #else
     EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
-    return (sizeof(T) == 8) ?
-      __builtin_clzll(static_cast<uint64_t>(val)) :
-      __builtin_clz(static_cast<uint32_t>(val));
+    return __builtin_clzll(static_cast<uint64_t>(val));
 #endif
   }
 
@@ -98,7 +120,9 @@ namespace {
       return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
 #else
       const uint64_t shift = 1ULL << log_div;
-      TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
+      TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
+                                               - TensorUInt128<static_val<1>, static_val<0> >(1, 0)
+                                               + TensorUInt128<static_val<0>, static_val<1> >(1);
       return static_cast<uint64_t>(result);
 #endif
     }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index fd0842cad..f950f0093 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -264,7 +264,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
     const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
     eigen_assert(num_coeffs >= numblocks * blocksize);
 
-    Barrier barrier(numblocks);
+    Barrier barrier(internal::convert_index<unsigned int>(numblocks));
     MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
     for (Index i = 0; i < numblocks; ++i) {
       device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
index bdcd70fd9..3523e7c94 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@@ -20,6 +20,7 @@ struct static_val {
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
+
   template <typename T>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
     eigen_assert(v == n);
@@ -53,7 +54,7 @@ struct TensorUInt128
   template<typename T>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
   explicit TensorUInt128(const T& x) : high(0), low(x) {
-    eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= static_cast<typename conditional<sizeof(LOW) == 8, uint64_t, uint32_t>::type>(NumTraits<LOW>::highest())));
+    eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
     eigen_assert(x >= 0);
   }
 
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 5137b51cf..d35ca5022 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -192,10 +192,12 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
   # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
   # and -fno-check-new flags since they trigger thousands of compilation warnings
   # in the CUDA runtime
+  # Also remove -ansi that is incompatible with std=c++11.
   string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
   message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
 
@@ -211,7 +213,14 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
     set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
   endif()
 
-  set(CUDA_NVCC_FLAGS "-std=c++11 ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
+  if(NOT EIGEN_TEST_CXX11)
+    set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
+  else()
+    # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
+    set(EIGEN_CUDA_CXX11_FLAG "")
+  endif()
+
+  set(CUDA_NVCC_FLAGS  "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}")
   cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
 
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index 1dd6dc97d..8b7528fb7 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -18,11 +18,11 @@ using namespace Eigen;
 
 
 template < typename T>
-complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+complex<long double>  promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
 
-complex<long double>  promote(float x) { return complex<long double>( x); }
-complex<long double>  promote(double x) { return complex<long double>( x); }
-complex<long double>  promote(long double x) { return complex<long double>( x); }
+complex<long double>  promote(float x) { return complex<long double>((long double)x); }
+complex<long double>  promote(double x) { return complex<long double>((long double)x); }
+complex<long double>  promote(long double x) { return complex<long double>((long double)x); }
     
 
     template <typename VT1,typename VT2>
@@ -33,7 +33,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
         long double pi = acos((long double)-1 );
         for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
             complex<long double> acc = 0;
-            long double phinc = -2.*k0* pi / timebuf.size();
+            long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
             for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
                 acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
             }
@@ -54,8 +54,8 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
         long double difpower=0;
         size_t n = (min)( buf1.size(),buf2.size() );
         for (size_t k=0;k<n;++k) {
-            totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2;
-            difpower += numext::abs2(buf1[k] - buf2[k]);
+            totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
+            difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
         }
         return sqrt(difpower/totalpower);
     }
@@ -93,19 +93,19 @@ void test_scalar_generic(int nfft)
     fft.SetFlag(fft.HalfSpectrum );
     fft.fwd( freqBuf,tbuf);
     VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
-    VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
 
     fft.ClearFlag(fft.HalfSpectrum );
     fft.fwd( freqBuf,tbuf);
     VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
-    VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
 
     if (nfft&1)
         return; // odd FFTs get the wrong size inverse FFT
 
     ScalarVector tbuf2;
     fft.inv( tbuf2 , freqBuf);
-    VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
 
 
     // verify that the Unscaled flag takes effect
@@ -121,12 +121,12 @@ void test_scalar_generic(int nfft)
     //for (size_t i=0;i<(size_t) tbuf.size();++i)
     //    cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) <<  endl;
 
-    VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
     fft.inv( tbuf2 , freqBuf);
-    VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
 }
 
 template <typename T>
@@ -152,10 +152,10 @@ void test_complex_generic(int nfft)
         inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
     fft.fwd( outbuf , inbuf);
 
-    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>()  );// gross check
     fft.inv( buf3 , outbuf);
 
-    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
 
     // verify that the Unscaled flag takes effect
     ComplexVector buf4;
@@ -163,12 +163,12 @@ void test_complex_generic(int nfft)
     fft.inv( buf4 , outbuf);
     for (int k=0;k<nfft;++k)
         buf4[k] *= T(1./nfft);
-    VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
     fft.inv( buf3 , outbuf);
-    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
 }
 
 template <typename T>
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
index e39a7f83c..027f1c3e6 100644
--- a/unsupported/test/cxx11_float16.cpp
+++ b/unsupported/test/cxx11_float16.cpp
@@ -165,7 +165,7 @@ void test_basic_functions()
   VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
 
   VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
-  VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
+  VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI));
 
   VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
   VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index 233d69493..f7de43110 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -13,7 +13,7 @@
 
 using Eigen::Tensor;
 
-template<typename=void>
+template<typename>
 static void test_simple_reshape()
 {
   Tensor<float, 5> tensor1(2,3,1,7,1);
@@ -41,7 +41,7 @@ static void test_simple_reshape()
   }
 }
 
-template<typename=void>
+template<typename>
 static void test_reshape_in_expr() {
   MatrixXf m1(2,3*5*7*11);
   MatrixXf m2(3*5*7*11,13);
@@ -66,7 +66,7 @@ static void test_reshape_in_expr() {
   }
 }
 
-template<typename=void>
+template<typename>
 static void test_reshape_as_lvalue()
 {
   Tensor<float, 3> tensor(2,3,7);
@@ -461,25 +461,25 @@ static void test_composition()
 
 void test_cxx11_tensor_morphing()
 {
-  CALL_SUBTEST_1(test_simple_reshape());
-  CALL_SUBTEST_1(test_reshape_in_expr());
-  CALL_SUBTEST_1(test_reshape_as_lvalue());
+  CALL_SUBTEST_1(test_simple_reshape<void>());
+  CALL_SUBTEST_1(test_reshape_in_expr<void>());
+  CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
 
   CALL_SUBTEST_1(test_simple_slice<ColMajor>());
   CALL_SUBTEST_1(test_simple_slice<RowMajor>());
   CALL_SUBTEST_1(test_const_slice());
   CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
-  CALL_SUBTEST_2(test_slice_in_expr<RowMajor>());
-  CALL_SUBTEST_3(test_slice_as_lvalue<ColMajor>());
-  CALL_SUBTEST_3(test_slice_as_lvalue<RowMajor>());
-  CALL_SUBTEST_4(test_slice_raw_data<ColMajor>());
-  CALL_SUBTEST_4(test_slice_raw_data<RowMajor>());
-
-  CALL_SUBTEST_5(test_strided_slice_write<ColMajor>());
-  CALL_SUBTEST_5(test_strided_slice<ColMajor>());
-  CALL_SUBTEST_5(test_strided_slice_write<RowMajor>());
-  CALL_SUBTEST_5(test_strided_slice<RowMajor>());
-
-  CALL_SUBTEST_6(test_composition<ColMajor>());
-  CALL_SUBTEST_6(test_composition<RowMajor>());
+  CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
+  CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
+  CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
+  CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
+  CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
+
+  CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
+  CALL_SUBTEST_6(test_strided_slice<ColMajor>());
+  CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
+  CALL_SUBTEST_6(test_strided_slice<RowMajor>());
+
+  CALL_SUBTEST_7(test_composition<ColMajor>());
+  CALL_SUBTEST_7(test_composition<RowMajor>());
 }