aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h50
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h3
-rw-r--r--unsupported/test/CMakeLists.txt11
-rw-r--r--unsupported/test/FFTW.cpp32
-rw-r--r--unsupported/test/cxx11_float16.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_morphing.cpp38
7 files changed, 86 insertions, 52 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index 33c6c1b0f..ede3939c2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -29,25 +29,47 @@ namespace Eigen {
namespace internal {
namespace {
+
// Note: result is undefined if val == 0
template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
{
#ifdef __CUDA_ARCH__
- return (sizeof(T) == 8) ? __clzll(val) : __clz(val);
+ return __clz(val);
#elif EIGEN_COMP_MSVC
- unsigned long index;
- if (sizeof(T) == 8) {
- _BitScanReverse64(&index, val);
- } else {
- _BitScanReverse(&index, val);
- }
- return (sizeof(T) == 8) ? 63 - index : 31 - index;
+ unsigned long index;
+ _BitScanReverse(&index, val);
+ return 31 - index;
+#else
+ EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ return __builtin_clz(static_cast<uint32_t>(val));
+#endif
+ }
+
+ template <typename T>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
+ {
+#ifdef __CUDA_ARCH__
+ return __clzll(val);
+#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
+ unsigned long index;
+ _BitScanReverse64(&index, val);
+ return 63 - index;
+#elif EIGEN_COMP_MSVC
+ // MSVC's _BitScanReverse64 is not available for 32bits builds.
+ unsigned int lo = (unsigned int)(val&0xffffffff);
+ unsigned int hi = (unsigned int)((val>>32)&0xffffffff);
+ int n;
+ if(hi==0)
+ n = 32 + count_leading_zeros<unsigned int>(lo);
+ else
+ n = count_leading_zeros<unsigned int>(hi);
+ return n;
#else
EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return (sizeof(T) == 8) ?
- __builtin_clzll(static_cast<uint64_t>(val)) :
- __builtin_clz(static_cast<uint32_t>(val));
+ return __builtin_clzll(static_cast<uint64_t>(val));
#endif
}
@@ -98,7 +120,9 @@ namespace {
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
- TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
+ TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
+ - TensorUInt128<static_val<1>, static_val<0> >(1, 0)
+ + TensorUInt128<static_val<0>, static_val<1> >(1);
return static_cast<uint64_t>(result);
#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index fd0842cad..f950f0093 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -264,7 +264,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
eigen_assert(num_coeffs >= numblocks * blocksize);
- Barrier barrier(numblocks);
+ Barrier barrier(internal::convert_index<unsigned int>(numblocks));
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) {
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
index bdcd70fd9..3523e7c94 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@@ -20,6 +20,7 @@ struct static_val {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
+
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
eigen_assert(v == n);
@@ -53,7 +54,7 @@ struct TensorUInt128
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
explicit TensorUInt128(const T& x) : high(0), low(x) {
- eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= static_cast<typename conditional<sizeof(LOW) == 8, uint64_t, uint32_t>::type>(NumTraits<LOW>::highest())));
+ eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
eigen_assert(x >= 0);
}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 5137b51cf..d35ca5022 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -192,10 +192,12 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
+ # Also remove -ansi that is incompatible with std=c++11.
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
@@ -211,7 +213,14 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
endif()
- set(CUDA_NVCC_FLAGS "-std=c++11 ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
+ if(NOT EIGEN_TEST_CXX11)
+ set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
+ else()
+ # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
+ set(EIGEN_CUDA_CXX11_FLAG "")
+ endif()
+
+ set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index 1dd6dc97d..8b7528fb7 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -18,11 +18,11 @@ using namespace Eigen;
template < typename T>
-complex<long double> promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
-complex<long double> promote(float x) { return complex<long double>( x); }
-complex<long double> promote(double x) { return complex<long double>( x); }
-complex<long double> promote(long double x) { return complex<long double>( x); }
+complex<long double> promote(float x) { return complex<long double>((long double)x); }
+complex<long double> promote(double x) { return complex<long double>((long double)x); }
+complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1,typename VT2>
@@ -33,7 +33,7 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double pi = acos((long double)-1 );
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
complex<long double> acc = 0;
- long double phinc = -2.*k0* pi / timebuf.size();
+ long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
}
@@ -54,8 +54,8 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
- totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2;
- difpower += numext::abs2(buf1[k] - buf2[k]);
+ totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
+ difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower/totalpower);
}
@@ -93,19 +93,19 @@ void test_scalar_generic(int nfft)
fft.SetFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
fft.ClearFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
if (nfft&1)
return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
@@ -121,12 +121,12 @@ void test_scalar_generic(int nfft)
//for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
- VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
}
template <typename T>
@@ -152,10 +152,10 @@ void test_complex_generic(int nfft)
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
fft.fwd( outbuf , inbuf);
- VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
@@ -163,12 +163,12 @@ void test_complex_generic(int nfft)
fft.inv( buf4 , outbuf);
for (int k=0;k<nfft;++k)
buf4[k] *= T(1./nfft);
- VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
}
template <typename T>
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
index e39a7f83c..027f1c3e6 100644
--- a/unsupported/test/cxx11_float16.cpp
+++ b/unsupported/test/cxx11_float16.cpp
@@ -165,7 +165,7 @@ void test_basic_functions()
VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
- VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
+ VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI));
VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index 233d69493..f7de43110 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -13,7 +13,7 @@
using Eigen::Tensor;
-template<typename=void>
+template<typename>
static void test_simple_reshape()
{
Tensor<float, 5> tensor1(2,3,1,7,1);
@@ -41,7 +41,7 @@ static void test_simple_reshape()
}
}
-template<typename=void>
+template<typename>
static void test_reshape_in_expr() {
MatrixXf m1(2,3*5*7*11);
MatrixXf m2(3*5*7*11,13);
@@ -66,7 +66,7 @@ static void test_reshape_in_expr() {
}
}
-template<typename=void>
+template<typename>
static void test_reshape_as_lvalue()
{
Tensor<float, 3> tensor(2,3,7);
@@ -461,25 +461,25 @@ static void test_composition()
void test_cxx11_tensor_morphing()
{
- CALL_SUBTEST_1(test_simple_reshape());
- CALL_SUBTEST_1(test_reshape_in_expr());
- CALL_SUBTEST_1(test_reshape_as_lvalue());
+ CALL_SUBTEST_1(test_simple_reshape<void>());
+ CALL_SUBTEST_1(test_reshape_in_expr<void>());
+ CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
CALL_SUBTEST_1(test_simple_slice<ColMajor>());
CALL_SUBTEST_1(test_simple_slice<RowMajor>());
CALL_SUBTEST_1(test_const_slice());
CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
- CALL_SUBTEST_2(test_slice_in_expr<RowMajor>());
- CALL_SUBTEST_3(test_slice_as_lvalue<ColMajor>());
- CALL_SUBTEST_3(test_slice_as_lvalue<RowMajor>());
- CALL_SUBTEST_4(test_slice_raw_data<ColMajor>());
- CALL_SUBTEST_4(test_slice_raw_data<RowMajor>());
-
- CALL_SUBTEST_5(test_strided_slice_write<ColMajor>());
- CALL_SUBTEST_5(test_strided_slice<ColMajor>());
- CALL_SUBTEST_5(test_strided_slice_write<RowMajor>());
- CALL_SUBTEST_5(test_strided_slice<RowMajor>());
-
- CALL_SUBTEST_6(test_composition<ColMajor>());
- CALL_SUBTEST_6(test_composition<RowMajor>());
+ CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
+
+ CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
+ CALL_SUBTEST_6(test_strided_slice<RowMajor>());
+
+ CALL_SUBTEST_7(test_composition<ColMajor>());
+ CALL_SUBTEST_7(test_composition<RowMajor>());
}