aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/CMakeLists.txt47
-rw-r--r--unsupported/test/EulerAngles.cpp208
-rw-r--r--unsupported/test/FFTW.cpp32
-rw-r--r--unsupported/test/autodiff.cpp153
-rw-r--r--unsupported/test/autodiff_scalar.cpp35
-rw-r--r--unsupported/test/cxx11_eventcount.cpp6
-rw-r--r--unsupported/test/cxx11_float16.cpp192
-rw-r--r--unsupported/test/cxx11_non_blocking_thread_pool.cpp107
-rw-r--r--unsupported/test/cxx11_runqueue.cpp74
-rw-r--r--unsupported/test/cxx11_tensor_argmax_cuda.cu3
-rw-r--r--unsupported/test/cxx11_tensor_assign.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_broadcasting.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_cast_float16_cuda.cu4
-rw-r--r--unsupported/test/cxx11_tensor_complex_cuda.cu115
-rw-r--r--unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu97
-rw-r--r--unsupported/test/cxx11_tensor_contract_cuda.cu66
-rw-r--r--unsupported/test/cxx11_tensor_contraction.cpp32
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu255
-rw-r--r--unsupported/test/cxx11_tensor_device.cu8
-rw-r--r--unsupported/test/cxx11_tensor_dimension.cpp16
-rw-r--r--unsupported/test/cxx11_tensor_expr.cpp28
-rw-r--r--unsupported/test/cxx11_tensor_fft.cpp8
-rw-r--r--unsupported/test/cxx11_tensor_fixed_size.cpp16
-rw-r--r--unsupported/test/cxx11_tensor_image_patch.cpp49
-rw-r--r--unsupported/test/cxx11_tensor_index_list.cpp106
-rw-r--r--unsupported/test/cxx11_tensor_intdiv.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_io.cpp22
-rw-r--r--unsupported/test/cxx11_tensor_morphing.cpp165
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu326
-rw-r--r--unsupported/test/cxx11_tensor_random_cuda.cu57
-rw-r--r--unsupported/test/cxx11_tensor_reduction.cpp37
-rw-r--r--unsupported/test/cxx11_tensor_reduction_cuda.cu122
-rw-r--r--unsupported/test/cxx11_tensor_scan.cpp110
-rw-r--r--unsupported/test/cxx11_tensor_scan_cuda.cu79
-rw-r--r--unsupported/test/cxx11_tensor_sugar.cpp34
-rw-r--r--unsupported/test/cxx11_tensor_thread_pool.cpp51
-rw-r--r--unsupported/test/kronecker_product.cpp22
-rw-r--r--unsupported/test/matrix_function.cpp4
-rw-r--r--unsupported/test/matrix_functions.h2
-rw-r--r--unsupported/test/matrix_power.cpp42
-rw-r--r--unsupported/test/mpreal/mpreal.h2
-rw-r--r--unsupported/test/mpreal_support.cpp1
-rw-r--r--unsupported/test/special_functions.cpp345
43 files changed, 2546 insertions, 538 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 22442b394..a1823beaa 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -59,6 +59,8 @@ ei_add_test(alignedvector3)
ei_add_test(FFT)
+ei_add_test(EulerAngles)
+
find_package(MPFR 2.3.0)
find_package(GMP)
if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11)
@@ -109,10 +111,14 @@ ei_add_test(gmres)
ei_add_test(minres)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
+ei_add_test(special_functions)
# TODO: The following test names are prefixed with the cxx11 string, since historically
# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
-ei_add_test(cxx11_float16)
+# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests
+# when using visual studio. We should make the check more strict to enable the tests for
+# newer versions of MSVC.
+if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_assign)
@@ -130,7 +136,8 @@ ei_add_test(cxx11_tensor_io)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
# This test requires __uint128_t which is only available on 64bit systems
ei_add_test(cxx11_tensor_uint128)
-endif()
+endif()
+endif()
if(EIGEN_TEST_CXX11)
# It should be safe to always run these tests as there is some fallback code for
@@ -139,6 +146,8 @@ if(EIGEN_TEST_CXX11)
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+
ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry)
@@ -174,6 +183,7 @@ if(EIGEN_TEST_CXX11)
ei_add_test(cxx11_tensor_custom_index)
ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_ifft)
+ ei_add_test(cxx11_tensor_scan)
endif()
@@ -183,37 +193,58 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
+ # Also remove -ansi that is incompatible with std=c++11.
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
+ set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
endif()
- set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
+ set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
+ if (${CUDA_VERSION} STREQUAL "7.0")
+ set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
+ endif()
+
+ if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3))
+ set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
+ else()
+ # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
+ set(EIGEN_CUDA_CXX11_FLAG "")
+ endif()
+
+ set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
- ei_add_test(cxx11_tensor_device)
- ei_add_test(cxx11_tensor_cuda)
- ei_add_test(cxx11_tensor_contract_cuda)
+ ei_add_test(cxx11_tensor_complex_cuda)
+ ei_add_test(cxx11_tensor_complex_cwise_ops_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
ei_add_test(cxx11_tensor_cast_float16_cuda)
+ ei_add_test(cxx11_tensor_scan_cuda)
+
+ # Contractions require arch 3.0 or higher
+ if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29)
+ ei_add_test(cxx11_tensor_device)
+ ei_add_test(cxx11_tensor_cuda)
+ ei_add_test(cxx11_tensor_contract_cuda)
+ ei_add_test(cxx11_tensor_of_float16_cuda)
+ endif()
# The random number generation code requires arch 3.5 or greater.
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
ei_add_test(cxx11_tensor_random_cuda)
endif()
- ei_add_test(cxx11_tensor_of_float16_cuda)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp
new file mode 100644
index 000000000..a8cb52864
--- /dev/null
+++ b/unsupported/test/EulerAngles.cpp
@@ -0,0 +1,208 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <unsupported/Eigen/EulerAngles>
+
+using namespace Eigen;
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler_ranged(const Matrix<Scalar,3,1>& ea,
+ bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma)
+{
+ typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
+ typedef Matrix<Scalar,3,3> Matrix3;
+ typedef Matrix<Scalar,3,1> Vector3;
+ typedef Quaternion<Scalar> QuaternionType;
+ typedef AngleAxis<Scalar> AngleAxisType;
+ using std::abs;
+
+ Scalar alphaRangeStart, alphaRangeEnd;
+ Scalar betaRangeStart, betaRangeEnd;
+ Scalar gammaRangeStart, gammaRangeEnd;
+
+ if (positiveRangeAlpha)
+ {
+ alphaRangeStart = Scalar(0);
+ alphaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ alphaRangeStart = -Scalar(EIGEN_PI);
+ alphaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ if (positiveRangeBeta)
+ {
+ betaRangeStart = Scalar(0);
+ betaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ betaRangeStart = -Scalar(EIGEN_PI);
+ betaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ if (positiveRangeGamma)
+ {
+ gammaRangeStart = Scalar(0);
+ gammaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ gammaRangeStart = -Scalar(EIGEN_PI);
+ gammaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ const int i = EulerSystem::AlphaAxisAbs - 1;
+ const int j = EulerSystem::BetaAxisAbs - 1;
+ const int k = EulerSystem::GammaAxisAbs - 1;
+
+ const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1;
+ const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1;
+ const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1;
+
+ const Vector3 I = EulerAnglesType::AlphaAxisVector();
+ const Vector3 J = EulerAnglesType::BetaAxisVector();
+ const Vector3 K = EulerAnglesType::GammaAxisVector();
+
+ EulerAnglesType e(ea[0], ea[1], ea[2]);
+
+ Matrix3 m(e);
+ Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+
+ // Check that eabis in range
+ VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd);
+ VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd);
+ VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd);
+
+ Vector3 eabis2 = m.eulerAngles(i, j, k);
+
+ // Invert the relevant axes
+ eabis2[0] *= iFactor;
+ eabis2[1] *= jFactor;
+ eabis2[2] *= kFactor;
+
+ // Saturate the angles to the correct range
+ if (positiveRangeAlpha && (eabis2[0] < 0))
+ eabis2[0] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeBeta && (eabis2[1] < 0))
+ eabis2[1] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeGamma && (eabis2[2] < 0))
+ eabis2[2] += Scalar(2 * EIGEN_PI);
+
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is
+
+ Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K));
+ VERIFY_IS_APPROX(m, mbis);
+
+ // Tests that are only relevant for no possitive range
+ if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma))
+ {
+ /* If I==K, and ea[1]==0, then there no unique solution. */
+ /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */
+ if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) )
+ VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
+
+ // approx_or_less_than does not work for 0
+ VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
+ }
+
+ // Quaternions
+ QuaternionType q(e);
+ eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
+}
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea)
+{
+ verify_euler_ranged<EulerSystem>(ea, false, false, false);
+ verify_euler_ranged<EulerSystem>(ea, false, false, true);
+ verify_euler_ranged<EulerSystem>(ea, false, true, false);
+ verify_euler_ranged<EulerSystem>(ea, false, true, true);
+ verify_euler_ranged<EulerSystem>(ea, true, false, false);
+ verify_euler_ranged<EulerSystem>(ea, true, false, true);
+ verify_euler_ranged<EulerSystem>(ea, true, true, false);
+ verify_euler_ranged<EulerSystem>(ea, true, true, true);
+}
+
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+ verify_euler<EulerSystemXYZ>(ea);
+ verify_euler<EulerSystemXYX>(ea);
+ verify_euler<EulerSystemXZY>(ea);
+ verify_euler<EulerSystemXZX>(ea);
+
+ verify_euler<EulerSystemYZX>(ea);
+ verify_euler<EulerSystemYZY>(ea);
+ verify_euler<EulerSystemYXZ>(ea);
+ verify_euler<EulerSystemYXY>(ea);
+
+ verify_euler<EulerSystemZXY>(ea);
+ verify_euler<EulerSystemZXZ>(ea);
+ verify_euler<EulerSystemZYX>(ea);
+ verify_euler<EulerSystemZYZ>(ea);
+}
+
+template<typename Scalar> void eulerangles()
+{
+ typedef Matrix<Scalar,3,3> Matrix3;
+ typedef Matrix<Scalar,3,1> Vector3;
+ typedef Array<Scalar,3,1> Array3;
+ typedef Quaternion<Scalar> Quaternionx;
+ typedef AngleAxis<Scalar> AngleAxisType;
+
+ Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
+ Quaternionx q1;
+ q1 = AngleAxisType(a, Vector3::Random().normalized());
+ Matrix3 m;
+ m = q1;
+
+ Vector3 ea = m.eulerAngles(0,1,2);
+ check_all_var(ea);
+ ea = m.eulerAngles(0,1,0);
+ check_all_var(ea);
+
+ // Check with purely random Quaternion:
+ q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
+ m = q1;
+ ea = m.eulerAngles(0,1,2);
+ check_all_var(ea);
+ ea = m.eulerAngles(0,1,0);
+ check_all_var(ea);
+
+ // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
+ ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1);
+ check_all_var(ea);
+
+ ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+ check_all_var(ea);
+
+ ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+ check_all_var(ea);
+
+ ea[1] = 0;
+ check_all_var(ea);
+
+ ea.head(2).setZero();
+ check_all_var(ea);
+
+ ea.setZero();
+ check_all_var(ea);
+}
+
+void test_EulerAngles()
+{
+ for(int i = 0; i < g_repeat; i++) {
+ CALL_SUBTEST_1( eulerangles<float>() );
+ CALL_SUBTEST_2( eulerangles<double>() );
+ }
+}
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index d3718e2d2..8b7528fb7 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -18,11 +18,11 @@ using namespace Eigen;
template < typename T>
-complex<long double> promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
-complex<long double> promote(float x) { return complex<long double>( x); }
-complex<long double> promote(double x) { return complex<long double>( x); }
-complex<long double> promote(long double x) { return complex<long double>( x); }
+complex<long double> promote(float x) { return complex<long double>((long double)x); }
+complex<long double> promote(double x) { return complex<long double>((long double)x); }
+complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1,typename VT2>
@@ -33,7 +33,7 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double pi = acos((long double)-1 );
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
complex<long double> acc = 0;
- long double phinc = -2.*k0* pi / timebuf.size();
+ long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
}
@@ -54,8 +54,8 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
- totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.;
- difpower += numext::abs2(buf1[k] - buf2[k]);
+ totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
+ difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower/totalpower);
}
@@ -93,19 +93,19 @@ void test_scalar_generic(int nfft)
fft.SetFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
fft.ClearFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
if (nfft&1)
return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
@@ -121,12 +121,12 @@ void test_scalar_generic(int nfft)
//for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
- VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
}
template <typename T>
@@ -152,10 +152,10 @@ void test_complex_generic(int nfft)
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
fft.fwd( outbuf , inbuf);
- VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
@@ -163,12 +163,12 @@ void test_complex_generic(int nfft)
fft.inv( buf4 , outbuf);
for (int k=0;k<nfft;++k)
buf4[k] *= T(1./nfft);
- VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
}
template <typename T>
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 374f86df9..85743137e 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
using namespace std;
// return x+std::sin(y);
EIGEN_ASM_COMMENT("mybegin");
- return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0));
+ // pow(float, int) promotes to pow(double, double)
+ return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0);
//return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
EIGEN_ASM_COMMENT("myend");
}
@@ -104,6 +105,89 @@ struct TestFunc1
}
};
+
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+/* Test functor for the C++11 features. */
+template <typename Scalar>
+struct integratorFunctor
+{
+ typedef Matrix<Scalar, 2, 1> InputType;
+ typedef Matrix<Scalar, 2, 1> ValueType;
+
+ /*
+ * Implementation starts here.
+ */
+ integratorFunctor(const Scalar gain) : _gain(gain) {}
+ integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {}
+ const Scalar _gain;
+
+ template <typename T1, typename T2>
+ void operator() (const T1 &input, T2 *output, const Scalar dt) const
+ {
+ T2 &o = *output;
+
+ /* Integrator to test the AD. */
+ o[0] = input[0] + input[1] * dt * _gain;
+ o[1] = input[1] * _gain;
+ }
+
+ /* Only needed for the test */
+ template <typename T1, typename T2, typename T3>
+ void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const
+ {
+ T2 &o = *output;
+
+ /* Integrator to test the AD. */
+ o[0] = input[0] + input[1] * dt * _gain;
+ o[1] = input[1] * _gain;
+
+ if (jacobian)
+ {
+ T3 &j = *jacobian;
+
+ j(0, 0) = 1;
+ j(0, 1) = dt * _gain;
+ j(1, 0) = 0;
+ j(1, 1) = _gain;
+ }
+ }
+
+};
+
+template<typename Func> void forward_jacobian_cpp11(const Func& f)
+{
+ typedef typename Func::ValueType::Scalar Scalar;
+ typedef typename Func::ValueType ValueType;
+ typedef typename Func::InputType InputType;
+ typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType;
+
+ InputType x = InputType::Random(InputType::RowsAtCompileTime);
+ ValueType y, yref;
+ JacobianType j, jref;
+
+ const Scalar dt = internal::random<double>();
+
+ jref.setZero();
+ yref.setZero();
+ f(x, &yref, &jref, dt);
+
+ //std::cerr << "y, yref, jref: " << "\n";
+ //std::cerr << y.transpose() << "\n\n";
+ //std::cerr << yref << "\n\n";
+ //std::cerr << jref << "\n\n";
+
+ AutoDiffJacobian<Func> autoj(f);
+ autoj(x, &y, &j, dt);
+
+ //std::cerr << "y j (via autodiff): " << "\n";
+ //std::cerr << y.transpose() << "\n\n";
+ //std::cerr << j << "\n\n";
+
+ VERIFY_IS_APPROX(y, yref);
+ VERIFY_IS_APPROX(j, jref);
+}
+#endif
+
template<typename Func> void forward_jacobian(const Func& f)
{
typename Func::InputType x = Func::InputType::Random(f.inputs());
@@ -127,7 +211,6 @@ template<typename Func> void forward_jacobian(const Func& f)
VERIFY_IS_APPROX(j, jref);
}
-
// TODO also check actual derivatives!
template <int>
void test_autodiff_scalar()
@@ -140,6 +223,7 @@ void test_autodiff_scalar()
VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
}
+
// TODO also check actual derivatives!
template <int>
void test_autodiff_vector()
@@ -150,7 +234,7 @@ void test_autodiff_vector()
VectorAD ap = p.cast<AD>();
ap.x().derivatives() = Vector2f::UnitX();
ap.y().derivatives() = Vector2f::UnitY();
-
+
AD res = foo<VectorAD>(ap);
VERIFY_IS_APPROX(res.value(), foo(p));
}
@@ -163,6 +247,9 @@ void test_autodiff_jacobian()
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+ CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
+#endif
}
@@ -204,9 +291,64 @@ void test_autodiff_hessian()
VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4));
VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3));
VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4));
+
+ ADD z = x(0)*x(1);
+ VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1));
+ VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0));
+}
+
+double bug_1222() {
+ typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
+ const double _cv1_3 = 1.0;
+ const AD chi_3 = 1.0;
+ // this line did not work, because operator+ returns ADS<DerType&>, which then cannot be converted to ADS<DerType>
+ const AD denom = chi_3 + _cv1_3;
+ return denom.value();
+}
+
+double bug_1223() {
+ using std::min;
+ typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
+
+ const double _cv1_3 = 1.0;
+ const AD chi_3 = 1.0;
+ const AD denom = 1.0;
+
+ // failed because implementation of min attempts to construct ADS<DerType&> via constructor AutoDiffScalar(const Real& value)
+ // without initializing m_derivatives (which is a reference in this case)
+ #define EIGEN_TEST_SPACE
+ const AD t = min EIGEN_TEST_SPACE (denom / chi_3, 1.0);
+
+ const AD t2 = min EIGEN_TEST_SPACE (denom / (chi_3 * _cv1_3), 1.0);
+
+ return t.value() + t2.value();
+}
+
+// regression test for some compilation issues with specializations of ScalarBinaryOpTraits
+void bug_1260() {
+ Matrix4d A;
+ Vector4d v;
+ A*v;
}
+// check a compilation issue with numext::max
+double bug_1261() {
+ typedef AutoDiffScalar<Matrix2d> AD;
+ typedef Matrix<AD,2,1> VectorAD;
+
+ VectorAD v;
+ const AD maxVal = v.maxCoeff();
+ const AD minVal = v.minCoeff();
+ return maxVal.value() + minVal.value();
+}
+double bug_1264() {
+ typedef AutoDiffScalar<Vector2d> AD;
+ const AD s;
+ const Matrix<AD, 3, 1> v1;
+ const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1;
+ return v2(0).value();
+}
void test_autodiff()
{
@@ -216,5 +358,10 @@ void test_autodiff()
CALL_SUBTEST_3( test_autodiff_jacobian<1>() );
CALL_SUBTEST_4( test_autodiff_hessian<1>() );
}
+
+ bug_1222();
+ bug_1223();
+ bug_1260();
+ bug_1261();
}
diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp
index c631c734a..4df2f5c57 100644
--- a/unsupported/test/autodiff_scalar.cpp
+++ b/unsupported/test/autodiff_scalar.cpp
@@ -36,13 +36,48 @@ template<typename Scalar> void check_atan2()
VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
}
+template<typename Scalar> void check_hyperbolic_functions()
+{
+ using std::sinh;
+ using std::cosh;
+ using std::tanh;
+ typedef Matrix<Scalar, 1, 1> Deriv1;
+ typedef AutoDiffScalar<Deriv1> AD;
+ Deriv1 p = Deriv1::Random();
+ AD val(p.x(),Deriv1::UnitX());
+
+ Scalar cosh_px = std::cosh(p.x());
+ AD res1 = tanh(val);
+ VERIFY_IS_APPROX(res1.value(), std::tanh(p.x()));
+ VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(1.0) / (cosh_px * cosh_px));
+ AD res2 = sinh(val);
+ VERIFY_IS_APPROX(res2.value(), std::sinh(p.x()));
+ VERIFY_IS_APPROX(res2.derivatives().x(), cosh_px);
+ AD res3 = cosh(val);
+ VERIFY_IS_APPROX(res3.value(), cosh_px);
+ VERIFY_IS_APPROX(res3.derivatives().x(), std::sinh(p.x()));
+
+ // Check constant values.
+ const Scalar sample_point = Scalar(1) / Scalar(3);
+ val = AD(sample_point,Deriv1::UnitX());
+ res1 = tanh(val);
+ VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(0.896629559604914));
+
+ res2 = sinh(val);
+ VERIFY_IS_APPROX(res2.derivatives().x(), Scalar(1.056071867829939));
+
+ res3 = cosh(val);
+ VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150));
+}
void test_autodiff_scalar()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( check_atan2<float>() );
CALL_SUBTEST_2( check_atan2<double>() );
+ CALL_SUBTEST_3( check_hyperbolic_functions<float>() );
+ CALL_SUBTEST_4( check_hyperbolic_functions<double>() );
}
}
diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp
index f16cc6f07..3b598bf42 100644
--- a/unsupported/test/cxx11_eventcount.cpp
+++ b/unsupported/test/cxx11_eventcount.cpp
@@ -25,7 +25,8 @@ int rand_reentrant(unsigned int* s) {
static void test_basic_eventcount()
{
- std::vector<EventCount::Waiter> waiters(1);
+ MaxSizeVector<EventCount::Waiter> waiters(1);
+ waiters.resize(1);
EventCount ec(waiters);
EventCount::Waiter& w = waiters[0];
ec.Notify(false);
@@ -81,7 +82,8 @@ static void test_stress_eventcount()
static const int kEvents = 1 << 16;
static const int kQueues = 10;
- std::vector<EventCount::Waiter> waiters(kThreads);
+ MaxSizeVector<EventCount::Waiter> waiters(kThreads);
+ waiters.resize(kThreads);
EventCount ec(waiters);
TestQueue queues[kQueues];
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
deleted file mode 100644
index 9a813653c..000000000
--- a/unsupported/test/cxx11_float16.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_float16
-
-#include "main.h"
-#include <Eigen/src/Core/arch/CUDA/Half.h>
-
-using Eigen::half;
-
-void test_conversion()
-{
- // Conversion from float.
- VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
- VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
- VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
- VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
- VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
- VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
- VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity.
-
- // Denormals.
- VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
- VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
- VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
-
- // Verify round-to-nearest-even behavior.
- float val1 = float(half(__half(0x3c00)));
- float val2 = float(half(__half(0x3c01)));
- float val3 = float(half(__half(0x3c02)));
- VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
- VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
-
- // Conversion from int.
- VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
- VERIFY_IS_EQUAL(half(0).x, 0x0000);
- VERIFY_IS_EQUAL(half(1).x, 0x3c00);
- VERIFY_IS_EQUAL(half(2).x, 0x4000);
- VERIFY_IS_EQUAL(half(3).x, 0x4200);
-
- // Conversion from bool.
- VERIFY_IS_EQUAL(half(false).x, 0x0000);
- VERIFY_IS_EQUAL(half(true).x, 0x3c00);
-
- // Conversion to float.
- VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f);
- VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f);
-
- // Denormals.
- VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f);
-
- // NaNs and infinities.
- VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
- VERIFY(!(numext::isnan)(float(half(0.0f))));
- VERIFY((numext::isinf)(float(half(__half(0xfc00)))));
- VERIFY((numext::isnan)(float(half(__half(0xfc01)))));
- VERIFY((numext::isinf)(float(half(__half(0x7c00)))));
- VERIFY((numext::isnan)(float(half(__half(0x7c01)))));
-
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
- VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
- VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
-#endif
-
- // Exactly same checks as above, just directly on the half representation.
- VERIFY(!(numext::isinf)(half(__half(0x7bff))));
- VERIFY(!(numext::isnan)(half(__half(0x0000))));
- VERIFY((numext::isinf)(half(__half(0xfc00))));
- VERIFY((numext::isnan)(half(__half(0xfc01))));
- VERIFY((numext::isinf)(half(__half(0x7c00))));
- VERIFY((numext::isnan)(half(__half(0x7c01))));
-
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY((numext::isnan)(half(0.0 / 0.0)));
- VERIFY((numext::isinf)(half(1.0 / 0.0)));
- VERIFY((numext::isinf)(half(-1.0 / 0.0)));
-#endif
-}
-
-void test_arithmetic()
-{
- VERIFY_IS_EQUAL(float(half(2) + half(2)), 4);
- VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0);
- VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f);
- VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f);
- VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f);
- VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f);
- VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f);
-}
-
-void test_comparison()
-{
- VERIFY(half(1.0f) > half(0.5f));
- VERIFY(half(0.5f) < half(1.0f));
- VERIFY(!(half(1.0f) < half(0.5f)));
- VERIFY(!(half(0.5f) > half(1.0f)));
-
- VERIFY(!(half(4.0f) > half(4.0f)));
- VERIFY(!(half(4.0f) < half(4.0f)));
-
- VERIFY(!(half(0.0f) < half(-0.0f)));
- VERIFY(!(half(-0.0f) < half(0.0f)));
- VERIFY(!(half(0.0f) > half(-0.0f)));
- VERIFY(!(half(-0.0f) > half(0.0f)));
-
- VERIFY(half(0.2f) > half(-1.0f));
- VERIFY(half(-1.0f) < half(0.2f));
- VERIFY(half(-16.0f) < half(-15.0f));
-
- VERIFY(half(1.0f) == half(1.0f));
- VERIFY(half(1.0f) != half(2.0f));
-
- // Comparisons with NaNs and infinities.
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
- VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
-
- VERIFY(!(half(1.0) == half(0.0 / 0.0)));
- VERIFY(!(half(1.0) < half(0.0 / 0.0)));
- VERIFY(!(half(1.0) > half(0.0 / 0.0)));
- VERIFY(half(1.0) != half(0.0 / 0.0));
-
- VERIFY(half(1.0) < half(1.0 / 0.0));
- VERIFY(half(1.0) > half(-1.0 / 0.0));
-#endif
-}
-
-void test_basic_functions()
-{
- VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
- VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
-
- VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
- VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
-
- VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
- VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
-
- VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
-
- VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
-
- VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
- VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
-
- VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
-}
-
-void test_trigonometric_functions()
-{
- VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
- VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
- //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
- //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
-
- VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
- // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
- VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
-
- VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
- // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
- // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
- //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
-}
-
-void test_cxx11_float16()
-{
- CALL_SUBTEST(test_conversion());
- CALL_SUBTEST(test_arithmetic());
- CALL_SUBTEST(test_comparison());
- CALL_SUBTEST(test_basic_functions());
- CALL_SUBTEST(test_trigonometric_functions());
-}
diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp
new file mode 100644
index 000000000..5f9bb938b
--- /dev/null
+++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp
@@ -0,0 +1,107 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include "main.h"
+#include "Eigen/CXX11/ThreadPool"
+
+static void test_create_destroy_empty_pool()
+{
+ // Just create and destroy the pool. This will wind up and tear down worker
+ // threads. Ensure there are no issues in that logic.
+ for (int i = 0; i < 16; ++i) {
+ NonBlockingThreadPool tp(i);
+ }
+}
+
+
+static void test_parallelism()
+{
+ // Test we never-ever fail to match available tasks with idle threads.
+ const int kThreads = 16; // code below expects that this is a multiple of 4
+ NonBlockingThreadPool tp(kThreads);
+ VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
+ VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
+ for (int iter = 0; iter < 100; ++iter) {
+ std::atomic<int> running(0);
+ std::atomic<int> done(0);
+ std::atomic<int> phase(0);
+ // Schedule kThreads tasks and ensure that they all are running.
+ for (int i = 0; i < kThreads; ++i) {
+ tp.Schedule([&]() {
+ const int thread_id = tp.CurrentThreadId();
+ VERIFY_GE(thread_id, 0);
+ VERIFY_LE(thread_id, kThreads - 1);
+ running++;
+ while (phase < 1) {
+ }
+ done++;
+ });
+ }
+ while (running != kThreads) {
+ }
+ running = 0;
+ phase = 1;
+ // Now, while the previous tasks exit, schedule another kThreads tasks and
+ // ensure that they are running.
+ for (int i = 0; i < kThreads; ++i) {
+ tp.Schedule([&, i]() {
+ running++;
+ while (phase < 2) {
+ }
+ // When all tasks are running, half of tasks exit, quarter of tasks
+ // continue running and quarter of tasks schedule another 2 tasks each.
+ // Concurrently main thread schedules another quarter of tasks.
+ // This gives us another kThreads tasks and we ensure that they all
+ // are running.
+ if (i < kThreads / 2) {
+ } else if (i < 3 * kThreads / 4) {
+ running++;
+ while (phase < 3) {
+ }
+ done++;
+ } else {
+ for (int j = 0; j < 2; ++j) {
+ tp.Schedule([&]() {
+ running++;
+ while (phase < 3) {
+ }
+ done++;
+ });
+ }
+ }
+ done++;
+ });
+ }
+ while (running != kThreads) {
+ }
+ running = 0;
+ phase = 2;
+ for (int i = 0; i < kThreads / 4; ++i) {
+ tp.Schedule([&]() {
+ running++;
+ while (phase < 3) {
+ }
+ done++;
+ });
+ }
+ while (running != kThreads) {
+ }
+ phase = 3;
+ while (done != 3 * kThreads) {
+ }
+ }
+}
+
+void test_cxx11_non_blocking_thread_pool()
+{
+ CALL_SUBTEST(test_create_destroy_empty_pool());
+ CALL_SUBTEST(test_parallelism());
+}
diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp
index d1770ee1b..91f690114 100644
--- a/unsupported/test/cxx11_runqueue.cpp
+++ b/unsupported/test/cxx11_runqueue.cpp
@@ -33,73 +33,81 @@ void test_basic_runqueue()
VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
std::vector<int> stolen;
- VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(0u, stolen.size());
// Push one front, pop one front.
VERIFY_IS_EQUAL(0, q.PushFront(1));
- VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(1, q.PopFront());
- VERIFY_IS_EQUAL(0, q.Size());
+ VERIFY_IS_EQUAL(0u, q.Size());
// Push front to overflow.
VERIFY_IS_EQUAL(0, q.PushFront(2));
- VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(3));
- VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(4));
- VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(5));
- VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(6, q.PushFront(6));
- VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(5, q.PopFront());
- VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(4, q.PopFront());
- VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(3, q.PopFront());
- VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(2, q.PopFront());
- VERIFY_IS_EQUAL(0, q.Size());
+ VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
// Push one back, pop one back.
VERIFY_IS_EQUAL(0, q.PushBack(7));
- VERIFY_IS_EQUAL(1, q.Size());
- VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
- VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(7, stolen[0]);
- VERIFY_IS_EQUAL(0, q.Size());
+ VERIFY_IS_EQUAL(0u, q.Size());
stolen.clear();
// Push back to overflow.
VERIFY_IS_EQUAL(0, q.PushBack(8));
- VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(9));
- VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(2u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(10));
- VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(3u, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(11));
- VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(4u, q.Size());
VERIFY_IS_EQUAL(12, q.PushBack(12));
- VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(4u, q.Size());
// Pop back in halves.
- VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen));
- VERIFY_IS_EQUAL(2, stolen.size());
+ VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(2u, stolen.size());
VERIFY_IS_EQUAL(10, stolen[0]);
VERIFY_IS_EQUAL(11, stolen[1]);
- VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(2u, q.Size());
stolen.clear();
- VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
- VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(9, stolen[0]);
- VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1u, q.Size());
stolen.clear();
- VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
- VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
VERIFY_IS_EQUAL(8, stolen[0]);
stolen.clear();
- VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
- VERIFY_IS_EQUAL(0, stolen.size());
+ VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0u, stolen.size());
// Empty again.
VERIFY(q.Empty());
- VERIFY_IS_EQUAL(0, q.Size());
+ VERIFY_IS_EQUAL(0u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(1));
+ VERIFY_IS_EQUAL(0, q.PushFront(2));
+ VERIFY_IS_EQUAL(0, q.PushFront(3));
+ VERIFY_IS_EQUAL(1, q.PopBack());
+ VERIFY_IS_EQUAL(2, q.PopBack());
+ VERIFY_IS_EQUAL(3, q.PopBack());
+ VERIFY(q.Empty());
+ VERIFY_IS_EQUAL(0u, q.Size());
}
// Empty tests that the queue is not claimed to be empty when is is in fact not.
@@ -130,7 +138,7 @@ void test_empty_runqueue()
stolen.clear();
break;
}
- VERIFY_IS_EQUAL(0, stolen.size());
+ VERIFY_IS_EQUAL(0u, stolen.size());
}
}
}
diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu
index 41ccbe974..6fe8982f2 100644
--- a/unsupported/test/cxx11_tensor_argmax_cuda.cu
+++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu
@@ -12,6 +12,9 @@
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp
index e5cf61fe1..8fe85d83c 100644
--- a/unsupported/test/cxx11_tensor_assign.cpp
+++ b/unsupported/test/cxx11_tensor_assign.cpp
@@ -286,7 +286,7 @@ static void test_compound_assign()
}
static void test_std_initializers_tensor() {
-#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
+#if EIGEN_HAS_VARIADIC_TEMPLATES
Tensor<int, 1> a(3);
a.setValues({0, 1, 2});
VERIFY_IS_EQUAL(a(0), 0);
diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp
index 2ddf47234..5c0ea5889 100644
--- a/unsupported/test/cxx11_tensor_broadcasting.cpp
+++ b/unsupported/test/cxx11_tensor_broadcasting.cpp
@@ -115,7 +115,7 @@ static void test_static_broadcasting()
Tensor<float, 3, DataLayout> tensor(8,3,5);
tensor.setRandom();
-#ifdef EIGEN_HAS_CONSTEXPR
+#if EIGEN_HAS_CONSTEXPR
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
#else
Eigen::array<int, 3> broadcasts;
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
index f22b99de8..88c233994 100644
--- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu
new file mode 100644
index 000000000..f895efd01
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cuda.cu
@@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_nullary() {
+ Tensor<std::complex<float>, 1, 0, int> in1(2);
+ Tensor<std::complex<float>, 1, 0, int> in2(2);
+ in1.setRandom();
+ in2.setRandom();
+
+ std::size_t float_bytes = in1.size() * sizeof(float);
+ std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>);
+
+ std::complex<float>* d_in1;
+ std::complex<float>* d_in2;
+ float* d_out2;
+ cudaMalloc((void**)(&d_in1), complex_bytes);
+ cudaMalloc((void**)(&d_in2), complex_bytes);
+ cudaMalloc((void**)(&d_out2), float_bytes);
+ cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, 2);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, 2);
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2(
+ d_out2, 2);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f));
+ gpu_out2.device(gpu_device) = gpu_in2.abs();
+
+ Tensor<std::complex<float>, 1, 0, int> new1(2);
+ Tensor<float, 1, 0, int> new2(2);
+
+ assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 2; ++i) {
+ VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f));
+ VERIFY_IS_APPROX(new2(i), std::abs(in2(i)));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out2);
+}
+
+
+static void test_cuda_sum_reductions() {
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ const int num_rows = internal::random<int>(1024, 5*1024);
+ const int num_cols = internal::random<int>(1024, 5*1024);
+
+ Tensor<std::complex<float>, 2> in(num_rows, num_cols);
+ in.setRandom();
+
+ Tensor<std::complex<float>, 0> full_redux;
+ full_redux = in.sum();
+
+ std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
+ std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
+ std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
+ std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
+ gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
+
+ TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+ TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
+
+ out_gpu.device(gpu_device) = in_gpu.sum();
+
+ Tensor<std::complex<float>, 0> full_redux_gpu;
+ gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
+ gpu_device.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+ gpu_device.deallocate(gpu_in_ptr);
+ gpu_device.deallocate(gpu_out_ptr);
+}
+
+
+void test_cxx11_tensor_complex()
+{
+ CALL_SUBTEST(test_cuda_nullary());
+ CALL_SUBTEST(test_cuda_sum_reductions());
+}
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
new file mode 100644
index 000000000..2baf5eaad
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename T>
+void test_cuda_complex_cwise_ops() {
+ const int kNumItems = 2;
+ std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
+
+ std::complex<T>* d_in1;
+ std::complex<T>* d_in2;
+ std::complex<T>* d_out;
+ cudaMalloc((void**)(&d_in1), complex_bytes);
+ cudaMalloc((void**)(&d_in2), complex_bytes);
+ cudaMalloc((void**)(&d_out), complex_bytes);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, kNumItems);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
+ d_out, kNumItems);
+
+ const std::complex<T> a(3.14f, 2.7f);
+ const std::complex<T> b(-10.6f, 1.4f);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(a);
+ gpu_in2.device(gpu_device) = gpu_in2.constant(b);
+
+ enum CwiseOp {
+ Add = 0,
+ Sub,
+ Mul,
+ Div
+ };
+
+ Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
+ for (int op = Add; op <= Div; op++) {
+ std::complex<T> expected;
+ switch (static_cast<CwiseOp>(op)) {
+ case Add:
+ gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
+ expected = a + b;
+ break;
+ case Sub:
+ gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
+ expected = a - b;
+ break;
+ case Mul:
+ gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
+ expected = a * b;
+ break;
+ case Div:
+ gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
+ expected = a / b;
+ break;
+ }
+ assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < kNumItems; ++i) {
+ VERIFY_IS_APPROX(actual(i), expected);
+ }
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out);
+}
+
+
+void test_cxx11_tensor_complex_cwise_ops()
+{
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
+ CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
+}
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 6d1ef07f9..767e9c678 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cu
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -14,7 +14,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
@@ -84,6 +86,65 @@ void test_cuda_contraction(int m_size, int k_size, int n_size)
cudaFree((void*)d_t_result);
}
+
+template<int DataLayout>
+void test_scalar(int m_size, int k_size, int n_size)
+{
+ std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+ // with these dimensions, the output has 300 * 140 elements, which is
+ // more than 30 * 1024, which is the number of threads in blocks on
+ // a 15 SM GK110 GPU
+ Tensor<float, 2, DataLayout> t_left(m_size, k_size);
+ Tensor<float, 2, DataLayout> t_right(k_size, n_size);
+ Tensor<float, 0, DataLayout> t_result;
+ Tensor<float, 0, DataLayout> t_result_gpu;
+ Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1));
+
+ t_left.setRandom();
+ t_right.setRandom();
+
+ std::size_t t_left_bytes = t_left.size() * sizeof(float);
+ std::size_t t_right_bytes = t_right.size() * sizeof(float);
+ std::size_t t_result_bytes = sizeof(float);
+
+ float* d_t_left;
+ float* d_t_right;
+ float* d_t_result;
+
+ cudaMalloc((void**)(&d_t_left), t_left_bytes);
+ cudaMalloc((void**)(&d_t_right), t_right_bytes);
+ cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+ cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+ gpu_t_left(d_t_left, m_size, k_size);
+ Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+ gpu_t_right(d_t_right, k_size, n_size);
+ Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> >
+ gpu_t_result(d_t_result);
+
+ gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
+ t_result = t_left.contract(t_right, dims);
+
+ cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+ if (fabs(t_result() - t_result_gpu()) > 1e-4f &&
+ !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) {
+ std::cout << "mismatch detected: " << t_result()
+ << " vs " << t_result_gpu() << std::endl;
+ assert(false);
+ }
+
+ cudaFree((void*)d_t_left);
+ cudaFree((void*)d_t_right);
+ cudaFree((void*)d_t_result);
+}
+
+
template<int DataLayout>
void test_cuda_contraction_m() {
for (int k = 32; k < 256; k++) {
@@ -138,6 +199,9 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
+ CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128));
+ CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128));
+
CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index 0e16308a2..ace97057f 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -87,19 +87,14 @@ static void test_scalar()
vec1.setRandom();
vec2.setRandom();
- Tensor<float, 1, DataLayout> scalar(1);
- scalar.setZero();
Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
- typedef TensorEvaluator<decltype(vec1.contract(vec2, dims)), DefaultDevice> Evaluator;
- Evaluator eval(vec1.contract(vec2, dims), DefaultDevice());
- eval.evalTo(scalar.data());
- EIGEN_STATIC_ASSERT(Evaluator::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims);
float expected = 0.0f;
for (int i = 0; i < 6; ++i) {
expected += vec1(i) * vec2(i);
}
- VERIFY_IS_APPROX(scalar(0), expected);
+ VERIFY_IS_APPROX(scalar(), expected);
}
template<int DataLayout>
@@ -494,6 +489,27 @@ static void test_tensor_product()
}
+template<int DataLayout>
+static void test_const_inputs()
+{
+ Tensor<float, 2, DataLayout> in1(2, 3);
+ Tensor<float, 2, DataLayout> in2(3, 2);
+ in1.setRandom();
+ in2.setRandom();
+
+ TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3);
+ TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2);
+ Tensor<float, 2, DataLayout> mat3(2,2);
+
+ Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+ mat3 = mat1.contract(mat2, dims);
+
+ VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
+ VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
+ VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
+ VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
+}
+
void test_cxx11_tensor_contraction()
{
CALL_SUBTEST(test_evals<ColMajor>());
@@ -524,4 +540,6 @@ void test_cxx11_tensor_contraction()
CALL_SUBTEST(test_small_blocking_factors<RowMajor>());
CALL_SUBTEST(test_tensor_product<ColMajor>());
CALL_SUBTEST(test_tensor_product<RowMajor>());
+ CALL_SUBTEST(test_const_inputs<ColMajor>());
+ CALL_SUBTEST(test_const_inputs<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 4026f48f0..bf216587a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -10,19 +10,65 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
+void test_cuda_nullary() {
+ Tensor<float, 1, 0, int> in1(2);
+ Tensor<float, 1, 0, int> in2(2);
+ in1.setRandom();
+ in2.setRandom();
+
+ std::size_t tensor_bytes = in1.size() * sizeof(float);
+
+ float* d_in1;
+ float* d_in2;
+ cudaMalloc((void**)(&d_in1), tensor_bytes);
+ cudaMalloc((void**)(&d_in2), tensor_bytes);
+ cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, 2);
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, 2);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f);
+ gpu_in2.device(gpu_device) = gpu_in2.random();
+
+ Tensor<float, 1, 0, int> new1(2);
+ Tensor<float, 1, 0, int> new2(2);
+
+ assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 2; ++i) {
+ VERIFY_IS_APPROX(new1(i), 3.14f);
+ VERIFY_IS_NOT_EQUAL(new2(i), in2(i));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+}
+
void test_cuda_elementwise_small() {
- Tensor<float, 1> in1(Eigen::array<int, 1>(2));
- Tensor<float, 1> in2(Eigen::array<int, 1>(2));
- Tensor<float, 1> out(Eigen::array<int, 1>(2));
+ Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
+ Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
+ Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2));
in1.setRandom();
in2.setRandom();
@@ -44,11 +90,11 @@ void test_cuda_elementwise_small() {
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
- d_in1, Eigen::array<int, 1>(2));
+ d_in1, Eigen::array<Eigen::DenseIndex, 1>(2));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2(
- d_in2, Eigen::array<int, 1>(2));
+ d_in2, Eigen::array<Eigen::DenseIndex, 1>(2));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out(
- d_out, Eigen::array<int, 1>(2));
+ d_out, Eigen::array<Eigen::DenseIndex, 1>(2));
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
@@ -58,8 +104,8 @@ void test_cuda_elementwise_small() {
for (int i = 0; i < 2; ++i) {
VERIFY_IS_APPROX(
- out(Eigen::array<int, 1>(i)),
- in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
+ out(Eigen::array<Eigen::DenseIndex, 1>(i)),
+ in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i)));
}
cudaFree(d_in1);
@@ -69,10 +115,10 @@ void test_cuda_elementwise_small() {
void test_cuda_elementwise()
{
- Tensor<float, 3> in1(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> in2(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> in3(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97));
+ Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
in1.setRandom();
in2.setRandom();
in3.setRandom();
@@ -98,10 +144,10 @@ void test_cuda_elementwise()
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3;
@@ -111,7 +157,7 @@ void test_cuda_elementwise()
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 53; ++j) {
for (int k = 0; k < 97; ++k) {
- VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * in3(Eigen::array<int, 3>(i,j,k)));
+ VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)));
}
}
}
@@ -181,7 +227,7 @@ void test_cuda_reduction()
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
- array<int, 2> reduction_axis;
+ array<Eigen::DenseIndex, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
@@ -214,8 +260,8 @@ void test_cuda_contraction()
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
- Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1));
- Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1));
+ Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1));
+ Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1));
t_left.setRandom();
t_right.setRandom();
@@ -299,7 +345,7 @@ void test_cuda_convolution_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
- Eigen::array<int, 1> dims(1);
+ Eigen::array<Eigen::DenseIndex, 1> dims(1);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -352,7 +398,7 @@ void test_cuda_convolution_inner_dim_col_major_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
- Eigen::array<int, 1> dims(0);
+ Eigen::array<Eigen::DenseIndex, 1> dims(0);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -405,7 +451,7 @@ void test_cuda_convolution_inner_dim_row_major_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
- Eigen::array<int, 1> dims(3);
+ Eigen::array<Eigen::DenseIndex, 1> dims(3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -459,7 +505,7 @@ void test_cuda_convolution_2d()
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
- Eigen::array<int, 2> dims(1,2);
+ Eigen::array<Eigen::DenseIndex, 2> dims(1,2);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -496,9 +542,9 @@ void test_cuda_convolution_2d()
template<int DataLayout>
void test_cuda_convolution_3d()
{
- Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
+ Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2);
- Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17));
+ Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17));
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
@@ -523,7 +569,7 @@ void test_cuda_convolution_3d()
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
- Eigen::array<int, 3> dims(1,2,3);
+ Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -1019,8 +1065,156 @@ void test_cuda_erfc(const Scalar stddev)
cudaFree(d_out);
}
+template <typename Scalar>
+void test_cuda_betainc()
+{
+ Tensor<Scalar, 1> in_x(125);
+ Tensor<Scalar, 1> in_a(125);
+ Tensor<Scalar, 1> in_b(125);
+ Tensor<Scalar, 1> out(125);
+ Tensor<Scalar, 1> expected_out(125);
+ out.setZero();
+
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+
+ Array<Scalar, 1, Dynamic> x(125);
+ Array<Scalar, 1, Dynamic> a(125);
+ Array<Scalar, 1, Dynamic> b(125);
+ Array<Scalar, 1, Dynamic> v(125);
+
+ a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999;
+
+ b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+ 999.999, 999.999, 999.999;
+
+ x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+ 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+ 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1,
+ -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+ 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1;
+
+ v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan,
+ 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan,
+ 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan,
+ 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan,
+ nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256,
+ 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001,
+ 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403,
+ 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999,
+ 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan,
+ 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan,
+ nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444,
+ nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229,
+ nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan,
+ nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306,
+ 2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302,
+ 1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252,
+ 2.9303043666183996e-60, nan, nan, 2.248913486879199e-196,
+ 0.5000000000004947, 0.9999999999999999, nan;
+
+ for (int i = 0; i < 125; ++i) {
+ in_x(i) = x(i);
+ in_a(i) = a(i);
+ in_b(i) = b(i);
+ expected_out(i) = v(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_in_a;
+ Scalar* d_in_b;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in_x), bytes);
+ cudaMalloc((void**)(&d_in_a), bytes);
+ cudaMalloc((void**)(&d_in_b), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 125);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_a(d_in_a, 125);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_b(d_in_b, 125);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 125);
+
+ gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 1; i < 125; ++i) {
+ if ((std::isnan)(expected_out(i))) {
+ VERIFY((std::isnan)(out(i)));
+ } else {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+ }
+
+ cudaFree(d_in_x);
+ cudaFree(d_in_a);
+ cudaFree(d_in_b);
+ cudaFree(d_out);
+}
+
+
void test_cxx11_tensor_cuda()
{
+ CALL_SUBTEST_1(test_cuda_nullary());
CALL_SUBTEST_1(test_cuda_elementwise_small());
CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_1(test_cuda_props());
@@ -1086,5 +1280,8 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_5(test_cuda_igamma<double>());
CALL_SUBTEST_5(test_cuda_igammac<double>());
+
+ CALL_SUBTEST_6(test_cuda_betainc<float>());
+ CALL_SUBTEST_6(test_cuda_betainc<double>());
#endif
}
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index cbe9e6449..fde20ddf2 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
@@ -241,7 +243,7 @@ void test_cpu() {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) +
(in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
- if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) {
+ if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
@@ -258,7 +260,7 @@ void test_cpu() {
in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) +
(in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
- if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) {
+ if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp
index 421e73693..16f168ed4 100644
--- a/unsupported/test/cxx11_tensor_dimension.cpp
+++ b/unsupported/test/cxx11_tensor_dimension.cpp
@@ -21,7 +21,7 @@ static void test_dynamic_size()
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
- VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
+ VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
VERIFY_IS_EQUAL((int)dimensions[0], 2);
VERIFY_IS_EQUAL((int)dimensions[1], 3);
VERIFY_IS_EQUAL((int)dimensions[2], 7);
@@ -34,12 +34,12 @@ static void test_fixed_size()
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
- VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
+ VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
}
static void test_match()
{
- Eigen::DSizes<int, 3> dyn(2,3,7);
+ Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7);
Eigen::Sizes<2,3,7> stat;
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true);
@@ -51,13 +51,13 @@ static void test_match()
static void test_rank_zero()
{
Eigen::Sizes<> scalar;
- VERIFY_IS_EQUAL(scalar.TotalSize(), 1);
- VERIFY_IS_EQUAL(scalar.rank(), 0);
- VERIFY_IS_EQUAL(internal::array_prod(scalar), 1);
+ VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL((int)scalar.rank(), 0);
+ VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1);
Eigen::DSizes<ptrdiff_t, 0> dscalar;
- VERIFY_IS_EQUAL(dscalar.TotalSize(), 1);
- VERIFY_IS_EQUAL(dscalar.rank(), 0);
+ VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL((int)dscalar.rank(), 0);
}
void test_cxx11_tensor_dimension()
diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp
index 8389e9840..77e24cb67 100644
--- a/unsupported/test/cxx11_tensor_expr.cpp
+++ b/unsupported/test/cxx11_tensor_expr.cpp
@@ -16,8 +16,8 @@ using Eigen::RowMajor;
static void test_1d()
{
- Tensor<float, 1> vec1({6});
- Tensor<float, 1, RowMajor> vec2({6});
+ Tensor<float, 1> vec1(6);
+ Tensor<float, 1, RowMajor> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
@@ -112,13 +112,13 @@ static void test_3d()
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3, RowMajor> mat2(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -142,7 +142,7 @@ static void test_3d()
Tensor<float, 3, RowMajor> mat11(2,3,7);
mat11 = mat2 / 3.14f;
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
@@ -155,7 +155,7 @@ static void test_3d()
VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f);
VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f);
VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f);
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -167,25 +167,25 @@ static void test_constants()
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
mat2 = mat1.constant(3.14f);
mat3 = mat1.cwiseMax(7.3f).exp();
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), 3.14f);
VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f)));
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -228,25 +228,25 @@ static void test_functors()
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
mat2 = mat1.inverse().unaryExpr(&asinf);
mat3 = mat1.unaryExpr(&tanhf);
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k)));
VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k)));
- val += 1.0;
+ val += 1.0f;
}
}
}
diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp
index 89874349f..2f14ebc62 100644
--- a/unsupported/test/cxx11_tensor_fft.cpp
+++ b/unsupported/test/cxx11_tensor_fft.cpp
@@ -205,15 +205,15 @@ static void test_fft_real_input_energy() {
VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i));
}
- float energy_original = 0.0;
- float energy_after_fft = 0.0;
+ RealScalar energy_original = 0.0;
+ RealScalar energy_after_fft = 0.0;
for (int i = 0; i < total_size; ++i) {
- energy_original += pow(std::abs(input(i)), 2);
+ energy_original += numext::abs2(input(i));
}
for (int i = 0; i < total_size; ++i) {
- energy_after_fft += pow(std::abs(output(i)), 2);
+ energy_after_fft += numext::abs2(output(i));
}
if(FFTDirection == FFT_FORWARD) {
diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp
index 46d741b05..4c660de65 100644
--- a/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -188,13 +188,13 @@ static void test_3d()
// VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
// VERIFY_IS_EQUAL((mat1.dimension(2)), 7);
- float val = 0.0;
+ float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -210,13 +210,13 @@ static void test_3d()
// VERIFY_IS_EQUAL((mat3.dimension(2)), 7);
- val = 0.0;
+ val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val));
VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val));
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -226,12 +226,12 @@ static void test_3d()
static void test_array()
{
TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
- float val = 0.0;
+ float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -239,12 +239,12 @@ static void test_array()
TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
mat3 = mat1.pow(3.5f);
- val = 0.0;
+ val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f));
- val += 1.0;
+ val += 1.0f;
}
}
}
diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp
index 5d6a49181..988b01481 100644
--- a/unsupported/test/cxx11_tensor_image_patch.cpp
+++ b/unsupported/test/cxx11_tensor_image_patch.cpp
@@ -568,13 +568,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 16);
// RowMajor
- Tensor<float, 4, RowMajor> l_in_row_major = l_in.swap_layout();
- VERIFY_IS_EQUAL(l_in.dimension(0), l_in_row_major.dimension(3));
- VERIFY_IS_EQUAL(l_in.dimension(1), l_in_row_major.dimension(2));
- VERIFY_IS_EQUAL(l_in.dimension(2), l_in_row_major.dimension(1));
- VERIFY_IS_EQUAL(l_in.dimension(3), l_in_row_major.dimension(0));
-
- Tensor<float, 5, RowMajor> l_out_row_major = l_in_row_major.extract_image_patches(11, 11);
+ Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
@@ -589,10 +583,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 11; ++r) {
for (int d = 0; d < 3; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
expected = l_in(d, r-5+i, c-5+j, b);
- expected_row_major = l_in_row_major(b, c-5+j, r-5+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -601,15 +593,13 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) !=
- expected_row_major) {
+ expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j
<< " r=" << r << " c=" << c << " d=" << d << " b=" << b
<< std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
- expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ expected);
}
}
}
@@ -628,8 +618,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(9, 9);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
@@ -644,10 +633,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 9; ++r) {
for (int d = 0; d < 16; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
expected = l_in(d, r-4+i, c-4+j, b);
- expected_row_major = l_in_row_major(b, c-4+j, r-4+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -655,12 +642,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
@@ -679,8 +664,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(7, 7);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
@@ -695,10 +679,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 7; ++r) {
for (int d = 0; d < 32; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
expected = l_in(d, r-3+i, c-3+j, b);
- expected_row_major = l_in_row_major(b, c-3+j, r-3+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -706,12 +688,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
@@ -730,8 +710,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(3, 3);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
@@ -746,10 +725,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 3; ++r) {
for (int d = 0; d < 64; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
expected = l_in(d, r-1+i, c-1+j, b);
- expected_row_major = l_in_row_major(b, c-1+j, r-1+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -757,12 +734,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp
index 4ce8dea20..4cf5df666 100644
--- a/unsupported/test/cxx11_tensor_index_list.cpp
+++ b/unsupported/test/cxx11_tensor_index_list.cpp
@@ -159,6 +159,111 @@ static void test_type2index_list()
}
+static void test_type2indexpair_list()
+{
+ Tensor<float, 5> tensor(2,3,5,7,11);
+ tensor.setRandom();
+ tensor += tensor.constant(10.0f);
+
+ typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>> Dims0;
+ typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::type2indexpair<1,11>, Eigen::type2indexpair<2,12>> Dims2_a;
+ typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
+ typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
+
+ Dims0 d0;
+ Dims2_a d2_a;
+
+ Dims2_b d2_b;
+ d2_b.set(1, Eigen::IndexPair<DenseIndex>(1,11));
+
+ Dims2_c d2_c;
+ d2_c.set(0, Eigen::IndexPair<DenseIndex>(Eigen::IndexPair<DenseIndex>(0,10)));
+ d2_c.set(1, Eigen::IndexPair<DenseIndex>(1,11)); // setting type2indexpair to correct value.
+ d2_c.set(2, Eigen::IndexPair<DenseIndex>(2,12));
+
+ VERIFY_IS_EQUAL(d2_a[0].first, 0);
+ VERIFY_IS_EQUAL(d2_a[0].second, 10);
+ VERIFY_IS_EQUAL(d2_a[1].first, 1);
+ VERIFY_IS_EQUAL(d2_a[1].second, 11);
+ VERIFY_IS_EQUAL(d2_a[2].first, 2);
+ VERIFY_IS_EQUAL(d2_a[2].second, 12);
+
+ VERIFY_IS_EQUAL(d2_b[0].first, 0);
+ VERIFY_IS_EQUAL(d2_b[0].second, 10);
+ VERIFY_IS_EQUAL(d2_b[1].first, 1);
+ VERIFY_IS_EQUAL(d2_b[1].second, 11);
+ VERIFY_IS_EQUAL(d2_b[2].first, 2);
+ VERIFY_IS_EQUAL(d2_b[2].second, 12);
+
+ VERIFY_IS_EQUAL(d2_c[0].first, 0);
+ VERIFY_IS_EQUAL(d2_c[0].second, 10);
+ VERIFY_IS_EQUAL(d2_c[1].first, 1);
+ VERIFY_IS_EQUAL(d2_c[1].second, 11);
+ VERIFY_IS_EQUAL(d2_c[2].first, 2);
+ VERIFY_IS_EQUAL(d2_c[2].second, 12);
+
+ EIGEN_STATIC_ASSERT((d2_a.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_a.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_a.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((d2_b.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_b.value_known_statically(1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_b.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((d2_c.value_known_statically(0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_c.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((d2_c.value_known_statically(2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 10) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+}
+
+
static void test_dynamic_index_list()
{
Tensor<float, 4> tensor(2,3,5,7);
@@ -273,6 +378,7 @@ void test_cxx11_tensor_index_list()
#ifdef EIGEN_HAS_INDEX_LIST
CALL_SUBTEST(test_static_index_list());
CALL_SUBTEST(test_type2index_list());
+ CALL_SUBTEST(test_type2indexpair_list());
CALL_SUBTEST(test_dynamic_index_list());
CALL_SUBTEST(test_mixed_index_list());
CALL_SUBTEST(test_dim_check());
diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp
index 48aa6d368..8e2b70b75 100644
--- a/unsupported/test/cxx11_tensor_intdiv.cpp
+++ b/unsupported/test/cxx11_tensor_intdiv.cpp
@@ -128,7 +128,7 @@ void test_powers_64bit() {
void test_specific() {
// A particular combination that was previously failing
int64_t div = 209715200;
- int64_t num = 3238002688;
+ int64_t num = 3238002688ll;
Eigen::internal::TensorIntDivisor<int64_t> divider(div);
int64_t result = num/div;
int64_t result_op = divider.divide(num);
diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp
index 8bbcf7089..489960529 100644
--- a/unsupported/test/cxx11_tensor_io.cpp
+++ b/unsupported/test/cxx11_tensor_io.cpp
@@ -14,6 +14,20 @@
template<int DataLayout>
+static void test_output_0d()
+{
+ Tensor<int, 0, DataLayout> tensor;
+ tensor() = 123;
+
+ std::stringstream os;
+ os << tensor;
+
+ std::string expected("123");
+ VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+template<int DataLayout>
static void test_output_1d()
{
Tensor<int, 1, DataLayout> tensor(5);
@@ -26,6 +40,12 @@ static void test_output_1d()
std::string expected("0\n1\n2\n3\n4");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
+
+ Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
+ std::stringstream empty_os;
+ empty_os << empty_tensor;
+ std::string empty_string;
+ VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
}
@@ -101,6 +121,8 @@ static void test_output_const()
void test_cxx11_tensor_io()
{
+ CALL_SUBTEST(test_output_0d<ColMajor>());
+ CALL_SUBTEST(test_output_0d<RowMajor>());
CALL_SUBTEST(test_output_1d<ColMajor>());
CALL_SUBTEST(test_output_1d<RowMajor>());
CALL_SUBTEST(test_output_2d<ColMajor>());
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index eb3b891fd..f7de43110 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -13,6 +13,7 @@
using Eigen::Tensor;
+template<typename>
static void test_simple_reshape()
{
Tensor<float, 5> tensor1(2,3,1,7,1);
@@ -40,7 +41,7 @@ static void test_simple_reshape()
}
}
-
+template<typename>
static void test_reshape_in_expr() {
MatrixXf m1(2,3*5*7*11);
MatrixXf m2(3*5*7*11,13);
@@ -65,7 +66,7 @@ static void test_reshape_in_expr() {
}
}
-
+template<typename>
static void test_reshape_as_lvalue()
{
Tensor<float, 3> tensor(2,3,7);
@@ -114,6 +115,7 @@ static void test_simple_slice()
}
}
+template<typename=void>
static void test_const_slice()
{
const float b[1] = {42};
@@ -315,6 +317,128 @@ static void test_slice_raw_data()
VERIFY_IS_EQUAL(slice6.data(), tensor.data());
}
+
+template<int DataLayout>
+static void test_strided_slice()
+{
+ typedef Tensor<float, 5, DataLayout> Tensor5f;
+ typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5;
+ typedef Tensor<float, 2, DataLayout> Tensor2f;
+ typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
+ Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
+ Tensor<float, 2, DataLayout> tensor2(7,11);
+ tensor.setRandom();
+ tensor2.setRandom();
+
+ if (true) {
+ Tensor2f slice(2,3);
+ Index2 strides(-2,-1);
+ Index2 indicesStart(5,7);
+ Index2 indicesStop(0,4);
+ slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+ for (int j = 0; j < 2; ++j) {
+ for (int k = 0; k < 3; ++k) {
+ VERIFY_IS_EQUAL(slice(j,k), tensor2(5-2*j,7-k));
+ }
+ }
+ }
+
+ if(true) {
+ Tensor2f slice(0,1);
+ Index2 strides(1,1);
+ Index2 indicesStart(5,4);
+ Index2 indicesStop(5,5);
+ slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+ }
+
+ if(true) { // test clamped degenerate interavls
+ Tensor2f slice(7,11);
+ Index2 strides(1,-1);
+ Index2 indicesStart(-3,20); // should become 0,10
+ Index2 indicesStop(20,-11); // should become 11, -1
+ slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+ for (int j = 0; j < 7; ++j) {
+ for (int k = 0; k < 11; ++k) {
+ VERIFY_IS_EQUAL(slice(j,k), tensor2(j,10-k));
+ }
+ }
+ }
+
+ if(true) {
+ Tensor5f slice1(1,1,1,1,1);
+ Eigen::DSizes<Eigen::DenseIndex, 5> indicesStart(1, 2, 3, 4, 5);
+ Eigen::DSizes<Eigen::DenseIndex, 5> indicesStop(2, 3, 4, 5, 6);
+ Eigen::DSizes<Eigen::DenseIndex, 5> strides(1, 1, 1, 1, 1);
+ slice1 = tensor.stridedSlice(indicesStart, indicesStop, strides);
+ VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
+ }
+
+ if(true) {
+ Tensor5f slice(1,1,2,2,3);
+ Index5 start(1, 1, 3, 4, 5);
+ Index5 stop(2, 2, 5, 6, 8);
+ Index5 strides(1, 1, 1, 1, 1);
+ slice = tensor.stridedSlice(start, stop, strides);
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 2; ++j) {
+ for (int k = 0; k < 3; ++k) {
+ VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
+ }
+ }
+ }
+ }
+
+ if(true) {
+ Tensor5f slice(1,1,2,2,3);
+ Index5 strides3(1, 1, -2, 1, -1);
+ Index5 indices3Start(1, 1, 4, 4, 7);
+ Index5 indices3Stop(2, 2, 0, 6, 4);
+ slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 2; ++j) {
+ for (int k = 0; k < 3; ++k) {
+ VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,4-2*i,4+j,7-k));
+ }
+ }
+ }
+ }
+
+ if(false) { // tests degenerate interval
+ Tensor5f slice(1,1,2,2,3);
+ Index5 strides3(1, 1, 2, 1, 1);
+ Index5 indices3Start(1, 1, 4, 4, 7);
+ Index5 indices3Stop(2, 2, 0, 6, 4);
+ slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
+ }
+}
+
+template<int DataLayout>
+static void test_strided_slice_write()
+{
+ typedef Tensor<float, 2, DataLayout> Tensor2f;
+ typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
+
+ Tensor<float, 2, DataLayout> tensor(7,11),tensor2(7,11);
+ tensor.setRandom();
+ tensor2=tensor;
+ Tensor2f slice(2,3);
+
+ slice.setRandom();
+
+ Index2 strides(1,1);
+ Index2 indicesStart(3,4);
+ Index2 indicesStop(5,7);
+ Index2 lengths(2,3);
+
+ tensor.slice(indicesStart,lengths)=slice;
+ tensor2.stridedSlice(indicesStart,indicesStop,strides)=slice;
+
+ for(int i=0;i<7;i++) for(int j=0;j<11;j++){
+ VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
+ }
+}
+
+
template<int DataLayout>
static void test_composition()
{
@@ -337,20 +461,25 @@ static void test_composition()
void test_cxx11_tensor_morphing()
{
- CALL_SUBTEST(test_simple_reshape());
- CALL_SUBTEST(test_reshape_in_expr());
- CALL_SUBTEST(test_reshape_as_lvalue());
-
- CALL_SUBTEST(test_simple_slice<ColMajor>());
- CALL_SUBTEST(test_simple_slice<RowMajor>());
- CALL_SUBTEST(test_const_slice());
- CALL_SUBTEST(test_slice_in_expr<ColMajor>());
- CALL_SUBTEST(test_slice_in_expr<RowMajor>());
- CALL_SUBTEST(test_slice_as_lvalue<ColMajor>());
- CALL_SUBTEST(test_slice_as_lvalue<RowMajor>());
- CALL_SUBTEST(test_slice_raw_data<ColMajor>());
- CALL_SUBTEST(test_slice_raw_data<RowMajor>());
-
- CALL_SUBTEST(test_composition<ColMajor>());
- CALL_SUBTEST(test_composition<RowMajor>());
+ CALL_SUBTEST_1(test_simple_reshape<void>());
+ CALL_SUBTEST_1(test_reshape_in_expr<void>());
+ CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
+
+ CALL_SUBTEST_1(test_simple_slice<ColMajor>());
+ CALL_SUBTEST_1(test_simple_slice<RowMajor>());
+ CALL_SUBTEST_1(test_const_slice());
+ CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
+ CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
+
+ CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
+ CALL_SUBTEST_6(test_strided_slice<RowMajor>());
+
+ CALL_SUBTEST_7(test_composition<ColMajor>());
+ CALL_SUBTEST_7(test_composition<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index 37fe3e9a4..2f86980a2 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -13,14 +13,55 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
+template<typename>
+void test_cuda_numext() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+ bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+ gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>());
+
+ Tensor<bool, 1> half_prec(num_elem);
+ Tensor<bool, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking numext " << i << std::endl;
+ VERIFY_IS_EQUAL(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
#ifdef EIGEN_HAS_CUDA_FP16
+template<typename>
void test_cuda_conversion() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -55,7 +96,7 @@ void test_cuda_conversion() {
gpu_device.deallocate(d_conv);
}
-
+template<typename>
void test_cuda_unary() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -92,7 +133,7 @@ void test_cuda_unary() {
gpu_device.deallocate(d_res_float);
}
-
+template<typename>
void test_cuda_elementwise() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -124,8 +165,8 @@ void test_cuda_elementwise() {
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
- std::cout << "Checking elemwise " << i << std::endl;
- VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ std::cout << "Checking elemwise " << i << ": full prec = " << full_prec(i) << " vs half prec = " << half_prec(i) << std::endl;
+ VERIFY_IS_APPROX(static_cast<Eigen::half>(full_prec(i)), static_cast<Eigen::half>(half_prec(i)));
}
gpu_device.deallocate(d_float1);
@@ -134,6 +175,7 @@ void test_cuda_elementwise() {
gpu_device.deallocate(d_res_float);
}
+template<typename>
void test_cuda_trancendental() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -141,43 +183,58 @@ void test_cuda_trancendental() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res1_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res1_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res2_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res2_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
- d_float1, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
- d_float2, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_half(
- d_res1_half, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_float(
- d_res1_float, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_half(
- d_res2_half, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_float(
- d_res2_float, num_elem);
+ gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+ gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+ gpu_float3.device(gpu_device) = gpu_float3.random();
+ gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
+ gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
+ gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
- gpu_float1.device(gpu_device) = gpu_float1.random();
- gpu_float2.device(gpu_device) = gpu_float2.random();
- gpu_res1_float.device(gpu_device) = gpu_float1.exp();
- gpu_res2_float.device(gpu_device) = gpu_float2.log();
- gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp().cast<float>();
- gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log().cast<float>();
+ gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
+ gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
+
+ gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>();
+ gpu_res2_half.device(gpu_device) = gpu_res2_half.log();
+
+ gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
+ gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
Tensor<float, 1> input1(num_elem);
- Tensor<float, 1> half_prec1(num_elem);
- Tensor<float, 1> full_prec1(num_elem);
+ Tensor<Eigen::half, 1> half_prec1(num_elem);
+ Tensor<Eigen::half, 1> full_prec1(num_elem);
Tensor<float, 1> input2(num_elem);
- Tensor<float, 1> half_prec2(num_elem);
- Tensor<float, 1> full_prec2(num_elem);
+ Tensor<Eigen::half, 1> half_prec2(num_elem);
+ Tensor<Eigen::half, 1> full_prec2(num_elem);
+ Tensor<float, 1> input3(num_elem);
+ Tensor<Eigen::half, 1> half_prec3(num_elem);
+ Tensor<Eigen::half, 1> full_prec3(num_elem);
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
@@ -186,17 +243,27 @@ void test_cuda_trancendental() {
}
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
- VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
+ if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1
+ VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
+ else
+ VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
+ }
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec3(i), half_prec3(i));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_float3);
gpu_device.deallocate(d_res1_half);
gpu_device.deallocate(d_res1_float);
gpu_device.deallocate(d_res2_half);
gpu_device.deallocate(d_res2_float);
+ gpu_device.deallocate(d_res3_float);
+ gpu_device.deallocate(d_res3_half);
}
-
+template<typename>
void test_cuda_contractions() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -206,36 +273,38 @@ void test_cuda_contractions() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, rows, cols);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, rows, cols);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_half(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half(
d_res_half, rows, cols);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_float(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float(
d_res_float, rows, cols);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
- gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float1.constant(0.5f);
+ gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
typedef Tensor<float, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
- gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims);
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims).cast<float>();
+ gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims);
- Tensor<float, 2> half_prec(rows, cols);
- Tensor<float, 2> full_prec(rows, cols);
- gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ Tensor<Eigen::half, 2> half_prec(rows, cols);
+ Tensor<Eigen::half, 2> full_prec(rows, cols);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
- std::cout << "Checking contract " << i << " " << j << std::endl;
- VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+ std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl;
+ if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) {
+ VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+ }
}
}
@@ -245,8 +314,69 @@ void test_cuda_contractions() {
gpu_device.deallocate(d_res_float);
}
+template<typename>
+void test_cuda_reductions(int size1, int size2, int redux) {
+ std::cout << "Reducing " << size1 << " by " << size2
+ << " tensor along dim " << redux << std::endl;
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = size1*size2;
+ int result_size = (redux == 1 ? size1 : size2);
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+ d_float1, size1, size2);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+ d_float2, size1, size2);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, result_size);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, result_size);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random() * 2.0f;
+ gpu_float2.device(gpu_device) = gpu_float2.random() * 2.0f;
+
+ Eigen::array<int, 1> redux_dim = {{redux}};
+ gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim);
+
+ Tensor<Eigen::half, 1> half_prec(result_size);
+ Tensor<Eigen::half, 1> full_prec(result_size);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, result_size*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::half));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < result_size; ++i) {
+ std::cout << "EXPECTED " << full_prec(i) << " GOT " << half_prec(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
void test_cuda_reductions() {
+ test_cuda_reductions<void>(13, 13, 0);
+ test_cuda_reductions<void>(13, 13, 1);
+
+ test_cuda_reductions<void>(35, 36, 0);
+ test_cuda_reductions<void>(35, 36, 1);
+
+ test_cuda_reductions<void>(36, 35, 0);
+ test_cuda_reductions<void>(36, 35, 1);
+}
+
+template<typename>
+void test_cuda_full_reductions() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int size = 13;
@@ -254,35 +384,39 @@ void test_cuda_reductions() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_half = (float*)gpu_device.allocate(size * sizeof(float));
- float* d_res_float = (float*)gpu_device.allocate(size * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, size, size);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, size, size);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
- d_res_half, size);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
- d_res_float, size);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half(
+ d_res_half);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float(
+ d_res_float);
gpu_float1.device(gpu_device) = gpu_float1.random();
gpu_float2.device(gpu_device) = gpu_float2.random();
- Eigen::array<int, 1> redux_dim = {{0}};
- gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim);
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim).cast<float>();
+ gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum();
- Tensor<float, 1> half_prec(size);
- Tensor<float, 1> full_prec(size);
- gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(float));
+ Tensor<Eigen::half, 0> half_prec;
+ Tensor<Eigen::half, 0> full_prec;
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
gpu_device.synchronize();
- for (int i = 0; i < size; ++i) {
- std::cout << "Checking redux " << i << std::endl;
- VERIFY_IS_APPROX(full_prec(i), half_prec(i));
- }
+ VERIFY_IS_APPROX(full_prec(), half_prec());
+
+ gpu_res_float.device(gpu_device) = gpu_float1.maximum().cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().maximum();
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
+ gpu_device.synchronize();
+
+ VERIFY_IS_APPROX(full_prec(), half_prec());
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
@@ -290,6 +424,7 @@ void test_cuda_reductions() {
gpu_device.deallocate(d_res_float);
}
+template<typename>
void test_cuda_forced_evals() {
Eigen::CudaStreamDevice stream;
@@ -297,59 +432,62 @@ void test_cuda_forced_evals() {
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
- d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1(
+ d_res_half1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
+ d_res_half2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
+ Eigen::array<int, 1> no_bcast;
+ no_bcast[0] = 1;
+
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.abs();
- gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
+ gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
+ gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>();
- Tensor<float, 1> half_prec(num_elem);
+ Tensor<float, 1> half_prec1(num_elem);
+ Tensor<float, 1> half_prec2(num_elem);
Tensor<float, 1> full_prec(num_elem);
- gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
- std::cout << "Checking unary " << i << std::endl;
- VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec1(i));
+ VERIFY_IS_APPROX(full_prec(i), half_prec2(i));
}
gpu_device.deallocate(d_float);
- gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_half1);
+ gpu_device.deallocate(d_res_half2);
gpu_device.deallocate(d_res_float);
}
-
#endif
void test_cxx11_tensor_of_float16_cuda()
{
+ CALL_SUBTEST_1(test_cuda_numext<void>());
+
#ifdef EIGEN_HAS_CUDA_FP16
- Eigen::CudaStreamDevice stream;
- Eigen::GpuDevice device(&stream);
- if (device.majorDeviceVersion() > 5 ||
- (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) {
- std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl;
-
- CALL_SUBTEST_1(test_cuda_conversion());
- CALL_SUBTEST_1(test_cuda_unary());
- CALL_SUBTEST_1(test_cuda_elementwise());
- CALL_SUBTEST_1(test_cuda_trancendental());
- CALL_SUBTEST_2(test_cuda_contractions());
- CALL_SUBTEST_3(test_cuda_reductions());
- CALL_SUBTEST_4(test_cuda_forced_evals());
- }
- else {
- std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;
- }
+ CALL_SUBTEST_1(test_cuda_conversion<void>());
+ CALL_SUBTEST_1(test_cuda_unary<void>());
+ CALL_SUBTEST_1(test_cuda_elementwise<void>());
+ CALL_SUBTEST_1(test_cuda_trancendental<void>());
+ CALL_SUBTEST_2(test_cuda_contractions<void>());
+ CALL_SUBTEST_3(test_cuda_reductions<void>());
+ CALL_SUBTEST_4(test_cuda_full_reductions<void>());
+ CALL_SUBTEST_5(test_cuda_forced_evals<void>());
#else
std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
#endif
diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu
index 5d091de15..b3be199e1 100644
--- a/unsupported/test/cxx11_tensor_random_cuda.cu
+++ b/unsupported/test/cxx11_tensor_random_cuda.cu
@@ -13,10 +13,61 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <Eigen/CXX11/Tensor>
-static void test_default()
+
+void test_cuda_random_uniform()
+{
+ Tensor<float, 2> out(72,97);
+ out.setZero();
+
+ std::size_t out_bytes = out.size() * sizeof(float);
+
+ float* d_out;
+ cudaMalloc((void**)(&d_out), out_bytes);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
+
+ gpu_out.device(gpu_device) = gpu_out.random();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ // For now we just check thes code doesn't crash.
+ // TODO: come up with a valid test of randomness
+}
+
+
+void test_cuda_random_normal()
+{
+ Tensor<float, 2> out(72,97);
+ out.setZero();
+
+ std::size_t out_bytes = out.size() * sizeof(float);
+
+ float* d_out;
+ cudaMalloc((void**)(&d_out), out_bytes);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
+
+ Eigen::internal::NormalRandomGenerator<float> gen(true);
+ gpu_out.device(gpu_device) = gpu_out.random(gen);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+}
+
+static void test_complex()
{
Tensor<std::complex<float>, 1> vec(6);
vec.setRandom();
@@ -31,5 +82,7 @@ static void test_default()
void test_cxx11_tensor_random_cuda()
{
- CALL_SUBTEST(test_default());
+ CALL_SUBTEST(test_cuda_random_uniform());
+ CALL_SUBTEST(test_cuda_random_normal());
+ CALL_SUBTEST(test_complex());
}
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp
index 6a128901a..1490ec3da 100644
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -239,6 +239,33 @@ static void test_simple_reductions() {
}
}
+
+template <int DataLayout>
+static void test_reductions_in_expr() {
+ Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+ tensor.setRandom();
+ array<ptrdiff_t, 2> reduction_axis2;
+ reduction_axis2[0] = 1;
+ reduction_axis2[1] = 3;
+
+ Tensor<float, 2, DataLayout> result(2, 5);
+ result = result.constant(1.0f) - tensor.sum(reduction_axis2);
+ VERIFY_IS_EQUAL(result.dimension(0), 2);
+ VERIFY_IS_EQUAL(result.dimension(1), 5);
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 5; ++j) {
+ float sum = 0.0f;
+ for (int k = 0; k < 3; ++k) {
+ for (int l = 0; l < 7; ++l) {
+ sum += tensor(i, k, j, l);
+ }
+ }
+ VERIFY_IS_APPROX(result(i, j), 1.0f - sum);
+ }
+ }
+}
+
+
template <int DataLayout>
static void test_full_reductions() {
Tensor<float, 2, DataLayout> tensor(2, 3);
@@ -341,7 +368,7 @@ static void test_static_dims() {
Tensor<float, 2, DataLayout> out(72, 97);
in.setRandom();
-#ifndef EIGEN_HAS_CONSTEXPR
+#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
@@ -371,7 +398,7 @@ static void test_innermost_last_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
-#ifndef EIGEN_HAS_CONSTEXPR
+#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 0;
reduction_axis[1] = 1;
@@ -402,7 +429,7 @@ static void test_innermost_first_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
-#ifndef EIGEN_HAS_CONSTEXPR
+#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 2;
reduction_axis[1] = 3;
@@ -433,7 +460,7 @@ static void test_reduce_middle_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
-#ifndef EIGEN_HAS_CONSTEXPR
+#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 2;
@@ -462,6 +489,8 @@ void test_cxx11_tensor_reduction() {
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
CALL_SUBTEST(test_simple_reductions<ColMajor>());
CALL_SUBTEST(test_simple_reductions<RowMajor>());
+ CALL_SUBTEST(test_reductions_in_expr<ColMajor>());
+ CALL_SUBTEST(test_reductions_in_expr<RowMajor>());
CALL_SUBTEST(test_full_reductions<ColMajor>());
CALL_SUBTEST(test_full_reductions<RowMajor>());
CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu
index cad0c08e0..6858b43a7 100644
--- a/unsupported/test/cxx11_tensor_reduction_cuda.cu
+++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu
@@ -12,11 +12,14 @@
#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
-template<int DataLayout>
+template<typename Type, int DataLayout>
static void test_full_reductions() {
Eigen::CudaStreamDevice stream;
@@ -25,24 +28,24 @@ static void test_full_reductions() {
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
- Tensor<float, 2, DataLayout> in(num_rows, num_cols);
+ Tensor<Type, 2, DataLayout> in(num_rows, num_cols);
in.setRandom();
- Tensor<float, 0, DataLayout> full_redux;
+ Tensor<Type, 0, DataLayout> full_redux;
full_redux = in.sum();
- std::size_t in_bytes = in.size() * sizeof(float);
- std::size_t out_bytes = full_redux.size() * sizeof(float);
- float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes));
- float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes));
+ std::size_t in_bytes = in.size() * sizeof(Type);
+ std::size_t out_bytes = full_redux.size() * sizeof(Type);
+ Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes));
+ Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
- TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
- TensorMap<Tensor<float, 0, DataLayout> > out_gpu(gpu_out_ptr);
+ TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+ TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.sum();
- Tensor<float, 0, DataLayout> full_redux_gpu;
+ Tensor<Type, 0, DataLayout> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
@@ -53,7 +56,102 @@ static void test_full_reductions() {
gpu_device.deallocate(gpu_out_ptr);
}
+template<typename Type, int DataLayout>
+static void test_first_dim_reductions() {
+ int dim_x = 33;
+ int dim_y = 1;
+ int dim_z = 128;
+
+ Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+ in.setRandom();
+
+ Eigen::array<int, 1> red_axis;
+ red_axis[0] = 0;
+ Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+ // Create device
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice dev(&stream);
+
+ // Create data(T)
+ Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+ Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type));
+ Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+ Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z);
+
+ // Perform operation
+ dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+ gpu_out.device(dev) = gpu_in.sum(red_axis);
+ gpu_out.device(dev) += gpu_in.sum(red_axis);
+ Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z);
+ dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+ dev.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ for (int i = 0; i < gpu_out.size(); ++i) {
+ VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+ }
+
+ dev.deallocate(in_data);
+ dev.deallocate(out_data);
+}
+
+template<typename Type, int DataLayout>
+static void test_last_dim_reductions() {
+ int dim_x = 128;
+ int dim_y = 1;
+ int dim_z = 33;
+
+ Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+ in.setRandom();
+
+ Eigen::array<int, 1> red_axis;
+ red_axis[0] = 2;
+ Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+ // Create device
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice dev(&stream);
+
+ // Create data
+ Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+ Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type));
+ Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+ Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y);
+
+ // Perform operation
+ dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+ gpu_out.device(dev) = gpu_in.sum(red_axis);
+ gpu_out.device(dev) += gpu_in.sum(red_axis);
+ Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y);
+ dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+ dev.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ for (int i = 0; i < gpu_out.size(); ++i) {
+ VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+ }
+
+ dev.deallocate(in_data);
+ dev.deallocate(out_data);
+}
+
+
void test_cxx11_tensor_reduction_cuda() {
- CALL_SUBTEST_1(test_full_reductions<ColMajor>());
- CALL_SUBTEST_2(test_full_reductions<RowMajor>());
+ CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
+ CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
+ CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));
+ CALL_SUBTEST_2((test_full_reductions<double, RowMajor>()));
+
+ CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>()));
+ CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>()));
+ CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>()));
+// Outer reductions of doubles aren't supported just yet.
+// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>()))
+
+ CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>()));
+// Outer reductions of doubles aren't supported just yet.
+// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>()));
+ CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>()));
+ CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>()));
}
diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp
new file mode 100644
index 000000000..af59aa3ef
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_scan.cpp
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <limits>
+#include <numeric>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout, typename Type=float, bool Exclusive = false>
+static void test_1d_scan()
+{
+ int size = 50;
+ Tensor<Type, 1, DataLayout> tensor(size);
+ tensor.setRandom();
+ Tensor<Type, 1, DataLayout> result = tensor.cumsum(0, Exclusive);
+
+ VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0));
+
+ float accum = 0;
+ for (int i = 0; i < size; i++) {
+ if (Exclusive) {
+ VERIFY_IS_EQUAL(result(i), accum);
+ accum += tensor(i);
+ } else {
+ accum += tensor(i);
+ VERIFY_IS_EQUAL(result(i), accum);
+ }
+ }
+
+ accum = 1;
+ result = tensor.cumprod(0, Exclusive);
+ for (int i = 0; i < size; i++) {
+ if (Exclusive) {
+ VERIFY_IS_EQUAL(result(i), accum);
+ accum *= tensor(i);
+ } else {
+ accum *= tensor(i);
+ VERIFY_IS_EQUAL(result(i), accum);
+ }
+ }
+}
+
+template <int DataLayout, typename Type=float>
+static void test_4d_scan()
+{
+ int size = 5;
+ Tensor<Type, 4, DataLayout> tensor(size, size, size, size);
+ tensor.setRandom();
+
+ Tensor<Type, 4, DataLayout> result(size, size, size, size);
+
+ result = tensor.cumsum(0);
+ float accum = 0;
+ for (int i = 0; i < size; i++) {
+ accum += tensor(i, 1, 2, 3);
+ VERIFY_IS_EQUAL(result(i, 1, 2, 3), accum);
+ }
+ result = tensor.cumsum(1);
+ accum = 0;
+ for (int i = 0; i < size; i++) {
+ accum += tensor(1, i, 2, 3);
+ VERIFY_IS_EQUAL(result(1, i, 2, 3), accum);
+ }
+ result = tensor.cumsum(2);
+ accum = 0;
+ for (int i = 0; i < size; i++) {
+ accum += tensor(1, 2, i, 3);
+ VERIFY_IS_EQUAL(result(1, 2, i, 3), accum);
+ }
+ result = tensor.cumsum(3);
+ accum = 0;
+ for (int i = 0; i < size; i++) {
+ accum += tensor(1, 2, 3, i);
+ VERIFY_IS_EQUAL(result(1, 2, 3, i), accum);
+ }
+}
+
+template <int DataLayout>
+static void test_tensor_maps() {
+ int inputs[20];
+ TensorMap<Tensor<int, 1, DataLayout> > tensor_map(inputs, 20);
+ tensor_map.setRandom();
+
+ Tensor<int, 1, DataLayout> result = tensor_map.cumsum(0);
+
+ int accum = 0;
+ for (int i = 0; i < 20; ++i) {
+ accum += tensor_map(i);
+ VERIFY_IS_EQUAL(result(i), accum);
+ }
+}
+
+void test_cxx11_tensor_scan() {
+ CALL_SUBTEST((test_1d_scan<ColMajor, float, true>()));
+ CALL_SUBTEST((test_1d_scan<ColMajor, float, false>()));
+ CALL_SUBTEST((test_1d_scan<RowMajor, float, true>()));
+ CALL_SUBTEST((test_1d_scan<RowMajor, float, false>()));
+ CALL_SUBTEST(test_4d_scan<ColMajor>());
+ CALL_SUBTEST(test_4d_scan<RowMajor>());
+ CALL_SUBTEST(test_tensor_maps<ColMajor>());
+ CALL_SUBTEST(test_tensor_maps<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu
new file mode 100644
index 000000000..761d11fd1
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_scan_cuda.cu
@@ -0,0 +1,79 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_scan_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+typedef Tensor<float, 1>::DimensionPair DimPair;
+
+template<int DataLayout>
+void test_cuda_cumsum(int m_size, int k_size, int n_size)
+{
+ std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+ Tensor<float, 3, DataLayout> t_input(m_size, k_size, n_size);
+ Tensor<float, 3, DataLayout> t_result(m_size, k_size, n_size);
+ Tensor<float, 3, DataLayout> t_result_gpu(m_size, k_size, n_size);
+
+ t_input.setRandom();
+
+ std::size_t t_input_bytes = t_input.size() * sizeof(float);
+ std::size_t t_result_bytes = t_result.size() * sizeof(float);
+
+ float* d_t_input;
+ float* d_t_result;
+
+ cudaMalloc((void**)(&d_t_input), t_input_bytes);
+ cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+ cudaMemcpy(d_t_input, t_input.data(), t_input_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
+ gpu_t_input(d_t_input, Eigen::array<int, 3>(m_size, k_size, n_size));
+ Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
+ gpu_t_result(d_t_result, Eigen::array<int, 3>(m_size, k_size, n_size));
+
+ gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1);
+ t_result = t_input.cumsum(1);
+
+ cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+ for (size_t i = 0; i < t_result.size(); i++) {
+ if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
+ continue;
+ }
+ if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
+ continue;
+ }
+ std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+ << " vs " << t_result_gpu(i) << std::endl;
+ assert(false);
+ }
+
+ cudaFree((void*)d_t_input);
+ cudaFree((void*)d_t_result);
+}
+
+
+void test_cxx11_tensor_scan_cuda()
+{
+ CALL_SUBTEST_1(test_cuda_cumsum<ColMajor>(128, 128, 128));
+ CALL_SUBTEST_2(test_cuda_cumsum<RowMajor>(128, 128, 128));
+}
diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp
index a03f75cfe..2f56eb495 100644
--- a/unsupported/test/cxx11_tensor_sugar.cpp
+++ b/unsupported/test/cxx11_tensor_sugar.cpp
@@ -33,7 +33,7 @@ static void test_comparison_sugar() {
}
-static void test_scalar_sugar() {
+static void test_scalar_sugar_add_mul() {
Tensor<float, 3> A(6, 7, 5);
Tensor<float, 3> B(6, 7, 5);
A.setRandom();
@@ -41,21 +41,41 @@ static void test_scalar_sugar() {
const float alpha = 0.43f;
const float beta = 0.21f;
+ const float gamma = 0.14f;
- Tensor<float, 3> R = A * A.constant(alpha) + B * B.constant(beta);
- Tensor<float, 3> S = A * alpha + B * beta;
-
- // TODO: add enough syntactic sugar to support this
- // Tensor<float, 3> T = alpha * A + beta * B;
+ Tensor<float, 3> R = A.constant(gamma) + A * A.constant(alpha) + B * B.constant(beta);
+ Tensor<float, 3> S = A * alpha + B * beta + gamma;
+ Tensor<float, 3> T = gamma + alpha * A + beta * B;
for (int i = 0; i < 6*7*5; ++i) {
VERIFY_IS_APPROX(R(i), S(i));
+ VERIFY_IS_APPROX(R(i), T(i));
}
}
+static void test_scalar_sugar_sub_div() {
+ Tensor<float, 3> A(6, 7, 5);
+ Tensor<float, 3> B(6, 7, 5);
+ A.setRandom();
+ B.setRandom();
+
+ const float alpha = 0.43f;
+ const float beta = 0.21f;
+ const float gamma = 0.14f;
+ const float delta = 0.32f;
+
+ Tensor<float, 3> R = A.constant(gamma) - A / A.constant(alpha)
+ - B.constant(beta) / B - A.constant(delta);
+ Tensor<float, 3> S = gamma - A / alpha - beta / B - delta;
+
+ for (int i = 0; i < 6*7*5; ++i) {
+ VERIFY_IS_APPROX(R(i), S(i));
+ }
+}
void test_cxx11_tensor_sugar()
{
CALL_SUBTEST(test_comparison_sugar());
- CALL_SUBTEST(test_scalar_sugar());
+ CALL_SUBTEST(test_scalar_sugar_add_mul());
+ CALL_SUBTEST(test_scalar_sugar_sub_div());
}
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index e46197464..2ef665f30 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -91,7 +91,7 @@ void test_multithread_contraction()
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
- if (fabs(t_result(i) - m_result(i)) < 1e-4) {
+ if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
continue;
}
if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
@@ -132,7 +132,7 @@ void test_contraction_corner_cases()
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
- if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
+ if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false);
}
@@ -147,7 +147,7 @@ void test_contraction_corner_cases()
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
- if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
+ if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false);
}
@@ -165,7 +165,7 @@ void test_contraction_corner_cases()
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
- if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
+ if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false);
}
@@ -183,7 +183,7 @@ void test_contraction_corner_cases()
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
- if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
+ if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false);
}
@@ -226,7 +226,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
for (ptrdiff_t i = 0; i < st_result.size(); i++) {
// if both of the values are very small, then do nothing (because the test will fail
// due to numerical precision issues when values are small)
- if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) {
+ if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
}
}
@@ -234,6 +234,42 @@ void test_multithread_contraction_agrees_with_singlethread() {
template<int DataLayout>
+void test_full_contraction() {
+ int contract_size1 = internal::random<int>(1, 500);
+ int contract_size2 = internal::random<int>(1, 500);
+
+ Tensor<float, 2, DataLayout> left(contract_size1,
+ contract_size2);
+ Tensor<float, 2, DataLayout> right(contract_size1,
+ contract_size2);
+ left.setRandom();
+ right.setRandom();
+
+ // add constants to shift values away from 0 for more precision
+ left += left.constant(1.5f);
+ right += right.constant(1.5f);
+
+ typedef Tensor<float, 2>::DimensionPair DimPair;
+ Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
+
+ Eigen::ThreadPool tp(internal::random<int>(2, 11));
+ Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
+
+ Tensor<float, 0, DataLayout> st_result;
+ st_result = left.contract(right, dims);
+
+ Tensor<float, 0, DataLayout> tp_result;
+ tp_result.device(thread_pool_device) = left.contract(right, dims);
+
+ VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
+ // if both of the values are very small, then do nothing (because the test will fail
+ // due to numerical precision issues when values are small)
+ if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
+ VERIFY_IS_APPROX(st_result(), tp_result());
+ }
+}
+
+template<int DataLayout>
void test_multithreaded_reductions() {
const int num_threads = internal::random<int>(3, 11);
ThreadPool thread_pool(num_threads);
@@ -324,6 +360,9 @@ void test_cxx11_tensor_thread_pool()
CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
+ CALL_SUBTEST_4(test_full_contraction<ColMajor>());
+ CALL_SUBTEST_4(test_full_contraction<RowMajor>());
+
CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 02411a262..e770049e5 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -9,12 +9,12 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifdef EIGEN_TEST_PART_1
#include "sparse.h"
#include <Eigen/SparseExtra>
#include <Eigen/KroneckerProduct>
-
template<typename MatrixType>
void check_dimension(const MatrixType& ab, const int rows, const int cols)
{
@@ -230,3 +230,23 @@ void test_kronecker_product()
VERIFY_IS_APPROX(MatrixXf(sC2),dC);
}
}
+
+#endif
+
+#ifdef EIGEN_TEST_PART_2
+
+// simply check that for a dense kronecker product, sparse module is not needed
+
+#include "main.h"
+#include <Eigen/KroneckerProduct>
+
+void test_kronecker_product()
+{
+ MatrixXd a(2,2), b(3,3), c;
+ a.setRandom();
+ b.setRandom();
+ c = kroneckerProduct(a,b);
+ VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b);
+}
+
+#endif
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 9a995f941..7c9b68a3c 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A)
MatrixType scaledA;
RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff();
- if (maxImagPartOfSpectrum >= 0.9 * EIGEN_PI)
- scaledA = A * 0.9 * EIGEN_PI / maxImagPartOfSpectrum;
+ if (maxImagPartOfSpectrum >= RealScalar(0.9L * EIGEN_PI))
+ scaledA = A * RealScalar(0.9L * EIGEN_PI) / maxImagPartOfSpectrum;
else
scaledA = A;
diff --git a/unsupported/test/matrix_functions.h b/unsupported/test/matrix_functions.h
index 150b4c0c5..4e2636404 100644
--- a/unsupported/test/matrix_functions.h
+++ b/unsupported/test/matrix_functions.h
@@ -61,7 +61,7 @@ struct generateTestMatrix<MatrixType,1>
};
template <typename Derived, typename OtherDerived>
-double relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B)
+typename Derived::RealScalar relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B)
{
return std::sqrt((A - B).cwiseAbs2().sum() / (std::min)(A.cwiseAbs2().sum(), B.cwiseAbs2().sum()));
}
diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp
index 8e104ed1e..7ccfacfdf 100644
--- a/unsupported/test/matrix_power.cpp
+++ b/unsupported/test/matrix_power.cpp
@@ -10,7 +10,7 @@
#include "matrix_functions.h"
template<typename T>
-void test2dRotation(double tol)
+void test2dRotation(const T& tol)
{
Matrix<T,2,2> A, B, C;
T angle, c, s;
@@ -19,19 +19,19 @@ void test2dRotation(double tol)
MatrixPower<Matrix<T,2,2> > Apow(A);
for (int i=0; i<=20; ++i) {
- angle = pow(10, (i-10) / 5.);
+ angle = std::pow(T(10), (i-10) / T(5.));
c = std::cos(angle);
s = std::sin(angle);
B << c, s, -s, c;
- C = Apow(std::ldexp(angle,1) / EIGEN_PI);
+ C = Apow(std::ldexp(angle,1) / T(EIGEN_PI));
std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n';
VERIFY(C.isApprox(B, tol));
}
}
template<typename T>
-void test2dHyperbolicRotation(double tol)
+void test2dHyperbolicRotation(const T& tol)
{
Matrix<std::complex<T>,2,2> A, B, C;
T angle, ch = std::cosh((T)1);
@@ -53,7 +53,7 @@ void test2dHyperbolicRotation(double tol)
}
template<typename T>
-void test3dRotation(double tol)
+void test3dRotation(const T& tol)
{
Matrix<T,3,1> v;
T angle;
@@ -61,13 +61,13 @@ void test3dRotation(double tol)
for (int i=0; i<=20; ++i) {
v = Matrix<T,3,1>::Random();
v.normalize();
- angle = pow(10, (i-10) / 5.);
+ angle = std::pow(T(10), (i-10) / T(5.));
VERIFY(AngleAxis<T>(angle, v).matrix().isApprox(AngleAxis<T>(1,v).matrix().pow(angle), tol));
}
}
template<typename MatrixType>
-void testGeneral(const MatrixType& m, double tol)
+void testGeneral(const MatrixType& m, const typename MatrixType::RealScalar& tol)
{
typedef typename MatrixType::RealScalar RealScalar;
MatrixType m1, m2, m3, m4, m5;
@@ -97,7 +97,7 @@ void testGeneral(const MatrixType& m, double tol)
}
template<typename MatrixType>
-void testSingular(const MatrixType& m_const, double tol)
+void testSingular(const MatrixType& m_const, const typename MatrixType::RealScalar& tol)
{
// we need to pass by reference in order to prevent errors with
// MSVC for aligned data types ...
@@ -119,18 +119,18 @@ void testSingular(const MatrixType& m_const, double tol)
MatrixPower<MatrixType> mpow(m);
T = T.sqrt();
- VERIFY(mpow(0.5).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+ VERIFY(mpow(0.5L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
T = T.sqrt();
- VERIFY(mpow(0.25).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+ VERIFY(mpow(0.25L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
T = T.sqrt();
- VERIFY(mpow(0.125).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+ VERIFY(mpow(0.125L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
}
}
template<typename MatrixType>
-void testLogThenExp(const MatrixType& m_const, double tol)
+void testLogThenExp(const MatrixType& m_const, const typename MatrixType::RealScalar& tol)
{
// we need to pass by reference in order to prevent errors with
// MSVC for aligned data types ...
@@ -154,14 +154,14 @@ void test_matrix_power()
{
CALL_SUBTEST_2(test2dRotation<double>(1e-13));
CALL_SUBTEST_1(test2dRotation<float>(2e-5)); // was 1e-5, relaxed for clang 2.8 / linux / x86-64
- CALL_SUBTEST_9(test2dRotation<long double>(1e-13));
+ CALL_SUBTEST_9(test2dRotation<long double>(1e-13L));
CALL_SUBTEST_2(test2dHyperbolicRotation<double>(1e-14));
CALL_SUBTEST_1(test2dHyperbolicRotation<float>(1e-5));
- CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14));
+ CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14L));
CALL_SUBTEST_10(test3dRotation<double>(1e-13));
CALL_SUBTEST_11(test3dRotation<float>(1e-5));
- CALL_SUBTEST_12(test3dRotation<long double>(1e-13));
+ CALL_SUBTEST_12(test3dRotation<long double>(1e-13L));
CALL_SUBTEST_2(testGeneral(Matrix2d(), 1e-13));
CALL_SUBTEST_7(testGeneral(Matrix3dRowMajor(), 1e-13));
@@ -171,10 +171,10 @@ void test_matrix_power()
CALL_SUBTEST_5(testGeneral(Matrix3cf(), 1e-4));
CALL_SUBTEST_8(testGeneral(Matrix4f(), 1e-4));
CALL_SUBTEST_6(testGeneral(MatrixXf(2,2), 1e-3)); // see bug 614
- CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13));
+ CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_10(testGeneral(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testGeneral(Matrix3f(), 1e-4));
- CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13));
+ CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13L));
CALL_SUBTEST_2(testSingular(Matrix2d(), 1e-13));
CALL_SUBTEST_7(testSingular(Matrix3dRowMajor(), 1e-13));
@@ -184,10 +184,10 @@ void test_matrix_power()
CALL_SUBTEST_5(testSingular(Matrix3cf(), 1e-4));
CALL_SUBTEST_8(testSingular(Matrix4f(), 1e-4));
CALL_SUBTEST_6(testSingular(MatrixXf(2,2), 1e-3));
- CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13));
+ CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_10(testSingular(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testSingular(Matrix3f(), 1e-4));
- CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13));
+ CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13L));
CALL_SUBTEST_2(testLogThenExp(Matrix2d(), 1e-13));
CALL_SUBTEST_7(testLogThenExp(Matrix3dRowMajor(), 1e-13));
@@ -197,8 +197,8 @@ void test_matrix_power()
CALL_SUBTEST_5(testLogThenExp(Matrix3cf(), 1e-4));
CALL_SUBTEST_8(testLogThenExp(Matrix4f(), 1e-4));
CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2), 1e-3));
- CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13));
+ CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_10(testLogThenExp(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testLogThenExp(Matrix3f(), 1e-4));
- CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13));
+ CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13L));
}
diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h
index 9b0cf7268..8404f1ff8 100644
--- a/unsupported/test/mpreal/mpreal.h
+++ b/unsupported/test/mpreal/mpreal.h
@@ -99,7 +99,7 @@
// Detect support for explicit converters.
#if (__has_feature(cxx_explicit_conversions) || \
- (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR >= 5) || __cplusplus >= 201103L || \
+ (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR__ >= 5) || __cplusplus >= 201103L || \
(defined(_MSC_VER) && _MSC_VER >= 1800))
#define MPREAL_HAVE_EXPLICIT_CONVERTERS
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp
index 1aa9e786a..ffa5691eb 100644
--- a/unsupported/test/mpreal_support.cpp
+++ b/unsupported/test/mpreal_support.cpp
@@ -17,6 +17,7 @@ void test_mpreal_support()
std::cerr << "dummy_precision = " << NumTraits<mpreal>::dummy_precision() << "\n";
std::cerr << "highest = " << NumTraits<mpreal>::highest() << "\n";
std::cerr << "lowest = " << NumTraits<mpreal>::lowest() << "\n";
+ std::cerr << "digits10 = " << NumTraits<mpreal>::digits10() << "\n";
for(int i = 0; i < g_repeat; i++) {
int s = Eigen::internal::random<int>(1,100);
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
new file mode 100644
index 000000000..057fb3e92
--- /dev/null
+++ b/unsupported/test/special_functions.cpp
@@ -0,0 +1,345 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include "../Eigen/SpecialFunctions"
+
+template<typename X, typename Y>
+void verify_component_wise(const X& x, const Y& y)
+{
+ for(Index i=0; i<x.size(); ++i)
+ {
+ if((numext::isfinite)(y(i)))
+ VERIFY_IS_APPROX( x(i), y(i) );
+ else if((numext::isnan)(y(i)))
+ VERIFY((numext::isnan)(x(i)));
+ else
+ VERIFY_IS_EQUAL( x(i), y(i) );
+ }
+}
+
+template<typename ArrayType> void array_special_functions()
+{
+ using std::abs;
+ using std::sqrt;
+ typedef typename ArrayType::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ Scalar plusinf = std::numeric_limits<Scalar>::infinity();
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+
+ Index rows = internal::random<Index>(1,30);
+ Index cols = 1;
+
+ // API
+ {
+ ArrayType m1 = ArrayType::Random(rows,cols);
+#if EIGEN_HAS_C99_MATH
+ VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
+ VERIFY_IS_APPROX(m1.digamma(), digamma(m1));
+ VERIFY_IS_APPROX(m1.erf(), erf(m1));
+ VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
+#endif // EIGEN_HAS_C99_MATH
+ }
+
+
+#if EIGEN_HAS_C99_MATH
+ // check special functions (comparing against numpy implementation)
+ if (!NumTraits<Scalar>::IsComplex)
+ {
+
+ {
+ ArrayType m1 = ArrayType::Random(rows,cols);
+ ArrayType m2 = ArrayType::Random(rows,cols);
+
+ // Test various propreties of igamma & igammac. These are normalized
+ // gamma integrals where
+ // igammac(a, x) = Gamma(a, x) / Gamma(a)
+ // igamma(a, x) = gamma(a, x) / Gamma(a)
+ // where Gamma and gamma are considered the standard unnormalized
+ // upper and lower incomplete gamma functions, respectively.
+ ArrayType a = m1.abs() + 2;
+ ArrayType x = m2.abs() + 2;
+ ArrayType zero = ArrayType::Zero(rows, cols);
+ ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0));
+ ArrayType a_m1 = a - one;
+ ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp();
+ ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp();
+ ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp();
+ ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp();
+
+ // Gamma(a, 0) == Gamma(a)
+ VERIFY_IS_APPROX(Eigen::igammac(a, zero), one);
+
+ // Gamma(a, x) + gamma(a, x) == Gamma(a)
+ VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp());
+
+ // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x)
+ VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp());
+
+ // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x)
+ VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp());
+ }
+
+ {
+ // Check exact values of igamma and igammac against a third party calculation.
+ Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+ Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+ // location i*6+j corresponds to a_s[i], x_s[j].
+ Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+ {0.0, 0.6321205588285578, 0.7768698398515702,
+ 0.9816843611112658, 9.999500016666262e-05, 1.0},
+ {0.0, 0.4275932955291202, 0.608374823728911,
+ 0.9539882943107686, 7.522076445089201e-07, 1.0},
+ {0.0, 0.01898815687615381, 0.06564245437845008,
+ 0.5665298796332909, 4.166333347221828e-18, 1.0},
+ {0.0, 0.9999780593618628, 0.9999899967080838,
+ 0.9999996219837988, 0.9991370418689945, 1.0},
+ {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+ Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+ {1.0, 0.36787944117144233, 0.22313016014842982,
+ 0.018315638888734182, 0.9999000049998333, 0.0},
+ {1.0, 0.5724067044708798, 0.3916251762710878,
+ 0.04601170568923136, 0.9999992477923555, 0.0},
+ {1.0, 0.9810118431238462, 0.9343575456215499,
+ 0.4334701203667089, 1.0, 0.0},
+ {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+ 3.7801620118431334e-07, 0.0008629581310054535,
+ 0.0},
+ {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ if ((std::isnan)(igamma_s[i][j])) {
+ VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j])));
+ } else {
+ VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]);
+ }
+
+ if ((std::isnan)(igammac_s[i][j])) {
+ VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j])));
+ } else {
+ VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]);
+ }
+ }
+ }
+ }
+ }
+#endif // EIGEN_HAS_C99_MATH
+
+ // Check the zeta function against scipy.special.zeta
+ {
+ ArrayType x(7), q(7), res(7), ref(7);
+ x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9;
+ q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345;
+ ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+ CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
+ }
+
+ // digamma
+ {
+ ArrayType x(7), res(7), ref(7);
+ x << 1, 1.5, 4, -10.5, 10000.5, 0, -1;
+ ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+ CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = digamma(x); verify_component_wise(res, ref); );
+ }
+
+
+#if EIGEN_HAS_C99_MATH
+ {
+ ArrayType n(11), x(11), res(11), ref(11);
+ n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170;
+ x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64;
+ ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+ if(sizeof(RealScalar)>=8) { // double
+ // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+ // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); );
+ }
+ else {
+ // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); );
+ CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); );
+ }
+ }
+#endif
+
+#if EIGEN_HAS_C99_MATH
+ {
+ // Inputs and ground truth generated with scipy via:
+ // a = np.logspace(-3, 3, 5) - 1e-3
+ // b = np.logspace(-3, 3, 5) - 1e-3
+ // x = np.linspace(-0.1, 1.1, 5)
+ // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x))
+ // full_a = full_a.flatten().tolist() # same for full_b, full_x
+ // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist()
+ //
+ // Note in Eigen, we call betainc with arguments in the order (x, a, b).
+ ArrayType a(125);
+ ArrayType b(125);
+ ArrayType x(125);
+ ArrayType v(125);
+ ArrayType res(125);
+
+ a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999;
+
+ b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999;
+
+ x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+ 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1,
+ -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+ 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+ 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1;
+
+ v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan,
+ 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan,
+ 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan,
+ 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan,
+ nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256,
+ 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001,
+ 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403,
+ 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999,
+ 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan,
+ 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06,
+ nan, nan, 7.864342668429763e-23, 3.015969667594166e-10,
+ 0.0008598571564165444, nan, nan, 6.031987710123844e-08,
+ 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999,
+ 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan,
+ nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan,
+ 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0,
+ 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan,
+ 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan;
+
+ CALL_SUBTEST(res = betainc(a, b, x);
+ verify_component_wise(res, v););
+ }
+
+ // Test various properties of betainc
+ {
+ ArrayType m1 = ArrayType::Random(32);
+ ArrayType m2 = ArrayType::Random(32);
+ ArrayType m3 = ArrayType::Random(32);
+ ArrayType one = ArrayType::Constant(32, Scalar(1.0));
+ const Scalar eps = std::numeric_limits<Scalar>::epsilon();
+ ArrayType a = (m1 * 4.0).exp();
+ ArrayType b = (m2 * 4.0).exp();
+ ArrayType x = m3.abs();
+
+ // betainc(a, 1, x) == x**a
+ CALL_SUBTEST(
+ ArrayType test = betainc(a, one, x);
+ ArrayType expected = x.pow(a);
+ verify_component_wise(test, expected););
+
+ // betainc(1, b, x) == 1 - (1 - x)**b
+ CALL_SUBTEST(
+ ArrayType test = betainc(one, b, x);
+ ArrayType expected = one - (one - x).pow(b);
+ verify_component_wise(test, expected););
+
+ // betainc(a, b, x) == 1 - betainc(b, a, 1-x)
+ CALL_SUBTEST(
+ ArrayType test = betainc(a, b, x) + betainc(b, a, one - x);
+ ArrayType expected = one;
+ verify_component_wise(test, expected););
+
+ // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b))
+ CALL_SUBTEST(
+ ArrayType num = x.pow(a) * (one - x).pow(b);
+ ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+ // Add eps to rhs and lhs so that component-wise test doesn't result in
+ // nans when both outputs are zeros.
+ ArrayType expected = betainc(a, b, x) - num / denom + eps;
+ ArrayType test = betainc(a + one, b, x) + eps;
+ if (sizeof(Scalar) >= 8) { // double
+ verify_component_wise(test, expected);
+ } else {
+ // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+ verify_component_wise(test.head(8), expected.head(8));
+ });
+
+ // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b))
+ CALL_SUBTEST(
+ // Add eps to rhs and lhs so that component-wise test doesn't result in
+ // nans when both outputs are zeros.
+ ArrayType num = x.pow(a) * (one - x).pow(b);
+ ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+ ArrayType expected = betainc(a, b, x) + num / denom + eps;
+ ArrayType test = betainc(a, b + one, x) + eps;
+ verify_component_wise(test, expected););
+ }
+#endif
+}
+
+void test_special_functions()
+{
+ CALL_SUBTEST_1(array_special_functions<ArrayXf>());
+ CALL_SUBTEST_2(array_special_functions<ArrayXd>());
+}