diff options
Diffstat (limited to 'unsupported/test')
43 files changed, 2546 insertions, 538 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 22442b394..a1823beaa 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -59,6 +59,8 @@ ei_add_test(alignedvector3) ei_add_test(FFT) +ei_add_test(EulerAngles) + find_package(MPFR 2.3.0) find_package(GMP) if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) @@ -109,10 +111,14 @@ ei_add_test(gmres) ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) +ei_add_test(special_functions) # TODO: The following test names are prefixed with the cxx11 string, since historically # the tests depended on c++11. This isn't the case anymore so we ought to rename them. -ei_add_test(cxx11_float16) +# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests +# when using visual studio. We should make the check more strict to enable the tests for +# newer versions of MSVC. +if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_map) ei_add_test(cxx11_tensor_assign) @@ -130,7 +136,8 @@ ei_add_test(cxx11_tensor_io) if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") # This test requires __uint128_t which is only available on 64bit systems ei_add_test(cxx11_tensor_uint128) -endif() +endif() +endif() if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for @@ -139,6 +146,8 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) @@ -174,6 +183,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_custom_index) ei_add_test(cxx11_tensor_fft) ei_add_test(cxx11_tensor_ifft) + ei_add_test(cxx11_tensor_scan) endif() @@ -183,37 +193,58 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor # and -fno-check-new flags since they trigger thousands of compilation warnings # in the CUDA runtime + # Also remove -ansi that is incompatible with std=c++11. string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") endif() - set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"") + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr") + if (${CUDA_VERSION} STREQUAL "7.0") + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr") + endif() + + if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) + set(EIGEN_CUDA_CXX11_FLAG "-std=c++11") + else() + # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11) + set(EIGEN_CUDA_CXX11_FLAG "") + endif() + + set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - ei_add_test(cxx11_tensor_device) - ei_add_test(cxx11_tensor_cuda) - ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_complex_cuda) + ei_add_test(cxx11_tensor_complex_cwise_ops_cuda) ei_add_test(cxx11_tensor_reduction_cuda) ei_add_test(cxx11_tensor_argmax_cuda) ei_add_test(cxx11_tensor_cast_float16_cuda) + ei_add_test(cxx11_tensor_scan_cuda) + + # Contractions require arch 3.0 or higher + if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29) + ei_add_test(cxx11_tensor_device) + ei_add_test(cxx11_tensor_cuda) + ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_of_float16_cuda) + endif() # The random number generation code requires arch 3.5 or greater. if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34) ei_add_test(cxx11_tensor_random_cuda) endif() - ei_add_test(cxx11_tensor_of_float16_cuda) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp new file mode 100644 index 000000000..a8cb52864 --- /dev/null +++ b/unsupported/test/EulerAngles.cpp @@ -0,0 +1,208 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <unsupported/Eigen/EulerAngles> + +using namespace Eigen; + +template<typename EulerSystem, typename Scalar> +void verify_euler_ranged(const Matrix<Scalar,3,1>& ea, + bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma) +{ + typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType; + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Quaternion<Scalar> QuaternionType; + typedef AngleAxis<Scalar> AngleAxisType; + using std::abs; + + Scalar alphaRangeStart, alphaRangeEnd; + Scalar betaRangeStart, betaRangeEnd; + Scalar gammaRangeStart, gammaRangeEnd; + + if (positiveRangeAlpha) + { + alphaRangeStart = Scalar(0); + alphaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + alphaRangeStart = -Scalar(EIGEN_PI); + alphaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeBeta) + { + betaRangeStart = Scalar(0); + betaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + betaRangeStart = -Scalar(EIGEN_PI); + betaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeGamma) + { + gammaRangeStart = Scalar(0); + gammaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + gammaRangeStart = -Scalar(EIGEN_PI); + gammaRangeEnd = Scalar(EIGEN_PI); + } + + const int i = EulerSystem::AlphaAxisAbs - 1; + const int j = EulerSystem::BetaAxisAbs - 1; + const int k = EulerSystem::GammaAxisAbs - 1; + + const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1; + const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1; + const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1; + + const Vector3 I = EulerAnglesType::AlphaAxisVector(); + const Vector3 J = EulerAnglesType::BetaAxisVector(); + const Vector3 K = EulerAnglesType::GammaAxisVector(); + + EulerAnglesType e(ea[0], ea[1], ea[2]); + + Matrix3 m(e); + Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + + // Check that eabis in range + VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd); + VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd); + VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd); + + Vector3 eabis2 = m.eulerAngles(i, j, k); + + // Invert the relevant axes + eabis2[0] *= iFactor; + eabis2[1] *= jFactor; + eabis2[2] *= kFactor; + + // Saturate the angles to the correct range + if (positiveRangeAlpha && (eabis2[0] < 0)) + eabis2[0] += Scalar(2 * EIGEN_PI); + if (positiveRangeBeta && (eabis2[1] < 0)) + eabis2[1] += Scalar(2 * EIGEN_PI); + if (positiveRangeGamma && (eabis2[2] < 0)) + eabis2[2] += Scalar(2 * EIGEN_PI); + + VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is + + Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K)); + VERIFY_IS_APPROX(m, mbis); + + // Tests that are only relevant for no possitive range + if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma)) + { + /* If I==K, and ea[1]==0, then there no unique solution. */ + /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) + VERIFY((ea-eabis).norm() <= test_precision<Scalar>()); + + // approx_or_less_than does not work for 0 + VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); + } + + // Quaternions + QuaternionType q(e); + eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same +} + +template<typename EulerSystem, typename Scalar> +void verify_euler(const Matrix<Scalar,3,1>& ea) +{ + verify_euler_ranged<EulerSystem>(ea, false, false, false); + verify_euler_ranged<EulerSystem>(ea, false, false, true); + verify_euler_ranged<EulerSystem>(ea, false, true, false); + verify_euler_ranged<EulerSystem>(ea, false, true, true); + verify_euler_ranged<EulerSystem>(ea, true, false, false); + verify_euler_ranged<EulerSystem>(ea, true, false, true); + verify_euler_ranged<EulerSystem>(ea, true, true, false); + verify_euler_ranged<EulerSystem>(ea, true, true, true); +} + +template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea) +{ + verify_euler<EulerSystemXYZ>(ea); + verify_euler<EulerSystemXYX>(ea); + verify_euler<EulerSystemXZY>(ea); + verify_euler<EulerSystemXZX>(ea); + + verify_euler<EulerSystemYZX>(ea); + verify_euler<EulerSystemYZY>(ea); + verify_euler<EulerSystemYXZ>(ea); + verify_euler<EulerSystemYXY>(ea); + + verify_euler<EulerSystemZXY>(ea); + verify_euler<EulerSystemZXZ>(ea); + verify_euler<EulerSystemZYX>(ea); + verify_euler<EulerSystemZYZ>(ea); +} + +template<typename Scalar> void eulerangles() +{ + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Array<Scalar,3,1> Array3; + typedef Quaternion<Scalar> Quaternionx; + typedef AngleAxis<Scalar> AngleAxisType; + + Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Quaternionx q1; + q1 = AngleAxisType(a, Vector3::Random().normalized()); + Matrix3 m; + m = q1; + + Vector3 ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with purely random Quaternion: + q1.coeffs() = Quaternionx::Coefficients::Random().normalized(); + m = q1; + ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi]. + ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1); + check_all_var(ea); + + ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[1] = 0; + check_all_var(ea); + + ea.head(2).setZero(); + check_all_var(ea); + + ea.setZero(); + check_all_var(ea); +} + +void test_EulerAngles() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eulerangles<float>() ); + CALL_SUBTEST_2( eulerangles<double>() ); + } +} diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp index d3718e2d2..8b7528fb7 100644 --- a/unsupported/test/FFTW.cpp +++ b/unsupported/test/FFTW.cpp @@ -18,11 +18,11 @@ using namespace Eigen; template < typename T> -complex<long double> promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); } +complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); } -complex<long double> promote(float x) { return complex<long double>( x); } -complex<long double> promote(double x) { return complex<long double>( x); } -complex<long double> promote(long double x) { return complex<long double>( x); } +complex<long double> promote(float x) { return complex<long double>((long double)x); } +complex<long double> promote(double x) { return complex<long double>((long double)x); } +complex<long double> promote(long double x) { return complex<long double>((long double)x); } template <typename VT1,typename VT2> @@ -33,7 +33,7 @@ complex<long double> promote(long double x) { return complex<long double>( x); long double pi = acos((long double)-1 ); for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) { complex<long double> acc = 0; - long double phinc = -2.*k0* pi / timebuf.size(); + long double phinc = (long double)(-2.)*k0* pi / timebuf.size(); for (size_t k1=0;k1<(size_t)timebuf.size();++k1) { acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) ); } @@ -54,8 +54,8 @@ complex<long double> promote(long double x) { return complex<long double>( x); long double difpower=0; size_t n = (min)( buf1.size(),buf2.size() ); for (size_t k=0;k<n;++k) { - totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.; - difpower += numext::abs2(buf1[k] - buf2[k]); + totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2); + difpower += (long double)(numext::abs2(buf1[k] - buf2[k])); } return sqrt(difpower/totalpower); } @@ -93,19 +93,19 @@ void test_scalar_generic(int nfft) fft.SetFlag(fft.HalfSpectrum ); fft.fwd( freqBuf,tbuf); VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) ); - VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check fft.ClearFlag(fft.HalfSpectrum ); fft.fwd( freqBuf,tbuf); VERIFY( (size_t)freqBuf.size() == (size_t)nfft); - VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check if (nfft&1) return; // odd FFTs get the wrong size inverse FFT ScalarVector tbuf2; fft.inv( tbuf2 , freqBuf); - VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check // verify that the Unscaled flag takes effect @@ -121,12 +121,12 @@ void test_scalar_generic(int nfft) //for (size_t i=0;i<(size_t) tbuf.size();++i) // cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl; - VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check // verify that ClearFlag works fft.ClearFlag(fft.Unscaled); fft.inv( tbuf2 , freqBuf); - VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check } template <typename T> @@ -152,10 +152,10 @@ void test_complex_generic(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check // verify that the Unscaled flag takes effect ComplexVector buf4; @@ -163,12 +163,12 @@ void test_complex_generic(int nfft) fft.inv( buf4 , outbuf); for (int k=0;k<nfft;++k) buf4[k] *= T(1./nfft); - VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check // verify that ClearFlag works fft.ClearFlag(fft.Unscaled); fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check } template <typename T> diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 374f86df9..85743137e 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y) using namespace std; // return x+std::sin(y); EIGEN_ASM_COMMENT("mybegin"); - return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0)); + // pow(float, int) promotes to pow(double, double) + return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0); //return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2; EIGEN_ASM_COMMENT("myend"); } @@ -104,6 +105,89 @@ struct TestFunc1 } }; + +#if EIGEN_HAS_VARIADIC_TEMPLATES +/* Test functor for the C++11 features. */ +template <typename Scalar> +struct integratorFunctor +{ + typedef Matrix<Scalar, 2, 1> InputType; + typedef Matrix<Scalar, 2, 1> ValueType; + + /* + * Implementation starts here. + */ + integratorFunctor(const Scalar gain) : _gain(gain) {} + integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {} + const Scalar _gain; + + template <typename T1, typename T2> + void operator() (const T1 &input, T2 *output, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + } + + /* Only needed for the test */ + template <typename T1, typename T2, typename T3> + void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + + if (jacobian) + { + T3 &j = *jacobian; + + j(0, 0) = 1; + j(0, 1) = dt * _gain; + j(1, 0) = 0; + j(1, 1) = _gain; + } + } + +}; + +template<typename Func> void forward_jacobian_cpp11(const Func& f) +{ + typedef typename Func::ValueType::Scalar Scalar; + typedef typename Func::ValueType ValueType; + typedef typename Func::InputType InputType; + typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType; + + InputType x = InputType::Random(InputType::RowsAtCompileTime); + ValueType y, yref; + JacobianType j, jref; + + const Scalar dt = internal::random<double>(); + + jref.setZero(); + yref.setZero(); + f(x, &yref, &jref, dt); + + //std::cerr << "y, yref, jref: " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << yref << "\n\n"; + //std::cerr << jref << "\n\n"; + + AutoDiffJacobian<Func> autoj(f); + autoj(x, &y, &j, dt); + + //std::cerr << "y j (via autodiff): " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << j << "\n\n"; + + VERIFY_IS_APPROX(y, yref); + VERIFY_IS_APPROX(j, jref); +} +#endif + template<typename Func> void forward_jacobian(const Func& f) { typename Func::InputType x = Func::InputType::Random(f.inputs()); @@ -127,7 +211,6 @@ template<typename Func> void forward_jacobian(const Func& f) VERIFY_IS_APPROX(j, jref); } - // TODO also check actual derivatives! template <int> void test_autodiff_scalar() @@ -140,6 +223,7 @@ void test_autodiff_scalar() VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y())); } + // TODO also check actual derivatives! template <int> void test_autodiff_vector() @@ -150,7 +234,7 @@ void test_autodiff_vector() VectorAD ap = p.cast<AD>(); ap.x().derivatives() = Vector2f::UnitX(); ap.y().derivatives() = Vector2f::UnitY(); - + AD res = foo<VectorAD>(ap); VERIFY_IS_APPROX(res.value(), foo(p)); } @@ -163,6 +247,9 @@ void test_autodiff_jacobian() CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) )); +#if EIGEN_HAS_VARIADIC_TEMPLATES + CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) )); +#endif } @@ -204,9 +291,64 @@ void test_autodiff_hessian() VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4)); VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3)); VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4)); + + ADD z = x(0)*x(1); + VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1)); + VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0)); +} + +double bug_1222() { + typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD; + const double _cv1_3 = 1.0; + const AD chi_3 = 1.0; + // this line did not work, because operator+ returns ADS<DerType&>, which then cannot be converted to ADS<DerType> + const AD denom = chi_3 + _cv1_3; + return denom.value(); +} + +double bug_1223() { + using std::min; + typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD; + + const double _cv1_3 = 1.0; + const AD chi_3 = 1.0; + const AD denom = 1.0; + + // failed because implementation of min attempts to construct ADS<DerType&> via constructor AutoDiffScalar(const Real& value) + // without initializing m_derivatives (which is a reference in this case) + #define EIGEN_TEST_SPACE + const AD t = min EIGEN_TEST_SPACE (denom / chi_3, 1.0); + + const AD t2 = min EIGEN_TEST_SPACE (denom / (chi_3 * _cv1_3), 1.0); + + return t.value() + t2.value(); +} + +// regression test for some compilation issues with specializations of ScalarBinaryOpTraits +void bug_1260() { + Matrix4d A; + Vector4d v; + A*v; } +// check a compilation issue with numext::max +double bug_1261() { + typedef AutoDiffScalar<Matrix2d> AD; + typedef Matrix<AD,2,1> VectorAD; + + VectorAD v; + const AD maxVal = v.maxCoeff(); + const AD minVal = v.minCoeff(); + return maxVal.value() + minVal.value(); +} +double bug_1264() { + typedef AutoDiffScalar<Vector2d> AD; + const AD s; + const Matrix<AD, 3, 1> v1; + const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1; + return v2(0).value(); +} void test_autodiff() { @@ -216,5 +358,10 @@ void test_autodiff() CALL_SUBTEST_3( test_autodiff_jacobian<1>() ); CALL_SUBTEST_4( test_autodiff_hessian<1>() ); } + + bug_1222(); + bug_1223(); + bug_1260(); + bug_1261(); } diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp index c631c734a..4df2f5c57 100644 --- a/unsupported/test/autodiff_scalar.cpp +++ b/unsupported/test/autodiff_scalar.cpp @@ -36,13 +36,48 @@ template<typename Scalar> void check_atan2() VERIFY_IS_APPROX(res.derivatives(), x.derivatives()); } +template<typename Scalar> void check_hyperbolic_functions() +{ + using std::sinh; + using std::cosh; + using std::tanh; + typedef Matrix<Scalar, 1, 1> Deriv1; + typedef AutoDiffScalar<Deriv1> AD; + Deriv1 p = Deriv1::Random(); + AD val(p.x(),Deriv1::UnitX()); + + Scalar cosh_px = std::cosh(p.x()); + AD res1 = tanh(val); + VERIFY_IS_APPROX(res1.value(), std::tanh(p.x())); + VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(1.0) / (cosh_px * cosh_px)); + AD res2 = sinh(val); + VERIFY_IS_APPROX(res2.value(), std::sinh(p.x())); + VERIFY_IS_APPROX(res2.derivatives().x(), cosh_px); + AD res3 = cosh(val); + VERIFY_IS_APPROX(res3.value(), cosh_px); + VERIFY_IS_APPROX(res3.derivatives().x(), std::sinh(p.x())); + + // Check constant values. + const Scalar sample_point = Scalar(1) / Scalar(3); + val = AD(sample_point,Deriv1::UnitX()); + res1 = tanh(val); + VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(0.896629559604914)); + + res2 = sinh(val); + VERIFY_IS_APPROX(res2.derivatives().x(), Scalar(1.056071867829939)); + + res3 = cosh(val); + VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150)); +} void test_autodiff_scalar() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( check_atan2<float>() ); CALL_SUBTEST_2( check_atan2<double>() ); + CALL_SUBTEST_3( check_hyperbolic_functions<float>() ); + CALL_SUBTEST_4( check_hyperbolic_functions<double>() ); } } diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp index f16cc6f07..3b598bf42 100644 --- a/unsupported/test/cxx11_eventcount.cpp +++ b/unsupported/test/cxx11_eventcount.cpp @@ -25,7 +25,8 @@ int rand_reentrant(unsigned int* s) { static void test_basic_eventcount() { - std::vector<EventCount::Waiter> waiters(1); + MaxSizeVector<EventCount::Waiter> waiters(1); + waiters.resize(1); EventCount ec(waiters); EventCount::Waiter& w = waiters[0]; ec.Notify(false); @@ -81,7 +82,8 @@ static void test_stress_eventcount() static const int kEvents = 1 << 16; static const int kQueues = 10; - std::vector<EventCount::Waiter> waiters(kThreads); + MaxSizeVector<EventCount::Waiter> waiters(kThreads); + waiters.resize(kThreads); EventCount ec(waiters); TestQueue queues[kQueues]; diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp deleted file mode 100644 index 9a813653c..000000000 --- a/unsupported/test/cxx11_float16.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_float16 - -#include "main.h" -#include <Eigen/src/Core/arch/CUDA/Half.h> - -using Eigen::half; - -void test_conversion() -{ - // Conversion from float. - VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); - VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); - VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); - VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); - VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); - VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. - - // Denormals. - VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); - VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001); - VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); - - // Verify round-to-nearest-even behavior. - float val1 = float(half(__half(0x3c00))); - float val2 = float(half(__half(0x3c01))); - float val3 = float(half(__half(0x3c02))); - VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); - - // Conversion from int. - VERIFY_IS_EQUAL(half(-1).x, 0xbc00); - VERIFY_IS_EQUAL(half(0).x, 0x0000); - VERIFY_IS_EQUAL(half(1).x, 0x3c00); - VERIFY_IS_EQUAL(half(2).x, 0x4000); - VERIFY_IS_EQUAL(half(3).x, 0x4200); - - // Conversion from bool. - VERIFY_IS_EQUAL(half(false).x, 0x0000); - VERIFY_IS_EQUAL(half(true).x, 0x3c00); - - // Conversion to float. - VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f); - VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f); - - // Denormals. - VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f); - - // NaNs and infinities. - VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. - VERIFY(!(numext::isnan)(float(half(0.0f)))); - VERIFY((numext::isinf)(float(half(__half(0xfc00))))); - VERIFY((numext::isnan)(float(half(__half(0xfc01))))); - VERIFY((numext::isinf)(float(half(__half(0x7c00))))); - VERIFY((numext::isnan)(float(half(__half(0x7c01))))); - -#if !EIGEN_COMP_MSVC - // Visual Studio errors out on divisions by 0 - VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); - VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); - VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); -#endif - - // Exactly same checks as above, just directly on the half representation. - VERIFY(!(numext::isinf)(half(__half(0x7bff)))); - VERIFY(!(numext::isnan)(half(__half(0x0000)))); - VERIFY((numext::isinf)(half(__half(0xfc00)))); - VERIFY((numext::isnan)(half(__half(0xfc01)))); - VERIFY((numext::isinf)(half(__half(0x7c00)))); - VERIFY((numext::isnan)(half(__half(0x7c01)))); - -#if !EIGEN_COMP_MSVC - // Visual Studio errors out on divisions by 0 - VERIFY((numext::isnan)(half(0.0 / 0.0))); - VERIFY((numext::isinf)(half(1.0 / 0.0))); - VERIFY((numext::isinf)(half(-1.0 / 0.0))); -#endif -} - -void test_arithmetic() -{ - VERIFY_IS_EQUAL(float(half(2) + half(2)), 4); - VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0); - VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f); - VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f); - VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f); - VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f); - VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f); -} - -void test_comparison() -{ - VERIFY(half(1.0f) > half(0.5f)); - VERIFY(half(0.5f) < half(1.0f)); - VERIFY(!(half(1.0f) < half(0.5f))); - VERIFY(!(half(0.5f) > half(1.0f))); - - VERIFY(!(half(4.0f) > half(4.0f))); - VERIFY(!(half(4.0f) < half(4.0f))); - - VERIFY(!(half(0.0f) < half(-0.0f))); - VERIFY(!(half(-0.0f) < half(0.0f))); - VERIFY(!(half(0.0f) > half(-0.0f))); - VERIFY(!(half(-0.0f) > half(0.0f))); - - VERIFY(half(0.2f) > half(-1.0f)); - VERIFY(half(-1.0f) < half(0.2f)); - VERIFY(half(-16.0f) < half(-15.0f)); - - VERIFY(half(1.0f) == half(1.0f)); - VERIFY(half(1.0f) != half(2.0f)); - - // Comparisons with NaNs and infinities. -#if !EIGEN_COMP_MSVC - // Visual Studio errors out on divisions by 0 - VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); - VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); - - VERIFY(!(half(1.0) == half(0.0 / 0.0))); - VERIFY(!(half(1.0) < half(0.0 / 0.0))); - VERIFY(!(half(1.0) > half(0.0 / 0.0))); - VERIFY(half(1.0) != half(0.0 / 0.0)); - - VERIFY(half(1.0) < half(1.0 / 0.0)); - VERIFY(half(1.0) > half(-1.0 / 0.0)); -#endif -} - -void test_basic_functions() -{ - VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); - VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); - - VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); - VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); - - VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); - VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); - - VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); - - VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); - - VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); - VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI)); - - VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); -} - -void test_trigonometric_functions() -{ - VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); - VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); - //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); - //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); - - VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); - // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); - VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); - - VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); - // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); - // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); - //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); -} - -void test_cxx11_float16() -{ - CALL_SUBTEST(test_conversion()); - CALL_SUBTEST(test_arithmetic()); - CALL_SUBTEST(test_comparison()); - CALL_SUBTEST(test_basic_functions()); - CALL_SUBTEST(test_trigonometric_functions()); -} diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp new file mode 100644 index 000000000..5f9bb938b --- /dev/null +++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -0,0 +1,107 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include "main.h" +#include "Eigen/CXX11/ThreadPool" + +static void test_create_destroy_empty_pool() +{ + // Just create and destroy the pool. This will wind up and tear down worker + // threads. Ensure there are no issues in that logic. + for (int i = 0; i < 16; ++i) { + NonBlockingThreadPool tp(i); + } +} + + +static void test_parallelism() +{ + // Test we never-ever fail to match available tasks with idle threads. + const int kThreads = 16; // code below expects that this is a multiple of 4 + NonBlockingThreadPool tp(kThreads); + VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); + VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); + for (int iter = 0; iter < 100; ++iter) { + std::atomic<int> running(0); + std::atomic<int> done(0); + std::atomic<int> phase(0); + // Schedule kThreads tasks and ensure that they all are running. + for (int i = 0; i < kThreads; ++i) { + tp.Schedule([&]() { + const int thread_id = tp.CurrentThreadId(); + VERIFY_GE(thread_id, 0); + VERIFY_LE(thread_id, kThreads - 1); + running++; + while (phase < 1) { + } + done++; + }); + } + while (running != kThreads) { + } + running = 0; + phase = 1; + // Now, while the previous tasks exit, schedule another kThreads tasks and + // ensure that they are running. + for (int i = 0; i < kThreads; ++i) { + tp.Schedule([&, i]() { + running++; + while (phase < 2) { + } + // When all tasks are running, half of tasks exit, quarter of tasks + // continue running and quarter of tasks schedule another 2 tasks each. + // Concurrently main thread schedules another quarter of tasks. + // This gives us another kThreads tasks and we ensure that they all + // are running. + if (i < kThreads / 2) { + } else if (i < 3 * kThreads / 4) { + running++; + while (phase < 3) { + } + done++; + } else { + for (int j = 0; j < 2; ++j) { + tp.Schedule([&]() { + running++; + while (phase < 3) { + } + done++; + }); + } + } + done++; + }); + } + while (running != kThreads) { + } + running = 0; + phase = 2; + for (int i = 0; i < kThreads / 4; ++i) { + tp.Schedule([&]() { + running++; + while (phase < 3) { + } + done++; + }); + } + while (running != kThreads) { + } + phase = 3; + while (done != 3 * kThreads) { + } + } +} + +void test_cxx11_non_blocking_thread_pool() +{ + CALL_SUBTEST(test_create_destroy_empty_pool()); + CALL_SUBTEST(test_parallelism()); +} diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp index d1770ee1b..91f690114 100644 --- a/unsupported/test/cxx11_runqueue.cpp +++ b/unsupported/test/cxx11_runqueue.cpp @@ -33,73 +33,81 @@ void test_basic_runqueue() VERIFY_IS_EQUAL(0u, q.Size()); VERIFY_IS_EQUAL(0, q.PopFront()); std::vector<int> stolen; - VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); VERIFY_IS_EQUAL(0u, stolen.size()); // Push one front, pop one front. VERIFY_IS_EQUAL(0, q.PushFront(1)); - VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1u, q.Size()); VERIFY_IS_EQUAL(1, q.PopFront()); - VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0u, q.Size()); // Push front to overflow. VERIFY_IS_EQUAL(0, q.PushFront(2)); - VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1u, q.Size()); VERIFY_IS_EQUAL(0, q.PushFront(3)); - VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(2u, q.Size()); VERIFY_IS_EQUAL(0, q.PushFront(4)); - VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(3u, q.Size()); VERIFY_IS_EQUAL(0, q.PushFront(5)); - VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(4u, q.Size()); VERIFY_IS_EQUAL(6, q.PushFront(6)); - VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(4u, q.Size()); VERIFY_IS_EQUAL(5, q.PopFront()); - VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(3u, q.Size()); VERIFY_IS_EQUAL(4, q.PopFront()); - VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(2u, q.Size()); VERIFY_IS_EQUAL(3, q.PopFront()); - VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1u, q.Size()); VERIFY_IS_EQUAL(2, q.PopFront()); - VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0u, q.Size()); VERIFY_IS_EQUAL(0, q.PopFront()); // Push one back, pop one back. VERIFY_IS_EQUAL(0, q.PushBack(7)); - VERIFY_IS_EQUAL(1, q.Size()); - VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); VERIFY_IS_EQUAL(7, stolen[0]); - VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0u, q.Size()); stolen.clear(); // Push back to overflow. VERIFY_IS_EQUAL(0, q.PushBack(8)); - VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1u, q.Size()); VERIFY_IS_EQUAL(0, q.PushBack(9)); - VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(2u, q.Size()); VERIFY_IS_EQUAL(0, q.PushBack(10)); - VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(3u, q.Size()); VERIFY_IS_EQUAL(0, q.PushBack(11)); - VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(4u, q.Size()); VERIFY_IS_EQUAL(12, q.PushBack(12)); - VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(4u, q.Size()); // Pop back in halves. - VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(2, stolen.size()); + VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(2u, stolen.size()); VERIFY_IS_EQUAL(10, stolen[0]); VERIFY_IS_EQUAL(11, stolen[1]); - VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(2u, q.Size()); stolen.clear(); - VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); VERIFY_IS_EQUAL(9, stolen[0]); - VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1u, q.Size()); stolen.clear(); - VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); VERIFY_IS_EQUAL(8, stolen[0]); stolen.clear(); - VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(0, stolen.size()); + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); // Empty again. VERIFY(q.Empty()); - VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(1)); + VERIFY_IS_EQUAL(0, q.PushFront(2)); + VERIFY_IS_EQUAL(0, q.PushFront(3)); + VERIFY_IS_EQUAL(1, q.PopBack()); + VERIFY_IS_EQUAL(2, q.PopBack()); + VERIFY_IS_EQUAL(3, q.PopBack()); + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); } // Empty tests that the queue is not claimed to be empty when is is in fact not. @@ -130,7 +138,7 @@ void test_empty_runqueue() stolen.clear(); break; } - VERIFY_IS_EQUAL(0, stolen.size()); + VERIFY_IS_EQUAL(0u, stolen.size()); } } } diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu index 41ccbe974..6fe8982f2 100644 --- a/unsupported/test/cxx11_tensor_argmax_cuda.cu +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -12,6 +12,9 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp index e5cf61fe1..8fe85d83c 100644 --- a/unsupported/test/cxx11_tensor_assign.cpp +++ b/unsupported/test/cxx11_tensor_assign.cpp @@ -286,7 +286,7 @@ static void test_compound_assign() } static void test_std_initializers_tensor() { -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES Tensor<int, 1> a(3); a.setValues({0, 1, 2}); VERIFY_IS_EQUAL(a(0), 0); diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp index 2ddf47234..5c0ea5889 100644 --- a/unsupported/test/cxx11_tensor_broadcasting.cpp +++ b/unsupported/test/cxx11_tensor_broadcasting.cpp @@ -115,7 +115,7 @@ static void test_static_broadcasting() Tensor<float, 3, DataLayout> tensor(8,3,5); tensor.setRandom(); -#ifdef EIGEN_HAS_CONSTEXPR +#if EIGEN_HAS_CONSTEXPR Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts; #else Eigen::array<int, 3> broadcasts; diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu index f22b99de8..88c233994 100644 --- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu new file mode 100644 index 000000000..f895efd01 --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cuda_nullary() { + Tensor<std::complex<float>, 1, 0, int> in1(2); + Tensor<std::complex<float>, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t float_bytes = in1.size() * sizeof(float); + std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>); + + std::complex<float>* d_in1; + std::complex<float>* d_in2; + float* d_out2; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out2), float_bytes); + cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2( + d_out2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f)); + gpu_out2.device(gpu_device) = gpu_in2.abs(); + + Tensor<std::complex<float>, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f)); + VERIFY_IS_APPROX(new2(i), std::abs(in2(i))); + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out2); +} + + +static void test_cuda_sum_reductions() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + const int num_rows = internal::random<int>(1024, 5*1024); + const int num_cols = internal::random<int>(1024, 5*1024); + + Tensor<std::complex<float>, 2> in(num_rows, num_cols); + in.setRandom(); + + Tensor<std::complex<float>, 0> full_redux; + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(std::complex<float>); + std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>); + std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); + std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor<std::complex<float>, 0> full_redux_gpu; + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); + + gpu_device.deallocate(gpu_in_ptr); + gpu_device.deallocate(gpu_out_ptr); +} + + +void test_cxx11_tensor_complex() +{ + CALL_SUBTEST(test_cuda_nullary()); + CALL_SUBTEST(test_cuda_sum_reductions()); +} diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu new file mode 100644 index 000000000..2baf5eaad --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename T> +void test_cuda_complex_cwise_ops() { + const int kNumItems = 2; + std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>); + + std::complex<T>* d_in1; + std::complex<T>* d_in2; + std::complex<T>* d_out; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out), complex_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out( + d_out, kNumItems); + + const std::complex<T> a(3.14f, 2.7f); + const std::complex<T> b(-10.6f, 1.4f); + + gpu_in1.device(gpu_device) = gpu_in1.constant(a); + gpu_in2.device(gpu_device) = gpu_in2.constant(b); + + enum CwiseOp { + Add = 0, + Sub, + Mul, + Div + }; + + Tensor<std::complex<T>, 1, 0, int> actual(kNumItems); + for (int op = Add; op <= Div; op++) { + std::complex<T> expected; + switch (static_cast<CwiseOp>(op)) { + case Add: + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + expected = a + b; + break; + case Sub: + gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; + expected = a - b; + break; + case Mul: + gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; + expected = a * b; + break; + case Div: + gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; + expected = a / b; + break; + } + assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < kNumItems; ++i) { + VERIFY_IS_APPROX(actual(i), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + + +void test_cxx11_tensor_complex_cwise_ops() +{ + CALL_SUBTEST(test_cuda_complex_cwise_ops<float>()); + CALL_SUBTEST(test_cuda_complex_cwise_ops<double>()); +} diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu index 6d1ef07f9..767e9c678 100644 --- a/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -14,7 +14,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> @@ -84,6 +86,65 @@ void test_cuda_contraction(int m_size, int k_size, int n_size) cudaFree((void*)d_t_result); } + +template<int DataLayout> +void test_scalar(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<float, 2, DataLayout> t_left(m_size, k_size); + Tensor<float, 2, DataLayout> t_right(k_size, n_size); + Tensor<float, 0, DataLayout> t_result; + Tensor<float, 0, DataLayout> t_result_gpu; + Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_left(d_t_left, m_size, k_size); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_right(d_t_right, k_size, n_size); + Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> > + gpu_t_result(d_t_result); + + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + t_result = t_left.contract(t_right, dims); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + if (fabs(t_result() - t_result_gpu()) > 1e-4f && + !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) { + std::cout << "mismatch detected: " << t_result() + << " vs " << t_result_gpu() << std::endl; + assert(false); + } + + cudaFree((void*)d_t_left); + cudaFree((void*)d_t_right); + cudaFree((void*)d_t_result); +} + + template<int DataLayout> void test_cuda_contraction_m() { for (int k = 32; k < 256; k++) { @@ -138,6 +199,9 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128)); + CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>()); CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>()); diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index 0e16308a2..ace97057f 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -87,19 +87,14 @@ static void test_scalar() vec1.setRandom(); vec2.setRandom(); - Tensor<float, 1, DataLayout> scalar(1); - scalar.setZero(); Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}}; - typedef TensorEvaluator<decltype(vec1.contract(vec2, dims)), DefaultDevice> Evaluator; - Evaluator eval(vec1.contract(vec2, dims), DefaultDevice()); - eval.evalTo(scalar.data()); - EIGEN_STATIC_ASSERT(Evaluator::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims); float expected = 0.0f; for (int i = 0; i < 6; ++i) { expected += vec1(i) * vec2(i); } - VERIFY_IS_APPROX(scalar(0), expected); + VERIFY_IS_APPROX(scalar(), expected); } template<int DataLayout> @@ -494,6 +489,27 @@ static void test_tensor_product() } +template<int DataLayout> +static void test_const_inputs() +{ + Tensor<float, 2, DataLayout> in1(2, 3); + Tensor<float, 2, DataLayout> in2(3, 2); + in1.setRandom(); + in2.setRandom(); + + TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3); + TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2); + Tensor<float, 2, DataLayout> mat3(2,2); + + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1)); + VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); +} + void test_cxx11_tensor_contraction() { CALL_SUBTEST(test_evals<ColMajor>()); @@ -524,4 +540,6 @@ void test_cxx11_tensor_contraction() CALL_SUBTEST(test_small_blocking_factors<RowMajor>()); CALL_SUBTEST(test_tensor_product<ColMajor>()); CALL_SUBTEST(test_tensor_product<RowMajor>()); + CALL_SUBTEST(test_const_inputs<ColMajor>()); + CALL_SUBTEST(test_const_inputs<RowMajor>()); } diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 4026f48f0..bf216587a 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -10,19 +10,65 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_cuda -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> using Eigen::Tensor; +void test_cuda_nullary() { + Tensor<float, 1, 0, int> in1(2); + Tensor<float, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t tensor_bytes = in1.size() * sizeof(float); + + float* d_in1; + float* d_in2; + cudaMalloc((void**)(&d_in1), tensor_bytes); + cudaMalloc((void**)(&d_in2), tensor_bytes); + cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f); + gpu_in2.device(gpu_device) = gpu_in2.random(); + + Tensor<float, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), 3.14f); + VERIFY_IS_NOT_EQUAL(new2(i), in2(i)); + } + + cudaFree(d_in1); + cudaFree(d_in2); +} + void test_cuda_elementwise_small() { - Tensor<float, 1> in1(Eigen::array<int, 1>(2)); - Tensor<float, 1> in2(Eigen::array<int, 1>(2)); - Tensor<float, 1> out(Eigen::array<int, 1>(2)); + Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2)); in1.setRandom(); in2.setRandom(); @@ -44,11 +90,11 @@ void test_cuda_elementwise_small() { Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1( - d_in1, Eigen::array<int, 1>(2)); + d_in1, Eigen::array<Eigen::DenseIndex, 1>(2)); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2( - d_in2, Eigen::array<int, 1>(2)); + d_in2, Eigen::array<Eigen::DenseIndex, 1>(2)); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out( - d_out, Eigen::array<int, 1>(2)); + d_out, Eigen::array<Eigen::DenseIndex, 1>(2)); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; @@ -58,8 +104,8 @@ void test_cuda_elementwise_small() { for (int i = 0; i < 2; ++i) { VERIFY_IS_APPROX( - out(Eigen::array<int, 1>(i)), - in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i))); + out(Eigen::array<Eigen::DenseIndex, 1>(i)), + in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i))); } cudaFree(d_in1); @@ -69,10 +115,10 @@ void test_cuda_elementwise_small() { void test_cuda_elementwise() { - Tensor<float, 3> in1(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> in2(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> in3(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97)); + Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); in1.setRandom(); in2.setRandom(); in3.setRandom(); @@ -98,10 +144,10 @@ void test_cuda_elementwise() Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3; @@ -111,7 +157,7 @@ void test_cuda_elementwise() for (int i = 0; i < 72; ++i) { for (int j = 0; j < 53; ++j) { for (int k = 0; k < 97; ++k) { - VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * in3(Eigen::array<int, 3>(i,j,k))); + VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k))); } } } @@ -181,7 +227,7 @@ void test_cuda_reduction() Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113); Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); - array<int, 2> reduction_axis; + array<Eigen::DenseIndex, 2> reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 3; @@ -214,8 +260,8 @@ void test_cuda_contraction() // more than 30 * 1024, which is the number of threads in blocks on // a 15 SM GK110 GPU Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31); - Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1)); - Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1)); + Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1)); + Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1)); t_left.setRandom(); t_right.setRandom(); @@ -299,7 +345,7 @@ void test_cuda_convolution_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137); - Eigen::array<int, 1> dims(1); + Eigen::array<Eigen::DenseIndex, 1> dims(1); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -352,7 +398,7 @@ void test_cuda_convolution_inner_dim_col_major_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4); Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7); - Eigen::array<int, 1> dims(0); + Eigen::array<Eigen::DenseIndex, 1> dims(0); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -405,7 +451,7 @@ void test_cuda_convolution_inner_dim_row_major_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4); Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71); - Eigen::array<int, 1> dims(3); + Eigen::array<Eigen::DenseIndex, 1> dims(3); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -459,7 +505,7 @@ void test_cuda_convolution_2d() Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137); - Eigen::array<int, 2> dims(1,2); + Eigen::array<Eigen::DenseIndex, 2> dims(1,2); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -496,9 +542,9 @@ void test_cuda_convolution_2d() template<int DataLayout> void test_cuda_convolution_3d() { - Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17)); + Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17)); Tensor<float, 3, DataLayout> kernel(3,4,2); - Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17)); + Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17)); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -523,7 +569,7 @@ void test_cuda_convolution_3d() Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2); Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17); - Eigen::array<int, 3> dims(1,2,3); + Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -1019,8 +1065,156 @@ void test_cuda_erfc(const Scalar stddev) cudaFree(d_out); } +template <typename Scalar> +void test_cuda_betainc() +{ + Tensor<Scalar, 1> in_x(125); + Tensor<Scalar, 1> in_a(125); + Tensor<Scalar, 1> in_b(125); + Tensor<Scalar, 1> out(125); + Tensor<Scalar, 1> expected_out(125); + out.setZero(); + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + + Array<Scalar, 1, Dynamic> x(125); + Array<Scalar, 1, Dynamic> a(125); + Array<Scalar, 1, Dynamic> b(125); + Array<Scalar, 1, Dynamic> v(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan, + nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan, + nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444, + nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229, + nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, + nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306, + 2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302, + 1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252, + 2.9303043666183996e-60, nan, nan, 2.248913486879199e-196, + 0.5000000000004947, 0.9999999999999999, nan; + + for (int i = 0; i < 125; ++i) { + in_x(i) = x(i); + in_a(i) = a(i); + in_b(i) = b(i); + expected_out(i) = v(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_a; + Scalar* d_in_b; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_a), bytes); + cudaMalloc((void**)(&d_in_b), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_a(d_in_a, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_b(d_in_b, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 125); + + gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 1; i < 125; ++i) { + if ((std::isnan)(expected_out(i))) { + VERIFY((std::isnan)(out(i))); + } else { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + cudaFree(d_in_x); + cudaFree(d_in_a); + cudaFree(d_in_b); + cudaFree(d_out); +} + + void test_cxx11_tensor_cuda() { + CALL_SUBTEST_1(test_cuda_nullary()); CALL_SUBTEST_1(test_cuda_elementwise_small()); CALL_SUBTEST_1(test_cuda_elementwise()); CALL_SUBTEST_1(test_cuda_props()); @@ -1086,5 +1280,8 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_5(test_cuda_igamma<double>()); CALL_SUBTEST_5(test_cuda_igammac<double>()); + + CALL_SUBTEST_6(test_cuda_betainc<float>()); + CALL_SUBTEST_6(test_cuda_betainc<double>()); #endif } diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index cbe9e6449..fde20ddf2 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> @@ -241,7 +243,7 @@ void test_cpu() { const float result = out(i,j,k); const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) + (in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); - if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { continue; } VERIFY_IS_APPROX(expected, result); @@ -258,7 +260,7 @@ void test_cpu() { in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) + (in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); - if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { continue; } VERIFY_IS_APPROX(expected, result); diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp index 421e73693..16f168ed4 100644 --- a/unsupported/test/cxx11_tensor_dimension.cpp +++ b/unsupported/test/cxx11_tensor_dimension.cpp @@ -21,7 +21,7 @@ static void test_dynamic_size() VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7); - VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7); + VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7); VERIFY_IS_EQUAL((int)dimensions[0], 2); VERIFY_IS_EQUAL((int)dimensions[1], 3); VERIFY_IS_EQUAL((int)dimensions[2], 7); @@ -34,12 +34,12 @@ static void test_fixed_size() VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3); VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7); - VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7); + VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7); } static void test_match() { - Eigen::DSizes<int, 3> dyn(2,3,7); + Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7); Eigen::Sizes<2,3,7> stat; VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true); @@ -51,13 +51,13 @@ static void test_match() static void test_rank_zero() { Eigen::Sizes<> scalar; - VERIFY_IS_EQUAL(scalar.TotalSize(), 1); - VERIFY_IS_EQUAL(scalar.rank(), 0); - VERIFY_IS_EQUAL(internal::array_prod(scalar), 1); + VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1); + VERIFY_IS_EQUAL((int)scalar.rank(), 0); + VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1); Eigen::DSizes<ptrdiff_t, 0> dscalar; - VERIFY_IS_EQUAL(dscalar.TotalSize(), 1); - VERIFY_IS_EQUAL(dscalar.rank(), 0); + VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1); + VERIFY_IS_EQUAL((int)dscalar.rank(), 0); } void test_cxx11_tensor_dimension() diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index 8389e9840..77e24cb67 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -16,8 +16,8 @@ using Eigen::RowMajor; static void test_1d() { - Tensor<float, 1> vec1({6}); - Tensor<float, 1, RowMajor> vec2({6}); + Tensor<float, 1> vec1(6); + Tensor<float, 1, RowMajor> vec2(6); vec1(0) = 4.0; vec2(0) = 0.0; vec1(1) = 8.0; vec2(1) = 1.0; @@ -112,13 +112,13 @@ static void test_3d() Tensor<float, 3> mat1(2,3,7); Tensor<float, 3, RowMajor> mat2(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; mat2(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -142,7 +142,7 @@ static void test_3d() Tensor<float, 3, RowMajor> mat11(2,3,7); mat11 = mat2 / 3.14f; - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { @@ -155,7 +155,7 @@ static void test_3d() VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f); VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f); VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f); - val += 1.0; + val += 1.0f; } } } @@ -167,25 +167,25 @@ static void test_constants() Tensor<float, 3> mat2(2,3,7); Tensor<float, 3> mat3(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } mat2 = mat1.constant(3.14f); mat3 = mat1.cwiseMax(7.3f).exp(); - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat2(i,j,k), 3.14f); VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f))); - val += 1.0; + val += 1.0f; } } } @@ -228,25 +228,25 @@ static void test_functors() Tensor<float, 3> mat2(2,3,7); Tensor<float, 3> mat3(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } mat2 = mat1.inverse().unaryExpr(&asinf); mat3 = mat1.unaryExpr(&tanhf); - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k))); VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k))); - val += 1.0; + val += 1.0f; } } } diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp index 89874349f..2f14ebc62 100644 --- a/unsupported/test/cxx11_tensor_fft.cpp +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -205,15 +205,15 @@ static void test_fft_real_input_energy() { VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i)); } - float energy_original = 0.0; - float energy_after_fft = 0.0; + RealScalar energy_original = 0.0; + RealScalar energy_after_fft = 0.0; for (int i = 0; i < total_size; ++i) { - energy_original += pow(std::abs(input(i)), 2); + energy_original += numext::abs2(input(i)); } for (int i = 0; i < total_size; ++i) { - energy_after_fft += pow(std::abs(output(i)), 2); + energy_after_fft += numext::abs2(output(i)); } if(FFTDirection == FFT_FORWARD) { diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 46d741b05..4c660de65 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -188,13 +188,13 @@ static void test_3d() // VERIFY_IS_EQUAL((mat1.dimension(1)), 3); // VERIFY_IS_EQUAL((mat1.dimension(2)), 7); - float val = 0.0; + float val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; mat2(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -210,13 +210,13 @@ static void test_3d() // VERIFY_IS_EQUAL((mat3.dimension(2)), 7); - val = 0.0; + val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val)); VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val)); - val += 1.0; + val += 1.0f; } } } @@ -226,12 +226,12 @@ static void test_3d() static void test_array() { TensorFixedSize<float, Sizes<2, 3, 7> > mat1; - float val = 0.0; + float val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -239,12 +239,12 @@ static void test_array() TensorFixedSize<float, Sizes<2, 3, 7> > mat3; mat3 = mat1.pow(3.5f); - val = 0.0; + val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f)); - val += 1.0; + val += 1.0f; } } } diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp index 5d6a49181..988b01481 100644 --- a/unsupported/test/cxx11_tensor_image_patch.cpp +++ b/unsupported/test/cxx11_tensor_image_patch.cpp @@ -568,13 +568,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 16); // RowMajor - Tensor<float, 4, RowMajor> l_in_row_major = l_in.swap_layout(); - VERIFY_IS_EQUAL(l_in.dimension(0), l_in_row_major.dimension(3)); - VERIFY_IS_EQUAL(l_in.dimension(1), l_in_row_major.dimension(2)); - VERIFY_IS_EQUAL(l_in.dimension(2), l_in_row_major.dimension(1)); - VERIFY_IS_EQUAL(l_in.dimension(3), l_in_row_major.dimension(0)); - - Tensor<float, 5, RowMajor> l_out_row_major = l_in_row_major.extract_image_patches(11, 11); + Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11); @@ -589,10 +583,8 @@ static void test_imagenet_patches() for (int r = 0; r < 11; ++r) { for (int d = 0; d < 3; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) { expected = l_in(d, r-5+i, c-5+j, b); - expected_row_major = l_in_row_major(b, c-5+j, r-5+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -601,15 +593,13 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor if (l_out_row_major(b, patchId, c, r, d) != - expected_row_major) { + expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), - expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + expected); } } } @@ -628,8 +618,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(9, 9); + l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9); @@ -644,10 +633,8 @@ static void test_imagenet_patches() for (int r = 0; r < 9; ++r) { for (int d = 0; d < 16; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) { expected = l_in(d, r-4+i, c-4+j, b); - expected_row_major = l_in_row_major(b, c-4+j, r-4+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -655,12 +642,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } @@ -679,8 +664,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(7, 7); + l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7); @@ -695,10 +679,8 @@ static void test_imagenet_patches() for (int r = 0; r < 7; ++r) { for (int d = 0; d < 32; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) { expected = l_in(d, r-3+i, c-3+j, b); - expected_row_major = l_in_row_major(b, c-3+j, r-3+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -706,12 +688,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } @@ -730,8 +710,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(3, 3); + l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3); @@ -746,10 +725,8 @@ static void test_imagenet_patches() for (int r = 0; r < 3; ++r) { for (int d = 0; d < 64; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) { expected = l_in(d, r-1+i, c-1+j, b); - expected_row_major = l_in_row_major(b, c-1+j, r-1+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -757,12 +734,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index 4ce8dea20..4cf5df666 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -159,6 +159,111 @@ static void test_type2index_list() } +static void test_type2indexpair_list() +{ + Tensor<float, 5> tensor(2,3,5,7,11); + tensor.setRandom(); + tensor += tensor.constant(10.0f); + + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>> Dims0; + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::type2indexpair<1,11>, Eigen::type2indexpair<2,12>> Dims2_a; + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b; + typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c; + + Dims0 d0; + Dims2_a d2_a; + + Dims2_b d2_b; + d2_b.set(1, Eigen::IndexPair<DenseIndex>(1,11)); + + Dims2_c d2_c; + d2_c.set(0, Eigen::IndexPair<DenseIndex>(Eigen::IndexPair<DenseIndex>(0,10))); + d2_c.set(1, Eigen::IndexPair<DenseIndex>(1,11)); // setting type2indexpair to correct value. + d2_c.set(2, Eigen::IndexPair<DenseIndex>(2,12)); + + VERIFY_IS_EQUAL(d2_a[0].first, 0); + VERIFY_IS_EQUAL(d2_a[0].second, 10); + VERIFY_IS_EQUAL(d2_a[1].first, 1); + VERIFY_IS_EQUAL(d2_a[1].second, 11); + VERIFY_IS_EQUAL(d2_a[2].first, 2); + VERIFY_IS_EQUAL(d2_a[2].second, 12); + + VERIFY_IS_EQUAL(d2_b[0].first, 0); + VERIFY_IS_EQUAL(d2_b[0].second, 10); + VERIFY_IS_EQUAL(d2_b[1].first, 1); + VERIFY_IS_EQUAL(d2_b[1].second, 11); + VERIFY_IS_EQUAL(d2_b[2].first, 2); + VERIFY_IS_EQUAL(d2_b[2].second, 12); + + VERIFY_IS_EQUAL(d2_c[0].first, 0); + VERIFY_IS_EQUAL(d2_c[0].second, 10); + VERIFY_IS_EQUAL(d2_c[1].first, 1); + VERIFY_IS_EQUAL(d2_c[1].second, 11); + VERIFY_IS_EQUAL(d2_c[2].first, 2); + VERIFY_IS_EQUAL(d2_c[2].second, 12); + + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 10) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); +} + + static void test_dynamic_index_list() { Tensor<float, 4> tensor(2,3,5,7); @@ -273,6 +378,7 @@ void test_cxx11_tensor_index_list() #ifdef EIGEN_HAS_INDEX_LIST CALL_SUBTEST(test_static_index_list()); CALL_SUBTEST(test_type2index_list()); + CALL_SUBTEST(test_type2indexpair_list()); CALL_SUBTEST(test_dynamic_index_list()); CALL_SUBTEST(test_mixed_index_list()); CALL_SUBTEST(test_dim_check()); diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index 48aa6d368..8e2b70b75 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -128,7 +128,7 @@ void test_powers_64bit() { void test_specific() { // A particular combination that was previously failing int64_t div = 209715200; - int64_t num = 3238002688; + int64_t num = 3238002688ll; Eigen::internal::TensorIntDivisor<int64_t> divider(div); int64_t result = num/div; int64_t result_op = divider.divide(num); diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8bbcf7089..489960529 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -14,6 +14,20 @@ template<int DataLayout> +static void test_output_0d() +{ + Tensor<int, 0, DataLayout> tensor; + tensor() = 123; + + std::stringstream os; + os << tensor; + + std::string expected("123"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +template<int DataLayout> static void test_output_1d() { Tensor<int, 1, DataLayout> tensor(5); @@ -26,6 +40,12 @@ static void test_output_1d() std::string expected("0\n1\n2\n3\n4"); VERIFY_IS_EQUAL(std::string(os.str()), expected); + + Eigen::Tensor<double,1,DataLayout> empty_tensor(0); + std::stringstream empty_os; + empty_os << empty_tensor; + std::string empty_string; + VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string); } @@ -101,6 +121,8 @@ static void test_output_const() void test_cxx11_tensor_io() { + CALL_SUBTEST(test_output_0d<ColMajor>()); + CALL_SUBTEST(test_output_0d<RowMajor>()); CALL_SUBTEST(test_output_1d<ColMajor>()); CALL_SUBTEST(test_output_1d<RowMajor>()); CALL_SUBTEST(test_output_2d<ColMajor>()); diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp index eb3b891fd..f7de43110 100644 --- a/unsupported/test/cxx11_tensor_morphing.cpp +++ b/unsupported/test/cxx11_tensor_morphing.cpp @@ -13,6 +13,7 @@ using Eigen::Tensor; +template<typename> static void test_simple_reshape() { Tensor<float, 5> tensor1(2,3,1,7,1); @@ -40,7 +41,7 @@ static void test_simple_reshape() } } - +template<typename> static void test_reshape_in_expr() { MatrixXf m1(2,3*5*7*11); MatrixXf m2(3*5*7*11,13); @@ -65,7 +66,7 @@ static void test_reshape_in_expr() { } } - +template<typename> static void test_reshape_as_lvalue() { Tensor<float, 3> tensor(2,3,7); @@ -114,6 +115,7 @@ static void test_simple_slice() } } +template<typename=void> static void test_const_slice() { const float b[1] = {42}; @@ -315,6 +317,128 @@ static void test_slice_raw_data() VERIFY_IS_EQUAL(slice6.data(), tensor.data()); } + +template<int DataLayout> +static void test_strided_slice() +{ + typedef Tensor<float, 5, DataLayout> Tensor5f; + typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5; + typedef Tensor<float, 2, DataLayout> Tensor2f; + typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2; + Tensor<float, 5, DataLayout> tensor(2,3,5,7,11); + Tensor<float, 2, DataLayout> tensor2(7,11); + tensor.setRandom(); + tensor2.setRandom(); + + if (true) { + Tensor2f slice(2,3); + Index2 strides(-2,-1); + Index2 indicesStart(5,7); + Index2 indicesStop(0,4); + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(j,k), tensor2(5-2*j,7-k)); + } + } + } + + if(true) { + Tensor2f slice(0,1); + Index2 strides(1,1); + Index2 indicesStart(5,4); + Index2 indicesStop(5,5); + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + } + + if(true) { // test clamped degenerate interavls + Tensor2f slice(7,11); + Index2 strides(1,-1); + Index2 indicesStart(-3,20); // should become 0,10 + Index2 indicesStop(20,-11); // should become 11, -1 + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + for (int j = 0; j < 7; ++j) { + for (int k = 0; k < 11; ++k) { + VERIFY_IS_EQUAL(slice(j,k), tensor2(j,10-k)); + } + } + } + + if(true) { + Tensor5f slice1(1,1,1,1,1); + Eigen::DSizes<Eigen::DenseIndex, 5> indicesStart(1, 2, 3, 4, 5); + Eigen::DSizes<Eigen::DenseIndex, 5> indicesStop(2, 3, 4, 5, 6); + Eigen::DSizes<Eigen::DenseIndex, 5> strides(1, 1, 1, 1, 1); + slice1 = tensor.stridedSlice(indicesStart, indicesStop, strides); + VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); + } + + if(true) { + Tensor5f slice(1,1,2,2,3); + Index5 start(1, 1, 3, 4, 5); + Index5 stop(2, 2, 5, 6, 8); + Index5 strides(1, 1, 1, 1, 1); + slice = tensor.stridedSlice(start, stop, strides); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); + } + } + } + } + + if(true) { + Tensor5f slice(1,1,2,2,3); + Index5 strides3(1, 1, -2, 1, -1); + Index5 indices3Start(1, 1, 4, 4, 7); + Index5 indices3Stop(2, 2, 0, 6, 4); + slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,4-2*i,4+j,7-k)); + } + } + } + } + + if(false) { // tests degenerate interval + Tensor5f slice(1,1,2,2,3); + Index5 strides3(1, 1, 2, 1, 1); + Index5 indices3Start(1, 1, 4, 4, 7); + Index5 indices3Stop(2, 2, 0, 6, 4); + slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3); + } +} + +template<int DataLayout> +static void test_strided_slice_write() +{ + typedef Tensor<float, 2, DataLayout> Tensor2f; + typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2; + + Tensor<float, 2, DataLayout> tensor(7,11),tensor2(7,11); + tensor.setRandom(); + tensor2=tensor; + Tensor2f slice(2,3); + + slice.setRandom(); + + Index2 strides(1,1); + Index2 indicesStart(3,4); + Index2 indicesStop(5,7); + Index2 lengths(2,3); + + tensor.slice(indicesStart,lengths)=slice; + tensor2.stridedSlice(indicesStart,indicesStop,strides)=slice; + + for(int i=0;i<7;i++) for(int j=0;j<11;j++){ + VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j)); + } +} + + template<int DataLayout> static void test_composition() { @@ -337,20 +461,25 @@ static void test_composition() void test_cxx11_tensor_morphing() { - CALL_SUBTEST(test_simple_reshape()); - CALL_SUBTEST(test_reshape_in_expr()); - CALL_SUBTEST(test_reshape_as_lvalue()); - - CALL_SUBTEST(test_simple_slice<ColMajor>()); - CALL_SUBTEST(test_simple_slice<RowMajor>()); - CALL_SUBTEST(test_const_slice()); - CALL_SUBTEST(test_slice_in_expr<ColMajor>()); - CALL_SUBTEST(test_slice_in_expr<RowMajor>()); - CALL_SUBTEST(test_slice_as_lvalue<ColMajor>()); - CALL_SUBTEST(test_slice_as_lvalue<RowMajor>()); - CALL_SUBTEST(test_slice_raw_data<ColMajor>()); - CALL_SUBTEST(test_slice_raw_data<RowMajor>()); - - CALL_SUBTEST(test_composition<ColMajor>()); - CALL_SUBTEST(test_composition<RowMajor>()); + CALL_SUBTEST_1(test_simple_reshape<void>()); + CALL_SUBTEST_1(test_reshape_in_expr<void>()); + CALL_SUBTEST_1(test_reshape_as_lvalue<void>()); + + CALL_SUBTEST_1(test_simple_slice<ColMajor>()); + CALL_SUBTEST_1(test_simple_slice<RowMajor>()); + CALL_SUBTEST_1(test_const_slice()); + CALL_SUBTEST_2(test_slice_in_expr<ColMajor>()); + CALL_SUBTEST_3(test_slice_in_expr<RowMajor>()); + CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>()); + CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>()); + CALL_SUBTEST_5(test_slice_raw_data<ColMajor>()); + CALL_SUBTEST_5(test_slice_raw_data<RowMajor>()); + + CALL_SUBTEST_6(test_strided_slice_write<ColMajor>()); + CALL_SUBTEST_6(test_strided_slice<ColMajor>()); + CALL_SUBTEST_6(test_strided_slice_write<RowMajor>()); + CALL_SUBTEST_6(test_strided_slice<RowMajor>()); + + CALL_SUBTEST_7(test_composition<ColMajor>()); + CALL_SUBTEST_7(test_composition<RowMajor>()); } diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 37fe3e9a4..2f86980a2 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -13,14 +13,55 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> using Eigen::Tensor; +template<typename> +void test_cuda_numext() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>()); + gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>()); + + Tensor<bool, 1> half_prec(num_elem); + Tensor<bool, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking numext " << i << std::endl; + VERIFY_IS_EQUAL(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + + #ifdef EIGEN_HAS_CUDA_FP16 +template<typename> void test_cuda_conversion() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -55,7 +96,7 @@ void test_cuda_conversion() { gpu_device.deallocate(d_conv); } - +template<typename> void test_cuda_unary() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -92,7 +133,7 @@ void test_cuda_unary() { gpu_device.deallocate(d_res_float); } - +template<typename> void test_cuda_elementwise() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -124,8 +165,8 @@ void test_cuda_elementwise() { gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { - std::cout << "Checking elemwise " << i << std::endl; - VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + std::cout << "Checking elemwise " << i << ": full prec = " << full_prec(i) << " vs half prec = " << half_prec(i) << std::endl; + VERIFY_IS_APPROX(static_cast<Eigen::half>(full_prec(i)), static_cast<Eigen::half>(half_prec(i))); } gpu_device.deallocate(d_float1); @@ -134,6 +175,7 @@ void test_cuda_elementwise() { gpu_device.deallocate(d_res_float); } +template<typename> void test_cuda_trancendental() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -141,43 +183,58 @@ void test_cuda_trancendental() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res1_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res1_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res2_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res2_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1( - d_float1, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2( - d_float2, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_half( - d_res1_half, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_float( - d_res1_float, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_half( - d_res2_half, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_float( - d_res2_float, num_elem); + gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); + gpu_float3.device(gpu_device) = gpu_float3.random(); + gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>(); + gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>(); + gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>(); - gpu_float1.device(gpu_device) = gpu_float1.random(); - gpu_float2.device(gpu_device) = gpu_float2.random(); - gpu_res1_float.device(gpu_device) = gpu_float1.exp(); - gpu_res2_float.device(gpu_device) = gpu_float2.log(); - gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp().cast<float>(); - gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log().cast<float>(); + gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>(); + gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); + + gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>(); + gpu_res2_half.device(gpu_device) = gpu_res2_half.log(); + + gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); + gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); Tensor<float, 1> input1(num_elem); - Tensor<float, 1> half_prec1(num_elem); - Tensor<float, 1> full_prec1(num_elem); + Tensor<Eigen::half, 1> half_prec1(num_elem); + Tensor<Eigen::half, 1> full_prec1(num_elem); Tensor<float, 1> input2(num_elem); - Tensor<float, 1> half_prec2(num_elem); - Tensor<float, 1> full_prec2(num_elem); + Tensor<Eigen::half, 1> half_prec2(num_elem); + Tensor<Eigen::half, 1> full_prec2(num_elem); + Tensor<float, 1> input3(num_elem); + Tensor<Eigen::half, 1> half_prec3(num_elem); + Tensor<Eigen::half, 1> full_prec3(num_elem); gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float)); gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half)); gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { @@ -186,17 +243,27 @@ void test_cuda_trancendental() { } for (int i = 0; i < num_elem; ++i) { std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; - VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1 + VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f)); + else + VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl; + VERIFY_IS_APPROX(full_prec3(i), half_prec3(i)); } gpu_device.deallocate(d_float1); gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_float3); gpu_device.deallocate(d_res1_half); gpu_device.deallocate(d_res1_float); gpu_device.deallocate(d_res2_half); gpu_device.deallocate(d_res2_float); + gpu_device.deallocate(d_res3_float); + gpu_device.deallocate(d_res3_half); } - +template<typename> void test_cuda_contractions() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -206,36 +273,38 @@ void test_cuda_contractions() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( d_float1, rows, cols); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( d_float2, rows, cols); - Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_half( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half( d_res_half, rows, cols); - Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_float( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float( d_res_float, rows, cols); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); - gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f); typedef Tensor<float, 2>::DimensionPair DimPair; Eigen::array<DimPair, 1> dims(DimPair(1, 0)); - gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims); - gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims).cast<float>(); + gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims); - Tensor<float, 2> half_prec(rows, cols); - Tensor<float, 2> full_prec(rows, cols); - gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + Tensor<Eigen::half, 2> half_prec(rows, cols); + Tensor<Eigen::half, 2> full_prec(rows, cols); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half)); gpu_device.synchronize(); for (int i = 0; i < rows; ++i) { for (int j = 0; j < cols; ++j) { - std::cout << "Checking contract " << i << " " << j << std::endl; - VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j)); + std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl; + if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) { + VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j)); + } } } @@ -245,8 +314,69 @@ void test_cuda_contractions() { gpu_device.deallocate(d_res_float); } +template<typename> +void test_cuda_reductions(int size1, int size2, int redux) { + std::cout << "Reducing " << size1 << " by " << size2 + << " tensor along dim " << redux << std::endl; + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = size1*size2; + int result_size = (redux == 1 ? size1 : size2); + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( + d_float1, size1, size2); + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( + d_float2, size1, size2); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, result_size); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, result_size); + + gpu_float1.device(gpu_device) = gpu_float1.random() * 2.0f; + gpu_float2.device(gpu_device) = gpu_float2.random() * 2.0f; + + Eigen::array<int, 1> redux_dim = {{redux}}; + gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim); + + Tensor<Eigen::half, 1> half_prec(result_size); + Tensor<Eigen::half, 1> full_prec(result_size); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, result_size*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < result_size; ++i) { + std::cout << "EXPECTED " << full_prec(i) << " GOT " << half_prec(i) << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> void test_cuda_reductions() { + test_cuda_reductions<void>(13, 13, 0); + test_cuda_reductions<void>(13, 13, 1); + + test_cuda_reductions<void>(35, 36, 0); + test_cuda_reductions<void>(35, 36, 1); + + test_cuda_reductions<void>(36, 35, 0); + test_cuda_reductions<void>(36, 35, 1); +} + +template<typename> +void test_cuda_full_reductions() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); int size = 13; @@ -254,35 +384,39 @@ void test_cuda_reductions() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_half = (float*)gpu_device.allocate(size * sizeof(float)); - float* d_res_float = (float*)gpu_device.allocate(size * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( d_float1, size, size); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( d_float2, size, size); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( - d_res_half, size); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( - d_res_float, size); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half( + d_res_half); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float( + d_res_float); gpu_float1.device(gpu_device) = gpu_float1.random(); gpu_float2.device(gpu_device) = gpu_float2.random(); - Eigen::array<int, 1> redux_dim = {{0}}; - gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim); - gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim).cast<float>(); + gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(); - Tensor<float, 1> half_prec(size); - Tensor<float, 1> full_prec(size); - gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(float)); + Tensor<Eigen::half, 0> half_prec; + Tensor<Eigen::half, 0> full_prec; + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half)); gpu_device.synchronize(); - for (int i = 0; i < size; ++i) { - std::cout << "Checking redux " << i << std::endl; - VERIFY_IS_APPROX(full_prec(i), half_prec(i)); - } + VERIFY_IS_APPROX(full_prec(), half_prec()); + + gpu_res_float.device(gpu_device) = gpu_float1.maximum().cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().maximum(); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half)); + gpu_device.synchronize(); + + VERIFY_IS_APPROX(full_prec(), half_prec()); gpu_device.deallocate(d_float1); gpu_device.deallocate(d_float2); @@ -290,6 +424,7 @@ void test_cuda_reductions() { gpu_device.deallocate(d_res_float); } +template<typename> void test_cuda_forced_evals() { Eigen::CudaStreamDevice stream; @@ -297,59 +432,62 @@ void test_cuda_forced_evals() { int num_elem = 101; float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( d_float, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( - d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1( + d_res_half1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2( + d_res_half2, num_elem); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( d_res_float, num_elem); + Eigen::array<int, 1> no_bcast; + no_bcast[0] = 1; + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); gpu_res_float.device(gpu_device) = gpu_float.abs(); - gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); + gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); + gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>(); - Tensor<float, 1> half_prec(num_elem); + Tensor<float, 1> half_prec1(num_elem); + Tensor<float, 1> half_prec2(num_elem); Tensor<float, 1> full_prec(num_elem); - gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float)); gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { - std::cout << "Checking unary " << i << std::endl; - VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec1(i)); + VERIFY_IS_APPROX(full_prec(i), half_prec2(i)); } gpu_device.deallocate(d_float); - gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_half1); + gpu_device.deallocate(d_res_half2); gpu_device.deallocate(d_res_float); } - #endif void test_cxx11_tensor_of_float16_cuda() { + CALL_SUBTEST_1(test_cuda_numext<void>()); + #ifdef EIGEN_HAS_CUDA_FP16 - Eigen::CudaStreamDevice stream; - Eigen::GpuDevice device(&stream); - if (device.majorDeviceVersion() > 5 || - (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) { - std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl; - - CALL_SUBTEST_1(test_cuda_conversion()); - CALL_SUBTEST_1(test_cuda_unary()); - CALL_SUBTEST_1(test_cuda_elementwise()); - CALL_SUBTEST_1(test_cuda_trancendental()); - CALL_SUBTEST_2(test_cuda_contractions()); - CALL_SUBTEST_3(test_cuda_reductions()); - CALL_SUBTEST_4(test_cuda_forced_evals()); - } - else { - std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl; - } + CALL_SUBTEST_1(test_cuda_conversion<void>()); + CALL_SUBTEST_1(test_cuda_unary<void>()); + CALL_SUBTEST_1(test_cuda_elementwise<void>()); + CALL_SUBTEST_1(test_cuda_trancendental<void>()); + CALL_SUBTEST_2(test_cuda_contractions<void>()); + CALL_SUBTEST_3(test_cuda_reductions<void>()); + CALL_SUBTEST_4(test_cuda_full_reductions<void>()); + CALL_SUBTEST_5(test_cuda_forced_evals<void>()); #else std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl; #endif diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu index 5d091de15..b3be199e1 100644 --- a/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/unsupported/test/cxx11_tensor_random_cuda.cu @@ -13,10 +13,61 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <Eigen/CXX11/Tensor> -static void test_default() + +void test_cuda_random_uniform() +{ + Tensor<float, 2> out(72,97); + out.setZero(); + + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_out; + cudaMalloc((void**)(&d_out), out_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); + + gpu_out.device(gpu_device) = gpu_out.random(); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + // For now we just check thes code doesn't crash. + // TODO: come up with a valid test of randomness +} + + +void test_cuda_random_normal() +{ + Tensor<float, 2> out(72,97); + out.setZero(); + + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_out; + cudaMalloc((void**)(&d_out), out_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); + + Eigen::internal::NormalRandomGenerator<float> gen(true); + gpu_out.device(gpu_device) = gpu_out.random(gen); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); +} + +static void test_complex() { Tensor<std::complex<float>, 1> vec(6); vec.setRandom(); @@ -31,5 +82,7 @@ static void test_default() void test_cxx11_tensor_random_cuda() { - CALL_SUBTEST(test_default()); + CALL_SUBTEST(test_cuda_random_uniform()); + CALL_SUBTEST(test_cuda_random_normal()); + CALL_SUBTEST(test_complex()); } diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index 6a128901a..1490ec3da 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp @@ -239,6 +239,33 @@ static void test_simple_reductions() { } } + +template <int DataLayout> +static void test_reductions_in_expr() { + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + array<ptrdiff_t, 2> reduction_axis2; + reduction_axis2[0] = 1; + reduction_axis2[1] = 3; + + Tensor<float, 2, DataLayout> result(2, 5); + result = result.constant(1.0f) - tensor.sum(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 5); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + float sum = 0.0f; + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 7; ++l) { + sum += tensor(i, k, j, l); + } + } + VERIFY_IS_APPROX(result(i, j), 1.0f - sum); + } + } +} + + template <int DataLayout> static void test_full_reductions() { Tensor<float, 2, DataLayout> tensor(2, 3); @@ -341,7 +368,7 @@ static void test_static_dims() { Tensor<float, 2, DataLayout> out(72, 97); in.setRandom(); -#ifndef EIGEN_HAS_CONSTEXPR +#if !EIGEN_HAS_CONSTEXPR array<int, 2> reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 3; @@ -371,7 +398,7 @@ static void test_innermost_last_dims() { in.setRandom(); // Reduce on the innermost dimensions. -#ifndef EIGEN_HAS_CONSTEXPR +#if !EIGEN_HAS_CONSTEXPR array<int, 2> reduction_axis; reduction_axis[0] = 0; reduction_axis[1] = 1; @@ -402,7 +429,7 @@ static void test_innermost_first_dims() { in.setRandom(); // Reduce on the innermost dimensions. -#ifndef EIGEN_HAS_CONSTEXPR +#if !EIGEN_HAS_CONSTEXPR array<int, 2> reduction_axis; reduction_axis[0] = 2; reduction_axis[1] = 3; @@ -433,7 +460,7 @@ static void test_reduce_middle_dims() { in.setRandom(); // Reduce on the innermost dimensions. -#ifndef EIGEN_HAS_CONSTEXPR +#if !EIGEN_HAS_CONSTEXPR array<int, 2> reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 2; @@ -462,6 +489,8 @@ void test_cxx11_tensor_reduction() { CALL_SUBTEST(test_trivial_reductions<RowMajor>()); CALL_SUBTEST(test_simple_reductions<ColMajor>()); CALL_SUBTEST(test_simple_reductions<RowMajor>()); + CALL_SUBTEST(test_reductions_in_expr<ColMajor>()); + CALL_SUBTEST(test_reductions_in_expr<RowMajor>()); CALL_SUBTEST(test_full_reductions<ColMajor>()); CALL_SUBTEST(test_full_reductions<RowMajor>()); CALL_SUBTEST(test_user_defined_reductions<ColMajor>()); diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu index cad0c08e0..6858b43a7 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cu +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -12,11 +12,14 @@ #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> -template<int DataLayout> +template<typename Type, int DataLayout> static void test_full_reductions() { Eigen::CudaStreamDevice stream; @@ -25,24 +28,24 @@ static void test_full_reductions() { const int num_rows = internal::random<int>(1024, 5*1024); const int num_cols = internal::random<int>(1024, 5*1024); - Tensor<float, 2, DataLayout> in(num_rows, num_cols); + Tensor<Type, 2, DataLayout> in(num_rows, num_cols); in.setRandom(); - Tensor<float, 0, DataLayout> full_redux; + Tensor<Type, 0, DataLayout> full_redux; full_redux = in.sum(); - std::size_t in_bytes = in.size() * sizeof(float); - std::size_t out_bytes = full_redux.size() * sizeof(float); - float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes)); - float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes)); + std::size_t in_bytes = in.size() * sizeof(Type); + std::size_t out_bytes = full_redux.size() * sizeof(Type); + Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes)); + Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes)); gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); - TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols); - TensorMap<Tensor<float, 0, DataLayout> > out_gpu(gpu_out_ptr); + TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr); out_gpu.device(gpu_device) = in_gpu.sum(); - Tensor<float, 0, DataLayout> full_redux_gpu; + Tensor<Type, 0, DataLayout> full_redux_gpu; gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); gpu_device.synchronize(); @@ -53,7 +56,102 @@ static void test_full_reductions() { gpu_device.deallocate(gpu_out_ptr); } +template<typename Type, int DataLayout> +static void test_first_dim_reductions() { + int dim_x = 33; + int dim_y = 1; + int dim_z = 128; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 0; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data(T) + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + +template<typename Type, int DataLayout> +static void test_last_dim_reductions() { + int dim_x = 128; + int dim_y = 1; + int dim_z = 33; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 2; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + + void test_cxx11_tensor_reduction_cuda() { - CALL_SUBTEST_1(test_full_reductions<ColMajor>()); - CALL_SUBTEST_2(test_full_reductions<RowMajor>()); + CALL_SUBTEST_1((test_full_reductions<float, ColMajor>())); + CALL_SUBTEST_1((test_full_reductions<double, ColMajor>())); + CALL_SUBTEST_2((test_full_reductions<float, RowMajor>())); + CALL_SUBTEST_2((test_full_reductions<double, RowMajor>())); + + CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>())); + CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>())) + + CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>())); } diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp new file mode 100644 index 000000000..af59aa3ef --- /dev/null +++ b/unsupported/test/cxx11_tensor_scan.cpp @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <limits> +#include <numeric> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout, typename Type=float, bool Exclusive = false> +static void test_1d_scan() +{ + int size = 50; + Tensor<Type, 1, DataLayout> tensor(size); + tensor.setRandom(); + Tensor<Type, 1, DataLayout> result = tensor.cumsum(0, Exclusive); + + VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0)); + + float accum = 0; + for (int i = 0; i < size; i++) { + if (Exclusive) { + VERIFY_IS_EQUAL(result(i), accum); + accum += tensor(i); + } else { + accum += tensor(i); + VERIFY_IS_EQUAL(result(i), accum); + } + } + + accum = 1; + result = tensor.cumprod(0, Exclusive); + for (int i = 0; i < size; i++) { + if (Exclusive) { + VERIFY_IS_EQUAL(result(i), accum); + accum *= tensor(i); + } else { + accum *= tensor(i); + VERIFY_IS_EQUAL(result(i), accum); + } + } +} + +template <int DataLayout, typename Type=float> +static void test_4d_scan() +{ + int size = 5; + Tensor<Type, 4, DataLayout> tensor(size, size, size, size); + tensor.setRandom(); + + Tensor<Type, 4, DataLayout> result(size, size, size, size); + + result = tensor.cumsum(0); + float accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(i, 1, 2, 3); + VERIFY_IS_EQUAL(result(i, 1, 2, 3), accum); + } + result = tensor.cumsum(1); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, i, 2, 3); + VERIFY_IS_EQUAL(result(1, i, 2, 3), accum); + } + result = tensor.cumsum(2); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, 2, i, 3); + VERIFY_IS_EQUAL(result(1, 2, i, 3), accum); + } + result = tensor.cumsum(3); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, 2, 3, i); + VERIFY_IS_EQUAL(result(1, 2, 3, i), accum); + } +} + +template <int DataLayout> +static void test_tensor_maps() { + int inputs[20]; + TensorMap<Tensor<int, 1, DataLayout> > tensor_map(inputs, 20); + tensor_map.setRandom(); + + Tensor<int, 1, DataLayout> result = tensor_map.cumsum(0); + + int accum = 0; + for (int i = 0; i < 20; ++i) { + accum += tensor_map(i); + VERIFY_IS_EQUAL(result(i), accum); + } +} + +void test_cxx11_tensor_scan() { + CALL_SUBTEST((test_1d_scan<ColMajor, float, true>())); + CALL_SUBTEST((test_1d_scan<ColMajor, float, false>())); + CALL_SUBTEST((test_1d_scan<RowMajor, float, true>())); + CALL_SUBTEST((test_1d_scan<RowMajor, float, false>())); + CALL_SUBTEST(test_4d_scan<ColMajor>()); + CALL_SUBTEST(test_4d_scan<RowMajor>()); + CALL_SUBTEST(test_tensor_maps<ColMajor>()); + CALL_SUBTEST(test_tensor_maps<RowMajor>()); +} diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu new file mode 100644 index 000000000..761d11fd1 --- /dev/null +++ b/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_scan_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; +typedef Tensor<float, 1>::DimensionPair DimPair; + +template<int DataLayout> +void test_cuda_cumsum(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + Tensor<float, 3, DataLayout> t_input(m_size, k_size, n_size); + Tensor<float, 3, DataLayout> t_result(m_size, k_size, n_size); + Tensor<float, 3, DataLayout> t_result_gpu(m_size, k_size, n_size); + + t_input.setRandom(); + + std::size_t t_input_bytes = t_input.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_input; + float* d_t_result; + + cudaMalloc((void**)(&d_t_input), t_input_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_input, t_input.data(), t_input_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > + gpu_t_input(d_t_input, Eigen::array<int, 3>(m_size, k_size, n_size)); + Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > + gpu_t_result(d_t_result, Eigen::array<int, 3>(m_size, k_size, n_size)); + + gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); + t_result = t_input.cumsum(1); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + for (size_t i = 0; i < t_result.size(); i++) { + if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) { + continue; + } + std::cout << "mismatch detected at index " << i << ": " << t_result(i) + << " vs " << t_result_gpu(i) << std::endl; + assert(false); + } + + cudaFree((void*)d_t_input); + cudaFree((void*)d_t_result); +} + + +void test_cxx11_tensor_scan_cuda() +{ + CALL_SUBTEST_1(test_cuda_cumsum<ColMajor>(128, 128, 128)); + CALL_SUBTEST_2(test_cuda_cumsum<RowMajor>(128, 128, 128)); +} diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp index a03f75cfe..2f56eb495 100644 --- a/unsupported/test/cxx11_tensor_sugar.cpp +++ b/unsupported/test/cxx11_tensor_sugar.cpp @@ -33,7 +33,7 @@ static void test_comparison_sugar() { } -static void test_scalar_sugar() { +static void test_scalar_sugar_add_mul() { Tensor<float, 3> A(6, 7, 5); Tensor<float, 3> B(6, 7, 5); A.setRandom(); @@ -41,21 +41,41 @@ static void test_scalar_sugar() { const float alpha = 0.43f; const float beta = 0.21f; + const float gamma = 0.14f; - Tensor<float, 3> R = A * A.constant(alpha) + B * B.constant(beta); - Tensor<float, 3> S = A * alpha + B * beta; - - // TODO: add enough syntactic sugar to support this - // Tensor<float, 3> T = alpha * A + beta * B; + Tensor<float, 3> R = A.constant(gamma) + A * A.constant(alpha) + B * B.constant(beta); + Tensor<float, 3> S = A * alpha + B * beta + gamma; + Tensor<float, 3> T = gamma + alpha * A + beta * B; for (int i = 0; i < 6*7*5; ++i) { VERIFY_IS_APPROX(R(i), S(i)); + VERIFY_IS_APPROX(R(i), T(i)); } } +static void test_scalar_sugar_sub_div() { + Tensor<float, 3> A(6, 7, 5); + Tensor<float, 3> B(6, 7, 5); + A.setRandom(); + B.setRandom(); + + const float alpha = 0.43f; + const float beta = 0.21f; + const float gamma = 0.14f; + const float delta = 0.32f; + + Tensor<float, 3> R = A.constant(gamma) - A / A.constant(alpha) + - B.constant(beta) / B - A.constant(delta); + Tensor<float, 3> S = gamma - A / alpha - beta / B - delta; + + for (int i = 0; i < 6*7*5; ++i) { + VERIFY_IS_APPROX(R(i), S(i)); + } +} void test_cxx11_tensor_sugar() { CALL_SUBTEST(test_comparison_sugar()); - CALL_SUBTEST(test_scalar_sugar()); + CALL_SUBTEST(test_scalar_sugar_add_mul()); + CALL_SUBTEST(test_scalar_sugar_sub_div()); } diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index e46197464..2ef665f30 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -91,7 +91,7 @@ void test_multithread_contraction() for (ptrdiff_t i = 0; i < t_result.size(); i++) { VERIFY(&t_result.data()[i] != &m_result.data()[i]); - if (fabs(t_result(i) - m_result(i)) < 1e-4) { + if (fabsf(t_result(i) - m_result(i)) < 1e-4f) { continue; } if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) { @@ -132,7 +132,7 @@ void test_contraction_corner_cases() for (ptrdiff_t i = 0; i < t_result.size(); i++) { assert(!(numext::isnan)(t_result.data()[i])); - if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); } @@ -147,7 +147,7 @@ void test_contraction_corner_cases() m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { assert(!(numext::isnan)(t_result.data()[i])); - if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); } @@ -165,7 +165,7 @@ void test_contraction_corner_cases() m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { assert(!(numext::isnan)(t_result.data()[i])); - if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); } @@ -183,7 +183,7 @@ void test_contraction_corner_cases() m_result = m_left.transpose() * m_right; for (ptrdiff_t i = 0; i < t_result.size(); i++) { assert(!(numext::isnan)(t_result.data()[i])); - if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); } @@ -226,7 +226,7 @@ void test_multithread_contraction_agrees_with_singlethread() { for (ptrdiff_t i = 0; i < st_result.size(); i++) { // if both of the values are very small, then do nothing (because the test will fail // due to numerical precision issues when values are small) - if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) { + if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) { VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); } } @@ -234,6 +234,42 @@ void test_multithread_contraction_agrees_with_singlethread() { template<int DataLayout> +void test_full_contraction() { + int contract_size1 = internal::random<int>(1, 500); + int contract_size2 = internal::random<int>(1, 500); + + Tensor<float, 2, DataLayout> left(contract_size1, + contract_size2); + Tensor<float, 2, DataLayout> right(contract_size1, + contract_size2); + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor<float, 2>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}}); + + Eigen::ThreadPool tp(internal::random<int>(2, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); + + Tensor<float, 0, DataLayout> st_result; + st_result = left.contract(right, dims); + + Tensor<float, 0, DataLayout> tp_result; + tp_result.device(thread_pool_device) = left.contract(right, dims); + + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); + // if both of the values are very small, then do nothing (because the test will fail + // due to numerical precision issues when values are small) + if (numext::abs(st_result() - tp_result()) >= 1e-4f) { + VERIFY_IS_APPROX(st_result(), tp_result()); + } +} + +template<int DataLayout> void test_multithreaded_reductions() { const int num_threads = internal::random<int>(3, 11); ThreadPool thread_pool(num_threads); @@ -324,6 +360,9 @@ void test_cxx11_tensor_thread_pool() CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>()); CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>()); + CALL_SUBTEST_4(test_full_contraction<ColMajor>()); + CALL_SUBTEST_4(test_full_contraction<RowMajor>()); + CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>()); CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>()); diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp index 02411a262..e770049e5 100644 --- a/unsupported/test/kronecker_product.cpp +++ b/unsupported/test/kronecker_product.cpp @@ -9,12 +9,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 #include "sparse.h" #include <Eigen/SparseExtra> #include <Eigen/KroneckerProduct> - template<typename MatrixType> void check_dimension(const MatrixType& ab, const int rows, const int cols) { @@ -230,3 +230,23 @@ void test_kronecker_product() VERIFY_IS_APPROX(MatrixXf(sC2),dC); } } + +#endif + +#ifdef EIGEN_TEST_PART_2 + +// simply check that for a dense kronecker product, sparse module is not needed + +#include "main.h" +#include <Eigen/KroneckerProduct> + +void test_kronecker_product() +{ + MatrixXd a(2,2), b(3,3), c; + a.setRandom(); + b.setRandom(); + c = kroneckerProduct(a,b); + VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b); +} + +#endif diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp index 9a995f941..7c9b68a3c 100644 --- a/unsupported/test/matrix_function.cpp +++ b/unsupported/test/matrix_function.cpp @@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A) MatrixType scaledA; RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff(); - if (maxImagPartOfSpectrum >= 0.9 * EIGEN_PI) - scaledA = A * 0.9 * EIGEN_PI / maxImagPartOfSpectrum; + if (maxImagPartOfSpectrum >= RealScalar(0.9L * EIGEN_PI)) + scaledA = A * RealScalar(0.9L * EIGEN_PI) / maxImagPartOfSpectrum; else scaledA = A; diff --git a/unsupported/test/matrix_functions.h b/unsupported/test/matrix_functions.h index 150b4c0c5..4e2636404 100644 --- a/unsupported/test/matrix_functions.h +++ b/unsupported/test/matrix_functions.h @@ -61,7 +61,7 @@ struct generateTestMatrix<MatrixType,1> }; template <typename Derived, typename OtherDerived> -double relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B) +typename Derived::RealScalar relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B) { return std::sqrt((A - B).cwiseAbs2().sum() / (std::min)(A.cwiseAbs2().sum(), B.cwiseAbs2().sum())); } diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp index 8e104ed1e..7ccfacfdf 100644 --- a/unsupported/test/matrix_power.cpp +++ b/unsupported/test/matrix_power.cpp @@ -10,7 +10,7 @@ #include "matrix_functions.h" template<typename T> -void test2dRotation(double tol) +void test2dRotation(const T& tol) { Matrix<T,2,2> A, B, C; T angle, c, s; @@ -19,19 +19,19 @@ void test2dRotation(double tol) MatrixPower<Matrix<T,2,2> > Apow(A); for (int i=0; i<=20; ++i) { - angle = pow(10, (i-10) / 5.); + angle = std::pow(T(10), (i-10) / T(5.)); c = std::cos(angle); s = std::sin(angle); B << c, s, -s, c; - C = Apow(std::ldexp(angle,1) / EIGEN_PI); + C = Apow(std::ldexp(angle,1) / T(EIGEN_PI)); std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n'; VERIFY(C.isApprox(B, tol)); } } template<typename T> -void test2dHyperbolicRotation(double tol) +void test2dHyperbolicRotation(const T& tol) { Matrix<std::complex<T>,2,2> A, B, C; T angle, ch = std::cosh((T)1); @@ -53,7 +53,7 @@ void test2dHyperbolicRotation(double tol) } template<typename T> -void test3dRotation(double tol) +void test3dRotation(const T& tol) { Matrix<T,3,1> v; T angle; @@ -61,13 +61,13 @@ void test3dRotation(double tol) for (int i=0; i<=20; ++i) { v = Matrix<T,3,1>::Random(); v.normalize(); - angle = pow(10, (i-10) / 5.); + angle = std::pow(T(10), (i-10) / T(5.)); VERIFY(AngleAxis<T>(angle, v).matrix().isApprox(AngleAxis<T>(1,v).matrix().pow(angle), tol)); } } template<typename MatrixType> -void testGeneral(const MatrixType& m, double tol) +void testGeneral(const MatrixType& m, const typename MatrixType::RealScalar& tol) { typedef typename MatrixType::RealScalar RealScalar; MatrixType m1, m2, m3, m4, m5; @@ -97,7 +97,7 @@ void testGeneral(const MatrixType& m, double tol) } template<typename MatrixType> -void testSingular(const MatrixType& m_const, double tol) +void testSingular(const MatrixType& m_const, const typename MatrixType::RealScalar& tol) { // we need to pass by reference in order to prevent errors with // MSVC for aligned data types ... @@ -119,18 +119,18 @@ void testSingular(const MatrixType& m_const, double tol) MatrixPower<MatrixType> mpow(m); T = T.sqrt(); - VERIFY(mpow(0.5).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + VERIFY(mpow(0.5L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); T = T.sqrt(); - VERIFY(mpow(0.25).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + VERIFY(mpow(0.25L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); T = T.sqrt(); - VERIFY(mpow(0.125).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + VERIFY(mpow(0.125L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); } } template<typename MatrixType> -void testLogThenExp(const MatrixType& m_const, double tol) +void testLogThenExp(const MatrixType& m_const, const typename MatrixType::RealScalar& tol) { // we need to pass by reference in order to prevent errors with // MSVC for aligned data types ... @@ -154,14 +154,14 @@ void test_matrix_power() { CALL_SUBTEST_2(test2dRotation<double>(1e-13)); CALL_SUBTEST_1(test2dRotation<float>(2e-5)); // was 1e-5, relaxed for clang 2.8 / linux / x86-64 - CALL_SUBTEST_9(test2dRotation<long double>(1e-13)); + CALL_SUBTEST_9(test2dRotation<long double>(1e-13L)); CALL_SUBTEST_2(test2dHyperbolicRotation<double>(1e-14)); CALL_SUBTEST_1(test2dHyperbolicRotation<float>(1e-5)); - CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14)); + CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14L)); CALL_SUBTEST_10(test3dRotation<double>(1e-13)); CALL_SUBTEST_11(test3dRotation<float>(1e-5)); - CALL_SUBTEST_12(test3dRotation<long double>(1e-13)); + CALL_SUBTEST_12(test3dRotation<long double>(1e-13L)); CALL_SUBTEST_2(testGeneral(Matrix2d(), 1e-13)); CALL_SUBTEST_7(testGeneral(Matrix3dRowMajor(), 1e-13)); @@ -171,10 +171,10 @@ void test_matrix_power() CALL_SUBTEST_5(testGeneral(Matrix3cf(), 1e-4)); CALL_SUBTEST_8(testGeneral(Matrix4f(), 1e-4)); CALL_SUBTEST_6(testGeneral(MatrixXf(2,2), 1e-3)); // see bug 614 - CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13)); + CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13L)); CALL_SUBTEST_10(testGeneral(Matrix3d(), 1e-13)); CALL_SUBTEST_11(testGeneral(Matrix3f(), 1e-4)); - CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13)); + CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13L)); CALL_SUBTEST_2(testSingular(Matrix2d(), 1e-13)); CALL_SUBTEST_7(testSingular(Matrix3dRowMajor(), 1e-13)); @@ -184,10 +184,10 @@ void test_matrix_power() CALL_SUBTEST_5(testSingular(Matrix3cf(), 1e-4)); CALL_SUBTEST_8(testSingular(Matrix4f(), 1e-4)); CALL_SUBTEST_6(testSingular(MatrixXf(2,2), 1e-3)); - CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13)); + CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13L)); CALL_SUBTEST_10(testSingular(Matrix3d(), 1e-13)); CALL_SUBTEST_11(testSingular(Matrix3f(), 1e-4)); - CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13)); + CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13L)); CALL_SUBTEST_2(testLogThenExp(Matrix2d(), 1e-13)); CALL_SUBTEST_7(testLogThenExp(Matrix3dRowMajor(), 1e-13)); @@ -197,8 +197,8 @@ void test_matrix_power() CALL_SUBTEST_5(testLogThenExp(Matrix3cf(), 1e-4)); CALL_SUBTEST_8(testLogThenExp(Matrix4f(), 1e-4)); CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2), 1e-3)); - CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13)); + CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13L)); CALL_SUBTEST_10(testLogThenExp(Matrix3d(), 1e-13)); CALL_SUBTEST_11(testLogThenExp(Matrix3f(), 1e-4)); - CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13)); + CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13L)); } diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 9b0cf7268..8404f1ff8 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -99,7 +99,7 @@ // Detect support for explicit converters.
#if (__has_feature(cxx_explicit_conversions) || \
- (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR >= 5) || __cplusplus >= 201103L || \
+ (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR__ >= 5) || __cplusplus >= 201103L || \
(defined(_MSC_VER) && _MSC_VER >= 1800))
#define MPREAL_HAVE_EXPLICIT_CONVERTERS
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp index 1aa9e786a..ffa5691eb 100644 --- a/unsupported/test/mpreal_support.cpp +++ b/unsupported/test/mpreal_support.cpp @@ -17,6 +17,7 @@ void test_mpreal_support() std::cerr << "dummy_precision = " << NumTraits<mpreal>::dummy_precision() << "\n"; std::cerr << "highest = " << NumTraits<mpreal>::highest() << "\n"; std::cerr << "lowest = " << NumTraits<mpreal>::lowest() << "\n"; + std::cerr << "digits10 = " << NumTraits<mpreal>::digits10() << "\n"; for(int i = 0; i < g_repeat; i++) { int s = Eigen::internal::random<int>(1,100); diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp new file mode 100644 index 000000000..057fb3e92 --- /dev/null +++ b/unsupported/test/special_functions.cpp @@ -0,0 +1,345 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include "../Eigen/SpecialFunctions" + +template<typename X, typename Y> +void verify_component_wise(const X& x, const Y& y) +{ + for(Index i=0; i<x.size(); ++i) + { + if((numext::isfinite)(y(i))) + VERIFY_IS_APPROX( x(i), y(i) ); + else if((numext::isnan)(y(i))) + VERIFY((numext::isnan)(x(i))); + else + VERIFY_IS_EQUAL( x(i), y(i) ); + } +} + +template<typename ArrayType> void array_special_functions() +{ + using std::abs; + using std::sqrt; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits<Scalar>::Real RealScalar; + + Scalar plusinf = std::numeric_limits<Scalar>::infinity(); + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + + Index rows = internal::random<Index>(1,30); + Index cols = 1; + + // API + { + ArrayType m1 = ArrayType::Random(rows,cols); +#if EIGEN_HAS_C99_MATH + VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.digamma(), digamma(m1)); + VERIFY_IS_APPROX(m1.erf(), erf(m1)); + VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); +#endif // EIGEN_HAS_C99_MATH + } + + +#if EIGEN_HAS_C99_MATH + // check special functions (comparing against numpy implementation) + if (!NumTraits<Scalar>::IsComplex) + { + + { + ArrayType m1 = ArrayType::Random(rows,cols); + ArrayType m2 = ArrayType::Random(rows,cols); + + // Test various propreties of igamma & igammac. These are normalized + // gamma integrals where + // igammac(a, x) = Gamma(a, x) / Gamma(a) + // igamma(a, x) = gamma(a, x) / Gamma(a) + // where Gamma and gamma are considered the standard unnormalized + // upper and lower incomplete gamma functions, respectively. + ArrayType a = m1.abs() + 2; + ArrayType x = m2.abs() + 2; + ArrayType zero = ArrayType::Zero(rows, cols); + ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0)); + ArrayType a_m1 = a - one; + ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp(); + ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp(); + ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp(); + ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp(); + + // Gamma(a, 0) == Gamma(a) + VERIFY_IS_APPROX(Eigen::igammac(a, zero), one); + + // Gamma(a, x) + gamma(a, x) == Gamma(a) + VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp()); + + // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x) + VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp()); + + // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x) + VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp()); + } + + { + // Check exact values of igamma and igammac against a third party calculation. + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + // location i*6+j corresponds to a_s[i], x_s[j]. + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]); + } + + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]); + } + } + } + } + } +#endif // EIGEN_HAS_C99_MATH + + // Check the zeta function against scipy.special.zeta + { + ArrayType x(7), q(7), res(7), ref(7); + x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9; + q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345; + ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); ); + } + + // digamma + { + ArrayType x(7), res(7), ref(7); + x << 1, 1.5, 4, -10.5, 10000.5, 0, -1; + ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + + CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = digamma(x); verify_component_wise(res, ref); ); + } + + +#if EIGEN_HAS_C99_MATH + { + ArrayType n(11), x(11), res(11), ref(11); + n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170; + x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64; + ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + + if(sizeof(RealScalar)>=8) { // double + // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); ); + } + else { + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); + CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); ); + } + } +#endif + +#if EIGEN_HAS_C99_MATH + { + // Inputs and ground truth generated with scipy via: + // a = np.logspace(-3, 3, 5) - 1e-3 + // b = np.logspace(-3, 3, 5) - 1e-3 + // x = np.linspace(-0.1, 1.1, 5) + // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x)) + // full_a = full_a.flatten().tolist() # same for full_b, full_x + // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist() + // + // Note in Eigen, we call betainc with arguments in the order (x, a, b). + ArrayType a(125); + ArrayType b(125); + ArrayType x(125); + ArrayType v(125); + ArrayType res(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, + 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, + 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, + nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, + nan, nan, 7.864342668429763e-23, 3.015969667594166e-10, + 0.0008598571564165444, nan, nan, 6.031987710123844e-08, + 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999, + 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan, + nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan, + 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0, + 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan, + 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan; + + CALL_SUBTEST(res = betainc(a, b, x); + verify_component_wise(res, v);); + } + + // Test various properties of betainc + { + ArrayType m1 = ArrayType::Random(32); + ArrayType m2 = ArrayType::Random(32); + ArrayType m3 = ArrayType::Random(32); + ArrayType one = ArrayType::Constant(32, Scalar(1.0)); + const Scalar eps = std::numeric_limits<Scalar>::epsilon(); + ArrayType a = (m1 * 4.0).exp(); + ArrayType b = (m2 * 4.0).exp(); + ArrayType x = m3.abs(); + + // betainc(a, 1, x) == x**a + CALL_SUBTEST( + ArrayType test = betainc(a, one, x); + ArrayType expected = x.pow(a); + verify_component_wise(test, expected);); + + // betainc(1, b, x) == 1 - (1 - x)**b + CALL_SUBTEST( + ArrayType test = betainc(one, b, x); + ArrayType expected = one - (one - x).pow(b); + verify_component_wise(test, expected);); + + // betainc(a, b, x) == 1 - betainc(b, a, 1-x) + CALL_SUBTEST( + ArrayType test = betainc(a, b, x) + betainc(b, a, one - x); + ArrayType expected = one; + verify_component_wise(test, expected);); + + // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b)) + CALL_SUBTEST( + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType expected = betainc(a, b, x) - num / denom + eps; + ArrayType test = betainc(a + one, b, x) + eps; + if (sizeof(Scalar) >= 8) { // double + verify_component_wise(test, expected); + } else { + // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + verify_component_wise(test.head(8), expected.head(8)); + }); + + // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b)) + CALL_SUBTEST( + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + ArrayType expected = betainc(a, b, x) + num / denom + eps; + ArrayType test = betainc(a, b + one, x) + eps; + verify_component_wise(test, expected);); + } +#endif +} + +void test_special_functions() +{ + CALL_SUBTEST_1(array_special_functions<ArrayXf>()); + CALL_SUBTEST_2(array_special_functions<ArrayXd>()); +} |