From 525da6a464c897d0fe4e401a65851bcd7567fc5a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 16 Jul 2009 14:20:36 +0200 Subject: bugfix in blueNorm --- bench/bench_norm.cpp | 126 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 47 deletions(-) (limited to 'bench/bench_norm.cpp') diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp index 76c8c574d..e06d06417 100644 --- a/bench/bench_norm.cpp +++ b/bench/bench_norm.cpp @@ -1,3 +1,4 @@ +#include #include #include "BenchTimer.h" using namespace Eigen; @@ -58,18 +59,23 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) return ei_sqrt(v(0)); } +#ifdef EIGEN_VECTORIZE Packet4f ei_plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } Packet2d ei_plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } Packet4f ei_pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } Packet2d ei_pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } +#endif template EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) { + #ifndef EIGEN_VECTORIZE + return v.blueNorm(); + #else typedef typename T::Scalar Scalar; - static int nmax; + static int nmax = 0; static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; int n; @@ -79,8 +85,8 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) Scalar abig, eps; nbig = std::numeric_limits::max(); // largest integer - ibeta = NumTraits::Base; // base for floating-point numbers - it = NumTraits::Mantissa; // number of base-beta digits in mantissa + ibeta = std::numeric_limits::radix; //NumTraits::Base; // base for floating-point numbers + it = std::numeric_limits::digits; //NumTraits::Mantissa; // number of base-beta digits in mantissa iemin = std::numeric_limits::min_exponent; // minimum exponent iemax = std::numeric_limits::max_exponent; // maximum exponent rbig = std::numeric_limits::max(); // largest floating-point number @@ -92,23 +98,23 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) ei_assert(false && "the algorithm cannot be guaranteed on this computer"); } iexp = -((1-iemin)/2); - b1 = bexp(ibeta, iexp); // lower boundary of midrange + b1 = std::pow(ibeta, iexp); // lower boundary of midrange iexp = (iemax + 1 - it)/2; - b2 = bexp(ibeta,iexp); // upper boundary of midrange + b2 = std::pow(ibeta,iexp); // upper boundary of midrange iexp = (2-iemin)/2; - s1m = bexp(ibeta,iexp); // scaling factor for lower range + s1m = std::pow(ibeta,iexp); // scaling factor for lower range iexp = - ((iemax+it)/2); - s2m = bexp(ibeta,iexp); // scaling factor for upper range + s2m = std::pow(ibeta,iexp); // scaling factor for upper range overfl = rbig*s2m; // overfow boundary for abig - eps = bexp(ibeta, 1-it); + eps = std::pow(ibeta, 1-it); relerr = ei_sqrt(eps); // tolerance for neglecting asml abig = 1.0/eps - 1.0; if (Scalar(nbig)>abig) nmax = abig; // largest safe n else nmax = nbig; } - + typedef typename ei_packet_traits::type Packet; const int ps = ei_packet_traits::size; Packet pasml = ei_pset1(Scalar(0)); @@ -173,6 +179,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) return abig; else return abig * ei_sqrt(Scalar(1) + ei_abs2(asml/abig)); + #endif } #define BENCH_PERF(NRM) { \ @@ -196,7 +203,7 @@ void check_accuracy(double basef, double based, int s) double yd = based * ei_abs(ei_random()); VectorXf vf = VectorXf::Ones(s) * yf; VectorXd vd = VectorXd::Ones(s) * yd; - + std::cout << "reference\t" << ei_sqrt(double(s))*yf << "\t" << ei_sqrt(double(s))*yd << "\n"; std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; @@ -205,55 +212,80 @@ void check_accuracy(double basef, double based, int s) std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; } -int main(int argc, char** argv) +void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) +{ + VectorXf vf(s); + VectorXd vd(s); + for (int i=0; i()) * std::pow(double(10), ei_random(ef0,ef1)); + vd[i] = ei_abs(ei_random()) * std::pow(double(10), ei_random(ed0,ed1)); + } + + //std::cout << "reference\t" << ei_sqrt(double(s))*yf << "\t" << ei_sqrt(double(s))*yd << "\n"; + std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) << "\t" << sqsumNorm(vd.cast()) << "\n"; + std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) << "\t" << hypotNorm(vd.cast()) << "\n"; + std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) << "\t" << lapackNorm(vd.cast()) << "\n"; +} + +int main(int argc, char** argv) { int tries = 5; int iters = 100000; double y = 1.1345743233455785456788e12 * ei_random(); VectorXf v = VectorXf::Ones(1024) * y; - -// std::cerr << "Performance (out of cache):\n"; -// { -// int iters = 1; -// VectorXf vf = VectorXf::Ones(1024*1024*32) * y; -// VectorXd vd = VectorXd::Ones(1024*1024*32) * y; -// BENCH_PERF(sqsumNorm); -// BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// } -// -// std::cerr << "\nPerformance (in cache):\n"; -// { -// int iters = 100000; -// VectorXf vf = VectorXf::Ones(512) * y; -// VectorXd vd = VectorXd::Ones(512) * y; -// BENCH_PERF(sqsumNorm); -// BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// } - + + std::cerr << "Performance (out of cache):\n"; + { + int iters = 1; + VectorXf vf = VectorXf::Ones(1024*1024*32) * y; + VectorXd vd = VectorXd::Ones(1024*1024*32) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + } + + std::cerr << "\nPerformance (in cache):\n"; + { + int iters = 100000; + VectorXf vf = VectorXf::Ones(512) * y; + VectorXd vd = VectorXd::Ones(512) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + } + int s = 10000; - double basef_ok = 1.1345743233455785456788e12; - double based_ok = 1.1345743233455785456788e32; - - double basef_under = 1.1345743233455785456788e-23; - double based_under = 1.1345743233455785456788e-180; - + double basef_ok = 1.1345743233455785456788e15; + double based_ok = 1.1345743233455785456788e95; + + double basef_under = 1.1345743233455785456788e-27; + double based_under = 1.1345743233455785456788e-315; + double basef_over = 1.1345743233455785456788e+27; - double based_over = 1.1345743233455785456788e+185; - + double based_over = 1.1345743233455785456788e+302; + std::cout.precision(20); - + std::cerr << "\nNo under/overflow:\n"; check_accuracy(basef_ok, based_ok, s); - + std::cerr << "\nUnderflow:\n"; check_accuracy(basef_under, based_under, 1); - + std::cerr << "\nOverflow:\n"; check_accuracy(basef_over, based_over, s); + + std::cerr << "\nVarying (over):\n"; + for (int k=0; k<5; ++k) + { + check_accuracy_var(20,27,190,302,s); + std::cout << "\n"; + } } -- cgit v1.2.3