From 15ed32dd6ef53111e6e29428418a65e1d3547c12 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 16 Jul 2009 16:21:26 +0200 Subject: add other stable norm impl. in the benchmark --- bench/bench_norm.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp index e06d06417..8e4fefdd7 100644 --- a/bench/bench_norm.cpp +++ b/bench/bench_norm.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include "BenchTimer.h" using namespace Eigen; using namespace std; @@ -27,22 +27,54 @@ EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) { typedef typename T::Scalar Scalar; int n = v.size(); - Scalar scale = 1; - Scalar ssq = 0; + Scalar scale = 0; + Scalar ssq = 1; for (int i=0;i= ax) + { + ssq += ei_abs2(ax/scale); + } + else { ssq = Scalar(1) + ssq * ei_abs2(scale/ax); scale = ax; } - else - ssq += ei_abs2(ax/scale); } return scale * ei_sqrt(ssq); } +template +EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) +{ + typedef typename T::Scalar Scalar; + Scalar s = v.cwise().abs().maxCoeff(); + return s*(v/s).norm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) +{ + const int blockSize = 4096; + typedef typename T::Scalar Scalar; + Scalar s = 0; + Scalar ssq = 0; + for (int bi=0; bi::type,Eigen::Dynamic,1,Eigen::ForceAligned> sv(v,bi,0,r,1); + Scalar m = sv.cwise().abs().maxCoeff(); + if (m>s) + { + ssq = ssq * ei_abs2(s/m); + s = m; + } + ssq += (sv/s).squaredNorm(); + } + return s*ei_sqrt(ssq); +} + template EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) { @@ -210,6 +242,8 @@ void check_accuracy(double basef, double based, int s) std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n"; std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; + std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n"; + std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n"; } void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) @@ -228,11 +262,13 @@ void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) << "\t" << lapackNorm(vd.cast()) << "\n"; + std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast()) << "\t" << twopassNorm(vd.cast()) << "\n"; + std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast()) << "\t" << bl2passNorm(vd.cast()) << "\n"; } int main(int argc, char** argv) { - int tries = 5; + int tries = 10; int iters = 100000; double y = 1.1345743233455785456788e12 * ei_random(); VectorXf v = VectorXf::Ones(1024) * y; @@ -240,33 +276,37 @@ int main(int argc, char** argv) std::cerr << "Performance (out of cache):\n"; { int iters = 1; - VectorXf vf = VectorXf::Ones(1024*1024*32) * y; - VectorXd vd = VectorXd::Ones(1024*1024*32) * y; + VectorXf vf = VectorXf::Random(1024*1024*32) * y; + VectorXd vd = VectorXd::Random(1024*1024*32) * y; BENCH_PERF(sqsumNorm); BENCH_PERF(blueNorm); BENCH_PERF(pblueNorm); BENCH_PERF(lapackNorm); BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); + BENCH_PERF(bl2passNorm); } std::cerr << "\nPerformance (in cache):\n"; { int iters = 100000; - VectorXf vf = VectorXf::Ones(512) * y; - VectorXd vd = VectorXd::Ones(512) * y; + VectorXf vf = VectorXf::Random(512) * y; + VectorXd vd = VectorXd::Random(512) * y; BENCH_PERF(sqsumNorm); BENCH_PERF(blueNorm); BENCH_PERF(pblueNorm); BENCH_PERF(lapackNorm); BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); + BENCH_PERF(bl2passNorm); } - +return 0; int s = 10000; double basef_ok = 1.1345743233455785456788e15; double based_ok = 1.1345743233455785456788e95; double basef_under = 1.1345743233455785456788e-27; - double based_under = 1.1345743233455785456788e-315; + double based_under = 1.1345743233455785456788e-303; double basef_over = 1.1345743233455785456788e+27; double based_over = 1.1345743233455785456788e+302; @@ -283,9 +323,16 @@ int main(int argc, char** argv) check_accuracy(basef_over, based_over, s); std::cerr << "\nVarying (over):\n"; - for (int k=0; k<5; ++k) + for (int k=0; k<1; ++k) { check_accuracy_var(20,27,190,302,s); std::cout << "\n"; } + + std::cerr << "\nVarying (under):\n"; + for (int k=0; k<1; ++k) + { + check_accuracy_var(-27,20,-302,-190,s); + std::cout << "\n"; + } } -- cgit v1.2.3