diff options
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/PacketMath.h | 15 | ||||
-rw-r--r-- | Eigen/src/Geometry/Scaling.h | 18 | ||||
-rw-r--r-- | bench/bench_reverse.cpp | 83 |
3 files changed, 105 insertions, 11 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 4de3b5e2e..c5bcf02aa 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -111,7 +111,7 @@ template<> inline v4i ei_pmul(const v4i& a, const v4i& b) USE_CONST_v1i; USE_CONST_v16i_; - // Get the absolute values + // Get the absolute values a1 = vec_abs(a); b1 = vec_abs(b); @@ -146,7 +146,7 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) { // Altivec does not offer a divide instruction, we have to do a reciprocal approximation y_0 = vec_re(b); - + // Do one Newton-Raphson iteration to get the needed accuracy t = vec_nmsub(y_0, b, v1f); y_1 = vec_madd(y_0, t, y_0); @@ -260,6 +260,17 @@ template<> inline int ei_pfirst(const v4i& a) return ai[0]; } +template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a) +{ + static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; + return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask); +} +template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a) +{ + static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; + return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask); +} + inline v4f ei_preduxp(const v4f* vecs) { v4f v[4], sum[4]; diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 435efe60d..22e356964 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -120,28 +120,28 @@ public: }; /** Constructs a uniform scaling from scale factor \a s */ -inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); } +static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); } /** Constructs a uniform scaling from scale factor \a s */ -inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); } +static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); } /** Constructs a uniform scaling from scale factor \a s */ -template<typename RealScalar> inline UniformScaling<std::complex<RealScalar> > -Scaling(const std::complex<RealScalar>& s) +template<typename RealScalar> +static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s) { return UniformScaling<std::complex<RealScalar> >(s); } /** Constructs a 2D axis aligned scaling */ -template<typename Scalar> inline DiagonalMatrix<Scalar,2> -Scaling(Scalar sx, Scalar sy) +template<typename Scalar> +static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy) { return DiagonalMatrix<Scalar,2>(sx, sy); } /** Constructs a 3D axis aligned scaling */ -template<typename Scalar> inline DiagonalMatrix<Scalar,3> -Scaling(Scalar sx, Scalar sy, Scalar sz) +template<typename Scalar> +static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz) { return DiagonalMatrix<Scalar,3>(sx, sy, sz); } /** Constructs an axis aligned scaling expression from vector expression \a coeffs * This is an alias for coeffs.asDiagonal() */ template<typename Derived> -inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs) +static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs) { return coeffs.asDiagonal(); } /** \addtogroup Geometry_Module */ diff --git a/bench/bench_reverse.cpp b/bench/bench_reverse.cpp new file mode 100644 index 000000000..2cedc0d3d --- /dev/null +++ b/bench/bench_reverse.cpp @@ -0,0 +1,83 @@ + +#include <Eigen/Array> +#include <bench/BenchUtil.h> +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 100000 +#endif + +#ifndef TRIES +#define TRIES 20 +#endif + +typedef double Scalar; + +template <typename MatrixType> +__attribute__ ((noinline)) void bench_reverse(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + int size = m.size(); + + int repeats = (REPEAT*1000)/size; + MatrixType a = MatrixType::Random(rows,cols); + MatrixType b = MatrixType::Random(rows,cols); + + BenchTimer timerB, timerH, timerV; + + Scalar acc = 0; + int r = ei_random<int>(0,rows-1); + int c = ei_random<int>(0,cols-1); + for (int t=0; t<TRIES; ++t) + { + timerB.start(); + for (int k=0; k<repeats; ++k) + { + asm("#begin foo"); + b = a.reverse(); + asm("#end foo"); + acc += b.coeff(r,c); + } + timerB.stop(); + } + + if (MatrixType::RowsAtCompileTime==Dynamic) + std::cout << "dyn "; + else + std::cout << "fixed "; + std::cout << rows << " x " << cols << " \t" + << (timerB.value() * REPEAT) / repeats << "s " + << "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t"; + + std::cout << "\n"; + // make sure the compiler does not optimize too much + if (acc==123) + std::cout << acc; +} + +int main(int argc, char* argv[]) +{ + const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0}; + std::cout << "size no sqrt standard"; +// #ifdef BENCH_GSL +// std::cout << " GSL (standard + double + ATLAS) "; +// #endif + std::cout << "\n"; + for (uint i=0; dynsizes[i]>0; ++i) + { + bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i])); + bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i])); + } +// bench_reverse(Matrix<Scalar,2,2>()); +// bench_reverse(Matrix<Scalar,3,3>()); +// bench_reverse(Matrix<Scalar,4,4>()); +// bench_reverse(Matrix<Scalar,5,5>()); +// bench_reverse(Matrix<Scalar,6,6>()); +// bench_reverse(Matrix<Scalar,7,7>()); +// bench_reverse(Matrix<Scalar,8,8>()); +// bench_reverse(Matrix<Scalar,12,12>()); +// bench_reverse(Matrix<Scalar,16,16>()); + return 0; +} + |