3 files changed, 105 insertions, 11 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 4de3b5e2e..c5bcf02aa 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -111,7 +111,7 @@ template<> inline v4i  ei_pmul(const v4i&   a, const v4i&   b)
   USE_CONST_v1i;
   USE_CONST_v16i_;
 
-  // Get the absolute values 
+  // Get the absolute values
   a1  = vec_abs(a);
   b1  = vec_abs(b);
 
@@ -146,7 +146,7 @@ template<> inline v4f  ei_pdiv(const v4f&   a, const v4f&   b) {
 
   // Altivec does not offer a divide instruction, we have to do a reciprocal approximation
   y_0 = vec_re(b);
-  
+
   // Do one Newton-Raphson iteration to get the needed accuracy
   t = vec_nmsub(y_0, b, v1f);
   y_1 = vec_madd(y_0, t, y_0);
@@ -260,6 +260,17 @@ template<> inline int    ei_pfirst(const v4i&  a)
   return ai[0];
 }
 
+template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
+{
+  static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+  return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
+}
+template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
+{
+  static const unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+  return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
+}
+
 inline v4f ei_preduxp(const v4f* vecs)
 {
   v4f v[4], sum[4];
diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h
index 435efe60d..22e356964 100644
--- a/Eigen/src/Geometry/Scaling.h
+++ b/Eigen/src/Geometry/Scaling.h
@@ -120,28 +120,28 @@ public:
 };
 
 /** Constructs a uniform scaling from scale factor \a s */
-inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
+static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
 /** Constructs a uniform scaling from scale factor \a s */
-inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
+static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
 /** Constructs a uniform scaling from scale factor \a s */
-template<typename RealScalar> inline UniformScaling<std::complex<RealScalar> >
-Scaling(const std::complex<RealScalar>& s)
+template<typename RealScalar>
+static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
 { return UniformScaling<std::complex<RealScalar> >(s); }
 
 /** Constructs a 2D axis aligned scaling */
-template<typename Scalar> inline DiagonalMatrix<Scalar,2>
-Scaling(Scalar sx, Scalar sy)
+template<typename Scalar>
+static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy)
 { return DiagonalMatrix<Scalar,2>(sx, sy); }
 /** Constructs a 3D axis aligned scaling */
-template<typename Scalar> inline DiagonalMatrix<Scalar,3>
-Scaling(Scalar sx, Scalar sy, Scalar sz)
+template<typename Scalar>
+static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz)
 { return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
 
 /** Constructs an axis aligned scaling expression from vector expression \a coeffs
   * This is an alias for coeffs.asDiagonal()
   */
 template<typename Derived>
-inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
+static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
 { return coeffs.asDiagonal(); }
 
 /** \addtogroup Geometry_Module */
diff --git a/bench/bench_reverse.cpp b/bench/bench_reverse.cpp
new file mode 100644
index 000000000..2cedc0d3d
--- /dev/null
+++ b/bench/bench_reverse.cpp
@@ -0,0 +1,83 @@
+
+#include <Eigen/Array>
+#include <bench/BenchUtil.h>
+using namespace Eigen;
+
+#ifndef REPEAT
+#define REPEAT 100000
+#endif
+
+#ifndef TRIES
+#define TRIES 20
+#endif
+
+typedef double Scalar;
+
+template <typename MatrixType>
+__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
+{
+  int rows = m.rows();
+  int cols = m.cols();
+  int size = m.size();
+
+  int repeats = (REPEAT*1000)/size;
+  MatrixType a = MatrixType::Random(rows,cols);
+  MatrixType b = MatrixType::Random(rows,cols);
+
+  BenchTimer timerB, timerH, timerV;
+
+  Scalar acc = 0;
+  int r = ei_random<int>(0,rows-1);
+  int c = ei_random<int>(0,cols-1);
+  for (int t=0; t<TRIES; ++t)
+  {
+    timerB.start();
+    for (int k=0; k<repeats; ++k)
+    {
+      asm("#begin foo");
+      b = a.reverse();
+      asm("#end foo");
+      acc += b.coeff(r,c);
+    }
+    timerB.stop();
+  }
+
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+    std::cout << "dyn   ";
+  else
+    std::cout << "fixed ";
+  std::cout << rows << " x " << cols << " \t"
+            << (timerB.value() * REPEAT) / repeats << "s "
+            << "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
+
+  std::cout << "\n";
+  // make sure the compiler does not optimize too much
+  if (acc==123)
+    std::cout << acc;
+}
+
+int main(int argc, char* argv[])
+{
+  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
+  std::cout << "size            no sqrt                           standard";
+//   #ifdef BENCH_GSL
+//   std::cout << "       GSL (standard + double + ATLAS)  ";
+//   #endif
+  std::cout << "\n";
+  for (uint i=0; dynsizes[i]>0; ++i)
+  {
+    bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+    bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
+  }
+//   bench_reverse(Matrix<Scalar,2,2>());
+//   bench_reverse(Matrix<Scalar,3,3>());
+//   bench_reverse(Matrix<Scalar,4,4>());
+//   bench_reverse(Matrix<Scalar,5,5>());
+//   bench_reverse(Matrix<Scalar,6,6>());
+//   bench_reverse(Matrix<Scalar,7,7>());
+//   bench_reverse(Matrix<Scalar,8,8>());
+//   bench_reverse(Matrix<Scalar,12,12>());
+//   bench_reverse(Matrix<Scalar,16,16>());
+  return 0;
+}
+