From f5f288b741b173a271b9c939ac5231639135dd93 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 22 Nov 2010 18:49:12 +0100
Subject: split level 1 and 2 implementation files into smaller ones and fix a
 couple of numerical and tricky issues discovered by the lapack test suite

---
 blas/common.h           |   4 +-
 blas/complex_double.cpp |   2 +
 blas/complex_single.cpp |   2 +
 blas/double.cpp         |   2 +
 blas/level1_cplx_impl.h | 141 ++++++++++++++
 blas/level1_impl.h      | 242 ++---------------------
 blas/level1_real_impl.h | 115 +++++++++++
 blas/level2_cplx_impl.h | 285 +++++++++++++++++++++++++++
 blas/level2_impl.h      | 505 ++++--------------------------------------------
 blas/level2_real_impl.h | 225 +++++++++++++++++++++
 blas/level3_impl.h      |  64 +++---
 blas/single.cpp         |   2 +
 12 files changed, 874 insertions(+), 715 deletions(-)
 create mode 100644 blas/level1_cplx_impl.h
 create mode 100644 blas/level1_real_impl.h
 create mode 100644 blas/level2_cplx_impl.h
 create mode 100644 blas/level2_real_impl.h

(limited to 'blas')

diff --git a/blas/common.h b/blas/common.h
index 290ae112a..d073361ed 100644
--- a/blas/common.h
+++ b/blas/common.h
@@ -133,7 +133,7 @@ T* get_compact_vector(T* x, int n, int incx)
 {
   if(incx==1)
     return x;
-  
+
   T* ret = new Scalar[n];
   if(incx<0) vector(ret,n) = vector(x,n,-incx).reverse();
   else       vector(ret,n) = vector(x,n, incx);
@@ -145,7 +145,7 @@ T* copy_back(T* x_cpy, T* x, int n, int incx)
 {
   if(x_cpy==x)
     return 0;
-  
+
   if(incx<0) vector(x,n,-incx).reverse() = vector(x_cpy,n);
   else       vector(x,n, incx)           = vector(x_cpy,n);
   return x_cpy;
diff --git a/blas/complex_double.cpp b/blas/complex_double.cpp
index bd7674cda..dad87a60d 100644
--- a/blas/complex_double.cpp
+++ b/blas/complex_double.cpp
@@ -29,5 +29,7 @@
 #define ISCOMPLEX     1
 
 #include "level1_impl.h"
+#include "level1_cplx_impl.h"
 #include "level2_impl.h"
+#include "level2_cplx_impl.h"
 #include "level3_impl.h"
diff --git a/blas/complex_single.cpp b/blas/complex_single.cpp
index 4cf19378f..11bbf629f 100644
--- a/blas/complex_single.cpp
+++ b/blas/complex_single.cpp
@@ -29,5 +29,7 @@
 #define ISCOMPLEX     1
 
 #include "level1_impl.h"
+#include "level1_cplx_impl.h"
 #include "level2_impl.h"
+#include "level2_cplx_impl.h"
 #include "level3_impl.h"
diff --git a/blas/double.cpp b/blas/double.cpp
index 10373d585..c6927ee22 100644
--- a/blas/double.cpp
+++ b/blas/double.cpp
@@ -28,5 +28,7 @@
 #define ISCOMPLEX     0
 
 #include "level1_impl.h"
+#include "level1_real_impl.h"
 #include "level2_impl.h"
+#include "level2_real_impl.h"
 #include "level3_impl.h"
diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h
new file mode 100644
index 000000000..e268bb812
--- /dev/null
+++ b/blas/level1_cplx_impl.h
@@ -0,0 +1,141 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+struct scalar_norm1_op {
+  typedef RealScalar result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_norm1_op)
+  inline RealScalar operator() (const Scalar& a) const { return internal::norm1(a); }
+};
+namespace Eigen {
+  namespace internal {
+    template<> struct functor_traits<scalar_norm1_op >
+    {
+      enum { Cost = 3 * NumTraits<Scalar>::AddCost, PacketAccess = 0 };
+    };
+  }
+}
+
+// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
+// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
+RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx)
+{
+//   std::cerr << "__asum " << *n << " " << *incx << "\n";
+  Complex* x = reinterpret_cast<Complex*>(px);
+
+  if(*n<=0) return 0;
+
+  if(*incx==1)  return vector(x,*n).unaryExpr<scalar_norm1_op>().sum();
+  else          return vector(x,*n,std::abs(*incx)).unaryExpr<scalar_norm1_op>().sum();
+}
+
+// computes a dot product of a conjugated vector with another vector.
+Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+//   std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n";
+
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  Scalar res;
+  if(*incx==1 && *incy==1)    res = (vector(x,*n).dot(vector(y,*n)));
+  else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).dot(vector(y,*n,*incy)));
+  else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,*incy)));
+  else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).dot(vector(y,*n,-*incy).reverse()));
+  else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,-*incy).reverse()));
+  return res;
+}
+
+// computes a vector-vector dot product without complex conjugation.
+Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+//   std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n";
+
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar res;
+  if(*incx==1 && *incy==1)    res = (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
+  else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
+  else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum();
+  else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
+  else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
+  return res;
+}
+
+RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx)
+{
+//   std::cerr << "__nrm2 " << *n << " " << *incx << "\n";
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+
+  if(*incx==1)
+    return vector(x,*n).stableNorm();
+
+  return vector(x,*n,*incx).stableNorm();
+}
+
+int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),rot_)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps)
+{
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  RealScalar c = *pc;
+  RealScalar s = *ps;
+
+  StridedVectorType vx(vector(x,*n,std::abs(*incx)));
+  StridedVectorType vy(vector(y,*n,std::abs(*incy)));
+
+  Reverse<StridedVectorType> rvx(vx);
+  Reverse<StridedVectorType> rvy(vy);
+
+  // TODO implement mixed real-scalar rotations
+       if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c,s));
+  else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c,s));
+  else                        internal::apply_rotation_in_the_plane(vx, vy,  JacobiRotation<Scalar>(c,s));
+
+  return 0;
+}
+
+int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
+{
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  RealScalar alpha = *palpha;
+
+//   std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n";
+
+  if(*incx==1)  vector(x,*n) *= alpha;
+  else          vector(x,*n,std::abs(*incx)) *= alpha;
+
+  return 0;
+}
+
diff --git a/blas/level1_impl.h b/blas/level1_impl.h
index bc99ddb57..fb68abce0 100644
--- a/blas/level1_impl.h
+++ b/blas/level1_impl.h
@@ -41,209 +41,51 @@ int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx,
   return 0;
 }
 
-#if !ISCOMPLEX
-// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
-// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
-RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx)
-{
-//   std::cerr << "_asum " << *n << " " << *incx << "\n";
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-
-  if(*n<=0) return 0;
-
-  if(*incx==1)  return vector(x,*n).cwiseAbs().sum();
-  else          return vector(x,*n,std::abs(*incx)).cwiseAbs().sum();
-}
-#else
-
-struct scalar_norm1_op {
-  typedef RealScalar result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_norm1_op)
-  inline RealScalar operator() (const Scalar& a) const { return internal::norm1(a); }
-};
-namespace Eigen {
-  namespace internal {
-    template<> struct functor_traits<scalar_norm1_op >
-    {
-      enum { Cost = 3 * NumTraits<Scalar>::AddCost, PacketAccess = 0 };
-    };
-  }
-}
-
-RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx)
-{
-//   std::cerr << "__asum " << *n << " " << *incx << "\n";
-
-  Complex* x = reinterpret_cast<Complex*>(px);
-
-  if(*n<=0) return 0;
-
-  if(*incx==1)  return vector(x,*n).unaryExpr<scalar_norm1_op>().sum();
-  else          return vector(x,*n,std::abs(*incx)).unaryExpr<scalar_norm1_op>().sum();
-}
-#endif
-
 int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
 {
-//   std::cerr << "_copy " << *n << " " << *incx << " " << *incy << "\n";
-
   if(*n<=0) return 0;
 
   Scalar* x = reinterpret_cast<Scalar*>(px);
   Scalar* y = reinterpret_cast<Scalar*>(py);
 
-  if(*incx==1 && *incy==1)    vector(y,*n) = vector(x,*n);
-  else if(*incx>0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,*incx);
-  else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,*incx);
-  else if(*incx<0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,-*incx).reverse();
-  else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,-*incx).reverse();
+  // be carefull, *incx==0 is allowed !!
+  if(*incx==1 && *incy==1)
+    vector(y,*n) = vector(x,*n);
+  else
+  {
+    if(*incx<0) x = x - (*n-1)*(*incx);
+    if(*incy<0) y = y - (*n-1)*(*incy);
+    for(int i=0;i<*n;++i)
+    {
+      *y = *x;
+      x += *incx;
+      y += *incy;
+    }
+  }
 
   return 0;
 }
 
-// computes a vector-vector dot product.
-Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
-{
-//   std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n";
-
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-
-  if(*incx==1 && *incy==1)    return (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
-  else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
-  else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum();
-  else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
-  else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
-  else return 0;
-}
-
 int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *incx)
 {
-//   std::cerr << "i_amax " << *n << " " << *incx << "\n";
-
   if(*n<=0) return 0;
-
   Scalar* x = reinterpret_cast<Scalar*>(px);
 
   DenseIndex ret;
-
   if(*incx==1)  vector(x,*n).cwiseAbs().maxCoeff(&ret);
   else          vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret);
-
   return ret+1;
 }
 
-
-/*
-
-// computes a vector-vector dot product with extended precision.
-Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
-{
-  // TODO
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-
-  if(*incx==1 && *incy==1)
-    return vector(x,*n).dot(vector(y,*n));
-
-  return vector(x,*n,*incx).dot(vector(y,*n,*incy));
-}
-
-*/
-
-#if ISCOMPLEX
-
-// computes a dot product of a conjugated vector with another vector.
-Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amin_)(int *n, RealScalar *px, int *incx)
 {
-//   std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n";
-
   if(*n<=0) return 0;
-
   Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-
-  Scalar res;
-  if(*incx==1 && *incy==1)    res = (vector(x,*n).dot(vector(y,*n)));
-  else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).dot(vector(y,*n,*incy)));
-  else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,*incy)));
-  else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).dot(vector(y,*n,-*incy).reverse()));
-  else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,-*incy).reverse()));
-  return res;
-}
-
-// computes a vector-vector dot product without complex conjugation.
-Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
-{
-//   std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n";
-
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar res;
-  if(*incx==1 && *incy==1)    res = (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
-  else if(*incx>0 && *incy>0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
-  else if(*incx<0 && *incy>0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum();
-  else if(*incx>0 && *incy<0) res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
-  else if(*incx<0 && *incy<0) res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
-  return res;
-}
-
-#endif // ISCOMPLEX
-
-#if !ISCOMPLEX
-// computes the Euclidean norm of a vector.
-Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
-{
-//   std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-
-  if(*incx==1)  return vector(x,*n).norm();
-  else          return vector(x,*n,std::abs(*incx)).norm();
-}
-#else
-RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx)
-{
-//   std::cerr << "__nrm2 " << *n << " " << *incx << "\n";
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-
-  if(*incx==1)
-    return vector(x,*n).norm();
-
-  return vector(x,*n,*incx).norm();
-}
-#endif
-
-int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps)
-{
-//   std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n";
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar c = *reinterpret_cast<Scalar*>(pc);
-  Scalar s = *reinterpret_cast<Scalar*>(ps);
-
-  StridedVectorType vx(vector(x,*n,std::abs(*incx)));
-  StridedVectorType vy(vector(y,*n,std::abs(*incy)));
-
-  Reverse<StridedVectorType> rvx(vx);
-  Reverse<StridedVectorType> rvy(vy);
-
-       if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c,s));
-  else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c,s));
-  else                        internal::apply_rotation_in_the_plane(vx, vy,  JacobiRotation<Scalar>(c,s));
-
-
-  return 0;
+  
+  DenseIndex ret;
+  if(*incx==1)  vector(x,*n).cwiseAbs().minCoeff(&ret);
+  else          vector(x,*n,std::abs(*incx)).cwiseAbs().minCoeff(&ret);
+  return ret+1;
 }
 
 int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps)
@@ -306,29 +148,6 @@ int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealSc
   return 0;
 }
 
-#if !ISCOMPLEX
-/*
-// performs rotation of points in the modified plane.
-int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-
-  // TODO
-
-  return 0;
-}
-
-// computes the modified parameters for a Givens rotation.
-int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
-{
-  // TODO
-
-  return 0;
-}
-*/
-#endif // !ISCOMPLEX
-
 int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
 {
   if(*n<=0) return 0;
@@ -336,35 +155,14 @@ int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
   Scalar* x = reinterpret_cast<Scalar*>(px);
   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
 
-//   std::cerr << "_scal " << *n << " " << alpha << " " << *incx << "\n";
-
-  if(*incx==1)  vector(x,*n) *= alpha;
-  else          vector(x,*n,std::abs(*incx)) *= alpha;
-
-  return 0;
-}
-
-#if ISCOMPLEX
-int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx)
-{
-  if(*n<=0) return 0;
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  RealScalar alpha = *palpha;
-
-//   std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n";
-
   if(*incx==1)  vector(x,*n) *= alpha;
   else          vector(x,*n,std::abs(*incx)) *= alpha;
 
   return 0;
 }
-#endif // ISCOMPLEX
 
 int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
 {
-//   std::cerr << "_swap " << *n << " " << *incx << " " << *incy << "\n";
-
   if(*n<=0) return 0;
 
   Scalar* x = reinterpret_cast<Scalar*>(px);
diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h
new file mode 100644
index 000000000..e18c40f0a
--- /dev/null
+++ b/blas/level1_real_impl.h
@@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
+// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
+RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx)
+{
+//   std::cerr << "_asum " << *n << " " << *incx << "\n";
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+
+  if(*n<=0) return 0;
+
+  if(*incx==1)  return vector(x,*n).cwiseAbs().sum();
+  else          return vector(x,*n,std::abs(*incx)).cwiseAbs().sum();
+}
+
+// computes a vector-vector dot product.
+Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
+{
+//   std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n";
+
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  if(*incx==1 && *incy==1)    return (vector(x,*n).cwiseProduct(vector(y,*n))).sum();
+  else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum();
+  else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum();
+  else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
+  else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum();
+  else return 0;
+}
+
+// computes the Euclidean norm of a vector.
+// FIXME
+Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
+{
+//   std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+
+  if(*incx==1)  return vector(x,*n).stableNorm();
+  else          return vector(x,*n,std::abs(*incx)).stableNorm();
+}
+
+int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps)
+{
+//   std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n";
+  if(*n<=0) return 0;
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar c = *reinterpret_cast<Scalar*>(pc);
+  Scalar s = *reinterpret_cast<Scalar*>(ps);
+
+  StridedVectorType vx(vector(x,*n,std::abs(*incx)));
+  StridedVectorType vy(vector(y,*n,std::abs(*incy)));
+
+  Reverse<StridedVectorType> rvx(vx);
+  Reverse<StridedVectorType> rvy(vy);
+
+       if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c,s));
+  else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c,s));
+  else                        internal::apply_rotation_in_the_plane(vx, vy,  JacobiRotation<Scalar>(c,s));
+
+
+  return 0;
+}
+
+/*
+// performs rotation of points in the modified plane.
+int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+
+  // TODO
+
+  return 0;
+}
+
+// computes the modified parameters for a Givens rotation.
+int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
+{
+  // TODO
+
+  return 0;
+}
+*/
diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
new file mode 100644
index 000000000..cbbf4f3e9
--- /dev/null
+++ b/blas/level2_cplx_impl.h
@@ -0,0 +1,285 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+/**  ZHEMV  performs the matrix-vector  operation
+  *
+  *     y := alpha*A*x + beta*y,
+  *
+  *  where alpha and beta are scalars, x and y are n element vectors and
+  *  A is an n by n hermitian matrix.
+  */
+int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
+
+  // check arguments
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)        info = 1;
+  else if(*n<0)                   info = 2;
+  else if(*lda<std::max(1,*n))    info = 5;
+  else if(*incx==0)               info = 7;
+  else if(*incy==0)               info = 10;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"HEMV ",&info,6);
+
+  if(*n==0)
+    return 1;
+
+  Scalar* actual_x = get_compact_vector(x,*n,*incx);
+  Scalar* actual_y = get_compact_vector(y,*n,*incy);
+
+  if(beta!=Scalar(1))
+  {
+    if(beta==Scalar(0)) vector(actual_y, *n).setZero();
+    else                vector(actual_y, *n) *= beta;
+  }
+
+  if(alpha!=Scalar(0))
+  {
+    // TODO performs a direct call to the underlying implementation function
+         if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Upper>() * (alpha * vector(actual_x,*n));
+    else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Lower>() * (alpha * vector(actual_x,*n));
+  }
+
+  if(actual_x!=x) delete[] actual_x;
+  if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy);
+
+  return 1;
+}
+
+/**  ZHBMV  performs the matrix-vector  operation
+  *
+  *     y := alpha*A*x + beta*y,
+  *
+  *  where alpha and beta are scalars, x and y are n element vectors and
+  *  A is an n by n hermitian band matrix, with k super-diagonals.
+  */
+// int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
+//                           RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
+// {
+//   return 1;
+// }
+
+/**  ZHPMV  performs the matrix-vector operation
+  *
+  *     y := alpha*A*x + beta*y,
+  *
+  *  where alpha and beta are scalars, x and y are n element vectors and
+  *  A is an n by n hermitian matrix, supplied in packed form.
+  */
+// int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
+// {
+//   return 1;
+// }
+
+/**  ZHPR    performs the hermitian rank 1 operation
+  *
+  *     A := alpha*x*conjg( x' ) + A,
+  *
+  *  where alpha is a real scalar, x is an n element vector and A is an
+  *  n by n hermitian matrix, supplied in packed form.
+  */
+// int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *ap)
+// {
+//   return 1;
+// }
+
+/**  ZHPR2  performs the hermitian rank 2 operation
+  *
+  *     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+  *
+  *  where alpha is a scalar, x and y are n element vectors and A is an
+  *  n by n hermitian matrix, supplied in packed form.
+  */
+// int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
+// {
+//   return 1;
+// }
+
+/**  ZHER   performs the hermitian rank 1 operation
+  *
+  *     A := alpha*x*conjg( x' ) + A,
+  *
+  *  where alpha is a real scalar, x is an n element vector and A is an
+  *  n by n hermitian matrix.
+  */
+int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  RealScalar alpha = *reinterpret_cast<RealScalar*>(palpha);
+
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)                                            info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*lda<std::max(1,*n))                                        info = 7;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"HER  ",&info,6);
+
+  if(alpha==RealScalar(0))
+    return 1;
+
+  Scalar* x_cpy = get_compact_vector(x, *n, *incx);
+
+  // TODO perform direct calls to underlying implementation
+//   if(UPLO(*uplo)==LO)       matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
+//   else if(UPLO(*uplo)==UP)  matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
+
+  if(UPLO(*uplo)==LO)
+    for(int j=0;j<*n;++j)
+      matrix(a,*n,*n,*lda).col(j).tail(*n-j) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy+j,*n-j);
+  else
+    for(int j=0;j<*n;++j)
+      matrix(a,*n,*n,*lda).col(j).head(j+1) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy,j+1);
+
+  matrix(a,*n,*n,*lda).diagonal().imag().setZero();
+
+  if(x_cpy!=x)  delete[] x_cpy;
+
+  return 1;
+}
+
+/**  ZHER2  performs the hermitian rank 2 operation
+  *
+  *     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+  *
+  *  where alpha is a scalar, x and y are n element vectors and A is an n
+  *  by n hermitian matrix.
+  */
+int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)                                            info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*incy==0)                                                   info = 7;
+  else if(*lda<std::max(1,*n))                                        info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"HER2 ",&info,6);
+
+  if(alpha==Scalar(0))
+    return 1;
+
+  Scalar* x_cpy = get_compact_vector(x, *n, *incx);
+  Scalar* y_cpy = get_compact_vector(y, *n, *incy);
+
+  // TODO perform direct calls to underlying implementation
+  if(UPLO(*uplo)==LO)       matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
+  else if(UPLO(*uplo)==UP)  matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
+
+  matrix(a,*n,*n,*lda).diagonal().imag().setZero();
+
+  if(x_cpy!=x)  delete[] x_cpy;
+  if(y_cpy!=y)  delete[] y_cpy;
+
+  return 1;
+}
+
+/**  ZGERU  performs the rank 1 operation
+  *
+  *     A := alpha*x*y' + A,
+  *
+  *  where alpha is a scalar, x is an m element vector, y is an n element
+  *  vector and A is an m by n matrix.
+  */
+int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int info = 0;
+       if(*m<0)                                                       info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*incy==0)                                                   info = 7;
+  else if(*lda<std::max(1,*m))                                        info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"GERU ",&info,6);
+
+  if(alpha==Scalar(0))
+    return 1;
+
+  Scalar* x_cpy = get_compact_vector(x,*m,*incx);
+  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
+
+  // TODO perform direct calls to underlying implementation
+  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).transpose();
+
+  if(x_cpy!=x)  delete[] x_cpy;
+  if(y_cpy!=y)  delete[] y_cpy;
+
+  return 1;
+}
+
+/**  ZGERC  performs the rank 1 operation
+  *
+  *     A := alpha*x*conjg( y' ) + A,
+  *
+  *  where alpha is a scalar, x is an m element vector, y is an n element
+  *  vector and A is an m by n matrix.
+  */
+int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
+{
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int info = 0;
+       if(*m<0)                                                       info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*incy==0)                                                   info = 7;
+  else if(*lda<std::max(1,*m))                                        info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"GERC ",&info,6);
+
+  if(alpha==Scalar(0))
+    return 1;
+
+  Scalar* x_cpy = get_compact_vector(x,*m,*incx);
+  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
+
+  // TODO perform direct calls to underlying implementation
+  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
+
+  if(x_cpy!=x)  delete[] x_cpy;
+  if(y_cpy!=y)  delete[] y_cpy;
+
+  return 1;
+}
diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index 87a4d04df..0e4649f57 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@@ -41,7 +41,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
 
     init = true;
   }
-  
+
   Scalar* a = reinterpret_cast<Scalar*>(pa);
   Scalar* b = reinterpret_cast<Scalar*>(pb);
   Scalar* c = reinterpret_cast<Scalar*>(pc);
@@ -59,7 +59,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"GEMV ",&info,6);
 
-  if(*m==0 || *n==0)
+  if(*m==0 || *n==0 || (alpha==Scalar(0) && beta==Scalar(1)))
     return 0;
 
   int actual_m = *m;
@@ -69,10 +69,13 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
 
   Scalar* actual_b = get_compact_vector(b,actual_n,*incb);
   Scalar* actual_c = get_compact_vector(c,actual_m,*incc);
-  
+
   if(beta!=Scalar(1))
-    vector(actual_c, actual_m) *= beta;
-  
+  {
+    if(beta==Scalar(0)) vector(actual_c, actual_m).setZero();
+    else                vector(actual_c, actual_m) *= beta;
+  }
+
   int code = OP(*opa);
   func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha);
 
@@ -131,7 +134,7 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar
   func[code](*n, a, *lda, actual_b);
 
   if(actual_b!=b) delete[] copy_back(actual_b,b,*n,*incb);
-  
+
   return 0;
 }
 
@@ -195,147 +198,8 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar
 
   copy_back(res.data(),b,*n,*incb);
   if(actual_b!=b) delete[] actual_b;
-  
-  return 0;
-}
-
-// y = alpha*A*x + beta*y
-int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
-{
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
-  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
-  
-  // check arguments
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)        info = 1;
-  else if(*n<0)                   info = 2;
-  else if(*lda<std::max(1,*n))    info = 5;
-  else if(*incx==0)               info = 7;
-  else if(*incy==0)               info = 10;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"SYMV ",&info,6);
-
-  if(*n==0)
-    return 0;
-
-  Scalar* actual_x = get_compact_vector(x,*n,*incx);
-  Scalar* actual_y = get_compact_vector(y,*n,*incy);
-
-  if(beta!=Scalar(1))
-    vector(actual_y, *n) *= beta;
-
-  // TODO performs a direct call to the underlying implementation function
-       if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Upper>() * (alpha * vector(actual_x,*n));
-  else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Lower>() * (alpha * vector(actual_x,*n));
-
-  if(actual_x!=x) delete[] actual_x;
-  if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy);
-
-  return 1;
-}
-
-// C := alpha*x*x' + C
-int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc)
-{
-  
-//   typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
-//   static functype func[2];
-
-//   static bool init = false;
-//   if(!init)
-//   {
-//     for(int k=0; k<2; ++k)
-//       func[k] = 0;
-// 
-//     func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
-//     func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
-
-//     init = true;
-//   }
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* c = reinterpret_cast<Scalar*>(pc);
-  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)                                            info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*ldc<std::max(1,*n))                                        info = 7;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"SYR  ",&info,6);
-  
-  if(alpha==Scalar(0))
-    return 1;
-  
-  // if the increment is not 1, let's copy it to a temporary vector to enable vectorization
-  Scalar* x_cpy = get_compact_vector(x,*n,*incx);
-    
-  // TODO perform direct calls to underlying implementation
-  if(UPLO(*uplo)==LO)       matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
-  else if(UPLO(*uplo)==UP)  matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-
-//   func[code](*n, a, *inca, c, *ldc, alpha);
-  return 1;
-}
-
-
-// C := alpha*x*y' + alpha*y*x' + C
-int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc)
-{
-//   typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
-//   static functype func[2];
-// 
-//   static bool init = false;
-//   if(!init)
-//   {
-//     for(int k=0; k<2; ++k)
-//       func[k] = 0;
-// 
-//     func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
-//     func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
-// 
-//     init = true;
-//   }
-
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar* c = reinterpret_cast<Scalar*>(pc);
-  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)                                            info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*incy==0)                                                   info = 7;
-  else if(*ldc<std::max(1,*n))                                        info = 9;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"SYR2 ",&info,6);
-  
-  if(alpha==Scalar(0))
-    return 1;
-  
-  Scalar* x_cpy = get_compact_vector(x,*n,*incx);
-  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
-    
-  // TODO perform direct calls to underlying implementation
-  if(UPLO(*uplo)==LO)       matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
-  else if(UPLO(*uplo)==UP)  matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-  if(y_cpy!=y)  delete[] y_cpy;
-
-//   int code = UPLO(*uplo);
-//   if(code>=2 || func[code]==0)
-//     return 0;
 
-//   func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
-  return 1;
+  return 0;
 }
 
 /**  DGBMV  performs one of the matrix-vector operations
@@ -345,23 +209,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
   *  where alpha and beta are scalars, x and y are vectors and A is an
   *  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
   */
-int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *alpha, RealScalar *a, int *lda,
-                          RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-{
-  return 1;
-}
-/**  DSBMV  performs the matrix-vector  operation
-  *
-  *     y := alpha*A*x + beta*y,
-  *
-  *  where alpha and beta are scalars, x and y are n element vectors and
-  *  A is an n by n symmetric band matrix, with k super-diagonals.
-  */
-int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
-                           RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-{
-  return 1;
-}
+// int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *alpha, RealScalar *a, int *lda,
+//                           RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
+// {
+//   return 1;
+// }
+
 
 /**  DTBMV  performs one of the matrix-vector operations
   *
@@ -370,10 +223,10 @@ int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealSc
   *  where x is an n element vector and  A is an n by n unit, or non-unit,
   *  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
   */
-int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
-{
-  return 1;
-}
+// int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
+// {
+//   return 1;
+// }
 
 /**  DTBSV  solves one of the systems of equations
   *
@@ -386,23 +239,10 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, R
   *  No test for singularity or near-singularity is included in this
   *  routine. Such tests must be performed before calling this routine.
   */
-int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
-{
-  return 1;
-}
-
-/**  DSPMV  performs the matrix-vector operation
-  *
-  *     y := alpha*A*x + beta*y,
-  *
-  *  where alpha and beta are scalars, x and y are n element vectors and
-  *  A is an n by n symmetric matrix, supplied in packed form.
-  *
-  */
-int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-{
-  return 1;
-}
+// int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
+// {
+//   return 1;
+// }
 
 /**  DTPMV  performs one of the matrix-vector operations
   *
@@ -411,10 +251,10 @@ int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap,
   *  where x is an n element vector and  A is an n by n unit, or non-unit,
   *  upper or lower triangular matrix, supplied in packed form.
   */
-int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx)
-{
-  return 1;
-}
+// int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx)
+// {
+//   return 1;
+// }
 
 /**  DTPSV  solves one of the systems of equations
   *
@@ -426,10 +266,10 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScala
   *  No test for singularity or near-singularity is included in this
   *  routine. Such tests must be performed before calling this routine.
   */
-int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx)
-{
-  return 1;
-}
+// int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx)
+// {
+//   return 1;
+// }
 
 /**  DGER   performs the rank 1 operation
   *
@@ -444,7 +284,7 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx,
   Scalar* y = reinterpret_cast<Scalar*>(py);
   Scalar* a = reinterpret_cast<Scalar*>(pa);
   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
+
   int info = 0;
        if(*m<0)                                                       info = 1;
   else if(*n<0)                                                       info = 2;
@@ -453,293 +293,20 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx,
   else if(*lda<std::max(1,*m))                                        info = 9;
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"GER  ",&info,6);
-  
+
   if(alpha==Scalar(0))
     return 1;
-  
+
   Scalar* x_cpy = get_compact_vector(x,*m,*incx);
   Scalar* y_cpy = get_compact_vector(y,*n,*incy);
-    
-  // TODO perform direct calls to underlying implementation
-  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-  if(y_cpy!=y)  delete[] y_cpy;
-  
-  return 1;
-}
-
-/**  DSPR    performs the symmetric rank 1 operation
-  *
-  *     A := alpha*x*x' + A,
-  *
-  *  where alpha is a real scalar, x is an n element vector and A is an
-  *  n by n symmetric matrix, supplied in packed form.
-  */
-int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *alpha, Scalar *x, int *incx, Scalar *ap)
-{
-  return 1;
-}
-/**  DSPR2  performs the symmetric rank 2 operation
-  *
-  *     A := alpha*x*y' + alpha*y*x' + A,
-  *
-  *  where alpha is a scalar, x and y are n element vectors and A is an
-  *  n by n symmetric matrix, supplied in packed form.
-  */
-int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
-{
-  return 1;
-}
-
-#if ISCOMPLEX
-/**  ZHEMV  performs the matrix-vector  operation
-  *
-  *     y := alpha*A*x + beta*y,
-  *
-  *  where alpha and beta are scalars, x and y are n element vectors and
-  *  A is an n by n hermitian matrix.
-  */
-int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
-{
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
-  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
 
-  // check arguments
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)        info = 1;
-  else if(*n<0)                   info = 2;
-  else if(*lda<std::max(1,*n))    info = 5;
-  else if(*incx==0)               info = 7;
-  else if(*incy==0)               info = 10;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"HEMV ",&info,6);
-
-  if(*n==0)
-    return 1;
-
-  Scalar* actual_x = get_compact_vector(x,*n,*incx);
-  Scalar* actual_y = get_compact_vector(y,*n,*incy);
-
-  if(beta!=Scalar(1))
-    vector(actual_y, *n) *= beta;
-
-  if(alpha!=Scalar(0))
-  {
-    // TODO performs a direct call to the underlying implementation function
-         if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Upper>() * (alpha * vector(actual_x,*n));
-    else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Lower>() * (alpha * vector(actual_x,*n));
-  }
-  
-  if(actual_x!=x) delete[] actual_x;
-  if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy);
-
-  return 1;
-}
-
-/**  ZHBMV  performs the matrix-vector  operation
-  *
-  *     y := alpha*A*x + beta*y,
-  *
-  *  where alpha and beta are scalars, x and y are n element vectors and
-  *  A is an n by n hermitian band matrix, with k super-diagonals.
-  */
-int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
-                          RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-{
-  return 1;
-}
-
-/**  ZHPMV  performs the matrix-vector operation
-  *
-  *     y := alpha*A*x + beta*y,
-  *
-  *  where alpha and beta are scalars, x and y are n element vectors and
-  *  A is an n by n hermitian matrix, supplied in packed form.
-  */
-int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-{
-  return 1;
-}
-
-/**  ZHPR    performs the hermitian rank 1 operation
-  *
-  *     A := alpha*x*conjg( x' ) + A,
-  *
-  *  where alpha is a real scalar, x is an n element vector and A is an
-  *  n by n hermitian matrix, supplied in packed form.
-  */
-int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *ap)
-{
-  return 1;
-}
-
-/**  ZHPR2  performs the hermitian rank 2 operation
-  *
-  *     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
-  *
-  *  where alpha is a scalar, x and y are n element vectors and A is an
-  *  n by n hermitian matrix, supplied in packed form.
-  */
-int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
-{
-  return 1;
-}
-
-/**  ZHER   performs the hermitian rank 1 operation
-  *
-  *     A := alpha*x*conjg( x' ) + A,
-  *
-  *  where alpha is a real scalar, x is an n element vector and A is an
-  *  n by n hermitian matrix.
-  */
-int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  RealScalar alpha = *reinterpret_cast<RealScalar*>(palpha);
-  
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)                                            info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*lda<std::max(1,*n))                                        info = 7;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"HER  ",&info,6);
-  
-  if(alpha==RealScalar(0))
-    return 1;
-  
-  Scalar* x_cpy = get_compact_vector(x, *n, *incx);
-  
   // TODO perform direct calls to underlying implementation
-  if(UPLO(*uplo)==LO)       matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
-  else if(UPLO(*uplo)==UP)  matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
-  
-  matrix(a,*n,*n,*lda).diagonal().imag().setZero();
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-
-  return 1;
-}
+  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
 
-/**  ZHER2  performs the hermitian rank 2 operation
-  *
-  *     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
-  *
-  *  where alpha is a scalar, x and y are n element vectors and A is an n
-  *  by n hermitian matrix.
-  */
-int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
-  int info = 0;
-  if(UPLO(*uplo)==INVALID)                                            info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*incy==0)                                                   info = 7;
-  else if(*lda<std::max(1,*n))                                        info = 9;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"HER2 ",&info,6);
-  
-  if(alpha==Scalar(0))
-    return 1;
-  
-  Scalar* x_cpy = get_compact_vector(x, *n, *incx);
-  Scalar* y_cpy = get_compact_vector(y, *n, *incy);
-  
-  // TODO perform direct calls to underlying implementation
-  if(UPLO(*uplo)==LO)       matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
-  else if(UPLO(*uplo)==UP)  matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
-  
-  matrix(a,*n,*n,*lda).diagonal().imag().setZero();
-  
   if(x_cpy!=x)  delete[] x_cpy;
   if(y_cpy!=y)  delete[] y_cpy;
 
   return 1;
 }
 
-/**  ZGERU  performs the rank 1 operation
-  *
-  *     A := alpha*x*y' + A,
-  *
-  *  where alpha is a scalar, x is an m element vector, y is an n element
-  *  vector and A is an m by n matrix.
-  */
-int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
-  int info = 0;
-       if(*m<0)                                                       info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*incy==0)                                                   info = 7;
-  else if(*lda<std::max(1,*m))                                        info = 9;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"GERU ",&info,6);
-  
-  if(alpha==Scalar(0))
-    return 1;
-  
-  Scalar* x_cpy = get_compact_vector(x,*m,*incx);
-  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
-    
-  // TODO perform direct calls to underlying implementation
-  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).transpose();
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-  if(y_cpy!=y)  delete[] y_cpy;
-  
-  return 1;
-}
 
-/**  ZGERC  performs the rank 1 operation
-  *
-  *     A := alpha*x*conjg( y' ) + A,
-  *
-  *  where alpha is a scalar, x is an m element vector, y is an n element
-  *  vector and A is an m by n matrix.
-  */
-int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
-  
-  int info = 0;
-       if(*m<0)                                                       info = 1;
-  else if(*n<0)                                                       info = 2;
-  else if(*incx==0)                                                   info = 5;
-  else if(*incy==0)                                                   info = 7;
-  else if(*lda<std::max(1,*m))                                        info = 9;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"GERC ",&info,6);
-  
-  if(alpha==Scalar(0))
-    return 1;
-  
-  Scalar* x_cpy = get_compact_vector(x,*m,*incx);
-  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
-    
-  // TODO perform direct calls to underlying implementation
-  matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
-  
-  if(x_cpy!=x)  delete[] x_cpy;
-  if(y_cpy!=y)  delete[] y_cpy;
-  
-  return 1;
-}
-#endif // ISCOMPLEX
diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
new file mode 100644
index 000000000..5cad7694b
--- /dev/null
+++ b/blas/level2_real_impl.h
@@ -0,0 +1,225 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "common.h"
+
+// y = alpha*A*x + beta*y
+int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
+  Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
+
+  // check arguments
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)        info = 1;
+  else if(*n<0)                   info = 2;
+  else if(*lda<std::max(1,*n))    info = 5;
+  else if(*incx==0)               info = 7;
+  else if(*incy==0)               info = 10;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"SYMV ",&info,6);
+
+  if(*n==0)
+    return 0;
+
+  Scalar* actual_x = get_compact_vector(x,*n,*incx);
+  Scalar* actual_y = get_compact_vector(y,*n,*incy);
+
+  if(beta!=Scalar(1))
+  {
+    if(beta==Scalar(0)) vector(actual_y, *n).setZero();
+    else                vector(actual_y, *n) *= beta;
+  }
+
+  // TODO performs a direct call to the underlying implementation function
+       if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Upper>() * (alpha * vector(actual_x,*n));
+  else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView<Lower>() * (alpha * vector(actual_x,*n));
+
+  if(actual_x!=x) delete[] actual_x;
+  if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy);
+
+  return 1;
+}
+
+// C := alpha*x*x' + C
+int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc)
+{
+
+//   typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
+//   static functype func[2];
+
+//   static bool init = false;
+//   if(!init)
+//   {
+//     for(int k=0; k<2; ++k)
+//       func[k] = 0;
+//
+//     func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
+//     func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
+
+//     init = true;
+//   }
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)                                            info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*ldc<std::max(1,*n))                                        info = 7;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"SYR  ",&info,6);
+
+  if(*n==0 || alpha==Scalar(0)) return 1;
+
+  // if the increment is not 1, let's copy it to a temporary vector to enable vectorization
+  Scalar* x_cpy = get_compact_vector(x,*n,*incx);
+
+  Matrix<Scalar,Dynamic,Dynamic> m2(matrix(c,*n,*n,*ldc));
+  
+  // TODO check why this is not accurate enough for lapack tests
+//   if(UPLO(*uplo)==LO)       matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
+//   else if(UPLO(*uplo)==UP)  matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
+
+  if(UPLO(*uplo)==LO)
+    for(int j=0;j<*n;++j)
+      matrix(c,*n,*n,*ldc).col(j).tail(*n-j) += alpha * x_cpy[j] * vector(x_cpy+j,*n-j);
+  else
+    for(int j=0;j<*n;++j)
+      matrix(c,*n,*n,*ldc).col(j).head(j+1) += alpha * x_cpy[j] * vector(x_cpy,j+1);
+
+  if(x_cpy!=x)  delete[] x_cpy;
+
+  return 1;
+}
+
+// C := alpha*x*y' + alpha*y*x' + C
+int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc)
+{
+//   typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
+//   static functype func[2];
+//
+//   static bool init = false;
+//   if(!init)
+//   {
+//     for(int k=0; k<2; ++k)
+//       func[k] = 0;
+//
+//     func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
+//     func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
+//
+//     init = true;
+//   }
+
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  Scalar* y = reinterpret_cast<Scalar*>(py);
+  Scalar* c = reinterpret_cast<Scalar*>(pc);
+  Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+  int info = 0;
+  if(UPLO(*uplo)==INVALID)                                            info = 1;
+  else if(*n<0)                                                       info = 2;
+  else if(*incx==0)                                                   info = 5;
+  else if(*incy==0)                                                   info = 7;
+  else if(*ldc<std::max(1,*n))                                        info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"SYR2 ",&info,6);
+
+  if(alpha==Scalar(0))
+    return 1;
+
+  Scalar* x_cpy = get_compact_vector(x,*n,*incx);
+  Scalar* y_cpy = get_compact_vector(y,*n,*incy);
+
+  // TODO perform direct calls to underlying implementation
+  if(UPLO(*uplo)==LO)       matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
+  else if(UPLO(*uplo)==UP)  matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
+
+  if(x_cpy!=x)  delete[] x_cpy;
+  if(y_cpy!=y)  delete[] y_cpy;
+
+//   int code = UPLO(*uplo);
+//   if(code>=2 || func[code]==0)
+//     return 0;
+
+//   func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
+  return 1;
+}
+
+/**  DSBMV  performs the matrix-vector  operation
+  *
+  *     y := alpha*A*x + beta*y,
+  *
+  *  where alpha and beta are scalars, x and y are n element vectors and
+  *  A is an n by n symmetric band matrix, with k super-diagonals.
+  */
+// int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
+//                            RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
+// {
+//   return 1;
+// }
+
+
+/**  DSPMV  performs the matrix-vector operation
+  *
+  *     y := alpha*A*x + beta*y,
+  *
+  *  where alpha and beta are scalars, x and y are n element vectors and
+  *  A is an n by n symmetric matrix, supplied in packed form.
+  *
+  */
+// int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
+// {
+//   return 1;
+// }
+
+/**  DSPR    performs the symmetric rank 1 operation
+  *
+  *     A := alpha*x*x' + A,
+  *
+  *  where alpha is a real scalar, x is an n element vector and A is an
+  *  n by n symmetric matrix, supplied in packed form.
+  */
+// int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *alpha, Scalar *x, int *incx, Scalar *ap)
+// {
+//   return 1;
+// }
+
+/**  DSPR2  performs the symmetric rank 2 operation
+  *
+  *     A := alpha*x*y' + alpha*y*x' + A,
+  *
+  *  where alpha is a scalar, x and y are n element vectors and A is an
+  *  n by n symmetric matrix, supplied in packed form.
+  */
+// int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
+// {
+//   return 1;
+// }
+
diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index 5b28a1d52..6c53dc679 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@@ -52,7 +52,7 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
   Scalar* c = reinterpret_cast<Scalar*>(pc);
   Scalar alpha  = *reinterpret_cast<Scalar*>(palpha);
   Scalar beta   = *reinterpret_cast<Scalar*>(pbeta);
-  
+
   int info = 0;
   if(OP(*opa)==INVALID)                                               info = 1;
   else if(OP(*opb)==INVALID)                                          info = 2;
@@ -342,13 +342,17 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
   else if(*ldc<std::max(1,*n))                                        info = 10;
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"SYRK ",&info,6);
-  
+
   int code = OP(*op) | (UPLO(*uplo) << 2);
 
   if(beta!=Scalar(1))
   {
-    if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
-    else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
+    if(UPLO(*uplo)==UP)
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
+    else
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
   }
 
   #if ISCOMPLEX
@@ -383,7 +387,7 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal
   Scalar* c = reinterpret_cast<Scalar*>(pc);
   Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
   Scalar beta  = *reinterpret_cast<Scalar*>(pbeta);
-  
+
   int info = 0;
   if(UPLO(*uplo)==INVALID)                                            info = 1;
   else if(OP(*op)==INVALID)                                           info = 2;
@@ -394,11 +398,15 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal
   else if(*ldc<std::max(1,*n))                                        info = 12;
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"SYR2K",&info,6);
-  
+
   if(beta!=Scalar(1))
   {
-    if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
-    else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
+    if(UPLO(*uplo)==UP)
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<Upper>() *= beta;
+    else
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
   }
 
   if(*k==0)
@@ -458,11 +466,9 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"HEMM ",&info,6);
 
-  if(beta==Scalar(0))
-    matrix(c, *m, *n, *ldc).setZero();
-  else if(beta!=Scalar(1))
-    matrix(c, *m, *n, *ldc) *= beta;
-  
+  if(beta==Scalar(0))       matrix(c, *m, *n, *ldc).setZero();
+  else if(beta!=Scalar(1))  matrix(c, *m, *n, *ldc) *= beta;
+
   if(*m==0 || *n==0)
   {
     return 1;
@@ -535,11 +541,18 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
 
   if(beta!=RealScalar(1))
   {
-    if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
-    else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
-
-    matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
-    matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
+    if(UPLO(*uplo)==UP)
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
+    else
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
+  
+    if(beta!=Scalar(0))
+    {
+      matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
+      matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
+    }
   }
 
   if(*k>0 && alpha!=RealScalar(0))
@@ -573,11 +586,18 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal
 
   if(beta!=RealScalar(1))
   {
-    if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
-    else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
+    if(UPLO(*uplo)==UP)
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Upper>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<StrictlyUpper>() *= beta;
+    else
+      if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView<Lower>().setZero();
+      else                matrix(c, *n, *n, *ldc).triangularView<StrictlyLower>() *= beta;
 
-    matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
-    matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
+    if(beta!=Scalar(0))
+    {
+      matrix(c, *n, *n, *ldc).diagonal().real() *= beta;
+      matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
+    }
   }
   else if(*k>0 && alpha!=Scalar(0))
     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
diff --git a/blas/single.cpp b/blas/single.cpp
index 9ee2b78dd..5816d329b 100644
--- a/blas/single.cpp
+++ b/blas/single.cpp
@@ -28,5 +28,7 @@
 #define ISCOMPLEX     0
 
 #include "level1_impl.h"
+#include "level1_real_impl.h"
 #include "level2_impl.h"
+#include "level2_real_impl.h"
 #include "level3_impl.h"
-- 
cgit v1.2.3