* Started support for unaligned vectorization.

* Introduce a new highly optimized matrix-matrix product for large matrices. The code is still highly experimental and it is activated only if you define EIGEN_WIP_PRODUCT at compile time. Currently the third dimension of the product must be a factor of the packet size (x4 for floats) and the right handed side matrix must be column major. Moreover, currently c = a*b; actually computes c += a*b !! Therefore, the code is provided for experimentation purpose only ! These limitations will be fixed soon or later to become the default product implementation.
author: Gael Guennebaud <g.gael@free.fr> 2008-05-05 10:23:29 +0000
committer: Gael Guennebaud <g.gael@free.fr> 2008-05-05 10:23:29 +0000
commit: 46fa4c713fa2fdb472e287cad95b5933135e6503 (patch)
tree: 451b02d2b48dda665d7f2c924c30b2ab03cf75cb /Eigen/src/Core/Assign.h
parent: 8c6007f80e773ca3661c5a323d8e28ce49a609b3 (diff)
1 files changed, 6 insertions, 6 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 3862ae831..384059185 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -78,7 +78,7 @@ struct ei_matrix_assignment_packet_unroller
   {
     ei_matrix_assignment_packet_unroller<Derived1, Derived2,
       Index-ei_packet_traits<typename Derived1::Scalar>::size>::run(dst, src);
-    dst.writePacketCoeff(row, col, src.packetCoeff(row, col));
+    dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
   }
 };
 
@@ -87,7 +87,7 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, 0 >
 {
   static void run(Derived1 &dst, const Derived2 &src)
   {
-    dst.writePacketCoeff(0, 0, src.packetCoeff(0, 0));
+    dst.template writePacketCoeff<Aligned>(0, 0, src.template packetCoeff<Aligned>(0, 0));
   }
 };
 
@@ -211,7 +211,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false>
             // FIXME the following is not really efficient
             int i = index/dst.rows();
             int j = index%dst.rows();
-            dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+            dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
           }
           for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
             for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
@@ -222,7 +222,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false>
 //           std::cout << "vectorized normal row major\n";
           for(int i = 0; i < dst.rows(); i++)
             for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
-              dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+              dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
         }
       }
       else
@@ -240,7 +240,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false>
             // FIXME the following is not really efficient
             int i = index%dst.rows();
             int j = index/dst.rows();
-            dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+            dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
           }
           for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
             for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
@@ -251,7 +251,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false>
 //           std::cout << "vectorized normal col major\n";
           for(int j = 0; j < dst.cols(); j++)
             for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
-              dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
+              dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
         }
       }
     }
author	Gael Guennebaud <g.gael@free.fr>	2008-05-05 10:23:29 +0000
committer	Gael Guennebaud <g.gael@free.fr>	2008-05-05 10:23:29 +0000
commit	46fa4c713fa2fdb472e287cad95b5933135e6503 (patch)
tree	451b02d2b48dda665d7f2c924c30b2ab03cf75cb /Eigen/src/Core/Assign.h
parent	8c6007f80e773ca3661c5a323d8e28ce49a609b3 (diff)