4 files changed, 70 insertions, 0 deletions
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 0f0958d09..72fa0edb7 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -621,6 +621,8 @@ template<typename Derived> class MatrixBase
 
     template<typename OtherDerived>
     PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const;
+    template<typename OtherDerived>
+    PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const;
     PlainMatrixType unitOrthogonal(void) const;
     Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const;
     const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index ab8c25ed5..4c8399b69 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -31,6 +31,7 @@
   * \returns the cross product of \c *this and \a other
   *
   * Here is a very good explanation of cross-product: http://xkcd.com/199/
+  * \sa MatrixBase::cross3()
   */
 template<typename Derived>
 template<typename OtherDerived>
@@ -38,6 +39,7 @@ inline typename MatrixBase<Derived>::PlainMatrixType
 MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
 
   // Note that there is no need for an expression here since the compiler
   // optimize such a small temporary very well (even within a complex expression)
@@ -50,6 +52,48 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
   );
 }
 
+template< int Arch,typename VectorLhs,typename VectorRhs,
+          typename Scalar = typename VectorLhs::Scalar,
+          int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
+struct ei_cross3_impl {
+  inline static typename ei_plain_matrix_type<VectorLhs>::type
+  run(const VectorLhs& lhs, const VectorRhs& rhs)
+  {
+    return typename ei_plain_matrix_type<VectorLhs>::type(
+      lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1),
+      lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2),
+      lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0),
+      0
+    );
+  }
+};
+
+/** \geometry_module
+  *
+  * \returns the cross product of \c *this and \a other using only the x, y, and z coefficients
+  *
+  * The size of \c *this and \a other must be four. This function is especially useful
+  * when using 4D vectors instead of 3D ones to get advantage of SSE/AltiVec vectorization.
+  *
+  * \sa MatrixBase::cross()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline typename MatrixBase<Derived>::PlainMatrixType
+MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
+
+  typedef typename ei_nested<Derived,2>::type DerivedNested;
+  typedef typename ei_nested<OtherDerived,2>::type OtherDerivedNested;
+  const DerivedNested lhs(derived());
+  const OtherDerivedNested rhs(other.derived());
+
+  return ei_cross3_impl<EiArch,typename ei_cleantype<DerivedNested>::type,
+                               typename ei_cleantype<OtherDerivedNested>::type>::run(lhs,rhs);
+}
+
 /** \returns a matrix expression of the cross product of each column or row
   * of the referenced expression with the \a other vector.
   *
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
index 9be5ecd6f..db8c7cc1f 100644
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -45,4 +45,19 @@ ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quate
   return res;
 }
 
+template<typename VectorLhs,typename VectorRhs>
+struct ei_cross3_impl<EiArch_SSE,VectorLhs,VectorRhs,float,true> {
+  inline static typename ei_plain_matrix_type<VectorLhs>::type
+  run(const VectorLhs& lhs, const VectorRhs& rhs)
+  {
+    __m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
+    __m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
+    typename ei_plain_matrix_type<VectorLhs>::type res;
+    ei_pstore(&res.x(),_mm_sub_ps(mul1,mul2));
+    return res;
+  }
+};
+
 #endif // EIGEN_GEOMETRY_SSE_H
diff --git a/test/geo_orthomethods.cpp b/test/geo_orthomethods.cpp
index ae9f73518..6f6b7f8d3 100644
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@@ -36,6 +36,8 @@ template<typename Scalar> void orthomethods_3()
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
 
+  typedef Matrix<Scalar,4,1> Vector4;
+
   Vector3 v0 = Vector3::Random(),
           v1 = Vector3::Random(),
           v2 = Vector3::Random();
@@ -59,6 +61,13 @@ template<typename Scalar> void orthomethods_3()
   mcross = mat3.rowwise().cross(vec3);
   VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3));
 
+  // cross3
+  Vector4 v40 = Vector4::Random(),
+          v41 = Vector4::Random(),
+          v42 = Vector4::Random();
+  v40.w() = v41.w() = v42.w() = 0;
+  v42.template start<3>() = v40.template start<3>().cross(v41.template start<3>());
+  VERIFY_IS_APPROX(v40.cross3(v41), v42);
 }
 
 template<typename Scalar, int Size> void orthomethods(int size=Size)