more refactoring in the level3 products

author: Gael Guennebaud <g.gael@free.fr> 2009-07-22 11:54:58 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2009-07-22 11:54:58 +0200
commit: d6475ea390b5a4beeef64e71a247b3f72573d768 (patch)
tree: 20b734753444747a7a349f8657aa3593d294173d /Eigen/src/Core/util
parent: d6627d540e6a8651ccd8ce4a4520b70fe5def870 (diff)
1 files changed, 157 insertions, 0 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
new file mode 100644
index 000000000..6e4b21e6a
--- /dev/null
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -0,0 +1,157 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BLASUTIL_H
+#define EIGEN_BLASUTIL_H
+
+// This file contains many lightweight helper classes used to
+// implement and control fast level 2 and level 3 BLAS-like routines.
+
+// forward declarations
+template<typename Scalar, typename Packet, int PacketSize, int mr, int nr, typename Conj>
+struct ei_gebp_kernel;
+
+template<typename Scalar, int PacketSize, int nr>
+struct ei_gemm_pack_rhs;
+
+template<typename Scalar, int mr, int StorageOrder>
+struct ei_gemm_pack_lhs;
+
+template<
+  typename Scalar,
+  int LhsStorageOrder, bool ConjugateLhs,
+  int RhsStorageOrder, bool ConjugateRhs,
+  int ResStorageOrder>
+struct ei_general_matrix_matrix_product;
+
+template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename RhsType>
+static void ei_cache_friendly_product_colmajor_times_vector(
+  int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res, Scalar alpha);
+
+template<bool ConjugateLhs, bool ConjugateRhs, typename Scalar, typename ResType>
+static void ei_cache_friendly_product_rowmajor_times_vector(
+  const Scalar* lhs, int lhsStride, const Scalar* rhs, int rhsSize, ResType& res, Scalar alpha);
+
+// Provides scalar/packet-wise product and product with accumulation
+// with optional conjugation of the arguments.
+template<bool ConjLhs, bool ConjRhs> struct ei_conj_helper;
+
+template<> struct ei_conj_helper<false,false>
+{
+  template<typename T>
+  EIGEN_STRONG_INLINE T pmadd(const T& x, const T& y, const T& c) const { return  ei_pmadd(x,y,c); }
+  template<typename T>
+  EIGEN_STRONG_INLINE T pmul(const T& x, const T& y) const { return  ei_pmul(x,y); }
+};
+
+template<> struct ei_conj_helper<false,true>
+{
+  template<typename T> std::complex<T>
+  pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
+  { return c + pmul(x,y); }
+
+  template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
+  { return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
+};
+
+template<> struct ei_conj_helper<true,false>
+{
+  template<typename T> std::complex<T>
+  pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
+  { return c + pmul(x,y); }
+
+  template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
+  { return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
+};
+
+template<> struct ei_conj_helper<true,true>
+{
+  template<typename T> std::complex<T>
+  pmadd(const std::complex<T>& x, const std::complex<T>& y, const std::complex<T>& c) const
+  { return c + pmul(x,y); }
+
+  template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
+  { return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
+};
+
+// lightweight helper class to access matrix coefficients
+template<typename Scalar, int StorageOrder>
+class ei_blas_data_mapper
+{
+  public:
+    ei_blas_data_mapper(Scalar* data, int stride) : m_data(data), m_stride(stride) {}
+    EIGEN_STRONG_INLINE Scalar& operator()(int i, int j)
+    { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
+  protected:
+    Scalar* EIGEN_RESTRICT m_data;
+    int m_stride;
+};
+
+// lightweight helper class to access matrix coefficients (const version)
+template<typename Scalar, int StorageOrder>
+class ei_const_blas_data_mapper
+{
+  public:
+    ei_const_blas_data_mapper(const Scalar* data, int stride) : m_data(data), m_stride(stride) {}
+    EIGEN_STRONG_INLINE const Scalar& operator()(int i, int j) const
+    { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
+  protected:
+    const Scalar* EIGEN_RESTRICT m_data;
+    int m_stride;
+};
+
+//
+// template <int L2MemorySize,typename Scalar>
+// struct ei_L2_block_traits {
+//   enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
+// };
+
+// Defines various constant controlling level 3 blocking
+template<typename Scalar>
+struct ei_product_blocking_traits
+{
+  typedef typename ei_packet_traits<Scalar>::type PacketType;
+  enum {
+    PacketSize = sizeof(PacketType)/sizeof(Scalar),
+    #if (defined __i386__)
+    HalfRegisterCount = 4,
+    #else
+    HalfRegisterCount = 8,
+    #endif
+
+    // register block size along the N direction
+    nr = HalfRegisterCount/2,
+
+    // register block size along the M direction
+    mr = 2 * PacketSize,
+
+    // max cache block size along the K direction
+    Max_kc = 8 * ei_meta_sqrt<EIGEN_TUNE_FOR_CPU_CACHE_SIZE/(64*sizeof(Scalar))>::ret,
+
+    // max cache block size along the M direction
+    Max_mc = 2*Max_kc
+  };
+};
+
+#endif // EIGEN_BLASUTIL_H
author	Gael Guennebaud <g.gael@free.fr>	2009-07-22 11:54:58 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2009-07-22 11:54:58 +0200
commit	d6475ea390b5a4beeef64e71a247b3f72573d768 (patch)
tree	20b734753444747a7a349f8657aa3593d294173d /Eigen/src/Core/util
parent	d6627d540e6a8651ccd8ce4a4520b70fe5def870 (diff)