From 04dc63776a63e5d0ec0237706cb440152d57769e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 25 Sep 2009 13:08:39 +0200 Subject: add a wip blas library built on top of Eigen. TODO: - write extentive unit tests (maybe this already exist in other projects) - the level2 functions still have to be implemented --- blas/CMakeLists.txt | 10 ++ blas/README.txt | 7 + blas/common.h | 115 +++++++++++++++ blas/complex_double.cpp | 31 ++++ blas/complex_single.cpp | 31 ++++ blas/double.cpp | 31 ++++ blas/level1_impl.h | 225 +++++++++++++++++++++++++++++ blas/level2_impl.h | 214 ++++++++++++++++++++++++++++ blas/level3_impl.h | 365 ++++++++++++++++++++++++++++++++++++++++++++++++ blas/single.cpp | 31 ++++ 10 files changed, 1060 insertions(+) create mode 100644 blas/CMakeLists.txt create mode 100644 blas/README.txt create mode 100644 blas/common.h create mode 100644 blas/complex_double.cpp create mode 100644 blas/complex_single.cpp create mode 100644 blas/double.cpp create mode 100644 blas/level1_impl.h create mode 100644 blas/level2_impl.h create mode 100644 blas/level3_impl.h create mode 100644 blas/single.cpp (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt new file mode 100644 index 000000000..477693bad --- /dev/null +++ b/blas/CMakeLists.txt @@ -0,0 +1,10 @@ + +set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp) + +add_library(eigen_blas SHARED ${EigenBlas_SRCS}) + +install(TARGETS eigen_blas + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) + diff --git a/blas/README.txt b/blas/README.txt new file mode 100644 index 000000000..466a6751c --- /dev/null +++ b/blas/README.txt @@ -0,0 +1,7 @@ + +This directory contains a BLAS library built on top of Eigen. + +This is currently a work in progress which is far to be ready for use, +but feel free to contribute to it if you wish. + +If you want to compile it, set the cmake variable EIGEN_BUILD_BLAS to "on". diff --git a/blas/common.h b/blas/common.h new file mode 100644 index 000000000..74c3c9f11 --- /dev/null +++ b/blas/common.h @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_BLAS_COMMON_H +#define EIGEN_BLAS_COMMON_H + +#ifndef SCALAR +#error the token SCALAR must be defined to compile this file +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include + +#ifdef __cplusplus +} +#endif + + +#define NOTR 0 +#define TR 1 +#define ADJ 2 + +#define LEFT 0 +#define RIGHT 1 + +#define UP 0 +#define LO 1 + +#define NUNIT 0 +#define UNIT 1 + +#define OP(X) ( ((X)=='N' || (X)=='n') ? NOTR \ + : ((X)=='T' || (X)=='t') ? TR \ + : ((X)=='C' || (X)=='c') ? ADJ \ + : 0xff) + +#define SIDE(X) ( ((X)=='L' || (X)=='l') ? LEFT \ + : ((X)=='R' || (X)=='r') ? RIGHT \ + : 0xff) + +#define UPLO(X) ( ((X)=='U' || (X)=='u') ? UP \ + : ((X)=='L' || (X)=='l') ? LO \ + : 0xff) + +#define DIAG(X) ( ((X)=='N' || (X)=='N') ? NUNIT \ + : ((X)=='U' || (X)=='u') ? UNIT \ + : 0xff) + +#include +#include +using namespace Eigen; + +template +Block > >, Dynamic, Dynamic> +matrix(T* data, int rows, int cols, int stride) +{ + return Map >(data, stride, cols).nestByValue().block(0,0,rows,cols); +} + +template +Block > >, Dynamic, 1> +vector(T* data, int size, int incr) +{ + return Map >(data, size, incr).nestByValue().col(0); +} + +template +Map > +vector(T* data, int size) +{ + return Map >(data, size); +} + +typedef SCALAR Scalar; +typedef NumTraits::Real RealScalar; +typedef std::complex Complex; + +enum +{ + IsComplex = Eigen::NumTraits::IsComplex, + Conj = IsComplex +}; + +typedef Block > >, Dynamic, Dynamic> MatrixType; +typedef Block > >, Dynamic, 1> StridedVectorType; +typedef Map > CompactVectorType; + +#define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX,X##_) + +#endif // EIGEN_BLAS_COMMON_H diff --git a/blas/complex_double.cpp b/blas/complex_double.cpp new file mode 100644 index 000000000..f51ccb25b --- /dev/null +++ b/blas/complex_double.cpp @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#define SCALAR std::complex +#define SCALAR_SUFFIX c +#define ISCOMPLEX 1 + +#include "level1_impl.h" +#include "level2_impl.h" +#include "level3_impl.h" diff --git a/blas/complex_single.cpp b/blas/complex_single.cpp new file mode 100644 index 000000000..b6617e7b9 --- /dev/null +++ b/blas/complex_single.cpp @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#define SCALAR std::complex +#define SCALAR_SUFFIX z +#define ISCOMPLEX 1 + +#include "level1_impl.h" +#include "level2_impl.h" +#include "level3_impl.h" diff --git a/blas/double.cpp b/blas/double.cpp new file mode 100644 index 000000000..8145696b3 --- /dev/null +++ b/blas/double.cpp @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#define SCALAR double +#define SCALAR_SUFFIX d +#define ISCOMPLEX 0 + +#include "level1_impl.h" +#include "level2_impl.h" +#include "level3_impl.h" diff --git a/blas/level1_impl.h b/blas/level1_impl.h new file mode 100644 index 000000000..c508626db --- /dev/null +++ b/blas/level1_impl.h @@ -0,0 +1,225 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar alpha = *reinterpret_cast(palpha); + + if(*incx==1 && *incy==1) + vector(y,*n) += alpha * vector(x,*n); + else + vector(y,*n,*incy) += alpha * vector(x,*n,*incx); + + return 1; +} + +// computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum +// res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n +RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) +{ + int size = IsComplex ? 2* *n : *n; + + if(*incx==1) + return vector(px,size).cwise().abs().sum(); + else + return vector(px,size,*incx).cwise().abs().sum(); + + return 1; +} + +int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + int size = IsComplex ? 2* *n : *n; + + if(*incx==1 && *incy==1) + vector(py,size) = vector(px,size); + else + vector(py,size,*incy) = vector(px,size,*incx); + + return 1; +} + +// computes a vector-vector dot product. +Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*incx==1 && *incy==1) + return (vector(x,*n).cwise()*vector(y,*n)).sum(); + + return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum(); +} + +/* + +// computes a vector-vector dot product with extended precision. +Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + // TODO + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*incx==1 && *incy==1) + return vector(x,*n).dot(vector(y,*n)); + + return vector(x,*n,*incx).dot(vector(y,*n,*incy)); +} + +*/ + +#if ISCOMPLEX + +// computes a dot product of a conjugated vector with another vector. +Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*incx==1 && *incy==1) + return vector(x,*n).dot(vector(y,*n)); + + return vector(x,*n,*incx).dot(vector(y,*n,*incy)); +} + +// computes a vector-vector dot product without complex conjugation. +Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*incx==1 && *incy==1) + return (vector(x,*n).cwise()*vector(y,*n)).sum(); + + return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum(); +} + +#endif // ISCOMPLEX + +// computes the Euclidean norm of a vector. +Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) +{ + Scalar* x = reinterpret_cast(px); + + if(*incx==1) + return vector(x,*n).norm(); + + return vector(x,*n,*incx).norm(); +} + +int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar c = *reinterpret_cast(pc); + Scalar s = *reinterpret_cast(ps); + + StridedVectorType vx(vector(x,*n,*incx)); + StridedVectorType vy(vector(y,*n,*incy)); + ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation(c,s)); + return 1; +} + +int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) +{ + Scalar a = *reinterpret_cast(pa); + Scalar b = *reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar* s = reinterpret_cast(ps); + + PlanarRotation r; + r.makeGivens(a,b); + *c = r.c(); + *s = r.s(); + + return 1; +} + +#if !ISCOMPLEX +/* +// performs rotation of points in the modified plane. +int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + // TODO + + return 0; +} + +// computes the modified parameters for a Givens rotation. +int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param) +{ + // TODO + + return 0; +} +*/ +#endif // !ISCOMPLEX + +int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *px, int *incx, RealScalar *palpha) +{ + Scalar* x = reinterpret_cast(px); + Scalar alpha = *reinterpret_cast(palpha); + + if(*incx==1) + vector(x,*n) *= alpha; + + vector(x,*n,*incx) *= alpha; + + return 1; +} + +int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +{ + int size = IsComplex ? 2* *n : *n; + + if(*incx==1 && *incy==1) + vector(py,size).swap(vector(px,size)); + else + vector(py,size,*incy).swap(vector(px,size,*incx)); + + return 1; +} + +#if !ISCOMPLEX + +RealScalar EIGEN_BLAS_FUNC(casum)(int *n, RealScalar *px, int *incx) +{ + Complex* x = reinterpret_cast(px); + + if(*incx==1) + return vector(x,*n).cwise().abs().sum(); + else + return vector(x,*n,*incx).cwise().abs().sum(); + + return 1; +} + +#endif // ISCOMPLEX diff --git a/blas/level2_impl.h b/blas/level2_impl.h new file mode 100644 index 000000000..5691e8a7f --- /dev/null +++ b/blas/level2_impl.h @@ -0,0 +1,214 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + if(beta!=Scalar(1)) + vector(c, *m, *incc) *= beta; + + if(OP(*opa)==NOTR) + if(*incc==1) + vector(c,*m) += alpha * matrix(a,*m,*n,*lda) * vector(b,*n,*incb); + else + vector(c,*m,*incc) += alpha * matrix(a,*m,*n,*lda) * vector(b,*n,*incb); + else if(OP(*opa)==TR) + if(*incb==1) + vector(c,*m,*incc) += alpha * matrix(a,*n,*m,*lda).transpose() * vector(b,*n); + else + vector(c,*m,*incc) += alpha * matrix(a,*n,*m,*lda).transpose() * vector(b,*n,*incb); + else if(OP(*opa)==TR) + if(*incb==1) + vector(c,*m,*incc) += alpha * matrix(a,*n,*m,*lda).adjoint() * vector(b,*n); + else + vector(c,*m,*incc) += alpha * matrix(a,*n,*m,*lda).adjoint() * vector(b,*n,*incb); + else + return 0; + + return 1; +} + +/* +int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +{ + typedef void (*functype)(int, const Scalar *, int, Scalar *, int); + functype func[16]; + + static bool init = false; + if(!init) + { + for(int k=0; k<16; ++k) + func[k] = 0; + +// func[NOTR | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// func[TR | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// func[ADJ | (UP << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// +// func[NOTR | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// func[TR | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// func[ADJ | (LO << 2) | (NUNIT << 3)] = (ei_triangular_solve_vector::run); +// +// func[NOTR | (UP << 3) | (UNIT << 3)] = (ei_triangular_solve_vector::run); +// func[TR | (UP << 2) | (UNIT << 3)] = (ei_triangular_solve_vector::run); +// func[ADJ | (UP << 2) | (UNIT << 3)] = (ei_triangular_solve_vector::run); +// +// func[NOTR | (LO << 2) | (UNIT << 3)] = (ei_triangular_solve_vector::run); +// func[TR | (LO << 2) | (UNIT << 3)] = (ei_triangular_solve_vector::run); +// func[ADJ | (LO << 2) | (UNIT << 3)] = (ei_triangular_solve_vector::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + + int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); + if(code>=16 || func[code]==0) + return 0; + + func[code](*n, a, *lda, b, *incb); + return 1; +} +*/ + +/* +int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +{ + // TODO + + typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int); + functype func[16]; + + static bool init = false; + if(!init) + { + for(int k=0; k<16; ++k) + func[k] = 0; + +// func[NOTR | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[TR | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[ADJ | (UP << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// +// func[NOTR | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[TR | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[ADJ | (LO << 2) | (NUNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// +// func[NOTR | (UP << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[TR | (UP << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[ADJ | (UP << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// +// func[NOTR | (LO << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[TR | (LO << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); +// func[ADJ | (LO << 2) | (UNIT << 3)] = (ei_product_triangular_matrix_vector::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + + int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); + if(code>=16 || func[code]==0) + return 0; + + func[code](*n, a, *lda, b, *incb, b, *incb); + return 1; +} +*/ + +/* +int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pc, int *ldc) +{ + // TODO + typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); + functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + +// func[UP] = (ei_selfadjoint_product::run); +// func[LO] = (ei_selfadjoint_product::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, a, *inca, c, *ldc, alpha); + return 1; +} +*/ + +/* +int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pb, int *incb, RealScalar *pc, int *ldc) +{ + // TODO + typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); + functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + +// func[UP] = (ei_selfadjoint_product::run); +// func[LO] = (ei_selfadjoint_product::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); + return 1; +} +*/ + +#if ISCOMPLEX + +#endif // ISCOMPLEX diff --git a/blas/level3_impl.h b/blas/level3_impl.h new file mode 100644 index 000000000..d44de1b5d --- /dev/null +++ b/blas/level3_impl.h @@ -0,0 +1,365 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "common.h" + +int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + typedef void (*functype)(int, int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); + functype func[12]; + + static bool init = false; + if(!init) + { + for(int k=0; k<12; ++k) + func[k] = 0; + func[NOTR | (NOTR << 2)] = (ei_general_matrix_matrix_product::run); + func[TR | (NOTR << 2)] = (ei_general_matrix_matrix_product::run); + func[ADJ | (NOTR << 2)] = (ei_general_matrix_matrix_product::run); + func[NOTR | (TR << 2)] = (ei_general_matrix_matrix_product::run); + func[TR | (TR << 2)] = (ei_general_matrix_matrix_product::run); + func[ADJ | (TR << 2)] = (ei_general_matrix_matrix_product::run); + func[NOTR | (ADJ << 2)] = (ei_general_matrix_matrix_product::run); + func[TR | (ADJ << 2)] = (ei_general_matrix_matrix_product::run); + func[ADJ | (ADJ << 2)] = (ei_general_matrix_matrix_product::run); + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + if(beta!=Scalar(1)) + matrix(c, *m, *n, *ldc) *= beta; + + int code = OP(*opa) | (OP(*opb) << 2); + if(code>=12 || func[code]==0) + return 0; + + func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha); + return 1; +} + +int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +{ + typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int); + functype func[32]; + + static bool init = false; + if(!init) + { + for(int k=0; k<32; ++k) + func[k] = 0; + + func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + + + func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + + func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar alpha = *reinterpret_cast(palpha); + + // TODO handle alpha + + int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); + if(code>=32 || func[code]==0) + return 0; + + func[code](*m, *n, a, *lda, b, *ldb); + return 1; +} + + +// b = alpha*op(a)*b for side = 'L'or'l' +// b = alpha*b*op(a) for side = 'R'or'r' +int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +{ + typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); + functype func[32]; + + static bool init = false; + if(!init) + { + for(int k=0; k<32; ++k) + func[k] = 0; + + func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar alpha = *reinterpret_cast(palpha); + + int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); + if(code>=32 || func[code]==0) + return 0; + + func[code](*m, *n, a, *lda, b, *ldb, b, *ldb, alpha); + return 1; +} + +// c = alpha*a*b + beta*c for side = 'L'or'l' +// c = alpha*b*a + beta*c for side = 'R'or'r +int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + if(beta!=Scalar(1)) + matrix(c, *m, *n, *ldc) *= beta; + + if(SIDE(*side)==LEFT) + if(UPLO(*uplo)==UP) + ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) + ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else + return 0; + else if(SIDE(*side)==RIGHT) + if(UPLO(*uplo)==UP) + ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) + ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else + return 0; + else + return 0; + + return 1; +} + +// c = alpha*a*a' + beta*c for op = 'N'or'n' +// c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' +int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar); + functype func[8]; + + static bool init = false; + if(!init) + { + for(int k=0; k<8; ++k) + func[k] = 0; + + func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); + func[TR | (UP << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); + + func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); + func[TR | (LO << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + int code = OP(*op) | (UPLO(*uplo) << 2); + if(code>=8 || func[code]==0) + return 0; + + if(beta!=Scalar(1)) + matrix(c, *n, *n, *ldc) *= beta; + + func[code](*n, *k, a, *lda, c, *ldc, alpha); + return 1; +} + +// c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' +// c = alpha*a'*b + alpha*b'*a + beta*c for op = 'T'or't' +int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + // TODO + + return 0; +} + + +#if ISCOMPLEX + +// c = alpha*a*b + beta*c for side = 'L'or'l' +// c = alpha*b*a + beta*c for side = 'R'or'r +int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + if(beta!=Scalar(1)) + matrix(c, *m, *n, *ldc) *= beta; + + if(SIDE(*side)==LEFT) + if(UPLO(*uplo)==UP) + ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) + ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else + return 0; + else if(SIDE(*side)==RIGHT) + if(UPLO(*uplo)==UP) + ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) + ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else + return 0; + else + return 0; + + return 1; +} + +// c = alpha*a*conj(a') + beta*c for op = 'N'or'n' +// c = alpha*conj(a')*a + beta*c for op = 'C'or'c' +int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar); + functype func[8]; + + static bool init = false; + if(!init) + { + for(int k=0; k<8; ++k) + func[k] = 0; + + func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); + + func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); + + init = true; + } + + Scalar* a = reinterpret_cast(pa); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + int code = OP(*op) | (UPLO(*uplo) << 2); + if(code>=8 || func[code]==0) + return 0; + + if(beta!=Scalar(1)) + matrix(c, *n, *n, *ldc) *= beta; + + func[code](*n, *k, a, *lda, c, *ldc, alpha); + return 1; +} + +// c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' +// c = alpha*conj(b')*a + conj(alpha)*conj(a')*b + beta*c, for op = 'C'or'c' +int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +{ + Scalar* a = reinterpret_cast(pa); + Scalar* b = reinterpret_cast(pb); + Scalar* c = reinterpret_cast(pc); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + + // TODO + + return 0; +} + +#endif // ISCOMPLEX diff --git a/blas/single.cpp b/blas/single.cpp new file mode 100644 index 000000000..842e104b8 --- /dev/null +++ b/blas/single.cpp @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#define SCALAR float +#define SCALAR_SUFFIX s +#define ISCOMPLEX 0 + +#include "level1_impl.h" +#include "level2_impl.h" +#include "level3_impl.h" -- cgit v1.2.3