aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-07-20 10:53:03 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-07-20 10:53:03 +0200
commit88e352adac535403db3e5e7efa5fa3cb483c5c2e (patch)
tree0825041d46aa9dc5625f205102bb7d3c40281905
parent6799c26cd6d5aba6223de287e7ea7e024c9b8eb1 (diff)
Add support for replicate in CUDA
-rw-r--r--Eigen/src/Core/DenseBase.h2
-rw-r--r--Eigen/src/Core/Replicate.h5
-rw-r--r--Eigen/src/Core/VectorwiseOp.h1
-rw-r--r--test/cuda_basic.cu22
4 files changed, 29 insertions, 1 deletions
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 58f682dce..c603b5a6d 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -517,7 +517,9 @@ template<typename Derived> class DenseBase
template<int p> RealScalar lpNorm() const;
template<int RowFactor, int ColFactor>
+ EIGEN_DEVICE_FUNC
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+ EIGEN_DEVICE_FUNC
const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
typedef Reverse<Derived, BothDirections> ReverseReturnType;
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index d105de62b..518c52e57 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -69,6 +69,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
inline explicit Replicate(const OriginalMatrixType& matrix)
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
{
@@ -78,6 +79,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
}
template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
{
@@ -85,9 +87,12 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
}
+ EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+ EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
+ EIGEN_DEVICE_FUNC
const _MatrixTypeNested& nestedExpression() const
{
return m_matrix;
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 5ebcdc100..0cc5eff16 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -461,6 +461,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
*/
// NOTE implemented here because of sunstudio's compilation errors
template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)>
+ EIGEN_DEVICE_FUNC
replicate(Index factor = Factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu
index 6ec9446c6..571a52299 100644
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@@ -48,6 +48,23 @@ struct coeff_wise {
};
template<typename T>
+struct replicate {
+ EIGEN_DEVICE_FUNC
+ void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
+ {
+ using namespace Eigen;
+ T x1(in+i);
+ int step = x1.size() * 4;
+ int stride = 3 * step;
+
+ typedef Map<Array<typename T::Scalar,Dynamic,Dynamic> > MapType;
+ MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2);
+ MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3);
+ MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3);
+ }
+};
+
+template<typename T>
struct redux {
EIGEN_DEVICE_FUNC
void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
@@ -117,7 +134,7 @@ void test_cuda_basic()
Eigen::VectorXf in, out;
#ifndef __CUDA_ARCH__
- int data_size = nthreads * 16;
+ int data_size = nthreads * 512;
in.setRandom(data_size);
out.setRandom(data_size);
#endif
@@ -125,6 +142,9 @@ void test_cuda_basic()
CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Vector3f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Array44f>(), nthreads, in, out) );
+ CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array4f>(), nthreads, in, out) );
+ CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array33f>(), nthreads, in, out) );
+
CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) );