diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-08-31 08:18:53 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-08-31 08:18:53 -0700 |
commit | f41831e445f3fdd9dc324561135b2a19eafd9a56 (patch) | |
tree | 045cb917d62685b342ce129e384e03e63c916898 | |
parent | 2ab603316af7c1bcf1d5e87d9ba50a2589b36e37 (diff) |
Added support for argmax/argmin
-rw-r--r-- | unsupported/Eigen/CXX11/Tensor | 1 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h | 288 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 59 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 2 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 34 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 54 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_argmax.cpp | 294 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_argmax_cuda.cpp | 241 |
9 files changed, 975 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index d1908a4c3..cbe416602 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -73,6 +73,7 @@ #include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorArgMax.h" #include "src/Tensor/TensorConcatenation.h" #include "src/Tensor/TensorContraction.h" #include "src/Tensor/TensorContractionThreadPool.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h new file mode 100644 index 000000000..ee3bf7fe3 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -0,0 +1,288 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H +#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H + +namespace Eigen { +namespace internal { + +/** \class TensorIndexTuple + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor + Index Tuple class. + * + * + */ +template<typename XprType> +struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType> +{ + typedef traits<XprType> XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Tuple<Index, typename XprTraits::Scalar> Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference<Nested>::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template<typename XprType> +struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense> +{ + typedef const TensorIndexTupleOp<XprType>& type; +}; + +template<typename XprType> +struct nested<TensorIndexTupleOp<XprType>, 1, + typename eval<TensorIndexTupleOp<XprType> >::type> +{ + typedef TensorIndexTupleOp<XprType> type; +}; + +} // end namespace internal + +template<typename XprType> +class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar; + typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; + typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested; + typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind; + typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index; + typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all<typename XprType::Nested>::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +// Eval as rvalue +template<typename ArgType, typename Device> +struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> +{ + typedef TensorIndexTupleOp<ArgType> XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; + static const int NumDims = internal::array_size<Dimensions>::value; + + enum { + IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, + PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return CoeffReturnType(index, m_impl.coeff(index)); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator<ArgType, Device> m_impl; +}; + +namespace internal { + +/** \class TensorTupleIndex + * \ingroup CXX11_Tensor_Module + * + * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>. + * + */ +template<typename ReduceOp, typename Dims, typename XprType> +struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType> +{ + typedef traits<XprType> XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Index Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference<Nested>::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template<typename ReduceOp, typename Dims, typename XprType> +struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense> +{ + typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type; +}; + +template<typename ReduceOp, typename Dims, typename XprType> +struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1, + typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type> +{ + typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type; +}; + +} // end namespace internal + +template<typename ReduceOp, typename Dims, typename XprType> +class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar; + typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; + typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested; + typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind; + typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index; + typedef Index CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, + const ReduceOp& reduce_op, + const int return_dim, + const Dims& reduce_dims) + : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all<typename XprType::Nested>::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const ReduceOp& reduce_op() const { return m_reduce_op; } + + EIGEN_DEVICE_FUNC + const Dims& reduce_dims() const { return m_reduce_dims; } + + EIGEN_DEVICE_FUNC + int return_dim() const { return m_return_dim; } + + protected: + typename XprType::Nested m_xpr; + const ReduceOp m_reduce_op; + const int m_return_dim; + const Dims m_reduce_dims; +}; + +// Eval as rvalue +template<typename ReduceOp, typename Dims, typename ArgType, typename Device> +struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device> +{ + typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType; + typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions; + typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions; + static const int NumDims = internal::array_size<InputDimensions>::value; + typedef array<Index, NumDims> StrideDims; + + enum { + IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, + PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_orig_impl(op.expression(), device), + m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), + m_return_dim(op.return_dim()), + m_strides(gen_strides(m_orig_impl.dimensions())), + m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), + m_stride_div(gen_stride_div()) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const TupleType v = m_impl.coeff(index); + return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) { + StrideDims strides; + if (m_return_dim < 0) return strides; // Won't be using these. + eigen_assert(m_return_dim < NumDims && + "Asking to convert index to a dimension outside of the rank"); + + // Calculate m_stride_div and m_stride_mod, which are used to + // calculate the value of an index w.r.t. the m_return_dim. + if (Layout == static_cast<int>(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i-1] * dims[i-1]; + } + } else { + strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i+1] * dims[i+1]; + } + } + return strides; + } + + EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { + if (Layout == static_cast<int>(ColMajor)) { + return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize(); + } else { + return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize(); + } + } + + EIGEN_DEVICE_FUNC Index gen_stride_div() { + return m_strides[m_return_dim]; + } + + protected: + TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl; + TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl; + const int m_return_dim; + const StrideDims m_strides; + const Index m_stride_mod; + const Index m_stride_div; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 0e5e4b426..477e4a174 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -363,6 +363,58 @@ class TensorBase<Derived, ReadOnlyAccessors> return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, NumDimensions>, const Derived> + argmax() const { + array<Index, NumDimensions> in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, NumDimensions>, + const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, NumDimensions>, const Derived> + argmin() const { + array<Index, NumDimensions> in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, NumDimensions>, + const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, 1>, const Derived> + argmax(const int return_dim) const { + array<Index, 1> in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, 1>, + const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, 1>, const Derived> + argmin(const int return_dim) const { + array<Index, 1> in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, + const array<Index, 1>, + const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); + } + template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp<Reducer, const Dims, const Derived> reduce(const Dims& dims, const Reducer& reducer) const { @@ -483,6 +535,13 @@ class TensorBase<Derived, ReadOnlyAccessors> return TensorInflationOp<const Strides, const Derived>(derived(), strides); } + // Returns a tensor containing index/value tuples + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorIndexTupleOp<const Derived> + index_tuples() const { + return TensorIndexTupleOp<const Derived>(derived()); + } + // Support for custom unary and binary operations template <typename CustomUnaryFunc> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 17b0e6153..c22444e6f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -23,6 +23,8 @@ template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp; template<typename Op, typename Dims, typename XprType> class TensorReductionOp; +template<typename XprType> class TensorIndexTupleOp; +template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp; template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; template<typename TargetType, typename XprType> class TensorConversionOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index d9061c216..ed259399b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -219,6 +219,40 @@ template <typename T> struct ProdReducer }; +// Argmin/Argmax reducers +template <typename T> struct ArgMaxTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t.second > accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits<typename T::second_type>::lowest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template <typename T> struct ArgMinTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { + if (t.second < accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits<typename T::second_type>::highest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + + // Random number generation namespace { #ifdef __CUDA_ARCH__ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 78feb85cd..7dfa04760 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -31,6 +31,60 @@ template <> struct max_n_1<0> { static const size_t size = 1; }; + + + +#if defined(EIGEN_HAS_CONSTEXPR) +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + +// Tuple mimics std::pair but works on e.g. nvcc. +template <typename U, typename V> struct Tuple { + public: + U first; + V second; + + typedef U first_type; + typedef V second_type; + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple() : first(), second() {} + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple(const U& f, const V& s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple& operator= (const Tuple& rhs) { + if (&rhs == this) return *this; + first = rhs.first; + second = rhs.second; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void swap(Tuple& rhs) { + using numext::swap; + swap(first, rhs.first); + swap(second, rhs.second); + } +}; + +template <typename U, typename V> +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) { + return (x.first == y.first && x.second == y.second); +} + +template <typename U, typename V> +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) { + return !(x == y); +} + +#undef EIGEN_CONSTEXPR + } // namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 7c8fb8dde..b161cb370 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -130,6 +130,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_image_patch "-std=c++0x") ei_add_test(cxx11_tensor_volume_patch "-std=c++0x") ei_add_test(cxx11_tensor_reduction "-std=c++0x") + ei_add_test(cxx11_tensor_argmax "-std=c++0x") ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") @@ -148,5 +149,6 @@ if(EIGEN_TEST_CXX11) # ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_random_cuda "-std=c++0x") +# ei_add_test(cxx11_tensor_argmax_cuda "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp new file mode 100644 index 000000000..4c532409e --- /dev/null +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -0,0 +1,294 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::array; +using Eigen::Tuple; + +template <int DataLayout> +static void test_simple_index_tuples() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + for (DenseIndex n = 0; n < 2*3*5*7; ++n) { + const Tuple<DenseIndex, float>& v = index_tuples.coeff(n); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor.coeff(n)); + } +} + +template <int DataLayout> +static void test_index_tuples_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + + index_tuples = tensor.index_tuples(); + + for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) { + const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor(n)); + } +} + +template <int DataLayout> +static void test_argmax_tuple_reducer() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced(1); + DimensionList<DenseIndex, 4> dims; + reduced = index_tuples.reduce( + dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); + + Tensor<float, 1, DataLayout> maxi = tensor.maximum(); + + VERIFY_IS_EQUAL(maxi(0), reduced(0).second); + + array<DenseIndex, 3> reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); + + Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(max_by_dims(l), reduced_by_dims(l).second); + } +} + +template <int DataLayout> +static void test_argmin_tuple_reducer() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced(1); + DimensionList<DenseIndex, 4> dims; + reduced = index_tuples.reduce( + dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); + + Tensor<float, 1, DataLayout> mini = tensor.minimum(); + + VERIFY_IS_EQUAL(mini(0), reduced(0).second); + + array<DenseIndex, 3> reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); + + Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(min_by_dims(l), reduced_by_dims(l).second); + } +} + +template <int DataLayout> +static void test_simple_argmax() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = 10.0; + + Tensor<DenseIndex, 1, DataLayout> tensor_argmax(1); + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 0); + + tensor(1,2,4,6) = 20.0; + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 2*3*5*7 - 1); +} + +template <int DataLayout> +static void test_simple_argmin() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = -10.0; + + Tensor<DenseIndex, 1, DataLayout> tensor_argmin(1); + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 0); + + tensor(1,2,4,6) = -20.0; + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 2*3*5*7 - 1); +} + +template <int DataLayout> +static void test_argmax_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<DenseIndex, 3, DataLayout> tensor_argmax; + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmax.dimensions().TotalSize(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmax.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], tensor.dimension(dim) - 1); + } + } +} + +template <int DataLayout> +static void test_argmin_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<DenseIndex, 3, DataLayout> tensor_argmin; + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0 + tensor(ix) = -10.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmin.dimensions().TotalSize(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0 + tensor(ix) = -20.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmin.dimensions().TotalSize(); ++n) { + // Expect min to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], tensor.dimension(dim) - 1); + } + } +} + +void test_cxx11_tensor_argmax() +{ + CALL_SUBTEST(test_simple_index_tuples<RowMajor>()); + CALL_SUBTEST(test_simple_index_tuples<ColMajor>()); + CALL_SUBTEST(test_index_tuples_dim<RowMajor>()); + CALL_SUBTEST(test_index_tuples_dim<ColMajor>()); + CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>()); + CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>()); + CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>()); + CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>()); + CALL_SUBTEST(test_simple_argmax<RowMajor>()); + CALL_SUBTEST(test_simple_argmax<ColMajor>()); + CALL_SUBTEST(test_simple_argmin<RowMajor>()); + CALL_SUBTEST(test_simple_argmin<ColMajor>()); + CALL_SUBTEST(test_argmax_dim<RowMajor>()); + CALL_SUBTEST(test_argmax_dim<ColMajor>()); + CALL_SUBTEST(test_argmin_dim<RowMajor>()); + CALL_SUBTEST(test_argmin_dim<ColMajor>()); +} diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cpp b/unsupported/test/cxx11_tensor_argmax_cuda.cpp new file mode 100644 index 000000000..d37490d15 --- /dev/null +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cpp @@ -0,0 +1,241 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// TODO(mdevin): Free the cuda memory. + +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_USE_GPU + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int Layout> +void test_cuda_simple_argmax() +{ + Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97)); + Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1)); + Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1)); + in.setRandom(); + in *= in.constant(100.0); + in(0, 0, 0) = -1000.0; + in(71, 52, 96) = 1000.0; + + std::size_t in_bytes = in.size() * sizeof(double); + std::size_t out_bytes = out_max.size() * sizeof(DenseIndex); + + double* d_in; + DenseIndex* d_out_max; + DenseIndex* d_out_min; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out_max), out_bytes); + cudaMalloc((void**)(&d_out_min), out_bytes); + + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1)); + + gpu_out_max.device(gpu_device) = gpu_in.argmax(); + gpu_out_min.device(gpu_device) = gpu_in.argmin(); + + assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1); + VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0); +} + +template <int DataLayout> +void test_cuda_argmax_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array<DenseIndex, 3> out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape); + + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + } +} + +template <int DataLayout> +void test_cuda_argmin_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array<DenseIndex, 3> out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape); + + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = -10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = -20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + } +} + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST(test_cuda_simple_argmax<RowMajor>()); + CALL_SUBTEST(test_cuda_simple_argmax<ColMajor>()); + CALL_SUBTEST(test_cuda_argmax_dim<RowMajor>()); + CALL_SUBTEST(test_cuda_argmax_dim<ColMajor>()); + CALL_SUBTEST(test_cuda_argmin_dim<RowMajor>()); + CALL_SUBTEST(test_cuda_argmin_dim<ColMajor>()); +} |