diff options
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen')
62 files changed, 0 insertions, 27888 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h deleted file mode 100644 index ad6a9dda10..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ /dev/null @@ -1,508 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11META_H -#define EIGEN_CXX11META_H - -namespace Eigen { - -namespace internal { - -/** \internal - * \file CXX11/Core/util/CXX11Meta.h - * This file contains generic metaprogramming classes which are not specifically related to Eigen. - * This file expands upon Core/util/Meta.h and adds support for C++11 specific features. - */ - -template<typename... tt> -struct type_list { constexpr static int count = sizeof...(tt); }; - -template<typename t, typename... tt> -struct type_list<t, tt...> { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; }; - -template<typename T, T... nn> -struct numeric_list { constexpr static std::size_t count = sizeof...(nn); }; - -template<typename T, T n, T... nn> -struct numeric_list<T, n, nn...> { constexpr static std::size_t count = sizeof...(nn) + 1; constexpr static T first_value = n; }; - -/* numeric list constructors - * - * equivalencies: - * constructor result - * typename gen_numeric_list<int, 5>::type numeric_list<int, 0,1,2,3,4> - * typename gen_numeric_list_reversed<int, 5>::type numeric_list<int, 4,3,2,1,0> - * typename gen_numeric_list_swapped_pair<int, 5,1,2>::type numeric_list<int, 0,2,1,3,4> - * typename gen_numeric_list_repeated<int, 0, 5>::type numeric_list<int, 0,0,0,0,0> - */ - -template<typename T, std::size_t n, T... ii> struct gen_numeric_list : gen_numeric_list<T, n-1, n-1, ii...> {}; -template<typename T, T... ii> struct gen_numeric_list<T, 0, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T... ii> struct gen_numeric_list_reversed : gen_numeric_list_reversed<T, n-1, ii..., n-1> {}; -template<typename T, T... ii> struct gen_numeric_list_reversed<T, 0, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T a, T b, T... ii> struct gen_numeric_list_swapped_pair : gen_numeric_list_swapped_pair<T, n-1, a, b, (n-1) == a ? b : ((n-1) == b ? a : (n-1)), ii...> {}; -template<typename T, T a, T b, T... ii> struct gen_numeric_list_swapped_pair<T, 0, a, b, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T V, T... nn> struct gen_numeric_list_repeated : gen_numeric_list_repeated<T, n-1, V, V, nn...> {}; -template<typename T, T V, T... nn> struct gen_numeric_list_repeated<T, 0, V, nn...> { typedef numeric_list<T, nn...> type; }; - -/* list manipulation: concatenate */ - -template<class a, class b> struct concat; - -template<typename... as, typename... bs> struct concat<type_list<as...>, type_list<bs...>> { typedef type_list<as..., bs...> type; }; -template<typename T, T... as, T... bs> struct concat<numeric_list<T, as...>, numeric_list<T, bs...> > { typedef numeric_list<T, as..., bs...> type; }; - -template<typename... p> struct mconcat; -template<typename a> struct mconcat<a> { typedef a type; }; -template<typename a, typename b> struct mconcat<a, b> : concat<a, b> {}; -template<typename a, typename b, typename... cs> struct mconcat<a, b, cs...> : concat<a, typename mconcat<b, cs...>::type> {}; - -/* list manipulation: extract slices */ - -template<int n, typename x> struct take; -template<int n, typename a, typename... as> struct take<n, type_list<a, as...>> : concat<type_list<a>, typename take<n-1, type_list<as...>>::type> {}; -template<int n> struct take<n, type_list<>> { typedef type_list<> type; }; -template<typename a, typename... as> struct take<0, type_list<a, as...>> { typedef type_list<> type; }; -template<> struct take<0, type_list<>> { typedef type_list<> type; }; - -template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {}; -template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; }; -template<typename T, T a, T... as> struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; }; -template<typename T> struct take<0, numeric_list<T>> { typedef numeric_list<T> type; }; - -template<typename T, int n, T... ii> struct h_skip_helper_numeric; -template<typename T, int n, T i, T... ii> struct h_skip_helper_numeric<T, n, i, ii...> : h_skip_helper_numeric<T, n-1, ii...> {}; -template<typename T, T i, T... ii> struct h_skip_helper_numeric<T, 0, i, ii...> { typedef numeric_list<T, i, ii...> type; }; -template<typename T, int n> struct h_skip_helper_numeric<T, n> { typedef numeric_list<T> type; }; -template<typename T> struct h_skip_helper_numeric<T, 0> { typedef numeric_list<T> type; }; - -template<int n, typename... tt> struct h_skip_helper_type; -template<int n, typename t, typename... tt> struct h_skip_helper_type<n, t, tt...> : h_skip_helper_type<n-1, tt...> {}; -template<typename t, typename... tt> struct h_skip_helper_type<0, t, tt...> { typedef type_list<t, tt...> type; }; -template<int n> struct h_skip_helper_type<n> { typedef type_list<> type; }; -template<> struct h_skip_helper_type<0> { typedef type_list<> type; }; - -template<int n> -struct h_skip { - template<typename T, T... ii> - constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); } - template<typename... tt> - constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); } -}; - -template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; }; - -template<int start, int count, typename a> struct slice : take<count, typename skip<start, a>::type> {}; - -/* list manipulation: retrieve single element from list */ - -template<int n, typename x> struct get; - -template<int n, typename a, typename... as> struct get<n, type_list<a, as...>> : get<n-1, type_list<as...>> {}; -template<typename a, typename... as> struct get<0, type_list<a, as...>> { typedef a type; }; -template<int n EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, as)> struct get<n, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; - -template<typename T, int n, T a, T... as> struct get<n, numeric_list<T, a, as...>> : get<n-1, numeric_list<T, as...>> {}; -template<typename T, T a, T... as> struct get<0, numeric_list<T, a, as...>> { constexpr static T value = a; }; -template<typename T, int n EIGEN_TPL_PP_SPEC_HACK_DEFC(T, as)> struct get<n, numeric_list<T EIGEN_TPL_PP_SPEC_HACK_USEC(as)>> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; - -/* always get type, regardless of dummy; good for parameter pack expansion */ - -template<typename T, T dummy, typename t> struct id_numeric { typedef t type; }; -template<typename dummy, typename t> struct id_type { typedef t type; }; - -/* equality checking, flagged version */ - -template<typename a, typename b> struct is_same_gf : is_same<a, b> { constexpr static int global_flags = 0; }; - -/* apply_op to list */ - -template< - bool from_left, // false - template<typename, typename> class op, - typename additional_param, - typename... values -> -struct h_apply_op_helper { typedef type_list<typename op<values, additional_param>::type...> type; }; -template< - template<typename, typename> class op, - typename additional_param, - typename... values -> -struct h_apply_op_helper<true, op, additional_param, values...> { typedef type_list<typename op<additional_param, values>::type...> type; }; - -template< - bool from_left, - template<typename, typename> class op, - typename additional_param -> -struct h_apply_op -{ - template<typename... values> - constexpr static typename h_apply_op_helper<from_left, op, additional_param, values...>::type helper(type_list<values...>) - { return typename h_apply_op_helper<from_left, op, additional_param, values...>::type(); } -}; - -template< - template<typename, typename> class op, - typename additional_param, - typename a -> -struct apply_op_from_left { typedef decltype(h_apply_op<true, op, additional_param>::helper(a())) type; }; - -template< - template<typename, typename> class op, - typename additional_param, - typename a -> -struct apply_op_from_right { typedef decltype(h_apply_op<false, op, additional_param>::helper(a())) type; }; - -/* see if an element is in a list */ - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - bool last_check_positive = false -> -struct contained_in_list; - -template< - template<typename, typename> class test, - typename check_against, - typename h_list -> -struct contained_in_list<test, check_against, h_list, true> -{ - constexpr static bool value = true; -}; - -template< - template<typename, typename> class test, - typename check_against, - typename a, - typename... as -> -struct contained_in_list<test, check_against, type_list<a, as...>, false> : contained_in_list<test, check_against, type_list<as...>, test<check_against, a>::value> {}; - -template< - template<typename, typename> class test, - typename check_against - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty) -> -struct contained_in_list<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, false> { constexpr static bool value = false; }; - -/* see if an element is in a list and check for global flags */ - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - int default_flags = 0, - bool last_check_positive = false, - int last_check_flags = default_flags -> -struct contained_in_list_gf; - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, h_list, default_flags, true, last_check_flags> -{ - constexpr static bool value = true; - constexpr static int global_flags = last_check_flags; -}; - -template< - template<typename, typename> class test, - typename check_against, - typename a, - typename... as, - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, type_list<a, as...>, default_flags, false, last_check_flags> : contained_in_list_gf<test, check_against, type_list<as...>, default_flags, test<check_against, a>::value, test<check_against, a>::global_flags> {}; - -template< - template<typename, typename> class test, - typename check_against - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; }; - -/* generic reductions */ - -template< - typename Reducer, - typename... Ts -> struct reduce; - -template< - typename Reducer, - typename A, - typename... Ts -> struct reduce<Reducer, A, Ts...> -{ - constexpr static inline A run(A a, Ts...) { return a; } -}; - -template< - typename Reducer, - typename A, - typename B, - typename... Ts -> struct reduce<Reducer, A, B, Ts...> -{ - constexpr static inline auto run(A a, B b, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, B, Ts...>::run(b, ts...))) { - return Reducer::run(a, reduce<Reducer, B, Ts...>::run(b, ts...)); - } -}; - -/* generic binary operations */ - -struct sum_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } }; -struct product_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } }; - -struct logical_and_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; -struct logical_or_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; - -struct equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } }; -struct not_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } }; -struct lesser_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } }; -struct lesser_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; -struct greater_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } }; -struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; - -/* generic unary operations */ - -struct not_op { template<typename A> constexpr static inline auto run(A a) -> decltype(!a) { return !a; } }; -struct negation_op { template<typename A> constexpr static inline auto run(A a) -> decltype(-a) { return -a; } }; -struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; - - -/* reductions for lists */ - -// using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it -// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 -// does... -template<typename... Ts> -constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts) -{ - return reduce<product_op, Ts...>::run(ts...); -} - -template<typename... Ts> -constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts) -{ - return reduce<sum_op, Ts...>::run(ts...); -} - -/* reverse arrays */ - -template<typename Array, int... n> -constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>) -{ - return {{array_get<sizeof...(n) - n - 1>(arr)...}}; -} - -template<typename T, std::size_t N> -constexpr inline std::array<T, N> array_reverse(std::array<T, N> arr) -{ - return h_array_reverse(arr, typename gen_numeric_list<int, N>::type()); -} - -/* generic array reductions */ - -// can't reuse standard reduce() interface above because Intel's Compiler -// *really* doesn't like it, so we just reimplement the stuff -// (start from N - 1 and work down to 0 because specialization for -// n == N - 1 also doesn't work in Intel's compiler, so it goes into -// an infinite loop) -template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1> -struct h_array_reduce { - constexpr static inline auto run(std::array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr), array_get<n>(arr))) - { - return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr), array_get<n>(arr)); - } -}; - -template<typename Reducer, typename T, std::size_t N> -struct h_array_reduce<Reducer, T, N, 0> -{ - constexpr static inline T run(std::array<T, N> arr, T identity) - { - return array_get<0>(arr); - } -}; - -template<typename Reducer, typename T, std::size_t N> -struct h_array_reduce<Reducer, T, 0> -{ - constexpr static inline T run(std::array<T, 0> arr, T identity) - { - return identity; - } -}; - -template<typename Reducer, typename T, std::size_t N> -constexpr inline auto array_reduce(std::array<T, N> arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr)) -{ - return h_array_reduce<Reducer, T, N>::run(arr, identity); -} - -/* standard array reductions */ - -template<typename T, std::size_t N> -constexpr inline auto array_sum(std::array<T, N> arr) -> decltype(array_reduce<sum_op, T, N>(arr)) -{ - return array_reduce<sum_op, T, N>(arr, 0); -} - -template<typename T, std::size_t N> -constexpr inline auto array_prod(std::array<T, N> arr) -> decltype(array_reduce<product_op, T, N>(arr)) -{ - return array_reduce<product_op, T, N>(arr, 1); -} - -/* zip an array */ - -template<typename Op, typename A, typename B, std::size_t N, int... n> -constexpr inline std::array<decltype(Op::run(A(), B())),N> h_array_zip(std::array<A, N> a, std::array<B, N> b, numeric_list<int, n...>) -{ - return std::array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }}; -} - -template<typename Op, typename A, typename B, std::size_t N> -constexpr inline std::array<decltype(Op::run(A(), B())),N> array_zip(std::array<A, N> a, std::array<B, N> b) -{ - return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type()); -} - -/* zip an array and reduce the result */ - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n> -constexpr inline auto h_array_zip_and_reduce(std::array<A, N> a, std::array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...)) -{ - return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...); -} - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -constexpr inline auto array_zip_and_reduce(std::array<A, N> a, std::array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type())) -{ - return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()); -} - -/* apply stuff to an array */ - -template<typename Op, typename A, std::size_t N, int... n> -constexpr inline std::array<decltype(Op::run(A())),N> h_array_apply(std::array<A, N> a, numeric_list<int, n...>) -{ - return std::array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }}; -} - -template<typename Op, typename A, std::size_t N> -constexpr inline std::array<decltype(Op::run(A())),N> array_apply(std::array<A, N> a) -{ - return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type()); -} - -/* apply stuff to an array and reduce */ - -template<typename Reducer, typename Op, typename A, std::size_t N, int... n> -constexpr inline auto h_array_apply_and_reduce(std::array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...)) -{ - return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...); -} - -template<typename Reducer, typename Op, typename A, std::size_t N> -constexpr inline auto array_apply_and_reduce(std::array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type())) -{ - return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()); -} - -/* repeat a value n times (and make an array out of it - * usage: - * std::array<int, 16> = repeat<16>(42); - */ - -template<int n> -struct h_repeat -{ - template<typename t, int... ii> - constexpr static inline std::array<t, n> run(t v, numeric_list<int, ii...>) - { - return {{ typename id_numeric<int, ii, t>::type(v)... }}; - } -}; - -template<int n, typename t> -constexpr std::array<t, n> repeat(t v) { return h_repeat<n>::run(v, typename gen_numeric_list<int, n>::type()); } - -/* instantiate a class by a C-style array */ -template<class InstType, typename ArrType, std::size_t N, bool Reverse, typename... Ps> -struct h_instantiate_by_c_array; - -template<class InstType, typename ArrType, std::size_t N, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, N, false, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, Ps..., ArrType>::run(arr + 1, args..., arr[0]); - } -}; - -template<class InstType, typename ArrType, std::size_t N, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, N, true, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, ArrType, Ps...>::run(arr + 1, arr[0], args...); - } -}; - -template<class InstType, typename ArrType, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, 0, false, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - (void)arr; - return InstType(args...); - } -}; - -template<class InstType, typename ArrType, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, 0, true, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - (void)arr; - return InstType(args...); - } -}; - -template<class InstType, typename ArrType, std::size_t N, bool Reverse = false> -InstType instantiate_by_c_array(ArrType* arr) -{ - return h_instantiate_by_c_array<InstType, ArrType, N, Reverse>::run(arr); -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11META_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h deleted file mode 100644 index a590cf4e18..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11WORKAROUNDS_H -#define EIGEN_CXX11WORKAROUNDS_H - -/* COMPATIBILITY CHECKS - * (so users of compilers that are too old get some realistic error messages) - */ -#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310) -#error Intel Compiler only supports required C++ features since version 13.1. -// note that most stuff in principle works with 13.0 but when combining -// some features, at some point 13.0 will just fail with an internal assertion -#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)) -// G++ < 4.6 by default will continue processing the source files - even if we use #error to make -// it error out. For this reason, we use the pragma to make sure G++ aborts at the first error -// it sees. Unfortunately, that is still not our #error directive, but at least the output is -// short enough the user has a chance to see that the compiler version is not sufficient for -// the funky template mojo we use. -#pragma GCC diagnostic error "-Wfatal-errors" -#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6. -#endif - -/* Check that the compiler at least claims to support C++11. It might not be sufficient - * because the compiler may not implement it correctly, but at least we'll know. - */ -#if __cplusplus <= 199711L -#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) -#pragma GCC diagnostic error "-Wfatal-errors" -#endif -#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.) -#endif - -namespace Eigen { - -// Use std::array as Eigen array -template <typename T, std::size_t N> using array = std::array<T, N>; - -namespace internal { - -/* std::get is only constexpr in C++14, not yet in C++11 - * - libstdc++ from version 4.7 onwards has it nevertheless, - * so use that - * - libstdc++ older versions: use _M_instance directly - * - libc++ all versions so far: use __elems_ directly - * - all other libs: use std::get to be portable, but - * this may not be constexpr - */ -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 -#define STD_GET_ARR_HACK a._M_instance[I] -#elif defined(_LIBCPP_VERSION) -#define STD_GET_ARR_HACK a.__elems_[I] -#else -#define STD_GET_ARR_HACK std::template get<I, T, N>(a) -#endif - -template<std::size_t I, class T, std::size_t N> constexpr inline T& array_get(std::array<T,N>& a) { return (T&) STD_GET_ARR_HACK; } -template<std::size_t I, class T, std::size_t N> constexpr inline T&& array_get(std::array<T,N>&& a) { return (T&&) STD_GET_ARR_HACK; } -template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; } - -template<std::size_t I, class T> constexpr inline T& array_get(std::vector<T>& a) { return a[I]; } -template<std::size_t I, class T> constexpr inline T&& array_get(std::vector<T>&& a) { return a[I]; } -template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; } - -#undef STD_GET_ARR_HACK - -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const std::array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<std::array<T,N> > { - static const size_t value = N; -}; - -/* Suppose you have a template of the form - * template<typename T> struct X; - * And you want to specialize it in such a way: - * template<typename S1, typename... SN> struct X<Foo<S1, SN...>> { ::: }; - * template<> struct X<Foo<>> { ::: }; - * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since - * g++ can only match templates called with parameter packs if the number of template - * arguments is not a fixed size (so inside the first specialization, referencing - * X<Foo<Sn...>> will fail in g++). On the other hand, g++ will accept the following: - * template<typename S...> struct X<Foo<S...>> { ::: }: - * as an additional (!) specialization, which will then only match the empty case. - * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax, - * so we have to create a workaround for this. - */ -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) -#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) mt... n -#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_USE(n) n... -#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) , n... -#else -#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_USE(n) -#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11WORKAROUNDS_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h deleted file mode 100644 index a1e1dca8e1..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ /dev/null @@ -1,456 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EMULATE_CXX11_META_H -#define EIGEN_EMULATE_CXX11_META_H - - - -namespace Eigen { - -// The array class is only available starting with cxx11. Emulate our own here -// if needed -template <typename T, size_t n> class array { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - - static EIGEN_ALWAYS_INLINE std::size_t size() { return n; } - - T values[n]; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - explicit EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v) { - EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { - EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { - EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, - const T& v4) { - EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5) { - EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6) { - EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7) { - EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array( - const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7, const T& v8) { - EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - values[7] = v8; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - array(std::initializer_list<T> l) { - eigen_assert(l.size() == n); - internal::smart_copy(l.begin(), l.end(), values); - } -#endif -}; - -// Specialize array for zero size -template <typename T> class array<T, 0> { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t index) { - eigen_assert(false && "Can't index a zero size array"); - return *static_cast<T*>(NULL); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { - eigen_assert(false && "Can't index a zero size array"); - return *static_cast<const T*>(NULL); - } - - static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - array(std::initializer_list<T> l) { - eigen_assert(l.size() == 0); - } -#endif -}; - -namespace internal { - -/** \internal - * \file CXX11/Core/util/EmulateCXX11Meta.h - * This file emulates a subset of the functionality provided by CXXMeta.h for - * compilers that don't yet support cxx11 such as nvcc. - */ - -struct empty_list { static const std::size_t count = 0; }; - -template<typename T, typename Tail=empty_list> struct type_list { - typedef T HeadType; - typedef Tail TailType; - static const T head; - static const Tail tail; - static const std::size_t count = 1 + Tail::count; -}; - -struct null_type { }; - -template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type, - typename T4 = null_type, typename T5 = null_type, typename T6 = null_type, - typename T7 = null_type, typename T8 = null_type> -struct make_type_list { - typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult; - - typedef type_list<T1, tailresult> type; -}; - -template<> struct make_type_list<> { - typedef empty_list type; -}; - - -template <std::size_t index, class TList> struct get_type; - -template <class Head, class Tail> -struct get_type<0, type_list<Head, Tail> > -{ - typedef Head type; -}; - -template <std::size_t i, class Head, class Tail> -struct get_type<i, type_list<Head, Tail> > -{ - typedef typename get_type<i-1, Tail>::type type; -}; - - -/* numeric list */ -template <typename T, T n> -struct type2val { - typedef T type; - static const T value = n; -}; - - -template<typename T, size_t n, T V> struct gen_numeric_list_repeated; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 1, V> { - typedef typename make_type_list<type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 2, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 3, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 4, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V> >::type type; -}; - - -template <std::size_t index, class NList> struct get; - -template <std::size_t i> -struct get<i, empty_list> -{ - get() { eigen_assert(false && "index overflow"); } - typedef void type; - static const char value = '\0'; -}; - -template <std::size_t i, class Head> -struct get<i, type_list<Head, empty_list> > -{ - get() { eigen_assert(false && "index overflow"); } - typedef void type; - static const char value = '\0'; -}; - -template <class Head> -struct get<0, type_list<Head, empty_list> > -{ - typedef typename Head::type type; - static const type value = Head::value; -}; - -template <class Head, class Tail> -struct get<0, type_list<Head, Tail> > -{ - typedef typename Head::type type; - static const type value = Head::value; -}; - -template <std::size_t i, class Head, class Tail> -struct get<i, type_list<Head, Tail> > -{ - typedef typename Tail::HeadType::type type; - static const type value = get<i-1, Tail>::value; -}; - - -template <class NList> struct arg_prod { - static const typename NList::HeadType::type value = get<0, NList>::value * arg_prod<typename NList::TailType>::value; -}; -template <> struct arg_prod<empty_list> { - static const int value = 1; -}; - - -template<int n, typename t> -array<t, n> repeat(t v) { - array<t, n> array; - array.fill(v); - return array; -} - -template<std::size_t I, class Head, class Tail> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>& a) { - return get<I, type_list<Head, Tail> >::value; -} -template<std::size_t I, class Head, class Tail> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>& a) { - return get<I, type_list<Head, Tail> >::value; -} - -template <class NList> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) { - return arg_prod<NList>::value; -}; - -template<std::size_t n, typename t> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) { - t prod = 1; - for (size_t i = 0; i < n; ++i) { prod *= a[i]; } - return prod; -} - -template<typename t> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) { - t prod = 1; - for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } - return prod; -} - -template<std::size_t I, class T, std::size_t N> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) { - return a[I]; -} -template<std::size_t I, class T, std::size_t N> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) { - return a[I]; -} - -template<std::size_t I, class T> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) { - return a[I]; -} -template<std::size_t I, class T> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) { - return a[I]; -} - -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<array<T,N>& > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const array<T,N>& > { - static const size_t value = N; -}; - -struct sum_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a + b; } -}; -struct product_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a * b; } -}; - -struct logical_and_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a && b; } -}; -struct logical_or_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a || b; } -}; - -struct equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a == b; } -}; -struct not_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a != b; } -}; -struct lesser_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a < b; } -}; -struct lesser_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a <= b; } -}; - -struct greater_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a > b; } -}; -struct greater_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a >= b; } -}; - -struct not_op { - template<typename A> static inline bool run(A a) { return !a; } -}; -struct negation_op { - template<typename A> static inline bool run(A a) { return -a; } -}; -struct greater_equal_zero_op { - template<typename A> static inline bool run(A a) { return a >= 0; } -}; - - -template<typename Reducer, typename Op, typename A, std::size_t N> -struct ArrayApplyAndReduce { - static inline bool run(const array<A, N>& a) { - EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - bool result = Reducer::run(Op::run(a[0]), Op::run(a[1])); - for (size_t i = 2; i < N; ++i) { - result = Reducer::run(result, Op::run(a[i])); - } - return result; - } -}; - -template<typename Reducer, typename Op, typename A> -struct ArrayApplyAndReduce<Reducer, Op, A, 1> { - static inline bool run(const array<A, 1>& a) { - return Op::run(a[0]); - } -}; - -template<typename Reducer, typename Op, typename A, std::size_t N> -inline bool array_apply_and_reduce(const array<A, N>& a) { - return ArrayApplyAndReduce<Reducer, Op, A, N>::run(a); -} - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -struct ArrayZipAndReduce { - static inline bool run(const array<A, N>& a, const array<B, N>& b) { - EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - bool result = Reducer::run(Op::run(a[0], b[0]), Op::run(a[1], b[1])); - for (size_t i = 2; i < N; ++i) { - result = Reducer::run(result, Op::run(a[i], b[i])); - } - return result; - } -}; - -template<typename Reducer, typename Op, typename A, typename B> -struct ArrayZipAndReduce<Reducer, Op, A, B, 1> { - static inline bool run(const array<A, 1>& a, const array<B, 1>& b) { - return Op::run(a[0], b[0]); - } -}; - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -inline bool array_zip_and_reduce(const array<A, N>& a, const array<B, N>& b) { - return ArrayZipAndReduce<Reducer, Op, A, B, N>::run(a, b); -} - -} // end namespace internal - -} // end namespace Eigen - - - -#endif // EIGEN_EMULATE_CXX11_META_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h deleted file mode 100644 index c68119aa03..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Core/util/FixedSizeVector.h +++ /dev/null @@ -1,128 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FIXEDSIZEVECTOR_H -#define EIGEN_FIXEDSIZEVECTOR_H - -namespace Eigen { - -/** \class FixedSizeVector - * \ingroup Core - * - * \brief The FixedSizeVector class. - * - * The %FixedSizeVector provides a subset of std::vector functionality. - * - * The goal is to provide basic std::vector operations when using - * std::vector is not an option (e.g. on GPU or when compiling using - * FMA/AVX, as this can cause either compilation failures or illegal - * instruction failures). - * - */ -template <typename T> -class FixedSizeVector { - public: - // Construct a new FixedSizeVector, reserve n elements. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit FixedSizeVector(size_t n) - : reserve_(n), size_(0), - data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; } - } - - // Construct a new FixedSizeVector, reserve and resize to n. - // Copy the init value to all elements. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit FixedSizeVector(size_t n, const T& init) - : reserve_(n), size_(n), - data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - ~FixedSizeVector() { - for (size_t i = 0; i < size_; ++i) { - data_[i].~T(); - } - internal::aligned_free(data_); - } - - // Append new elements (up to reserved size). - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void push_back(const T& t) { - eigen_assert(size_ < reserve_); - data_[size_++] = t; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T& operator[] (size_t i) const { - eigen_assert(i < size_); - return data_[i]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T& operator[] (size_t i) { - eigen_assert(i < size_); - return data_[i]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T& back() { - eigen_assert(size_ > 0); - return data_[size_ - 1]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T& back() const { - eigen_assert(size_ > 0); - return data_[size_ - 1]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void pop_back() { - // NOTE: This does not destroy the value at the end the way - // std::vector's version of pop_back() does. That happens when - // the Vector is destroyed. - eigen_assert(size_ > 0); - size_--; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t size() const { return size_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - bool empty() const { return size_ == 0; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* data() { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* data() const { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* begin() { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* end() { return data_ + size_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* begin() const { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* end() const { return data_ + size_; } - - private: - size_t reserve_; - size_t size_; - T* data_; -}; - -} // namespace Eigen - -#endif // EIGEN_FIXEDSIZEVECTOR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h deleted file mode 100644 index 9db0d2698f..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ /dev/null @@ -1,461 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_H - -namespace Eigen { - -/** \class Tensor - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor class. - * - * The %Tensor class is the work-horse for all \em dense tensors within Eigen. - * - * The %Tensor class encompasses only dynamic-size objects so far. - * - * The first two template parameters are required: - * \tparam Scalar_ \anchor tensor_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>. - * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). - * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) - * - * The remaining template parameters are optional -- in most cases you don't have to worry about them. - * \tparam Options_ \anchor tensor_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either - * \b #AutoAlign or \b #DontAlign. - * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required - * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. - * Support for such operations (i.e. adding two tensors etc.) is planned. - * - * You can access elements of tensors using normal subscripting: - * - * \code - * Eigen::Tensor<double, 4> t(10, 10, 10, 10); - * t(0, 1, 2, 3) = 42.0; - * \endcode - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. - * - * <i><b>Some notes:</b></i> - * - * <dl> - * <dt><b>Relation to other parts of Eigen:</b></dt> - * <dd>The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that - * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code - * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor - * class does not provide any of these features and is only available as a stand-alone class that just allows for - * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to - * change dramatically.</dd> - * </dl> - * - * \ref TopicStorageOrders - */ - -template<typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_> -class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - public: - typedef Tensor<Scalar_, NumIndices_, Options_, IndexType_> Self; - typedef TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<Self>::StorageKind StorageKind; - typedef typename internal::traits<Self>::Index Index; - typedef Scalar_ Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - typedef typename Base::PacketReturnType PacketReturnType; - - enum { - IsAligned = bool(EIGEN_ALIGN) & !(Options_ & DontAlign), - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, - }; - - static const int Options = Options_; - static const std::size_t NumIndices = NumIndices_; - typedef DSizes<Index, NumIndices_> Dimensions; - - protected: - TensorStorage<Scalar, Dimensions, Options_> m_storage; - - public: - // Metadata - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } - - // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - // work, because that uses base().coeffRef() - and we don't yet - // implement a similar class hierarchy - inline Self& base() { return *this; } - inline const Self& base() const { return *this; } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const - { - return coeff(array<Index, 2>(i0, i1)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const - { - return coeff(array<Index, 3>(i0, i1, i2)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const - { - return coeff(array<Index, 4>(i0, i1, i2, i3)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - return coeff(array<Index, 5>(i0, i1, i2, i3, i4)); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - eigen_assert(checkIndexRange(indices)); - return coeff(indices); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return coeff(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const - { - // The bracket operator is only for vectors, use the parenthesis operator instead. - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(index); - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) - { - return coeffRef(array<Index, 2>(i0, i1)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) - { - return coeffRef(array<Index, 3>(i0, i1, i2)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - return coeffRef(array<Index, 4>(i0, i1, i2, i3)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) - { - return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4)); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - eigen_assert(checkIndexRange(indices)); - return coeffRef(indices); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeffRef(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < size()); - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) - { - // The bracket operator is only for vectors, use the parenthesis operator instead - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor() - : m_storage() - { - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const Self& other) - : m_storage(other.m_storage) - { - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Tensor(Index firstDimension, IndexTypes... otherDimensions) - : m_storage(internal::array_prod(array<Index, NumIndices>{{firstDimension, otherDimensions...}}), array<Index, NumIndices>{{firstDimension, otherDimensions...}}) - { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#else - inline explicit Tensor(Index dim1) - : m_storage(dim1, array<Index, 1>(dim1)) - { - EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - inline explicit Tensor(Index dim1, Index dim2) - : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2)) - { - EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - inline explicit Tensor(Index dim1, Index dim2, Index dim3) - : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3)) - { - EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) - : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4)) - { - EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) - : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 4>(dim1, dim2, dim3, dim4, dim5)) - { - EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#endif - - inline explicit Tensor(const array<Index, NumIndices>& dimensions) - : m_storage(internal::array_prod(dimensions), dimensions) - { - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) - { - typedef TensorAssignOp<Tensor, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other) - { - typedef TensorAssignOp<Tensor, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) - { - typedef TensorAssignOp<Tensor, const Tensor> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - template<typename Other> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor& operator=(const Other& other) - { - typedef TensorAssignOp<Tensor, const Other> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - void resize(Index firstDimension, IndexTypes... otherDimensions) - { - // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - resize(array<Index, NumIndices>{firstDimension, otherDimensions...}); - } -#endif - - EIGEN_DEVICE_FUNC - void resize() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - // Nothing to do: rank 0 tensors have fixed size - } - - EIGEN_DEVICE_FUNC - void resize(const array<Index, NumIndices>& dimensions) - { - Index size = Index(1); - for (size_t i = 0; i < NumIndices; i++) { - internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); - size *= dimensions[i]; - } - #ifdef EIGEN_INITIALIZE_COEFFS - bool size_changed = size != this->size(); - m_storage.resize(size, dimensions); - if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - #else - m_storage.resize(size, dimensions); - #endif - } - - EIGEN_DEVICE_FUNC - void resize(const DSizes<Index, NumIndices>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = dimensions[i]; - } - resize(dims); - } - -#ifndef EIGEN_EMULATE_CXX11_META_H - template <typename std::size_t... Indices> - EIGEN_DEVICE_FUNC - void resize(const Sizes<Indices...>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = dimensions[i]; - } - resize(dims); - } -#else - template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> - EIGEN_DEVICE_FUNC - void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = dimensions[i]; - } - resize(dims); - } -#endif - - protected: - - bool checkIndexRange(const array<Index, NumIndices>& indices) const - { - using internal::array_apply_and_reduce; - using internal::array_zip_and_reduce; - using internal::greater_equal_zero_op; - using internal::logical_and_op; - using internal::lesser_op; - - return - // check whether the indices are all >= 0 - array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && - // check whether the indices fit in the dimensions - array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const - { - if (Options&RowMajor) { - return m_storage.dimensions().IndexOfRowMajor(indices); - } else { - return m_storage.dimensions().IndexOfColMajor(indices); - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h deleted file mode 100644 index ee3bf7fe34..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ /dev/null @@ -1,288 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H -#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H - -namespace Eigen { -namespace internal { - -/** \class TensorIndexTuple - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor + Index Tuple class. - * - * - */ -template<typename XprType> -struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef Tuple<Index, typename XprTraits::Scalar> Scalar; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename XprType> -struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense> -{ - typedef const TensorIndexTupleOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorIndexTupleOp<XprType>, 1, - typename eval<TensorIndexTupleOp<XprType> >::type> -{ - typedef TensorIndexTupleOp<XprType> type; -}; - -} // end namespace internal - -template<typename XprType> -class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested; - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index; - typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - -// Eval as rvalue -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> -{ - typedef TensorIndexTupleOp<ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - static const int NumDims = internal::array_size<Dimensions>::value; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return CoeffReturnType(index, m_impl.coeff(index)); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; -}; - -namespace internal { - -/** \class TensorTupleIndex - * \ingroup CXX11_Tensor_Module - * - * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>. - * - */ -template<typename ReduceOp, typename Dims, typename XprType> -struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef Index Scalar; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename ReduceOp, typename Dims, typename XprType> -struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense> -{ - typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type; -}; - -template<typename ReduceOp, typename Dims, typename XprType> -struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1, - typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type> -{ - typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type; -}; - -} // end namespace internal - -template<typename ReduceOp, typename Dims, typename XprType> -class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested; - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index; - typedef Index CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, - const ReduceOp& reduce_op, - const int return_dim, - const Dims& reduce_dims) - : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - const ReduceOp& reduce_op() const { return m_reduce_op; } - - EIGEN_DEVICE_FUNC - const Dims& reduce_dims() const { return m_reduce_dims; } - - EIGEN_DEVICE_FUNC - int return_dim() const { return m_return_dim; } - - protected: - typename XprType::Nested m_xpr; - const ReduceOp m_reduce_op; - const int m_return_dim; - const Dims m_reduce_dims; -}; - -// Eval as rvalue -template<typename ReduceOp, typename Dims, typename ArgType, typename Device> -struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device> -{ - typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType; - typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions; - typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions; - static const int NumDims = internal::array_size<InputDimensions>::value; - typedef array<Index, NumDims> StrideDims; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, - BlockAccess = false, - Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_orig_impl(op.expression(), device), - m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), - m_return_dim(op.return_dim()), - m_strides(gen_strides(m_orig_impl.dimensions())), - m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), - m_stride_div(gen_stride_div()) { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - const TupleType v = m_impl.coeff(index); - return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) { - StrideDims strides; - if (m_return_dim < 0) return strides; // Won't be using these. - eigen_assert(m_return_dim < NumDims && - "Asking to convert index to a dimension outside of the rank"); - - // Calculate m_stride_div and m_stride_mod, which are used to - // calculate the value of an index w.r.t. the m_return_dim. - if (Layout == static_cast<int>(ColMajor)) { - strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - strides[i] = strides[i-1] * dims[i-1]; - } - } else { - strides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - strides[i] = strides[i+1] * dims[i+1]; - } - } - return strides; - } - - EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { - if (Layout == static_cast<int>(ColMajor)) { - return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize(); - } else { - return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize(); - } - } - - EIGEN_DEVICE_FUNC Index gen_stride_div() { - return m_strides[m_return_dim]; - } - - protected: - TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl; - TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl; - const int m_return_dim; - const StrideDims m_strides; - const Index m_stride_mod; - const Index m_stride_div; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h deleted file mode 100644 index fdb943e713..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ /dev/null @@ -1,179 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H -#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H - -namespace Eigen { - -/** \class TensorAssign - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor assignment class. - * - * This class is represents the assignment of the values resulting from the evaluation of - * the rhs expression to the memory locations denoted by the lhs expression. - */ -namespace internal { -template<typename LhsXprType, typename RhsXprType> -struct traits<TensorAssignOp<LhsXprType, RhsXprType> > -{ - typedef typename LhsXprType::Scalar Scalar; - typedef typename traits<LhsXprType>::StorageKind StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions; - static const int Layout = internal::traits<LhsXprType>::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename LhsXprType, typename RhsXprType> -struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorAssignOp<LhsXprType, RhsXprType>& type; -}; - -template<typename LhsXprType, typename RhsXprType> -struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type> -{ - typedef TensorAssignOp<LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename LhsXprType, typename RhsXprType> -class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename LhsXprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index; - static const std::size_t NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr; - const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr; -}; - - -template<typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> -{ - typedef TensorAssignOp<LeftArgType, RightArgType> XprType; - - enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & - TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess & - TensorEvaluator<RightArgType, Device>::BlockAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - m_leftImpl(op.lhsExpression(), device), - m_rightImpl(op.rhsExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions; - static const std::size_t NumDims = XprType::NumDims; - - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, NumDims, Layout> - TensorBlock; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use left impl instead if right impl dimensions are known at compile time. - return m_rightImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); - m_leftImpl.evalSubExprsIfNeeded(NULL); - // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non - // null value), attempt to evaluate the rhs expression in place. Returns true iff in place - // evaluation isn't supported and the caller still needs to manually assign the values generated - // by the rhs to the lhs. - return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { - m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { - const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned; - const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned; - m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - m_leftImpl.getResourceRequirements(resources); - m_rightImpl.getResourceRequirements(resources); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) { - m_rightImpl.block(block); - m_leftImpl.writeBlock(*block); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_leftImpl.coeff(index); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const - { - return m_leftImpl.template packet<LoadMode>(index); - } - - private: - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; -}; - -} - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h deleted file mode 100644 index 723f17c264..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ /dev/null @@ -1,958 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H -#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H - -// clang-format off - -namespace Eigen { - -/** \class TensorBase - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor base class. - * - * This class is the common parent of the Tensor and TensorMap class, thus - * making it possible to use either class interchangably in expressions. - */ - -template<typename Derived> -class TensorBase<Derived, ReadOnlyAccessors> -{ - public: - typedef internal::traits<Derived> DerivedTraits; - typedef typename DerivedTraits::Scalar Scalar; - typedef typename DerivedTraits::Index Index; - typedef typename internal::remove_const<Scalar>::type CoeffReturnType; - typedef typename internal::packet_traits<CoeffReturnType>::type PacketReturnType; - static const int NumDimensions = DerivedTraits::NumDimensions; - - // Generic nullary operation support. - template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<CustomNullaryOp, const Derived> - nullaryExpr(const CustomNullaryOp& func) const { - return TensorCwiseNullaryOp<CustomNullaryOp, const Derived>(derived(), func); - } - - // Coefficient-wise nullary operators - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> - constant(const Scalar& value) const { - return nullaryExpr(internal::scalar_constant_op<Scalar>(value)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::UniformRandomGenerator<Scalar>, const Derived> - random() const { - return nullaryExpr(internal::UniformRandomGenerator<Scalar>()); - } - template <typename RandomGenerator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<RandomGenerator, const Derived> - random(const RandomGenerator& gen = RandomGenerator()) const { - return nullaryExpr(gen); - } - - // Tensor generation - template <typename Generator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorGeneratorOp<Generator, const Derived> - generate(const Generator& generator) const { - return TensorGeneratorOp<Generator, const Derived>(derived(), generator); - } - - // Generic unary operation support. - template <typename CustomUnaryOp> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<CustomUnaryOp, const Derived> - unaryExpr(const CustomUnaryOp& func) const { - return TensorCwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func); - } - - // Coefficient-wise unary operators - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> - operator-() const { - return unaryExpr(internal::scalar_opposite_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> - sign() const { - return unaryExpr(internal::scalar_sign_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> - sqrt() const { - return unaryExpr(internal::scalar_sqrt_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> - rsqrt() const { - return unaryExpr(internal::scalar_rsqrt_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> - square() const { - return unaryExpr(internal::scalar_square_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> - cube() const { - return unaryExpr(internal::scalar_cube_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> - inverse() const { - return unaryExpr(internal::scalar_inverse_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> - tanh() const { - return unaryExpr(internal::scalar_tanh_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> - lgamma() const { - return unaryExpr(internal::scalar_lgamma_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> - erf() const { - return unaryExpr(internal::scalar_erf_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> - erfc() const { - return unaryExpr(internal::scalar_erfc_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> - sigmoid() const { - return unaryExpr(internal::scalar_sigmoid_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> - exp() const { - return unaryExpr(internal::scalar_exp_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> - log() const { - return unaryExpr(internal::scalar_log_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> - abs() const { - return unaryExpr(internal::scalar_abs_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> - pow(Scalar exponent) const { - return unaryExpr(internal::scalar_pow_op<Scalar>(exponent)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived> - operator+ (Scalar rhs) const { - return unaryExpr(internal::scalar_add_op<Scalar>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived> - operator- (Scalar rhs) const { - EIGEN_STATIC_ASSERT((std::numeric_limits<Scalar>::is_signed || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_sub_op<Scalar>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived> - operator* (Scalar rhs) const { - return unaryExpr(internal::scalar_multiple_op<Scalar>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> - operator/ (Scalar rhs) const { - // EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_quotient1_op<Scalar>(rhs)); - } - - template <typename Scale> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple2_op<Scalar, Scale>, const Derived> - scale (Scale rhs) const { - return unaryExpr(internal::scalar_multiple2_op<Scalar, Scale>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_mod_op<Scalar>, const Derived> - operator% (Scalar rhs) const { - EIGEN_STATIC_ASSERT(std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); - return unaryExpr(internal::scalar_mod_op<Scalar>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_fmod_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - mod(Scalar rhs) const { - EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_FMOD_IS_NOT_FOR_INTEGERS); - return mod(constant(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - cwiseMax(Scalar threshold) const { - return cwiseMax(constant(threshold)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - cwiseMin(Scalar threshold) const { - return cwiseMin(constant(threshold)); - } - - template <typename NewType> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorConversionOp<NewType, const Derived> - cast() const { - return TensorConversionOp<NewType, const Derived>(derived()); - } - - // Generic binary operation support. - template <typename CustomBinaryOp, typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived> - binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const { - return TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other, func); - } - - // Coefficient-wise binary operators. - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived> - operator+(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_sum_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived> - operator-(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_difference_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_product_op<Scalar>, const Derived, const OtherDerived> - operator*(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_product_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived> - operator/(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_quotient_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_fmod_op<Scalar>, const Derived, const OtherDerived> - mod(const OtherDerived& other) const { - EIGEN_STATIC_ASSERT(!std::numeric_limits<Scalar>::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_FMOD_IS_NOT_FOR_INTEGERS); - return binaryExpr(other.derived(), internal::scalar_fmod_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived> - cwiseMax(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_max_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived> - cwiseMin(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_min_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived> - operator&&(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_and_op()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived> - operator||(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_or_op()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived> - operator^(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); - } - - // Comparisons and tests. - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived> - operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived> - operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived> - operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived> - operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived> - operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived> - operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>()); - } - - // comparisons and tests for Scalars - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator<(Scalar threshold) const { - return operator<(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator<=(Scalar threshold) const { - return operator<=(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator>(Scalar threshold) const { - return operator>(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator>=(Scalar threshold) const { - return operator>=(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator==(Scalar threshold) const { - return operator==(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator!=(Scalar threshold) const { - return operator!=(constant(threshold)); - } - - // Coefficient-wise ternary operators. - template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived> - select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { - return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived()); - } - - // Contractions. - typedef Eigen::IndexPair<Index> DimensionPair; - - template<typename OtherDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorContractionOp<const Dimensions, const Derived, const OtherDerived> - contract(const OtherDerived& other, const Dimensions& dims) const { - return TensorContractionOp<const Dimensions, const Derived, const OtherDerived>(derived(), other.derived(), dims); - } - - // Convolutions. - template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived> - convolve(const KernelDerived& kernel, const Dimensions& dims) const { - return TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims); - } - - // Convolutions by fft. - template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConvolutionByFFTOp<const Dimensions, const Derived, const KernelDerived> - convolvebyfft(const KernelDerived& kernel, const Dimensions& dims) const { - return TensorConvolutionByFFTOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims); - } - - // Reductions. - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived> - sum(const Dims& dims) const { - return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - sum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived> - mean(const Dims& dims) const { - return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - mean() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived> - prod(const Dims& dims) const { - return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - prod() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived> - maximum(const Dims& dims) const { - return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - maximum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, const Derived> - argmax() const { - array<Index, NumDimensions> in_dims; - for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; - return TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, - const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, const Derived> - argmin() const { - array<Index, NumDimensions> in_dims; - for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; - return TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, - const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, const Derived> - argmax(const int return_dim) const { - array<Index, 1> in_dims; - in_dims[0] = return_dim; - return TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, - const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, const Derived> - argmin(const int return_dim) const { - array<Index, 1> in_dims; - in_dims[0] = return_dim; - return TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, - const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived> - minimum(const Dims& dims) const { - return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - minimum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>()); - } - - // This does not short-circuit, so is potentially very inefficient. - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::AndReducer, const Dims, const TensorConversionOp<bool, const Derived> > - all(const Dims& dims) const { - return cast<bool>().reduce(dims, internal::AndReducer()); - } - - // This does not short-circuit, so is potentially very inefficient. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> > - all() const { - DimensionList<Index, NumDimensions> in_dims; - return cast<bool>().reduce(in_dims, internal::AndReducer()); - } - - // This does not short-circuit, so is potentially very inefficient. - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::OrReducer, const Dims, const TensorConversionOp<bool, const Derived> > - any(const Dims& dims) const { - return cast<bool>().reduce(dims, internal::OrReducer()); - } - - // This does not short-circuit, so is potentially very inefficient. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> > - any() const { - DimensionList<Index, NumDimensions> in_dims; - return cast<bool>().reduce(in_dims, internal::OrReducer()); - } - - template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<Reducer, const Dims, const Derived> - reduce(const Dims& dims, const Reducer& reducer) const { - return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer); - } - - template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorBroadcastingOp<const Broadcast, const Derived> - broadcast(const Broadcast& broadcast) const { - return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast); - } - - template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection> - fft(const FFT& fft) const { - return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft); - } - - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConcatenationOp<Axis, const Derived, const OtherDerived> - concatenate(const OtherDerived& other, Axis axis) const { - return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis); - } - - template <typename PatchDims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPatchOp<const PatchDims, const Derived> - extract_patches(const PatchDims& patch_dims) const { - return TensorPatchOp<const PatchDims, const Derived>(derived(), patch_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived> - extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, - const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, - const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = 0) const { - return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); - } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived> - extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, - const Index plane_stride, const Index row_stride, const Index col_stride, - const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, - const Index padding_top_z, const Index padding_bottom_z, - const Index padding_top, const Index padding_bottom, - const Index padding_left, const Index padding_right, const Scalar padding_value = 0) const { - return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); - } - - template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Rows, Cols, const Derived> - extract_image_patches() const { - return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, 1, 1, 1, 1, 1, 1, PADDING_SAME, 0); - } - - template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Rows, Cols, const Derived> - extract_image_patches(const PaddingType padding_type) const { - return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, 1, 1, 1, 1, 1, 1, padding_type, 0); - } - - template <Index Rows, Index Cols> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Rows, Cols, const Derived> - extract_image_patches(const Index stride, const PaddingType padding_type) const { - return TensorImagePatchOp<Rows, Cols, const Derived>(derived(), Rows, Cols, stride, stride, 1, 1, 1, 1, padding_type, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride = 1, const Index col_stride = 1) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - 1, 1, 1, 1, PADDING_SAME, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const PaddingType padding_type) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - 1, 1, 1, 1, padding_type, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const PaddingType padding_type, const Scalar padding_value) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - 1, 1, 1, 1, padding_type, padding_value); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, 1, 1, PADDING_SAME, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const PaddingType padding_type) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, 1, 1, padding_type, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const PaddingType padding_type, const Scalar padding_value) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, 1, 1, padding_type, padding_value); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const Index row_inflate_stride, const Index col_inflate_stride, - const PaddingType padding_type, const Scalar padding_value) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, - padding_type, padding_value); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const Index row_inflate_stride, const Index col_inflate_stride, - const Index padding_top, const Index padding_bottom, - const Index padding_left,const Index padding_right, - const Scalar padding_value) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, - padding_top, padding_bottom, padding_left, padding_right, padding_value); - } - - // Morphing operators. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorLayoutSwapOp<const Derived> - swap_layout() const { - return TensorLayoutSwapOp<const Derived>(derived()); - } - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReshapingOp<const NewDimensions, const Derived> - reshape(const NewDimensions& newDimensions) const { - return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions); - } - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSlicingOp<const StartIndices, const Sizes, const Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) const { - return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes); - } - template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<DimId, const Derived> - chip(const Index offset) const { - return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<Dynamic, const Derived> - chip(const Index offset, const Index dim) const { - return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim); - } - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReverseOp<const ReverseDimensions, const Derived> - reverse(const ReverseDimensions& rev) const { - return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev); - } - template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPaddingOp<const PaddingDimensions, const Derived> - pad(const PaddingDimensions& padding) const { - return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, Scalar(0)); - } - template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPaddingOp<const PaddingDimensions, const Derived> - pad (const PaddingDimensions& padding, const Scalar padding_value) const { - return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, padding_value); - } - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingOp<const Strides, const Derived> - stride(const Strides& strides) const { - return TensorStridingOp<const Strides, const Derived>(derived(), strides); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorInflationOp<const Strides, const Derived> - inflate(const Strides& strides) const { - return TensorInflationOp<const Strides, const Derived>(derived(), strides); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTrueIndicesOp<const Derived> - true_indices(const Index& not_true_value = -1) const { - return TensorTrueIndicesOp<const Derived>(derived(), not_true_value); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorIndexTupleOp<const Derived> - index_tuples() const { - return TensorIndexTupleOp<const Derived>(derived()); - } - template <typename CustomUnaryFunc> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const { - return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op); - } - template <typename OtherDerived, typename CustomBinaryFunc> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { - return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op); - } - - // Force the evaluation of the expression. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorForcedEvalOp<const Derived> eval() const { - return TensorForcedEvalOp<const Derived>(derived()); - } - - protected: - template <typename Scalar, std::size_t NumIndices, int Options, typename IndexType> friend class Tensor; - template <typename Scalar, int Option, typename IndexTypes> friend class TensorVarDim; - template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize; - template <typename OtherDerived, int AccessLevel> friend class TensorBase; - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } -}; - -template<typename Derived> -class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyAccessors> { - public: - typedef internal::traits<Derived> DerivedTraits; - typedef typename DerivedTraits::Scalar Scalar; - typedef typename DerivedTraits::Index Index; - typedef Scalar CoeffReturnType; - typedef typename internal::packet_traits<Scalar>::type PacketReturnType; - static const int NumDimensions = DerivedTraits::NumDimensions; - - template <typename Scalar, std::size_t NumIndices, int Options, typename IndexType> friend class Tensor; - template <typename Scalar, int Options, typename IndexType> friend class TensorVarDim; - template <typename OtherDerived, int AccessLevel> friend class TensorBase; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setZero() { - return setConstant(Scalar(0)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) { - return derived() = this->constant(val); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setRandom() { - return derived() = this->random(); - } - template <typename RandomGenerator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setRandom() { - return derived() = this->template random<RandomGenerator>(); - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setValues( - const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) { - TensorEvaluator<Derived, DefaultDevice> eval(derived(), DefaultDevice()); - internal::initialize_tensor<Derived, NumDimensions>(eval, vals); - return derived(); - } -#endif // EIGEN_HAS_VARIADIC_TEMPLATES - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator+=(const OtherDerived& other) { - return derived() = derived() + other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator-=(const OtherDerived& other) { - return derived() = derived() - other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator*=(const OtherDerived& other) { - return derived() = derived() * other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator/=(const OtherDerived& other) { - return derived() = derived() / other.derived(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorLayoutSwapOp<const Derived> - swap_layout() const { - return TensorLayoutSwapOp<const Derived>(derived()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorLayoutSwapOp<Derived> - swap_layout() { - return TensorLayoutSwapOp<Derived>(derived()); - } - - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConcatenationOp<const Axis, const Derived, const OtherDerived> - concatenate(const OtherDerived& other, const Axis& axis) const { - return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis); - } - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorConcatenationOp<const Axis, Derived, OtherDerived> - concatenate(const OtherDerived& other, const Axis& axis) { - return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis); - } - - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReshapingOp<const NewDimensions, const Derived> - reshape(const NewDimensions& newDimensions) const { - return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions); - } - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReshapingOp<const NewDimensions, Derived> - reshape(const NewDimensions& newDimensions) { - return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions); - } - - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSlicingOp<const StartIndices, const Sizes, const Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) const { - return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes); - } - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorSlicingOp<const StartIndices, const Sizes, Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) { - return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes); - } - - template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<DimId, const Derived> - chip(const Index offset) const { - return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId); - } - template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp<DimId, Derived> - chip(const Index offset) { - return TensorChippingOp<DimId, Derived>(derived(), offset, DimId); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<Dynamic, const Derived> - chip(const Index offset, const Index dim) const { - return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp<Dynamic, Derived> - chip(const Index offset, const Index dim) { - return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim); - } - - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReverseOp<const ReverseDimensions, const Derived> - reverse(const ReverseDimensions& rev) const { - return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev); - } - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReverseOp<const ReverseDimensions, Derived> - reverse(const ReverseDimensions& rev) { - return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev); - } - - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); - } - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorShufflingOp<const Shuffle, Derived> - shuffle(const Shuffle& shuffle) { - return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle); - } - - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingOp<const Strides, const Derived> - stride(const Strides& strides) const { - return TensorStridingOp<const Strides, const Derived>(derived(), strides); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorStridingOp<const Strides, Derived> - stride(const Strides& strides) { - return TensorStridingOp<const Strides, Derived>(derived(), strides); - } - - // Select the device on which to evaluate the expression. - template <typename DeviceType> - TensorDevice<Derived, DeviceType> device(const DeviceType& device) { - return TensorDevice<Derived, DeviceType>(device, derived()); - } - - protected: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h deleted file mode 100644 index ac428b169f..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ /dev/null @@ -1,627 +0,0 @@ -#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H -#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H - -namespace Eigen { - -/** \class TensorBlock - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor block class. - * - * This class represents a tensor block specified by the index of the - * first block coefficient, and the size of the block in each dimension. - * - */ - -namespace internal { - -template <typename Index, typename Scalar, std::size_t NumDims, int Layout> -class TensorBlock { - public: - typedef DSizes<Index, NumDims> Dimensions; - - TensorBlock(const Index first_coeff_index, - const Dimensions& block_sizes, - const Dimensions& block_strides, - const Dimensions& tensor_strides, - Scalar* data) - : m_first_coeff_index(first_coeff_index), - m_block_sizes(block_sizes), - m_block_strides(block_strides), - m_tensor_strides(tensor_strides), - m_data(data) {} - - Index first_coeff_index() const { return m_first_coeff_index; } - - const Dimensions& block_sizes() const { return m_block_sizes; } - - const Dimensions& block_strides() const { return m_block_strides; } - - const Dimensions& tensor_strides() const { return m_tensor_strides; } - - Scalar* data() { return m_data; } - - const Scalar* data() const { return m_data; } - - private: - Index m_first_coeff_index; - Dimensions m_block_sizes; - Dimensions m_block_strides; - Dimensions m_tensor_strides; - Scalar* m_data; // Not owned. -}; - -template <typename Index, typename Scalar, bool Vectorizable> -struct TensorBlockCopyOp { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const Index num_coeff_to_copy, const Index dst_index, - const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data, const Index src_index, - const Index src_stride, const Scalar* EIGEN_RESTRICT src_data) { - for (Index i = 0; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i * dst_stride] = - src_data[src_index + i * src_stride]; - } - } -}; - -// NOTE: Benchmarks run on an implementation of this that broke each of the -// loops in these conditionals into it's own template specialization (to -// avoid conditionals in the caller's loop) did not show an improvement. -template <typename Index, typename Scalar> -struct TensorBlockCopyOp<Index, Scalar, true> { - typedef typename packet_traits<Scalar>::type Packet; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const Index num_coeff_to_copy, const Index dst_index, - const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data, - const Index src_index, const Index src_stride, - const Scalar* EIGEN_RESTRICT src_data) { - if (src_stride == 1) { - const Index packet_size = internal::unpacket_traits<Packet>::size; - const Index vectorized_size = - (num_coeff_to_copy / packet_size) * packet_size; - if (dst_stride == 1) { - // LINEAR - for (Index i = 0; i < vectorized_size; i += packet_size) { - Packet p = internal::ploadt<Packet, Unaligned>( - src_data + src_index + i); - internal::pstoret<Scalar, Packet, Unaligned>( - dst_data + dst_index + i, p); - } - for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i] = src_data[src_index + i]; - } - } else { - // SCATTER - for (Index i = 0; i < vectorized_size; i += packet_size) { - Packet p = internal::ploadt<Packet, Unaligned>( - src_data + src_index + i); - internal::pscatter<Scalar, Packet>( - dst_data + dst_index + i * dst_stride, p, dst_stride); - } - for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i * dst_stride] = src_data[src_index + i]; - } - } - } else { - if (dst_stride == 1) { - // GATHER - const Index packet_size = internal::unpacket_traits<Packet>::size; - const Index vectorized_size = - (num_coeff_to_copy / packet_size) * packet_size; - for (Index i = 0; i < vectorized_size; i += packet_size) { - Packet p = internal::pgather<Scalar, Packet>( - src_data + src_index + i * src_stride, src_stride); - internal::pstoret<Scalar, Packet, Unaligned>( - dst_data + dst_index + i, p); - } - for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i] = src_data[src_index + i * src_stride]; - } - } else { - // RANDOM - for (Index i = 0; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i * dst_stride] = - src_data[src_index + i * src_stride]; - } - } - } - } -}; - -/** \class TensorBlockIO - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor block IO class. - * - * This class is responsible for copying data between a tensor and a tensor - * block. - * - */ -template <typename Index, typename Scalar, std::size_t NumDims, int Layout, - bool Vectorizable, bool BlockRead> -class TensorBlockIO { - public: - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - TensorBlock; - typedef typename internal::TensorBlockCopyOp<Index, Scalar, Vectorizable> - TensorBlockCopyOp; - - protected: - struct BlockIteratorState { - Index input_stride; - Index output_stride; - Index input_span; - Index output_span; - Index size; - Index count; - }; - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( - const TensorBlock& block, Index first_coeff_index, - const array<Index, NumDims>& tensor_to_block_dim_map, - const array<Index, NumDims>& tensor_strides, const Scalar* src_data, - Scalar* dst_data) { - // Calculate strides and dimensions. - const Index block_dim_for_tensor_stride1_dim = - NumDims == 0 ? 1 : - tensor_to_block_dim_map[static_cast<int>(Layout) == - static_cast<int>(ColMajor) - ? 0 - : NumDims - 1]; - const size_t block_inner_dim_size = - NumDims == 0 ? 1 : - block.block_sizes()[block_dim_for_tensor_stride1_dim]; - const size_t block_outer_dim_size = - NumDims == 0 ? 1 : - block.block_sizes().TotalSize() / block_inner_dim_size; - - Index inputIndex; - Index outputIndex; - Index input_stride; - Index output_stride; - - // Setup strides to read/write along the tensor's stride1 dimension. - if (BlockRead) { - inputIndex = first_coeff_index; - outputIndex = 0; - input_stride = 1; - output_stride = NumDims == 0 ? 1 - : block.block_strides()[block_dim_for_tensor_stride1_dim]; - } else { - inputIndex = 0; - outputIndex = first_coeff_index; - input_stride = NumDims == 0 ? 1 - : block.block_strides()[block_dim_for_tensor_stride1_dim]; - output_stride = 1; - } - - const std::size_t at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1; - array<BlockIteratorState, at_least_1_dim> block_iter_state; - - // Initialize block iterator state. - for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) { - const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i + 1 - : NumDims - i - 2; - block_iter_state[i].size = - block.block_sizes()[tensor_to_block_dim_map[dim]]; - if (BlockRead) { - block_iter_state[i].input_stride = tensor_strides[dim]; - block_iter_state[i].output_stride = - block.block_strides()[tensor_to_block_dim_map[dim]]; - } else { - block_iter_state[i].input_stride = - block.block_strides()[tensor_to_block_dim_map[dim]]; - block_iter_state[i].output_stride = tensor_strides[dim]; - } - block_iter_state[i].input_span = - block_iter_state[i].input_stride * (block_iter_state[i].size - 1); - block_iter_state[i].output_span = - block_iter_state[i].output_stride * (block_iter_state[i].size - 1); - block_iter_state[i].count = 0; - } - - // Iterate copying data from src to dst. - for (Index i = 0; i < block_outer_dim_size; ++i) { - TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, - dst_data, inputIndex, input_stride, src_data); - // Update index. - for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) { - if (++block_iter_state[i].count < block_iter_state[i].size) { - inputIndex += block_iter_state[i].input_stride; - outputIndex += block_iter_state[i].output_stride; - break; - } - block_iter_state[i].count = 0; - inputIndex -= block_iter_state[i].input_span; - outputIndex -= block_iter_state[i].output_span; - } - } - } -}; - -/** \class TensorBlockReader - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor block reader class. - * - * This class is responsible for reading a tensor block. - * - */ - -template <typename Index, typename Scalar, std::size_t NumDims, int Layout, - bool Vectorizable> -class TensorBlockReader : public TensorBlockIO<Index, Scalar, NumDims, - Layout, Vectorizable, true> { - public: - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Index, Scalar, NumDims, Layout, Vectorizable, true> - Base; - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, const Scalar* src_data) { - array<Index, NumDims> tensor_to_block_dim_map; - for (int i = 0; i < NumDims; ++i) { - tensor_to_block_dim_map[i] = i; - } - Base::Copy(*block, block->first_coeff_index(), tensor_to_block_dim_map, - block->tensor_strides(), src_data, block->data()); - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, Index first_coeff_index, - const array<Index, NumDims>& tensor_to_block_dim_map, - const array<Index, NumDims>& tensor_strides, const Scalar* src_data) { - Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, - tensor_strides, src_data, block->data()); - } -}; - -/** \class TensorBlockWriter - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor block writer class. - * - * This class is responsible for writing a tensor block. - * - */ - -template <typename Index, typename Scalar, std::size_t NumDims, int Layout, - bool Vectorizable> -class TensorBlockWriter : public TensorBlockIO<Index, Scalar, NumDims, - Layout, Vectorizable, false> { - public: - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Index, Scalar, NumDims, Layout, Vectorizable, false> - Base; - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, Scalar* dst_data) { - array<Index, NumDims> tensor_to_block_dim_map; - for (int i = 0; i < NumDims; ++i) { - tensor_to_block_dim_map[i] = i; - } - Base::Copy(block, block.first_coeff_index(), tensor_to_block_dim_map, - block.tensor_strides(), block.data(), dst_data); - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, Index first_coeff_index, - const array<Index, NumDims>& tensor_to_block_dim_map, - const array<Index, NumDims>& tensor_strides, Scalar* dst_data) { - Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, - tensor_strides, block.data(), dst_data); - } -}; - -enum TensorBlockShapeType { - kUniformAllDims, - kSkewedInnerDims, -}; - -struct TensorOpResourceRequirements { - TensorBlockShapeType block_shape; - std::size_t block_total_size; - // TODO(andydavis) Add 'target_num_threads' to support communication of - // thread-resource requirements. This will allow ops deep in the - // expression tree (like reductions) to communicate resources - // requirements based on local state (like the total number of reductions - // to be computed). - TensorOpResourceRequirements(internal::TensorBlockShapeType shape, - const std::size_t size) - : block_shape(shape), block_total_size(size) {} -}; - -/** \class TensorBlockMapper - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor block mapper class. - * - * This class is responsible for iterating over the blocks of a tensor. - * - */ - -template <typename Index, typename Scalar, std::size_t NumDims, int Layout> -class TensorBlockMapper { - public: - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - TensorBlock; - - TensorBlockMapper(const Eigen::DSizes<Index, NumDims>& dims, - const TensorBlockShapeType block_shape, - const size_t max_coeff_count) - : m_dimensions(dims), m_block_dim_sizes(dims), m_total_block_count(1) { - if (m_block_dim_sizes.TotalSize() > max_coeff_count) { - if (block_shape == kUniformAllDims) { - // Tensor will not fit within 'max_coeff_count' budget: calculate tensor - // block dimension sizes based on "square" dimension size target. - const size_t dim_size_target = - std::pow(static_cast<float>(max_coeff_count), - 1.0 / static_cast<float>(m_block_dim_sizes.rank())); - for (size_t i = 0; i < m_block_dim_sizes.rank(); ++i) { - // TODO(andydavis) Adjust the inner most 'm_block_dim_size' to make it - // a multiple of the packet size. Note that reducing 'm_block_dim_size' - // in this manner can increase the number of blocks, and so will - // amplify any per-block overhead. - m_block_dim_sizes[i] = - numext::mini(dim_size_target, static_cast<size_t>(m_dimensions[i])); - } - // Add any un-allocated coefficients to inner dimension(s). - Index total_size = m_block_dim_sizes.TotalSize(); - for (int i = 0; i < NumDims; ++i) { - const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumDims - i - 1; - if (m_block_dim_sizes[dim] < m_dimensions[dim]) { - const Index total_size_other_dims = total_size / - m_block_dim_sizes[dim]; - const Index alloc_avail = max_coeff_count / total_size_other_dims; - if (alloc_avail == m_block_dim_sizes[dim]) { - // Insufficient excess coefficients to allocate. - break; - } - m_block_dim_sizes[dim] = numext::mini(m_dimensions[dim], alloc_avail); - total_size = total_size_other_dims * m_block_dim_sizes[dim]; - } - } - } else { - eigen_assert(block_shape == kSkewedInnerDims); - Index coeff_to_allocate = max_coeff_count; - for (int i = 0; i < NumDims; ++i) { - const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumDims - i - 1; - m_block_dim_sizes[dim] = numext::mini(coeff_to_allocate, - m_dimensions[dim]); - coeff_to_allocate /= numext::maxi(static_cast<Index>(1), - m_block_dim_sizes[dim]); - } - } - } - - // Calculate block counts by dimension and total block count. - DSizes<Index, NumDims> block_count; - for (size_t i = 0; i < block_count.rank(); ++i) { - block_count[i] = - (m_dimensions[i] + m_block_dim_sizes[i] - 1) / m_block_dim_sizes[i]; - } - m_total_block_count = array_prod(block_count); - - // Calculate block strides (used for enumerating blocks). - if (NumDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_block_strides[0] = 1; - m_tensor_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1]; - m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_block_strides[NumDims - 1] = 1; - m_tensor_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1]; - m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1]; - } - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock - GetBlockForIndex(Index block_index, Scalar* data) const { - Index first_coeff_index = 0; - DSizes<Index, NumDims> coords; - DSizes<Index, NumDims> sizes; - DSizes<Index, NumDims> strides; - if (NumDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = block_index / m_block_strides[i]; - coords[i] = idx * m_block_dim_sizes[i]; - sizes[i] = - numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]); - block_index -= idx * m_block_strides[i]; - first_coeff_index += coords[i] * m_tensor_strides[i]; - } - coords[0] = block_index * m_block_dim_sizes[0]; - sizes[0] = - numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]); - first_coeff_index += coords[0] * m_tensor_strides[0]; - - strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - strides[i] = strides[i - 1] * sizes[i - 1]; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = block_index / m_block_strides[i]; - coords[i] = idx * m_block_dim_sizes[i]; - sizes[i] = - numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]); - block_index -= idx * m_block_strides[i]; - first_coeff_index += coords[i] * m_tensor_strides[i]; - } - coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1]; - sizes[NumDims - 1] = - numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]), - m_block_dim_sizes[NumDims - 1]); - first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1]; - - strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * sizes[i + 1]; - } - } - } - - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index total_block_count() const { - return m_total_block_count; - } - - private: - DSizes<Index, NumDims> m_dimensions; - DSizes<Index, NumDims> m_block_dim_sizes; - DSizes<Index, NumDims> m_block_strides; - DSizes<Index, NumDims> m_tensor_strides; - Index m_total_block_count; -}; - -/** \class TensorSliceBlockMapper - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor slice block mapper class. - * - * This class is responsible for iterating over the blocks of - * a slice of a tensor. Supports shuffling of the block strides - * for callers that want to reduce strides for dimensions to be - * processed together. - * - */ - -template <typename Index, typename Scalar, std::size_t NumDims, int Layout> -class TensorSliceBlockMapper { - public: - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - TensorBlock; - typedef DSizes<Index, NumDims> Dimensions; - - TensorSliceBlockMapper(const Dimensions& tensor_dims, - const Dimensions& tensor_slice_offsets, - const Dimensions& tensor_slice_extents, - const Dimensions& block_dim_sizes, - const Dimensions& block_stride_order) - : m_tensor_dimensions(tensor_dims), - m_tensor_slice_offsets(tensor_slice_offsets), - m_tensor_slice_extents(tensor_slice_extents), - m_block_dim_sizes(block_dim_sizes), - m_block_stride_order(block_stride_order), - m_total_block_count(1) { - // Calculate block counts by dimension and total block count. - DSizes<Index, NumDims> block_count; - for (size_t i = 0; i < block_count.rank(); ++i) { - block_count[i] = (m_tensor_slice_extents[i] + m_block_dim_sizes[i] - 1) / - m_block_dim_sizes[i]; - } - m_total_block_count = array_prod(block_count); - - // Calculate block strides (used for enumerating blocks). - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_block_strides[0] = 1; - m_tensor_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1]; - m_tensor_strides[i] = m_tensor_strides[i - 1] * - m_tensor_dimensions[i - 1]; - } - } else { - m_block_strides[NumDims - 1] = 1; - m_tensor_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1]; - m_tensor_strides[i] = m_tensor_strides[i + 1] * - m_tensor_dimensions[i + 1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock - GetBlockForIndex(Index block_index, Scalar* data) const { - Index first_coeff_index = 0; - DSizes<Index, NumDims> coords; - DSizes<Index, NumDims> sizes; - DSizes<Index, NumDims> strides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = block_index / m_block_strides[i]; - coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i]; - sizes[i] = numext::mini(m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i], - m_block_dim_sizes[i]); - block_index -= idx * m_block_strides[i]; - first_coeff_index += coords[i] * m_tensor_strides[i]; - } - coords[0] = m_tensor_slice_offsets[0] + - block_index * m_block_dim_sizes[0]; - sizes[0] = numext::mini(m_tensor_slice_offsets[0] + m_tensor_slice_extents[0] - coords[0], - m_block_dim_sizes[0]); - first_coeff_index += coords[0] * m_tensor_strides[0]; - - Index prev_dim = m_block_stride_order[0]; - strides[prev_dim] = 1; - for (int i = 1; i < NumDims; ++i) { - const Index curr_dim = m_block_stride_order[i]; - strides[curr_dim] = strides[prev_dim] * sizes[prev_dim]; - prev_dim = curr_dim; - } - } else { - for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) { - const Index idx = block_index / m_block_strides[i]; - coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i]; - sizes[i] = numext::mini(m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i], - m_block_dim_sizes[i]); - block_index -= idx * m_block_strides[i]; - first_coeff_index += coords[i] * m_tensor_strides[i]; - } - coords[NumDims - 1] = m_tensor_slice_offsets[NumDims - 1] + - block_index * m_block_dim_sizes[NumDims - 1]; - sizes[NumDims - 1] = numext::mini( - m_tensor_slice_offsets[NumDims - 1] + m_tensor_slice_extents[NumDims - 1] - coords[NumDims - 1], - m_block_dim_sizes[NumDims - 1]); - first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1]; - - Index prev_dim = m_block_stride_order[NumDims - 1]; - strides[prev_dim] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - const Index curr_dim = m_block_stride_order[i]; - strides[curr_dim] = strides[prev_dim] * sizes[prev_dim]; - prev_dim = curr_dim; - } - } - - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index total_block_count() const { - return m_total_block_count; - } - - private: - Dimensions m_tensor_dimensions; - Dimensions m_tensor_slice_offsets; - Dimensions m_tensor_slice_extents; - Dimensions m_tensor_strides; - Dimensions m_block_dim_sizes; - Dimensions m_block_stride_order; - Dimensions m_block_strides; - Index m_total_block_count; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h deleted file mode 100644 index 7e6d00fad6..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ /dev/null @@ -1,352 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H -#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H - -namespace Eigen { - -/** \class TensorBroadcasting - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor broadcasting class. - * - * - */ -namespace internal { -template<typename Broadcast, typename XprType> -struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Broadcast, typename XprType> -struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense> -{ - typedef const TensorBroadcastingOp<Broadcast, XprType>& type; -}; - -template<typename Broadcast, typename XprType> -struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type> -{ - typedef TensorBroadcastingOp<Broadcast, XprType> type; -}; - -} // end namespace internal - - - -template<typename Broadcast, typename XprType> -class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) - : m_xpr(expr), m_broadcast(broadcast) {} - - EIGEN_DEVICE_FUNC - const Broadcast& broadcast() const { return m_broadcast; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Broadcast m_broadcast; -}; - - -// Eval as rvalue -template<typename Broadcast, typename ArgType, typename Device> -struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> -{ - typedef TensorBroadcastingOp<Broadcast, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - EIGEN_STATIC_ASSERT(NumDims == internal::array_size<Broadcast>::value, "Broadcast cannot change rank") - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const Broadcast& broadcast = op.broadcast(); - for (int i = 0; i < NumDims; ++i) { - eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i] * broadcast[i]; - } - - if (NumDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - } else { - // NumDims is always > 0 here, but use max to avoid compiler warning - m_inputStrides[numext::maxi(0, NumDims-1)] = 1; - m_outputStrides[numext::maxi(0, NumDims-1)] = 1; - for (int i = NumDims-2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - } - } - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const - { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return coeffColMajor(index); - } else { - return coeffRowMajor(index); - } - } - - // TODO: attempt to speed this up. The integer divisions and modulo are slow - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const - { - Index inputIndex = 0; - if (NumDims > 0) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>()(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - if (internal::index_statically_eq<Broadcast>()(0, 1)) { - eigen_assert(index < m_impl.dimensions()[0]); - inputIndex += index; - } else { - if (internal::index_statically_eq<InputDimensions>()(0, 1)) { - eigen_assert(index % m_impl.dimensions()[0] == 0); - } else { - inputIndex += (index % m_impl.dimensions()[0]); - } - } - } - return m_impl.coeff(inputIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const - { - Index inputIndex = 0; - if (NumDims > 0) { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>()(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) { - eigen_assert(index < m_impl.dimensions()[NumDims-1]); - inputIndex += index; - } else { - if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) { - eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); - } else { - inputIndex += (index % m_impl.dimensions()[NumDims-1]); - } - } - } - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const - { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return packetColMajor<LoadMode>(index); - } else { - return packetRowMajor<LoadMode>(index); - } - } - - // Ignore the LoadMode and always use unaligned loads since we can't guarantee - // the alignment at compile time. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - const Index originalIndex = index; - - Index inputIndex = 0; - Index innermostLoc = 0; - if (NumDims > 0) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>()(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - if (internal::index_statically_eq<Broadcast>()(0, 1)) { - eigen_assert(index < m_impl.dimensions()[0]); - innermostLoc = index; - } else { - if (internal::index_statically_eq<InputDimensions>()(0, 1)) { - eigen_assert(innermostLoc % m_impl.dimensions()[0] == 0); - innermostLoc = 0; - } else { - innermostLoc = index % m_impl.dimensions()[0]; - } - } - inputIndex += innermostLoc; - } - - // Todo: this could be extended to the second dimension if we're not - // broadcasting alongside the first dimension, and so on. - if (innermostLoc + packetSize <= m_impl.dimensions()[0]) { - return m_impl.template packet<Unaligned>(inputIndex); - } else { - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < packetSize; ++i) { - values[i] = coeffColMajor(originalIndex+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - const Index originalIndex = index; - - Index inputIndex = 0; - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>()(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - Index innermostLoc; - if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) { - eigen_assert(index < m_impl.dimensions()[NumDims-1]); - innermostLoc = index; - } else { - if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) { - eigen_assert(innermostLoc % m_impl.dimensions()[NumDims-1] == 0); - innermostLoc = 0; - } else { - innermostLoc = index % m_impl.dimensions()[NumDims-1]; - } - } - inputIndex += innermostLoc; - - // Todo: this could be extended to the second dimension if we're not - // broadcasting alongside the first dimension, and so on. - if (innermostLoc + packetSize <= m_impl.dimensions()[NumDims-1]) { - return m_impl.template packet<Unaligned>(inputIndex); - } else { - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < packetSize; ++i) { - values[i] = coeffRowMajor(originalIndex+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h deleted file mode 100644 index 36c436a613..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ /dev/null @@ -1,510 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H -#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H - -namespace Eigen { - -/** \class TensorKChippingReshaping - * \ingroup CXX11_Tensor_Module - * - * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. - * - * - */ - -namespace internal { -template<DenseIndex DimId, typename XprType> -struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions - 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex DimId, typename XprType> -struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> -{ - typedef const TensorChippingOp<DimId, XprType>& type; -}; - -template<DenseIndex DimId, typename XprType> -struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type> -{ - typedef TensorChippingOp<DimId, XprType> type; -}; - -template <DenseIndex DimId> -struct DimensionId -{ - DimensionId(DenseIndex dim) { - eigen_assert(dim == DimId); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { - return DimId; - } -}; -template <> -struct DimensionId<Dynamic> -{ - DimensionId(DenseIndex dim) : actual_dim(dim) { - eigen_assert(dim >= 0); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { - return actual_dim; - } - private: - const DenseIndex actual_dim; -}; - - -} // end namespace internal - - - -template<DenseIndex DimId, typename XprType> -class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) - : m_xpr(expr), m_offset(offset), m_dim(dim) { - } - - EIGEN_DEVICE_FUNC - const Index offset() const { return m_offset; } - EIGEN_DEVICE_FUNC - const Index dim() const { return m_dim.actualDim(); } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) - { - typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Index m_offset; - const internal::DimensionId<DimId> m_dim; -}; - - -// Eval as rvalue -template<DenseIndex DimId, typename ArgType, typename Device> -struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> -{ - typedef TensorChippingOp<DimId, ArgType> XprType; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims-1; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - - enum { - // Alignment can't be guaranteed at compile time since it depends on the - // slice offsets. - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - typedef internal::TensorBlock<Index, ScalarNonConst, NumInputDims, Layout> - InputTensorBlock; - typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout> - OutputTensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) - { - EIGEN_STATIC_ASSERT(NumInputDims >= 1, YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(NumInputDims > m_dim.actualDim()); - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); - - int j = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (i != m_dim.actualDim()) { - m_dimensions[j] = input_dims[i]; - ++j; - } - } - - m_stride = 1; - m_inputStride = 1; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < m_dim.actualDim(); ++i) { - m_stride *= input_dims[i]; - m_inputStride *= input_dims[i]; - } - } else { - for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { - m_stride *= input_dims[i]; - m_inputStride *= input_dims[i]; - } - } - m_inputStride *= input_dims[m_dim.actualDim()]; - m_inputOffset = m_stride * op.offset(); - - if (BlockAccess) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1]; - } - } else { - m_inputStrides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; - } - } - - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == NumInputDims - 1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(m_stride == 1); - Index inputIndex = index * m_inputStride + m_inputOffset; - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_impl.coeff(inputIndex); - inputIndex += m_inputStride; - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == NumInputDims - 1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(m_stride > index); - return m_impl.template packet<LoadMode>(index + m_inputOffset); - } else { - const Index idx = index / m_stride; - const Index rem = index - idx * m_stride; - if (rem + packetSize <= m_stride) { - Index inputIndex = idx * m_inputStride + m_inputOffset + rem; - return m_impl.template packet<LoadMode>(inputIndex); - } else { - // Cross the stride boundary. Fallback to slow path. - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index); - ++index; - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, m_block_total_size_max)); - m_impl.getResourceRequirements(resources); - } - - // TODO(andydavis) Reduce the overhead of this function (experiment with - // using a fixed block size). - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - // Calculate input block sizes. - const DSizes<Index, NumDims>& output_block_sizes = - output_block->block_sizes(); - const DSizes<Index, NumDims>& output_block_strides = - output_block->block_strides(); - const Index chip_dim = m_dim.actualDim(); - DSizes<Index, NumInputDims> input_block_sizes; - DSizes<Index, NumInputDims> input_block_strides; - for (Index i = 0; i < NumInputDims; ++i) { - if (i < chip_dim) { - input_block_sizes[i] = output_block_sizes[i]; - input_block_strides[i] = output_block_strides[i]; - } else if (i > chip_dim) { - input_block_sizes[i] = output_block_sizes[i - 1]; - input_block_strides[i] = output_block_strides[i - 1]; - } else { - input_block_sizes[i] = 1; - } - } - // Fix up input_block_stride for chip dimension. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - if (chip_dim == 0) { - input_block_strides[chip_dim] = 1; - } else { - input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] * - input_block_sizes[chip_dim - 1]; - } - } else { - if (chip_dim == NumInputDims - 1) { - input_block_strides[chip_dim] = 1; - } else { - input_block_strides[chip_dim] = input_block_strides[chip_dim + 1] * - input_block_sizes[chip_dim + 1]; - } - } - // Instantiate and read input block from input tensor. - InputTensorBlock input_block(srcCoeff(output_block->first_coeff_index()), - input_block_sizes, - input_block_strides, - m_inputStrides, - output_block->data()); - m_impl.block(&input_block); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { - CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data()); - if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == NumDims) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == 0)) && - result) { - return result + m_inputOffset; - } else { - return NULL; - } - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex; - if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == NumInputDims - 1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(m_stride == 1); - inputIndex = index * m_inputStride + m_inputOffset; - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == NumInputDims - 1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(m_stride > index); - inputIndex = index + m_inputOffset; - } else { - const Index idx = index / m_stride; - inputIndex = idx * m_inputStride + m_inputOffset; - index -= idx * m_stride; - inputIndex += index; - } - return inputIndex; - } - - Dimensions m_dimensions; - Index m_stride; - Index m_inputOffset; - Index m_inputStride; - DSizes<Index, NumInputDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const internal::DimensionId<DimId> m_dim; - const Device& m_device; - std::size_t m_block_total_size_max; -}; - - -// Eval as lvalue -template<DenseIndex DimId, typename ArgType, typename Device> -struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device> - : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base; - typedef TensorChippingOp<DimId, ArgType> XprType; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims-1; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef internal::TensorBlock<Index, ScalarNonConst, NumInputDims, Layout> - InputTensorBlock; - typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout> - OutputTensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - - if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && - this->m_dim.actualDim() == 0) || - (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && - this->m_dim.actualDim() == NumInputDims - 1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(this->m_stride == 1); - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - Index inputIndex = index * this->m_inputStride + this->m_inputOffset; - for (int i = 0; i < packetSize; ++i) { - this->m_impl.coeffRef(inputIndex) = values[i]; - inputIndex += this->m_inputStride; - } - } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && - this->m_dim.actualDim() == NumInputDims - 1) || - (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && - this->m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(this->m_stride > index); - this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x); - } else { - const Index idx = index / this->m_stride; - const Index rem = index - idx * this->m_stride; - if (rem + packetSize <= this->m_stride) { - const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; - this->m_impl.template writePacket<StoreMode>(inputIndex, x); - } else { - // Cross stride boundary. Fallback to slow path. - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < packetSize; ++i) { - this->coeffRef(index) = values[i]; - ++index; - } - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( - const OutputTensorBlock& output_block) { - // Calculate input block sizes. - const DSizes<Index, NumDims>& output_block_sizes = - output_block.block_sizes(); - const DSizes<Index, NumDims>& output_block_strides = - output_block.block_strides(); - const Index chip_dim = this->m_dim.actualDim(); - DSizes<Index, NumInputDims> input_block_sizes; - DSizes<Index, NumInputDims> input_block_strides; - for (Index i = 0; i < NumInputDims; ++i) { - if (i < chip_dim) { - input_block_sizes[i] = output_block_sizes[i]; - input_block_strides[i] = output_block_strides[i]; - } else if (i > chip_dim) { - input_block_sizes[i] = output_block_sizes[i - 1]; - input_block_strides[i] = output_block_strides[i - 1]; - } else { - input_block_sizes[i] = 1; - } - } - // Fix up input_block_stride for chip dimension. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - if (chip_dim == 0) { - input_block_strides[chip_dim] = 1; - } else { - input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] * - input_block_sizes[chip_dim - 1]; - } - } else { - if (chip_dim == NumInputDims - 1) { - input_block_strides[chip_dim] = 1; - } else { - input_block_strides[chip_dim] = input_block_strides[chip_dim - 1] * - input_block_sizes[chip_dim - 1]; - } - } - // Write input block. - this->m_impl.writeBlock( - InputTensorBlock(this->srcCoeff(output_block.first_coeff_index()), - input_block_sizes, - input_block_strides, - this->m_inputStrides, - const_cast<ScalarNonConst*>(output_block.data()))); - } - -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h deleted file mode 100644 index 54d9e5f2c8..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ /dev/null @@ -1,350 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H - -namespace Eigen { - -/** \class TensorConcatenationOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor concatenation class. - * - * - */ -namespace internal { -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename promote_storage_type<typename LhsXprType::Scalar, - typename RhsXprType::Scalar>::ret Scalar; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<LhsXprType>::NumDimensions; - static const int Layout = traits<LhsXprType>::Layout; - enum { Flags = 0 }; -}; - -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type; -}; - -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type> -{ - typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - -template<typename Axis, typename LhsXprType, typename RhsXprType> -class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors> -{ - public: - typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar; - typedef typename internal::traits<TensorConcatenationOp>::Packet Packet; - typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorConcatenationOp>::Index Index; - typedef typename internal::nested<TensorConcatenationOp>::type Nested; - typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType, - typename RhsXprType::PacketReturnType>::ret PacketReturnType; - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other) - { - typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const Axis m_axis; -}; - - -// Eval as rvalue -template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> -{ - typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; - static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(0 <= m_axis && m_axis < NumDims); - const Dimensions& lhs_dims = m_leftImpl.dimensions(); - const Dimensions& rhs_dims = m_rightImpl.dimensions(); - int i = 0; - for (; i < m_axis; ++i) { - eigen_assert(lhs_dims[i] > 0); - eigen_assert(lhs_dims[i] == rhs_dims[i]); - m_dimensions[i] = lhs_dims[i]; - } - eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. - eigen_assert(rhs_dims[i] > 0); - m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; - for (++i; i < NumDims; ++i) { - eigen_assert(lhs_dims[i] > 0); - eigen_assert(lhs_dims[i] == rhs_dims[i]); - m_dimensions[i] = lhs_dims[i]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_leftStrides[0] = 1; - m_rightStrides[0] = 1; - m_outputStrides[0] = 1; - - for (int i = 1; i < NumDims; ++i) { - m_leftStrides[i] = m_leftStrides[i-1] * lhs_dims[i-1]; - m_rightStrides[i] = m_rightStrides[i-1] * rhs_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - } else { - m_leftStrides[NumDims - 1] = 1; - m_rightStrides[NumDims - 1] = 1; - m_outputStrides[NumDims - 1] = 1; - - for (int i = NumDims - 2; i >= 0; --i) { - m_leftStrides[i] = m_leftStrides[i+1] * lhs_dims[i+1]; - m_rightStrides[i] = m_rightStrides[i+1] * rhs_dims[i+1]; - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) - { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() - { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. - // See CL/76180724 comments for more ideas. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Collect dimension-wise indices (subs). - array<Index, NumDims> subs; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - subs[i] = index / m_outputStrides[i]; - index -= subs[i] * m_outputStrides[i]; - } - subs[0] = index; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - subs[i] = index / m_outputStrides[i]; - index -= subs[i] * m_outputStrides[i]; - } - subs[NumDims - 1] = index; - } - - const Dimensions& left_dims = m_leftImpl.dimensions(); - if (subs[m_axis] < left_dims[m_axis]) { - Index left_index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - left_index = subs[0]; - for (int i = 1; i < NumDims; ++i) { - left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; - } - } else { - left_index = subs[NumDims - 1]; - for (int i = NumDims - 2; i >= 0; --i) { - left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; - } - } - return m_leftImpl.coeff(left_index); - } else { - subs[m_axis] -= left_dims[m_axis]; - const Dimensions& right_dims = m_rightImpl.dimensions(); - Index right_index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - right_index = subs[0]; - for (int i = 1; i < NumDims; ++i) { - right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; - } - } else { - right_index = subs[NumDims - 1]; - for (int i = NumDims - 2; i >= 0; --i) { - right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; - } - } - return m_rightImpl.coeff(right_index); - } - } - - // TODO(phli): Add a real vectorization. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_leftStrides; - array<Index, NumDims> m_rightStrides; - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; - const Axis m_axis; -}; - -// Eval as lvalue -template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> - struct TensorEvaluator<TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> - : public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> -{ - typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base; - typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; - typedef typename Base::Dimensions Dimensions; - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) - : Base(op, device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - // Collect dimension-wise indices (subs). - array<Index, Base::NumDims> subs; - for (int i = Base::NumDims - 1; i > 0; --i) { - subs[i] = index / this->m_outputStrides[i]; - index -= subs[i] * this->m_outputStrides[i]; - } - subs[0] = index; - - const Dimensions& left_dims = this->m_leftImpl.dimensions(); - if (subs[this->m_axis] < left_dims[this->m_axis]) { - Index left_index = subs[0]; - for (int i = 1; i < Base::NumDims; ++i) { - left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; - } - return this->m_leftImpl.coeffRef(left_index); - } else { - subs[this->m_axis] -= left_dims[this->m_axis]; - const Dimensions& right_dims = this->m_rightImpl.dimensions(); - Index right_index = subs[0]; - for (int i = 1; i < Base::NumDims; ++i) { - right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; - } - return this->m_rightImpl.coeffRef(right_index); - } - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < packetSize; ++i) { - coeffRef(index+i) = values[i]; - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h deleted file mode 100644 index 7fb384c65e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ /dev/null @@ -1,635 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Eric Martin <eric@ericmart.in> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H - -namespace Eigen { - -/** \class TensorContraction - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor contraction class. - * - * - */ -namespace internal { -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename scalar_product_traits<typename LhsXprType::Scalar, typename RhsXprType::Scalar>::ReturnType Scalar; - - typedef typename scalar_product_traits<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ReturnType StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - - // From NumDims below. - static const int NumDimensions = traits<RhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value; - static const int Layout = traits<LhsXprType>::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType>& type; -}; - -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >::type> -{ - typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType> type; -}; - -template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename Device_> -struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_>, Device_> > { - typedef Indices_ Indices; - typedef LeftArgType_ LeftArgType; - typedef RightArgType_ RightArgType; - typedef Device_ Device; - - // From NumDims below. - static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value; -}; - -} // end namespace internal - -template<typename Indices, typename LhsXprType, typename RhsXprType> -class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar; - typedef typename internal::scalar_product_traits<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( - const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {} - - EIGEN_DEVICE_FUNC const Indices& indices() const { return m_indices; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const Indices m_indices; -}; - - -template<typename Derived> -struct TensorContractionEvaluatorBase -{ - typedef typename internal::traits<Derived>::Indices Indices; - typedef typename internal::traits<Derived>::LeftArgType LeftArgType; - typedef typename internal::traits<Derived>::RightArgType RightArgType; - typedef typename internal::traits<Derived>::Device Device; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - enum { - IsAligned = true, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - typedef DSizes<Index, NumDims> Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorContractionEvaluatorBase(const XprType& op, const Device& device) - : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), - op.lhsExpression(), op.rhsExpression()), device), - m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), - op.rhsExpression(), op.lhsExpression()), device), - m_device(device), - m_result(NULL) { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == - static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), - YOU_MADE_A_PROGRAMMING_MISTAKE); - - eigen_assert((contract_t::size > 0) && "Must contract on some indices"); - - - DSizes<Index, LDims> eval_left_dims; - DSizes<Index, RDims> eval_right_dims; - array<IndexPair<Index>, ContractDims> eval_op_indices; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // For ColMajor, we keep using the existing dimensions - for (int i = 0; i < LDims; i++) { - eval_left_dims[i] = m_leftImpl.dimensions()[i]; - } - for (int i = 0; i < RDims; i++) { - eval_right_dims[i] = m_rightImpl.dimensions()[i]; - } - // We keep the pairs of contracting indices. - for (int i = 0; i < ContractDims; i++) { - eval_op_indices[i].first = op.indices()[i].first; - eval_op_indices[i].second = op.indices()[i].second; - } - } else { - // For RowMajor, we need to reverse the existing dimensions - for (int i = 0; i < LDims; i++) { - eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; - } - for (int i = 0; i < RDims; i++) { - eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; - } - // We need to flip all the pairs of contracting indices as well as - // reversing the dimensions. - for (int i = 0; i < ContractDims; i++) { - eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; - eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; - } - } - - array<Index, LDims> lhs_strides; - if (LDims > 0) { - lhs_strides[0] = 1; - for (int i = 0; i < LDims-1; ++i) { - lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; - } - } - - array<Index, RDims> rhs_strides; - if (RDims > 0) { - rhs_strides[0] = 1; - for (int i = 0; i < RDims-1; ++i) { - rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; - } - } - - if (m_i_strides.size() > 0) m_i_strides[0] = 1; - if (m_j_strides.size() > 0) m_j_strides[0] = 1; - if (m_k_strides.size() > 0) m_k_strides[0] = 1; - - m_i_size = 1; - m_j_size = 1; - m_k_size = 1; - - // To compute the dimension, we simply concatenate the non-contracting - // dimensions of the left and then the right tensor. Additionally, I also - // want to compute the cumulative products of the left non-contracting - // dimensions, right non-contracting dimensions, and the contracting - // dimensions (in the order of the contraction) to aid in the later - // computation of tensor indices for matrix indices. - m_lhs_inner_dim_contiguous = true; - int dim_idx = 0; - int nocontract_idx = 0; - - for (int i = 0; i < LDims; i++) { - // find if we are contracting on index i of left tensor - bool contracting = false; - for (int j = 0; j < ContractDims; j++) { - if (eval_op_indices[j].first == i) { - contracting = true; - break; - } - } - if (!contracting) { - // add dimension size to output dimensions - m_dimensions[dim_idx] = eval_left_dims[i]; - m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; - if (dim_idx != i) { - m_lhs_inner_dim_contiguous = false; - } - if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) { - m_i_strides[nocontract_idx+1] = - m_i_strides[nocontract_idx] * eval_left_dims[i]; - } else { - m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; - } - dim_idx++; - nocontract_idx++; - } - } - - nocontract_idx = 0; - for (int i = 0; i < RDims; i++) { - bool contracting = false; - // find if we are contracting on index i of right tensor - for (int j = 0; j < ContractDims; j++) { - if (eval_op_indices[j].second == i) { - contracting = true; - break; - } - } - if (!contracting) { - m_dimensions[dim_idx] = eval_right_dims[i]; - if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) { - m_j_strides[nocontract_idx+1] = - m_j_strides[nocontract_idx] * eval_right_dims[i]; - } else { - m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; - } - m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; - dim_idx++; - nocontract_idx++; - } - } - - // now build contraction cumprod. We assumed above that non-contracting axes - // are represented in the same order in the matrix as they are in the tensor. - // This is not the case for contracting axes. As the contracting axes must be - // of the same size in each tensor, I'll only look at the first tensor here. - m_rhs_inner_dim_contiguous = true; - m_rhs_inner_dim_reordered = false; - for (int i = 0; i < ContractDims; i++) { - Index left = eval_op_indices[i].first; - Index right = eval_op_indices[i].second; - - Index size = eval_left_dims[left]; - eigen_assert(size == eval_right_dims[right] && - "Contraction axes must be same size"); - - if (i+1 < internal::array_size<contract_t>::value) { - m_k_strides[i+1] = m_k_strides[i] * size; - } else { - m_k_size = m_k_strides[i] * size; - } - m_left_contracting_strides[i] = lhs_strides[left]; - m_right_contracting_strides[i] = rhs_strides[right]; - - if (i > 0 && right < eval_op_indices[i-1].second) { - m_rhs_inner_dim_reordered = true; - } - if (right != i) { - m_rhs_inner_dim_contiguous = false; - } - } - - // If the layout is RowMajor, we need to reverse the m_dimensions - if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) { - for (int i = 0, j = NumDims - 1; i < j; i++, j--) { - numext::swap(m_dimensions[i], m_dimensions[j]); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer); - } - } - } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer); - } - } - } - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalGemv(Scalar* buffer) const { - const Index rows = m_i_size; - const Index cols = m_k_size; - - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - const int lhs_packet_size = PacketType<LhsScalar, Device>::size; - const int rhs_packet_size = PacketType<RhsScalar, Device>::size; - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, - m_left_contracting_strides, m_k_strides); - RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, - m_right_contracting_strides, m_k_strides); - - const RhsScalar alpha(1); - const Index resIncr(1); - - // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) - m_device.memset(buffer, 0, rows * sizeof(Scalar)); - - internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run( - rows, cols, lhs, rhs, - buffer, resIncr, alpha); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_result; } - - protected: - // Note: nvcc doesn't like implicit copy constructor. If this is needed anywhere, - // then we'll have to write an explicit copy constructor... - //TensorContractionEvaluatorBase(const TensorContractionEvaluatorBase&); - - TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); - Dimensions m_dimensions; - - contract_t m_k_strides; - contract_t m_left_contracting_strides; - contract_t m_right_contracting_strides; - - bool m_lhs_inner_dim_contiguous; - bool m_rhs_inner_dim_contiguous; - bool m_rhs_inner_dim_reordered; - - left_nocontract_t m_i_strides; - right_nocontract_t m_j_strides; - left_nocontract_t m_left_nocontract_strides; - right_nocontract_t m_right_nocontract_strides; - - Index m_i_size; - Index m_j_size; - Index m_k_size; - - TensorEvaluator<EvalLeftArgType, Device> m_leftImpl; - TensorEvaluator<EvalRightArgType, Device> m_rightImpl; - const Device& m_device; - Scalar* m_result; -}; - - -// evaluator for default device -template<typename Indices, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> : - public TensorContractionEvaluatorBase< - TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> > { - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - // Could we use NumDimensions here? - typedef DSizes<Index, NumDims> Dimensions; - - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) { } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalProduct(Scalar* buffer) const { - if (this->m_j_size == 1) { - this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - return; - } - - evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - // define mr, nr, and all of my data mapper types - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits; - - const Index nr = Traits::nr; - const Index mr = Traits::mr; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - const int lhs_packet_size = internal::packet_traits<LhsScalar>::size; - const int rhs_packet_size = internal::packet_traits<RhsScalar>::size; - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - // declare GEBP packing and kernel structs - // TODO: packing could be faster sometimes if we supported row major tensor mappers - internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs; - internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs; - - // TODO: replace false, false with conjugate values? - internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp; - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - // TODO: refine arguments here (am I row or col major, etc) - typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType; - - // compute block sizes (which depend on number of threads) - - // last parameter is true to use L3 blocking, 2nd to last parameter is 1 to - // indicate 1 thread - BlockingType blocking(m, n, k, 1, true); - - const Index kc = blocking.kc(); - const Index mc = (std::min<Index>)(m, blocking.mc()); - const Index nc = (std::min<Index>)(n, blocking.nc()); - - // sizes of submatrices to live in cache. see Goto paper. - int sizeA = blocking.mc() * kc; - int sizeB = kc * blocking.nc(); - - // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB - // aren't 16 byte aligned segfaults will happen due to SIMD instructions - LhsScalar* blockA = static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))); - RhsScalar* blockB = static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))); - - for(Index i2=0; i2<m; i2+=mc) - { - const Index actual_mc = numext::mini(i2+mc,m)-i2; - for (Index k2 = 0; k2 < k; k2 += kc) { - // make sure we don't overshoot right edge of left matrix, then pack vertical panel - const Index actual_kc = numext::mini(k2 + kc, k) - k2; - pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc, 0, 0); - - // series of horizontal blocks - for (Index j2 = 0; j2 < n; j2 += nc) { - // make sure we don't overshoot right edge of right matrix, then pack block - const Index actual_nc = numext::mini(j2 + nc, n) - j2; - pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc, 0, 0); - - // call gebp (matrix kernel) - // The parameters here are copied from Eigen's GEMM implementation - gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); - } - } - } - - this->m_device.deallocate(blockA); - this->m_device.deallocate(blockB); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h deleted file mode 100644 index f05746f298..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ /dev/null @@ -1,1387 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Eric Martin <eric@ericmart.in> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - -namespace Eigen { - -template<typename Scalar, typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool needs_edge_check> -__device__ EIGEN_STRONG_INLINE void -EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, volatile Scalar* lhs_shmem, volatile Scalar* rhs_shmem, - const Index m_size, const Index n_size, const Index k_size) { - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - // declare and initialize 64 registers for output 8x8 block - - // prefetch registers - Scalar lhs_pf0; - Scalar lhs_pf1; - Scalar lhs_pf2; - Scalar lhs_pf3; - Scalar lhs_pf4; - Scalar lhs_pf5; - Scalar lhs_pf6; - Scalar lhs_pf7; - - Scalar rhs_pf0; - Scalar rhs_pf1; - Scalar rhs_pf2; - Scalar rhs_pf3; - Scalar rhs_pf4; - Scalar rhs_pf5; - Scalar rhs_pf6; - Scalar rhs_pf7; - - // shared memory is formatted - // (contract idx in block, nocontract idx in block, block idx) - // where block idx is column major. This transposition limits the number of - // bank conflicts when reading the LHS. The core idea is that since the contracting - // index is shared by both sides, then the contracting index should be in threadIdx.x. - - // On the LHS, we pad each row inside of each block with an extra element. This makes - // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts - // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks. - - // On the RHS we just add 8 padding elements to the end of each block. This gives no bank - // conflicts on writes and also none on reads. - - // storage indices - const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; - const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; - - const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0; - const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1; - const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2; - const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3; - const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4; - const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5; - const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6; - const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7; - - const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0; - const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1; - const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2; - const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3; - const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4; - const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5; - const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6; - const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7; - - // in the loading code, the following variables are important: - // threadIdx.x: the vertical position in an 8x8 block - // threadIdx.y: the vertical index of the 8x8 block in the grid - // threadIdx.z: the horizontal position in an 8x8 block - // k: the horizontal index of the 8x8 block in the grid - // - // The k parameter is implicit (it was the loop counter for a loop that went - // from 0 to <8, but now that loop is unrolled in the below code. - - const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; - const Index lhs_vert = base_m + load_idx_vert; - -#define prefetchIntoRegisters(base_k) \ - { \ - lhs_pf0 = Scalar(0); \ - lhs_pf1 = Scalar(0); \ - lhs_pf2 = Scalar(0); \ - lhs_pf3 = Scalar(0); \ - lhs_pf4 = Scalar(0); \ - lhs_pf5 = Scalar(0); \ - lhs_pf6 = Scalar(0); \ - lhs_pf7 = Scalar(0); \ - \ - rhs_pf0 = Scalar(0); \ - rhs_pf1 = Scalar(0); \ - rhs_pf2 = Scalar(0); \ - rhs_pf3 = Scalar(0); \ - rhs_pf4 = Scalar(0); \ - rhs_pf5 = Scalar(0); \ - rhs_pf6 = Scalar(0); \ - rhs_pf7 = Scalar(0); \ - \ - if (!needs_edge_check || lhs_vert < m_size) { \ - const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ - const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ - const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ - const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ - const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ - const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ - const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ - const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \ - \ - if (!needs_edge_check || lhs_horiz_7 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ - lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \ - } else if (lhs_horiz_6 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ - } else if (lhs_horiz_5 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - } else if (lhs_horiz_4 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - } else if (lhs_horiz_3 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - } else if (lhs_horiz_2 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - } else if (lhs_horiz_1 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - } else if (lhs_horiz_0 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - } \ - } \ - \ - const Index rhs_vert = base_k + load_idx_vert; \ - if (!needs_edge_check || rhs_vert < k_size) { \ - const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ - const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ - const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ - const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ - const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ - const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ - const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ - const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ - \ - if (rhs_horiz_7 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ - rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \ - } else if (rhs_horiz_6 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ - } else if (rhs_horiz_5 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - } else if (rhs_horiz_4 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - } else if (rhs_horiz_3 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - } else if (rhs_horiz_2 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - } else if (rhs_horiz_1 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - } else if (rhs_horiz_0 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - } \ - } \ - } \ - -#define writeRegToShmem(_) \ - lhs_shmem[lhs_store_idx_0] = lhs_pf0; \ - rhs_shmem[rhs_store_idx_0] = rhs_pf0; \ - \ - lhs_shmem[lhs_store_idx_1] = lhs_pf1; \ - rhs_shmem[rhs_store_idx_1] = rhs_pf1; \ - \ - lhs_shmem[lhs_store_idx_2] = lhs_pf2; \ - rhs_shmem[rhs_store_idx_2] = rhs_pf2; \ - \ - lhs_shmem[lhs_store_idx_3] = lhs_pf3; \ - rhs_shmem[rhs_store_idx_3] = rhs_pf3; \ - \ - lhs_shmem[lhs_store_idx_4] = lhs_pf4; \ - rhs_shmem[rhs_store_idx_4] = rhs_pf4; \ - \ - lhs_shmem[lhs_store_idx_5] = lhs_pf5; \ - rhs_shmem[rhs_store_idx_5] = rhs_pf5; \ - \ - lhs_shmem[lhs_store_idx_6] = lhs_pf6; \ - rhs_shmem[rhs_store_idx_6] = rhs_pf6; \ - \ - lhs_shmem[lhs_store_idx_7] = lhs_pf7; \ - rhs_shmem[rhs_store_idx_7] = rhs_pf7; \ - - // declare and initialize result array -#define res(i, j) _res_##i##j -#define initResultRow(i) \ - Scalar res(i, 0) = Scalar(0); \ - Scalar res(i, 1) = Scalar(0); \ - Scalar res(i, 2) = Scalar(0); \ - Scalar res(i, 3) = Scalar(0); \ - Scalar res(i, 4) = Scalar(0); \ - Scalar res(i, 5) = Scalar(0); \ - Scalar res(i, 6) = Scalar(0); \ - Scalar res(i, 7) = Scalar(0); \ - - initResultRow(0); - initResultRow(1); - initResultRow(2); - initResultRow(3); - initResultRow(4); - initResultRow(5); - initResultRow(6); - initResultRow(7); -#undef initResultRow - - for (Index base_k = 0; base_k < k_size; base_k += 64) { - // wait for previous iteration to finish with shmem. Despite common sense, - // the code is a bit faster with this here then at bottom of loop - __syncthreads(); - - prefetchIntoRegisters(base_k); - writeRegToShmem(); - - #undef prefetchIntoRegisters - #undef writeRegToShmem - - // wait for shared mem packing to be done before starting computation - __syncthreads(); - - // compute 8x8 matrix product by outer product. This involves packing one column - // of LHS and one row of RHS into registers (takes 16 registers). - -#define lcol(i) _lcol##i - Scalar lcol(0); - Scalar lcol(1); - Scalar lcol(2); - Scalar lcol(3); - Scalar lcol(4); - Scalar lcol(5); - Scalar lcol(6); - Scalar lcol(7); - -#define rrow(j) _rrow##j - Scalar rrow(0); - Scalar rrow(1); - Scalar rrow(2); - Scalar rrow(3); - Scalar rrow(4); - Scalar rrow(5); - Scalar rrow(6); - Scalar rrow(7); - - // Now x corresponds to k, y to m, and z to n - const volatile Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y]; - const volatile Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z]; - -#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))] -#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))] - -#define loadData(i, j) \ - lcol(0) = lhs_element(0, j); \ - rrow(0) = rhs_element(i, 0); \ - lcol(1) = lhs_element(1, j); \ - rrow(1) = rhs_element(i, 1); \ - lcol(2) = lhs_element(2, j); \ - rrow(2) = rhs_element(i, 2); \ - lcol(3) = lhs_element(3, j); \ - rrow(3) = rhs_element(i, 3); \ - lcol(4) = lhs_element(4, j); \ - rrow(4) = rhs_element(i, 4); \ - lcol(5) = lhs_element(5, j); \ - rrow(5) = rhs_element(i, 5); \ - lcol(6) = lhs_element(6, j); \ - rrow(6) = rhs_element(i, 6); \ - lcol(7) = lhs_element(7, j); \ - rrow(7) = rhs_element(i, 7); \ - -#define computeCol(j) \ - res(0, j) += lcol(0) * rrow(j); \ - res(1, j) += lcol(1) * rrow(j); \ - res(2, j) += lcol(2) * rrow(j); \ - res(3, j) += lcol(3) * rrow(j); \ - res(4, j) += lcol(4) * rrow(j); \ - res(5, j) += lcol(5) * rrow(j); \ - res(6, j) += lcol(6) * rrow(j); \ - res(7, j) += lcol(7) * rrow(j); \ - -#define computePass(i) \ - loadData(i, i); \ - \ - computeCol(0); \ - computeCol(1); \ - computeCol(2); \ - computeCol(3); \ - computeCol(4); \ - computeCol(5); \ - computeCol(6); \ - computeCol(7); \ - - computePass(0); - computePass(1); - computePass(2); - computePass(3); - computePass(4); - computePass(5); - computePass(6); - computePass(7); - -#undef lcol -#undef rrow -#undef lhs_element -#undef rhs_element -#undef loadData -#undef computeCol -#undef computePass - } // end loop over k - - // we've now iterated over all of the large (ie width 64) k blocks and - // accumulated results in registers. At this point thread (x, y, z) contains - // the sum across all big k blocks of the product of little k block of index (x, y) - // with block of index (y, z). To compute the final output, we need to reduce - // the 8 threads over y by summation. -#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) - -#define reduceRow(i, mask) \ - shuffleInc(i, 0, mask); \ - shuffleInc(i, 1, mask); \ - shuffleInc(i, 2, mask); \ - shuffleInc(i, 3, mask); \ - shuffleInc(i, 4, mask); \ - shuffleInc(i, 5, mask); \ - shuffleInc(i, 6, mask); \ - shuffleInc(i, 7, mask); \ - -#define reduceMatrix(mask) \ - reduceRow(0, mask); \ - reduceRow(1, mask); \ - reduceRow(2, mask); \ - reduceRow(3, mask); \ - reduceRow(4, mask); \ - reduceRow(5, mask); \ - reduceRow(6, mask); \ - reduceRow(7, mask); \ - - // actually perform the reduction, now each thread of index (_, y, z) - // contains the correct values in its registers that belong in the output - // block - reduceMatrix(1); - reduceMatrix(2); - reduceMatrix(4); - -#undef shuffleInc -#undef reduceRow -#undef reduceMatrix - - // now we need to copy the 64 values into main memory. We can't split work - // among threads because all variables are in registers. There's 2 ways - // to do this: - // (1) have 1 thread do 64 writes from registers into global memory - // (2) have 1 thread do 64 writes into shared memory, and then 8 threads - // each do 8 writes into global memory. We can just overwrite the shared - // memory from the problem we just solved. - // (2) is slightly faster than (1) due to less branching and more ILP - - // TODO: won't yield much gain, but could just use currently unused shared mem - // and then we won't have to sync - // wait for shared mem to be out of use - __syncthreads(); - -#define writeResultShmem(i, j) \ - lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \ - -#define writeRow(i) \ - writeResultShmem(i, 0); \ - writeResultShmem(i, 1); \ - writeResultShmem(i, 2); \ - writeResultShmem(i, 3); \ - writeResultShmem(i, 4); \ - writeResultShmem(i, 5); \ - writeResultShmem(i, 6); \ - writeResultShmem(i, 7); \ - - if (threadIdx.x == 0) { - writeRow(0); - writeRow(1); - writeRow(2); - writeRow(3); - writeRow(4); - writeRow(5); - writeRow(6); - writeRow(7); - } -#undef writeResultShmem -#undef writeRow - - const int max_i_write = (min)((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); - const int max_j_write = (min)((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); - - if (threadIdx.x < max_i_write) { - if (max_j_write == 8) { - // TODO: can i trade bank conflicts for coalesced writes? - Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0]; - Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1]; - Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2]; - Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3]; - Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4]; - Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5]; - Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6]; - Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7]; - - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; - } else { -#pragma unroll 7 - for (int j = 0; j < max_j_write; j++) { - Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j]; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; - } - } - } -#undef res -} - - -template<typename Scalar, typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(512) -EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ volatile Scalar lhs_shmem[72 * 64]; - __shared__ volatile Scalar rhs_shmem[72 * 64]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - if (base_m + 63 < m_size && base_n + 63 < n_size) { - EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); - } else { - EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY, - bool CHECK_RHS_BOUNDARY> -__device__ EIGEN_STRONG_INLINE void -EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, float2 lhs_shmem2[][16], - float2 rhs_shmem2[][8], const Index m_size, - const Index n_size, const Index k_size, - const Index base_m, const Index base_n) { - typedef float Scalar; - - // prefetch registers - float4 lhs_pf0, rhs_pf0; - - float4 results[4]; - for (int i = 0; i < 4; i++) { - results[i].x = results[i].y = results[i].z = results[i].w = 0; - } - - -#define prefetch_lhs(reg, row, col) \ - if (!CHECK_LHS_BOUNDARY) { \ - if (col < k_size) { \ - reg =lhs.loadPacket(row, col); \ - } \ - } else { \ - if (col < k_size) { \ - if (row + 3 < m_size) { \ - reg =lhs.loadPacket(row, col); \ - } else if (row + 2 < m_size) { \ - reg.x =lhs(row + 0, col); \ - reg.y =lhs(row + 1, col); \ - reg.z =lhs(row + 2, col); \ - } else if (row + 1 < m_size) { \ - reg.x =lhs(row + 0, col); \ - reg.y =lhs(row + 1, col); \ - } else if (row < m_size) { \ - reg.x =lhs(row + 0, col); \ - } \ - } \ - } \ - - - Index lhs_vert = base_m+threadIdx.x*4; - - for (Index k = 0; k < k_size; k += 16) { - lhs_pf0 = internal::pset1<float4>(0); - rhs_pf0 = internal::pset1<float4>(0); - - Index lhs_horiz = threadIdx.y+k; - prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz) - - Index rhs_vert = k+(threadIdx.x%4)*4; - Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; - - if (!CHECK_RHS_BOUNDARY) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if (rhs_vert + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } else { - if (rhs_horiz0 < n_size) { - if ((rhs_vert + 3) < k_size) { - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - } else if ((rhs_vert + 2) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if ((rhs_vert + 1) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } - } - float x1, x2 ; - // the following can be a bitwise operation..... some day. - if((threadIdx.x%8) < 4) { - x1 = rhs_pf0.y; - x2 = rhs_pf0.w; - } else { - x1 = rhs_pf0.x; - x2 = rhs_pf0.z; - } - x1 = __shfl_xor(x1, 4); - x2 = __shfl_xor(x2, 4); - if((threadIdx.x%8) < 4) { - rhs_pf0.y = x1; - rhs_pf0.w = x2; - } else { - rhs_pf0.x = x1; - rhs_pf0.z = x2; - } - - // We have 64 features. - // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1. - // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3. - // ... - // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63 - // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1 - // ... - rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y); - rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w); - - // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // ... - // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) - // ... - - lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y); - lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w); - - -#define add_vals(fl1, fl2, fr1, fr2)\ - results[0].x += fl1.x * fr1.x;\ - results[0].y += fl1.y * fr1.x;\ - results[0].z += fl2.x * fr1.x;\ - results[0].w += fl2.y * fr1.x;\ -\ - results[1].x += fl1.x * fr1.y;\ - results[1].y += fl1.y * fr1.y;\ - results[1].z += fl2.x * fr1.y;\ - results[1].w += fl2.y * fr1.y;\ -\ - results[2].x += fl1.x * fr2.x;\ - results[2].y += fl1.y * fr2.x;\ - results[2].z += fl2.x * fr2.x;\ - results[2].w += fl2.y * fr2.x;\ -\ - results[3].x += fl1.x * fr2.y;\ - results[3].y += fl1.y * fr2.y;\ - results[3].z += fl2.x * fr2.y;\ - results[3].w += fl2.y * fr2.y;\ - - __syncthreads(); - - // Do the multiplies. - #pragma unroll - for (int koff = 0; koff < 16; koff ++) { - // 32 x threads. - float2 fl1 = lhs_shmem2[koff][threadIdx.x]; - float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x]; - - int start_feature = threadIdx.y * 4; - float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; - float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; - - add_vals(fl1, fl2, fr1, fr2) - } - __syncthreads(); - } - -#undef prefetch_lhs -#undef add_vals - - Index horiz_base = threadIdx.y*4+base_n; - if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (!CHECK_RHS_BOUNDARY) { - // CHECK LHS - if (lhs_vert + 3 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (lhs_vert + 2 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - } - } else if (lhs_vert + 1 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - } - } else if (lhs_vert < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - } - } - } else if (!CHECK_LHS_BOUNDARY) { - // CHECK RHS - /* - int ncols_rem = fminf(n_size- horiz_base, 4); - for (int i = 0; i < ncols_rem; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - }*/ - for (int i = 0; i < 4; i++) { - if (horiz_base+i < n_size) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } else { - // CHECK both boundaries. - for (int i = 0; i < 4; i++) { - if (horiz_base+i < n_size) { - if (lhs_vert < m_size) - output(lhs_vert, horiz_base + i) = results[i].x; - if (lhs_vert + 1 < m_size) - output(lhs_vert + 1, horiz_base + i) = results[i].y; - if (lhs_vert + 2 < m_size) - output(lhs_vert + 2, horiz_base + i) = results[i].z; - if (lhs_vert + 3 < m_size) - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY, - bool CHECK_RHS_BOUNDARY> -__device__ EIGEN_ALWAYS_INLINE void -EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, float2 lhs_shmem2[][32], - float2 rhs_shmem2[][8], const Index m_size, - const Index n_size, const Index k_size, - const Index base_m, const Index base_n) { - typedef float Scalar; - - // prefetch registers - float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3; - float4 rhs_pf0, rhs_pf1; - - float4 results[8]; - for (int i=0; i < 8; i++) { - results[i].x = results[i].y = results[i].z = results[i].w = 0; - } - - - Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32; - for (Index k = 0; k < k_size; k += 32) { - lhs_pf0 = internal::pset1<float4>(0); - lhs_pf1 = internal::pset1<float4>(0); - lhs_pf2 = internal::pset1<float4>(0); - lhs_pf3 = internal::pset1<float4>(0); - - rhs_pf0 = internal::pset1<float4>(0); - rhs_pf1 = internal::pset1<float4>(0); - - if (!CHECK_LHS_BOUNDARY) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - } - } else { - // just CHECK_LHS_BOUNDARY - if (lhs_vert + 3 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - } - } else if (lhs_vert + 2 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); - lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - } - } else if (lhs_vert + 1 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - } - } else if (lhs_vert < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - } - } - } - __syncthreads(); - Index rhs_vert = k+threadIdx.x*4; - Index rhs_horiz0 = threadIdx.y*2+base_n; - Index rhs_horiz1 = threadIdx.y*2+1+base_n; - if (!CHECK_RHS_BOUNDARY) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); - } else if (rhs_vert + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - } - } else { - if (rhs_horiz1 < n_size) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); - } else if (k+threadIdx.x*4 + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - } else if (k+threadIdx.x*4 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - } - } else if (rhs_horiz0 < n_size) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - } else if ((rhs_vert + 2) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if ((rhs_vert + 1) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } - } - __syncthreads(); - // Loaded. Do computation - // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1. - // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3. - // .. - // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63 - rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x); - // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1. - // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3. - // .. - rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y); - // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1. - // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3. - rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z); - // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1. - // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3. - rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w); - - // LHS. - // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) - // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) - // ... - // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) - // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) - - -#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\ - results[0].x += a_feat1.x * f1.x;\ - results[1].x += a_feat1.x * f1.y;\ - results[2].x += a_feat1.x * f2.x;\ - results[3].x += a_feat1.x * f2.y;\ - results[4].x += a_feat1.x * f3.x;\ - results[5].x += a_feat1.x * f3.y;\ - results[6].x += a_feat1.x * f4.x;\ - results[7].x += a_feat1.x * f4.y;\ -\ - results[0].y += a_feat1.y * f1.x;\ - results[1].y += a_feat1.y * f1.y;\ - results[2].y += a_feat1.y * f2.x;\ - results[3].y += a_feat1.y * f2.y;\ - results[4].y += a_feat1.y * f3.x;\ - results[5].y += a_feat1.y * f3.y;\ - results[6].y += a_feat1.y * f4.x;\ - results[7].y += a_feat1.y * f4.y;\ -\ - results[0].z += a_feat2.x * f1.x;\ - results[1].z += a_feat2.x * f1.y;\ - results[2].z += a_feat2.x * f2.x;\ - results[3].z += a_feat2.x * f2.y;\ - results[4].z += a_feat2.x * f3.x;\ - results[5].z += a_feat2.x * f3.y;\ - results[6].z += a_feat2.x * f4.x;\ - results[7].z += a_feat2.x * f4.y;\ -\ - results[0].w += a_feat2.y * f1.x;\ - results[1].w += a_feat2.y * f1.y;\ - results[2].w += a_feat2.y * f2.x;\ - results[3].w += a_feat2.y * f2.y;\ - results[4].w += a_feat2.y * f3.x;\ - results[5].w += a_feat2.y * f3.y;\ - results[6].w += a_feat2.y * f4.x;\ - results[7].w += a_feat2.y * f4.y;\ - - lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y); - lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y); - lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y); - lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y); - - lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w); - lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w); - lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w); - lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w); - - __syncthreads(); - - // Do the multiplies. - #pragma unroll - for (int koff = 0; koff < 32; koff ++) { - float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8]; - float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8]; - - // first feature is at (threadIdx.y/4) * 8 last is at start + 8. - int start_feature = (threadIdx.y / 4) * 8; - - float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4]; - float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4]; - float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4]; - float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4]; - - add_vals(a3, a4, br1, br2, br3, br4) - } - __syncthreads(); - } // end loop over k - - - __syncthreads(); - Index horiz_base = (threadIdx.y/4)*8+base_n; - if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { - #pragma unroll - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (!CHECK_RHS_BOUNDARY) { - if (lhs_vert + 3 < m_size) { - #pragma unroll - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (lhs_vert + 2 < m_size) { - #pragma unroll - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - } - } else if (lhs_vert + 1 < m_size) { - #pragma unroll - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - } - } else if (lhs_vert < m_size) { - #pragma unroll - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - } - } - } else if (!CHECK_LHS_BOUNDARY) { - // CHECK BOUNDARY_B - #pragma unroll - for (int i = 0; i < 8; i++) { - if (horiz_base + i < n_size) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } else { - // CHECK both boundaries. - #pragma unroll - for (int i = 0; i < 8; i++) { - if (horiz_base + i < n_size) { - if (lhs_vert < m_size) - output(lhs_vert, horiz_base + i) = results[i].x; - if (lhs_vert + 1 < m_size) - output(lhs_vert + 1, horiz_base + i) = results[i].y; - if (lhs_vert + 2 < m_size) - output(lhs_vert + 2, horiz_base + i) = results[i].z; - if (lhs_vert + 3 < m_size) - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(256) -EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ float2 lhs_shmem[64*32]; - __shared__ float2 rhs_shmem[128*8]; - - typedef float2 LHS_MEM[64][32]; - typedef float2 RHS_MEM[128][8]; - - typedef float2 LHS_MEM16x16[32][16]; - typedef float2 RHS_MEM16x16[64][8]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 128 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - const bool check_rhs = (base_n + 63) >= n_size; - const bool check_lhs128 = (base_m + 127) >= m_size; - - if (!check_rhs) { - if (!check_lhs128) { - // >= 128 rows left - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, false>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, false>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } - } else { - if (!check_lhs128) { - // >= 128 rows left - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, true>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, true>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } - } -} - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(256) -EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ float2 lhs_shmem[32][16]; - __shared__ float2 rhs_shmem[64][8]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - if (base_m + 63 < m_size) { - if (base_n + 63 < n_size) { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } - } else { - if (base_n + 63 < n_size) { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } - } -} - - -template<typename Indices, typename LeftArgType, typename RightArgType> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> : - public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > { - - typedef GpuDevice Device; - - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef DSizes<Index, NumDims> Dimensions; - - // typedefs needed in evalTo - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - typedef typename LeftEvaluator::Dimensions LeftDimensions; - typedef typename RightEvaluator::Dimensions RightDimensions; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} - - // We need to redefine this method to make nvcc happy - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - this->m_leftImpl.evalSubExprsIfNeeded(NULL); - this->m_rightImpl.evalSubExprsIfNeeded(NULL); - if (data) { - evalTo(data); - return false; - } else { - this->m_result = static_cast<Scalar *>(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); - evalTo(this->m_result); - return true; - } - } - - void evalTo(Scalar* buffer) const { - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<true, true, true, Unaligned>(buffer); - } - else { - evalTyped<true, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<true, false, true, Unaligned>(buffer); - } - else { - evalTyped<true, false, false, Unaligned>(buffer); - } - } - } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<false, true, true, Unaligned>(buffer); - } - else { - evalTyped<false, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<false, false, true, Unaligned>(buffer); - } - else { - evalTyped<false, false, false, Unaligned>(buffer); - } - } - } - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalTyped(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, 4, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, 4, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte); - if (internal::is_same<LhsScalar, float>::value && - internal::is_same<RhsScalar, float>::value) { - if (m < 768 || n < 768) { - const Index m_blocks = (m + 63) / 64; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(16, 16, 1); - LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k); - } else { - const Index m_blocks = (m + 127) / 128; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(8, 32, 1); - LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k); - } - } else { - const Index m_blocks = (m + 63) / 64; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(8, 8, 8); - LAUNCH_CUDA_KERNEL((EigenContractionKernel<Scalar, Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, this->m_device, lhs, rhs, output, m, n, k); - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_USE_GPU and __CUDACC__ -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h deleted file mode 100644 index b5b09bf41e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMappers.h +++ /dev/null @@ -1,383 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Eric Martin <eric@ericmart.in> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H - -// NOTE: The file has strong column major bias/assumptions, which is pointed out -// in comments. As of right now, this code will only work the column major packing -// routines. - -/* - * A tensor contraction can be represented by a matrix multiplication. We don't - * want to actually reshape the tensor into a matrix (because this involves a - * full copy of the tensor), so the reshaping operation is implicit in a sense. - * This means we need a collection of methods take a matrix index and return - * the element of the tensor that would be at that index if we were to actually - * reshape the matrix. This file consists of these methods. - */ - -namespace Eigen { -namespace internal { - -enum { - Rhs = 0, - Lhs = 1, -}; - -/* - * Used to lookup the tensor index when working with the left and right - * arguments to a tensor contraction. - */ -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - size_t packet_size, bool inner_dim_contiguous> -class SimpleTensorContractionMapper { - public: - EIGEN_DEVICE_FUNC - SimpleTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - m_tensor(tensor), - m_nocontract_strides(nocontract_strides), - m_ij_strides(ij_strides), - m_contract_strides(contract_strides), - m_k_strides(k_strides) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void prefetch(int i) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row) const { - // column major assumption - return operator()(row, 0); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { - return m_tensor.coeff(computeIndex(row, col)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { - const bool left = (side == Lhs); - Index nocontract_val = left ? row : col; - Index linidx = 0; - for (int i = array_size<nocontract_t>::value - 1; i > 0; i--) { - const Index idx = nocontract_val / m_ij_strides[i]; - linidx += idx * m_nocontract_strides[i]; - nocontract_val -= idx * m_ij_strides[i]; - } - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx += nocontract_val; - } else { - linidx += nocontract_val * m_nocontract_strides[0]; - } - } - - Index contract_val = left ? col : row; - for (int i = array_size<contract_t>::value - 1; i > 0; i--) { - const Index idx = contract_val / m_k_strides[i]; - linidx += idx * m_contract_strides[i]; - contract_val -= idx * m_k_strides[i]; - } - EIGEN_STATIC_ASSERT(array_size<contract_t>::value > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx += contract_val; - } else { - linidx += contract_val * m_contract_strides[0]; - } - - return linidx; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const { - const bool left = (side == Lhs); - Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; - Index linidx[2] = {0, 0}; - for (int i = array_size<nocontract_t>::value - 1; i > 0; i--) { - const Index idx0 = nocontract_val[0] / m_ij_strides[i]; - const Index idx1 = nocontract_val[1] / m_ij_strides[i]; - linidx[0] += idx0 * m_nocontract_strides[i]; - linidx[1] += idx1 * m_nocontract_strides[i]; - nocontract_val[0] -= idx0 * m_ij_strides[i]; - nocontract_val[1] -= idx1 * m_ij_strides[i]; - } - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx[0] += nocontract_val[0]; - linidx[1] += nocontract_val[1]; - } else { - linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; - linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; - } - } - - Index contract_val[2] = {left ? col : row, left ? col : row + distance}; - for (int i = array_size<contract_t>::value - 1; i > 0; i--) { - const Index idx0 = contract_val[0] / m_k_strides[i]; - const Index idx1 = contract_val[1] / m_k_strides[i]; - linidx[0] += idx0 * m_contract_strides[i]; - linidx[1] += idx1 * m_contract_strides[i]; - contract_val[0] -= idx0 * m_k_strides[i]; - contract_val[1] -= idx1 * m_k_strides[i]; - } - EIGEN_STATIC_ASSERT(array_size<contract_t>::value > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx[0] += contract_val[0]; - linidx[1] += contract_val[1]; - } else { - linidx[0] += contract_val[0] * m_contract_strides[0]; - linidx[1] += contract_val[1] * m_contract_strides[0]; - } - return IndexPair<Index>(linidx[0], linidx[1]); - } - - Index firstAligned(Index size) const { - return size; - } - Index stride() const { - return 1; - } - - protected: - const Tensor m_tensor; - const nocontract_t m_nocontract_strides; - const nocontract_t m_ij_strides; - const contract_t m_contract_strides; - const contract_t m_k_strides; -}; - - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - size_t packet_size, bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> - class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - // current code assumes packet size must be a multiple of 2 - EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - - if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { - const Index index = this->computeIndex(i, j); - eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); - return this->m_tensor.template packet<Alignment>(index); - } - - const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1); - const Index first = indexPair.first; - const Index last = indexPair.second; - - // We can always do optimized packet reads from left hand side right now, because - // the vertical matrix dimension on the left hand side is never contracting. - // On the right hand side we need to check if the contracting dimensions may have - // been shuffled first. - if (Tensor::PacketAccess && - (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) && - (last - first) == (packet_size - 1)) { - - return this->m_tensor.template packet<Alignment>(first); - } - - EIGEN_ALIGN_DEFAULT Scalar data[packet_size]; - - data[0] = this->m_tensor.coeff(first); - for (Index k = 1; k < packet_size - 1; k += 2) { - const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1); - data[k] = this->m_tensor.coeff(internal_pair.first); - data[k + 1] = this->m_tensor.coeff(internal_pair.second); - } - data[packet_size - 1] = this->m_tensor.coeff(last); - - return pload<Packet>(data); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - const Index half_packet_size = unpacket_traits<HalfPacket>::size; - if (half_packet_size == packet_size) { - return loadPacket(i, j); - } - EIGEN_ALIGN_DEFAULT Scalar data[half_packet_size]; - for (Index k = 0; k < half_packet_size; k++) { - data[k] = operator()(i + k, j); - } - return pload<HalfPacket>(data); - } -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> -class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - EIGEN_ALIGN_DEFAULT Scalar data[1]; - data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload<typename packet_traits<Scalar>::type>(data); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { - return loadPacket(i, j); - } -}; - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - size_t packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionInputMapper; - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - size_t packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionSubMapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; - - typedef TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; - typedef Self LinearMapper; - - EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - return m_base_mapper(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { - m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); - } - - template <typename PacketT, int AlignmentType> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); - return loadPacket(i); - } - - template <typename Packet> - EIGEN_DEVICE_FUNC bool aligned(Index i) const { - return false; - } - - private: - const ParentMapper& m_base_mapper; - const Index m_vert_offset; - const Index m_horiz_offset; -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - size_t packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionInputMapper - : public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { - - public: - typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; - typedef SubMapper VectorMapper; - - EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } -}; - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPERS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h deleted file mode 100644 index c335086902..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ /dev/null @@ -1,713 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H - -namespace Eigen { -namespace internal { - -// Specify blocking strategy for thread pool by cols -template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index> -struct ComputeGemmByColBlockingSizes { - void operator()(Index& k, Index& m, Index& n, Index num_threads = 1) - { - computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads); - } -}; - -// Specify blocking strategy for thread pool by rows -template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index> -struct ComputeGemmByRowBlockingSizes { - void operator()(Index& k, Index& m, Index& n, Index num_threads = 1) - { - if (!k || !m || !n) { - return; - } - m = (((m / num_threads) + 15) / 16) * 16; - } -}; - -} // namespace internal -} // namespace Eigen - -// evaluator for thread pool device -#ifdef EIGEN_USE_THREADS - -namespace Eigen { -namespace internal { - -template<typename LhsScalar, typename LhsMapper, typename Index> -struct packLhsArg { - LhsScalar* blockA; - const LhsMapper& lhs; - const Index m_start; - const Index k_start; - const Index mc; - const Index kc; -}; - -template<typename LhsScalar, typename RhsScalar, typename RhsMapper, typename OutputMapper, typename Index> -struct packRhsAndKernelArg { - const FixedSizeVector<LhsScalar*>* blockAs; - RhsScalar* blockB; - const RhsMapper& rhs; - OutputMapper& output; - const Index m; - const Index k; - const Index n; - const Index mc; - const Index kc; - const Index nc; - const Index num_threads; - const Index num_blockAs; - const Index max_m; - const Index k_block_idx; - const Index m_block_idx; - const Index n_block_idx; - const Index m_blocks; - const Index n_blocks; - FixedSizeVector<Notification*>* kernel_notifications; - const FixedSizeVector<Notification*>* lhs_notifications; - const bool need_to_pack; -}; - -template<typename RhsScalar, typename RhsMapper, typename Index> -struct packRhsArg { - RhsScalar* blockB; - const RhsMapper& rhs; - const Index n_start; - const Index k_start; - const Index nc; - const Index kc; -}; - -template<typename LhsScalar, typename RhsScalar, typename LhsMapper, typename OutputMapper, typename Index> -struct packLhsAndKernelArg { - const FixedSizeVector<RhsScalar*>* blockBs; - LhsScalar* blockA; - const LhsMapper& lhs; - OutputMapper& output; - const Index m; - const Index k; - const Index n; - const Index mc; - const Index kc; - const Index nc; - const Index num_threads; - const Index num_blockBs; - const Index max_n; - const Index k_block_idx; - const Index m_block_idx; - const Index n_block_idx; - const Index m_blocks; - const Index n_blocks; - FixedSizeVector<Notification*>* kernel_notifications; - const FixedSizeVector<Notification*>* rhs_notifications; - const bool need_to_pack; -}; - -} // end namespace internal - - -template<typename Indices, typename LeftArgType, typename RightArgType> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> : - public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> > { - - typedef ThreadPoolDevice Device; - - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, ThreadPoolDevice>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef DSizes<Index, NumDims> Dimensions; - - // typedefs needed in evalTo - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalProduct(Scalar* buffer) const { - // Disable Gemv on ARM/AVX or if multiple threads are in use -#if !defined(EIGEN_VECTORIZE_NEON) && !defined(EIGEN_VECTORIZE_AVX) - if (this->m_j_size == 1 && this->m_device.numThreads() == 1) { - this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - return; - } -#endif - - if (this->m_j_size / this->m_device.numThreads() < Traits::nr && - this->m_i_size / this->m_device.numThreads() >= Traits::mr) { - evalGemmByRows<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - } else { - evalGemmByCols<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - } - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalGemmByCols(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - - const int lhs_packet_size = PacketType<LhsScalar, Device>::size; - const int rhs_packet_size = PacketType<RhsScalar, Device>::size; - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - // TODO: packing could be faster sometimes if we supported row major tensor mappers - typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr, - Traits::LhsProgress, ColMajor> LhsPacker; - typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker; - - // TODO: replace false, false with conjugate values? - typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, - Traits::mr, Traits::nr, false, false> GebpKernel; - - typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg; - typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg; - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - LhsPacker pack_lhs; - - // compute block sizes (which depend on number of threads) - const Index num_threads = this->m_device.numThreads(); - Index mc = m; - Index nc = n; - Index kc = k; - internal::ComputeGemmByColBlockingSizes<LhsScalar,RhsScalar,1,Index> block; - block(kc, mc, nc, num_threads); - eigen_assert(mc <= m); - eigen_assert(nc <= n); - eigen_assert(kc <= k); - -#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) - const Index k_blocks = CEIL_DIV(k, kc); - const Index n_blocks = CEIL_DIV(n, nc); - const Index m_blocks = CEIL_DIV(m, mc); -#undef CEIL_DIV - - const int sizeA = mc * kc; - const int sizeB = kc * nc; - - /* cout << "m: " << m << " n: " << n << " k: " << k << endl; - cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl; - cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl; - cout << "num threads: " << num_threads << endl; - */ - - // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB - // aren't 16 byte aligned segfaults will happen due to SIMD instructions - // note: You can get away with allocating just a single blockA and offsets and meet the - // the alignment requirements with the assumption that - // (Traits::mr * sizeof(ResScalar)) % 16 == 0 - const Index numBlockAs = (std::min)(num_threads, m_blocks); - FixedSizeVector<LhsScalar *> blockAs(num_threads); - for (int i = 0; i < num_threads; i++) { - blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); - } - - // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread - // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful. - // Other options: (1) reuse memory when a thread finishes. con: tricky - // (2) allocate block B memory in each thread. con: overhead - FixedSizeVector<RhsScalar *> blockBs(n_blocks); - for (int i = 0; i < n_blocks; i++) { - blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); - } - - // lhs_notifications starts with all null Notifications - FixedSizeVector<Notification*> lhs_notifications(num_threads, nullptr); - - // this should really be numBlockAs * n_blocks; - const Index num_kernel_notifications = num_threads * n_blocks; - FixedSizeVector<Notification*> kernel_notifications(num_kernel_notifications, - nullptr); - - for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { - const Index k_start = k_block_idx * kc; - // make sure we don't overshoot right edge of left matrix - const Index actual_kc = (std::min)(k_start + kc, k) - k_start; - - for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) { - const int num_blocks = (std::min)(m_blocks-m_block_idx, numBlockAs); - - for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) { - const Index m_start = mt_block_idx * mc; - const Index actual_mc = (std::min)(m_start + mc, m) - m_start; - eigen_assert(actual_mc > 0); - - int blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; - - // Wait for previous RHS kernels to complete. - for (int i = 0; i < n_blocks; ++i) { - int notification_id = (blockAId * n_blocks + i); - - // Wait for any current kernels using this slot to complete - // before using it. - if (kernel_notifications[notification_id]) { - wait_until_ready(kernel_notifications[notification_id]); - delete kernel_notifications[notification_id]; - } - kernel_notifications[notification_id] = new Notification(); - } - const packLArg arg = { - blockAs[blockAId], // blockA - lhs, // lhs - m_start, // m - k_start, // k - actual_mc, // mc - actual_kc, // kc - }; - - // Delete any existing notification since we may be - // replacing it. The algorithm should ensure that there are - // no existing waiters on this notification. - delete lhs_notifications[blockAId]; - lhs_notifications[blockAId] = - this->m_device.enqueue(&Self::packLhs<packLArg, LhsPacker>, arg); - } - - // now start kernels. - const Index m_base_start = m_block_idx * mc; - const bool need_to_pack = m_block_idx == 0; - - for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) { - const Index n_start = n_block_idx * nc; - const Index actual_nc = (std::min)(n_start + nc, n) - n_start; - - // first make sure the previous kernels are all done before overwriting rhs. Also wait if - // we're going to start new k. In both cases need_to_pack is true. - if (need_to_pack) { - for (int i = num_blocks; i < num_threads; ++i) { - Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; - Index future_id = (blockAId * n_blocks + n_block_idx); - wait_until_ready(kernel_notifications[future_id]); - } - } - - packRKArg arg = { - &blockAs, // blockA - blockBs[n_block_idx], // blockB - rhs, // rhs - output, // output - m_base_start, // m - k_start, // k - n_start, // n - mc, // mc - actual_kc, // kc - actual_nc, // nc - num_threads, - numBlockAs, - m, - k_block_idx, - m_block_idx, - n_block_idx, // n_block_idx - m_blocks, // m_blocks - n_blocks, // n_blocks - &kernel_notifications, // kernel_notifications - &lhs_notifications, // lhs_notifications - need_to_pack, // need_to_pack - }; - - // We asynchronously kick off this function, which ends up - // notifying the appropriate kernel_notifications objects, - // which this thread waits on before exiting. - // - // The wait for kernel_notifications below ensures that we - // don't have to keep track of the launch of this work. - this->m_device.enqueue_and_forget(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg); - } - } - } - - // Make sure all the kernels are done. - for (int i = 0; i < kernel_notifications.size(); ++i) { - wait_until_ready(kernel_notifications[i]); - delete kernel_notifications[i]; - } - - // No need to wait for lhs notifications since they should have - // already been waited on. Just clean them up. - for (int i = 0; i < lhs_notifications.size(); ++i) { - delete lhs_notifications[i]; - } - - // deallocate all of the memory for both A and B's - for (int i = 0; i < blockAs.size(); i++) { - this->m_device.deallocate(blockAs[i]); - } - for (int i = 0; i < blockBs.size(); i++) { - this->m_device.deallocate(blockBs[i]); - } - } - - /* - * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing - * the LHS block, check that all of the kernels that worked on the same - * mt_block_idx in the previous m_block are done. - */ - template <typename packLArg, typename LhsPacker> - static void packLhs(const packLArg arg) { - // perform actual packing - LhsPacker pack_lhs; - pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc); - } - - /* - * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that - * all kernels in the previous block are done. - * Then for each LHS future, we wait on the future and then call GEBP - * on the area packed by the future (which starts at - * blockA + future_idx * mt * kc) on the LHS and with the full packed - * RHS block. - * The output of this GEBP is written to output(m + i * mt, n). - */ - template <typename packRKArg, typename RhsPacker, typename GebpKernel> - static void packRhsAndKernel(packRKArg arg) { - if (arg.need_to_pack) { - RhsPacker pack_rhs; - pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc); - } - - GebpKernel gebp; - for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) { - const Index m_base_start = arg.m + arg.mc*mt_block_idx; - if (m_base_start < arg.max_m) { - int blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; - wait_until_ready((*arg.lhs_notifications)[blockAId]); - const Index actual_mc = (std::min)(m_base_start + arg.mc, arg.max_m) - m_base_start; - gebp(arg.output.getSubMapper(m_base_start, arg.n), - (*arg.blockAs)[blockAId], arg.blockB, - actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0); - - // Notify that the kernel is done. - const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx; - (*arg.kernel_notifications)[set_idx]->Notify(); - } - } - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalGemmByRows(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - const int lhs_packet_size = PacketType<LhsScalar, ThreadPoolDevice>::size; - const int rhs_packet_size = PacketType<RhsScalar, ThreadPoolDevice>::size; - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - // TODO: packing could be faster sometimes if we supported row major tensor mappers - typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr, - Traits::LhsProgress, ColMajor> LhsPacker; - typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker; - - // TODO: replace false, false with conjugate values? - typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, - Traits::mr, Traits::nr, false, false> GebpKernel; - - typedef internal::packRhsArg<RhsScalar, RhsMapper, Index> packRArg; - typedef internal::packLhsAndKernelArg<LhsScalar, RhsScalar, LhsMapper, OutputMapper, Index> packLKArg; - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - RhsPacker pack_rhs; - - // compute block sizes (which depend on number of threads) - const Index num_threads = this->m_device.numThreads(); - Index mc = m; - Index nc = n; - Index kc = k; - internal::ComputeGemmByRowBlockingSizes<LhsScalar,RhsScalar,1,Index> block; - block(kc, mc, nc, num_threads); - eigen_assert(mc <= m); - eigen_assert(nc <= n); - eigen_assert(kc <= k); - -#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) - const Index k_blocks = CEIL_DIV(k, kc); - const Index n_blocks = CEIL_DIV(n, nc); - const Index m_blocks = CEIL_DIV(m, mc); -#undef CEIL_DIV - - - const int sizeA = mc * kc; - const int sizeB = kc * nc; - - const Index numBlockBs = (std::min)(num_threads, n_blocks); - FixedSizeVector<RhsScalar *> blockBs(num_threads); - for (int i = 0; i < num_threads; i++) { - blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); - } - - FixedSizeVector<LhsScalar *> blockAs(m_blocks); - for (int i = 0; i < m_blocks; i++) { - blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); - } - - // lhs_notifications starts with all null Notifications - FixedSizeVector<Notification*> rhs_notifications(num_threads, nullptr); - - // this should really be numBlockBs * m_blocks; - const Index num_kernel_notifications = num_threads * m_blocks; - FixedSizeVector<Notification*> kernel_notifications(num_kernel_notifications, - nullptr); - - for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { - const Index k_start = k_block_idx * kc; - // make sure we don't overshoot right edge of left matrix - const Index actual_kc = (std::min)(k_start + kc, k) - k_start; - - for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx += numBlockBs) { - const int num_blocks = (std::min)(n_blocks-n_block_idx, numBlockBs); - - for (Index nt_block_idx = n_block_idx; nt_block_idx < n_block_idx+num_blocks; nt_block_idx++) { - const Index n_start = nt_block_idx * nc; - const Index actual_nc = (std::min)(n_start + nc, n) - n_start; - eigen_assert(actual_nc > 0); - - int blockBId = (k_block_idx * n_blocks + nt_block_idx) % num_threads; - // Wait for previous RHS kernels to complete. - for (int i = 0; i < m_blocks; ++i) { - int notification_id = (blockBId * m_blocks + i); - - // Wait for any current kernels using this slot to complete - // before using it. - if (kernel_notifications[notification_id]) { - wait_until_ready(kernel_notifications[notification_id]); - delete kernel_notifications[notification_id]; - } - kernel_notifications[notification_id] = new Notification(); - } - const packRArg arg = { - blockBs[blockBId], // blockB - rhs, // rhs - n_start, // n - k_start, // k - actual_nc, // nc - actual_kc, // kc - }; - - // Delete any existing notification since we may be - // replacing it. The algorithm should ensure that there are - // no existing waiters on this notification. - delete rhs_notifications[blockBId]; - rhs_notifications[blockBId] = - this->m_device.enqueue(&Self::packRhs<packRArg, RhsPacker>, arg); - } - - // now start kernels. - const Index n_base_start = n_block_idx * nc; - const bool need_to_pack = n_block_idx == 0; - - for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx++) { - const Index m_start = m_block_idx * mc; - const Index actual_mc = (std::min)(m_start + mc, m) - m_start; - - // first make sure the previous kernels are all done before overwriting rhs. Also wait if - // we're going to start new k. In both cases need_to_pack is true. - if (need_to_pack) { - for (int i = num_blocks; i < num_threads; ++i) { - Index blockBId = (k_block_idx * n_blocks + i + n_block_idx) % num_threads; - Index future_id = (blockBId * m_blocks + m_block_idx); - wait_until_ready(kernel_notifications[future_id]); - } - } - - packLKArg arg = { - &blockBs, // blockB - blockAs[m_block_idx], // blockA - lhs, // lhs - output, // output - m_start, // m - k_start, // k - n_base_start, // n - actual_mc, // mc - actual_kc, // kc - nc, // nc - num_threads, - numBlockBs, - n, - k_block_idx, - m_block_idx, - n_block_idx, - m_blocks, - n_blocks, - &kernel_notifications, - &rhs_notifications, - need_to_pack, - }; - - // We asynchronously kick off this function, which ends up - // notifying the appropriate kernel_notifications objects, - // which this thread waits on before exiting. - // - // The wait for kernel_notifications below ensures that we - // don't have to keep track of the launch of this work. - this->m_device.enqueue_and_forget(&Self::packLhsAndKernel<packLKArg, LhsPacker, GebpKernel>, arg); - } - } - } - - // Make sure all the kernels are done. - for (int i = 0; i < kernel_notifications.size(); ++i) { - wait_until_ready(kernel_notifications[i]); - delete kernel_notifications[i]; - } - - // No need to wait for lhs notifications since they should have - // already been waited on. Just clean them up. - for (int i = 0; i < rhs_notifications.size(); ++i) { - delete rhs_notifications[i]; - } - - // deallocate all of the memory for both A and B's - for (int i = 0; i < blockAs.size(); i++) { - this->m_device.deallocate(blockAs[i]); - } - for (int i = 0; i < blockBs.size(); i++) { - this->m_device.deallocate(blockBs[i]); - } - } - - template <typename packRArg, typename RhsPacker> - static void packRhs(const packRArg arg) { - // perform actual packing - RhsPacker pack_rhs; - pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k_start, arg.n_start), arg.kc, arg.nc); - } - - template <typename packLKArg, typename LhsPacker, typename GebpKernel> - static void packLhsAndKernel(packLKArg arg) { - if (arg.need_to_pack) { - LhsPacker pack_lhs; - pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m, arg.k), arg.kc, arg.mc); - } - - GebpKernel gebp; - for (Index nt_block_idx = 0; nt_block_idx < arg.num_blockBs; nt_block_idx++) { - const Index n_base_start = arg.n + arg.nc*nt_block_idx; - if (n_base_start < arg.max_n) { - int blockBId = (arg.k_block_idx * arg.n_blocks + nt_block_idx + arg.n_block_idx) % arg.num_threads; - wait_until_ready((*arg.rhs_notifications)[blockBId]); - const Index actual_nc = (std::min)(n_base_start + arg.nc, arg.max_n) - n_base_start; - gebp(arg.output.getSubMapper(arg.m, n_base_start), - arg.blockA, (*arg.blockBs)[blockBId], - arg.mc, arg.kc, actual_nc, Scalar(1), -1, -1, 0, 0); - - // Notify that the kernel is done. - const Index set_idx = blockBId * arg.m_blocks + arg.m_block_idx; - (*arg.kernel_notifications)[set_idx]->Notify(); - } - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_USE_THREADS -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h deleted file mode 100644 index d54091fa1c..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ /dev/null @@ -1,226 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H - -namespace Eigen { - -/** \class TensorConversionOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor conversion class. This class makes it possible to vectorize - * type casting operations when the number of scalars per packet in the source - * and the destination type differ - */ -namespace internal { -template<typename TargetType, typename XprType> -struct traits<TensorConversionOp<TargetType, XprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef TargetType Scalar; - typedef typename traits<XprType>::StorageKind StorageKind; - typedef typename traits<XprType>::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = traits<XprType>::Layout; - enum { Flags = 0 }; -}; - -template<typename TargetType, typename XprType> -struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> -{ - typedef const TensorConversionOp<TargetType, XprType>& type; -}; - -template<typename TargetType, typename XprType> -struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type> -{ - typedef TensorConversionOp<TargetType, XprType> type; -}; - -} // end namespace internal - - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> -struct PacketConverter { - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); - } - - private: - const TensorEvaluator& m_impl; -}; - - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - - SrcPacket src1 = m_impl.template packet<LoadMode>(index); - SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); - TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2); - return result; - } - - private: - const TensorEvaluator& m_impl; -}; - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - - SrcPacket src1 = m_impl.template packet<LoadMode>(index); - SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); - SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); - SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); - TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4); - return result; - } - - private: - const TensorEvaluator& m_impl; -}; - - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - if (index + SrcPacketSize < m_maxIndex) { - return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); - } else { - const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size; - EIGEN_ALIGN_DEFAULT typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; - for (int i = 0; i < TgtPacketSize; ++i) { - values[i] = m_impl.coeff(index+i); - } - TgtPacket rslt = internal::pload<TgtPacket>(values); - return rslt; - } - } - - private: - const TensorEvaluator& m_impl; - const typename TensorEvaluator::Index m_maxIndex; -}; - -template<typename TargetType, typename XprType> -class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorConversionOp>::Scalar Scalar; - typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorConversionOp>::Index Index; - typedef typename internal::nested<TensorConversionOp>::type Nested; - typedef Scalar CoeffReturnType; - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) - : m_xpr(xpr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - - - - -// Eval as rvalue -template<typename TargetType, typename ArgType, typename Device> -struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> -{ - typedef TensorConversionOp<TargetType, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - typedef TargetType Scalar; - typedef TargetType CoeffReturnType; - typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename PacketType<SrcType, Device>::type PacketSourceType; - - enum { - IsAligned = false, - PacketAccess = - TensorEvaluator<ArgType, Device>::PacketAccess && - internal::type_casting_traits<SrcType, TargetType>::VectorizedCast, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) - { - if (internal::is_same<TargetType, SrcType>::value) { - return m_impl.evalSubExprsIfNeeded((SrcType*)data); - } - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() - { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - internal::scalar_cast_op<SrcType, TargetType> converter; - return converter(m_impl.coeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; - const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; - PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, - SrcCoeffRatio, TgtCoeffRatio> converter(m_impl); - return converter.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h deleted file mode 100644 index 58cae7162c..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ /dev/null @@ -1,1076 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H - -namespace Eigen { - -/** \class TensorConvolution - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor convolution class. - * - * - */ -namespace internal { - -template <typename Index, typename InputDims, size_t NumKernelDims, int Layout> -class IndexMapper { - public: - IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims, - const array<Index, NumKernelDims>& indices) { - - array<Index, NumDims> dimensions = input_dims; - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = indices[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - dimensions[index] = result_dim; - } - - array<Index, NumDims> inputStrides; - array<Index, NumDims> outputStrides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputStrides[0] = 1; - outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; - outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; - } - } else { - inputStrides[NumDims - 1] = 1; - outputStrides[NumDims - 1] = 1; - for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) { - inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; - outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; - } - } - - array<Index, NumDims> cudaInputDimensions; - array<Index, NumDims> cudaOutputDimensions; - array<Index, NumDims> tmp = dimensions; - array<Index, NumDims> ordering; - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = i + offset; - ordering[index] = indices[i]; - tmp[indices[i]] = -1; - cudaInputDimensions[index] = input_dims[indices[i]]; - cudaOutputDimensions[index] = dimensions[indices[i]]; - } - - int written = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? NumKernelDims - : 0; - for (int i = 0; i < NumDims; ++i) { - if (tmp[i] >= 0) { - ordering[written] = i; - cudaInputDimensions[written] = input_dims[i]; - cudaOutputDimensions[written] = dimensions[i]; - ++written; - } - } - - for (int i = 0; i < NumDims; ++i) { - m_inputStrides[i] = inputStrides[ordering[i]]; - m_outputStrides[i] = outputStrides[ordering[i]]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - if (i > NumKernelDims) { - m_cudaInputStrides[i] = - m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1]; - m_cudaOutputStrides[i] = - m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1]; - } else { - m_cudaInputStrides[i] = 1; - m_cudaOutputStrides[i] = 1; - } - } - } else { - for (int i = NumDims - 1; i >= 0; --i) { - if (i + 1 < offset) { - m_cudaInputStrides[i] = - m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1]; - m_cudaOutputStrides[i] = - m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1]; - } else { - m_cudaInputStrides[i] = 1; - m_cudaOutputStrides[i] = 1; - } - } - } - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaInputStrides[d]; - inputIndex += idx * m_inputStrides[d]; - p -= idx * m_cudaInputStrides[d]; - } - inputIndex += p * m_inputStrides[NumKernelDims]; - } else { - int limit = 0; - if (NumKernelDims < NumDims) { - limit = NumDims - NumKernelDims - 1; - } - for (int d = 0; d < limit; ++d) { - const Index idx = p / m_cudaInputStrides[d]; - inputIndex += idx * m_inputStrides[d]; - p -= idx * m_cudaInputStrides[d]; - } - inputIndex += p * m_inputStrides[limit]; - } - return inputIndex; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const { - Index outputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaOutputStrides[d]; - outputIndex += idx * m_outputStrides[d]; - p -= idx * m_cudaOutputStrides[d]; - } - outputIndex += p * m_outputStrides[NumKernelDims]; - } else { - int limit = 0; - if (NumKernelDims < NumDims) { - limit = NumDims - NumKernelDims - 1; - } - for (int d = 0; d < limit; ++d) { - const Index idx = p / m_cudaOutputStrides[d]; - outputIndex += idx * m_outputStrides[d]; - p -= idx * m_cudaOutputStrides[d]; - } - outputIndex += p * m_outputStrides[limit]; - } - return outputIndex; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + - k * m_inputStrides[offset + 2]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + - k * m_outputStrides[offset + 2]; - } - - private: - static const size_t NumDims = internal::array_size<InputDims>::value; - array<Index, NumDims> m_inputStrides; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_cudaInputStrides; - array<Index, NumDims> m_cudaOutputStrides; -}; - - - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename promote_storage_type<typename InputXprType::Scalar, - typename KernelXprType::Scalar>::ret Scalar; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind, - typename traits<KernelXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<InputXprType>::Index, - typename traits<KernelXprType>::Index>::type Index; - typedef typename InputXprType::Nested LhsNested; - typedef typename KernelXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<InputXprType>::NumDimensions; - static const int Layout = traits<InputXprType>::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense> -{ - typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type; -}; - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type> -{ - typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type; -}; - -} // end namespace internal - - - -template<typename Indices, typename InputXprType, typename KernelXprType> -class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorConvolutionOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType, - typename KernelXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename internal::promote_storage_type<typename InputXprType::PacketReturnType, - typename KernelXprType::PacketReturnType>::ret PacketReturnType; - typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) - : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Indices& indices() const { return m_indices; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all<typename InputXprType::Nested>::type& - inputExpression() const { return m_input_xpr; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all<typename KernelXprType::Nested>::type& - kernelExpression() const { return m_kernel_xpr; } - - protected: - typename InputXprType::Nested m_input_xpr; - typename KernelXprType::Nested m_kernel_xpr; - const Indices m_indices; -}; - - -template<typename Indices, typename InputArgType, typename KernelArgType, typename Device> -struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device> -{ - typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType; - - static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value; - static const int NumKernelDims = internal::array_size<Indices>::value; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & - TensorEvaluator<KernelArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & - TensorEvaluator<KernelArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<InputArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - - const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; - } - } else { - m_inputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; - } - } - - m_dimensions = m_inputImpl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i > 0) { - m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; - } else { - m_kernelStride[0] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; - } - } else { - for (int i = NumKernelDims - 1; i >= 0; --i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i < NumKernelDims - 1) { - m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; - } else { - m_kernelStride[NumKernelDims - 1] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; - } - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_inputImpl.evalSubExprsIfNeeded(NULL); - preloadKernel(); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_inputImpl.cleanup(); - if (m_local_kernel) { - m_device.deallocate((void*)m_kernel); - m_local_kernel = false; - } - m_kernel = NULL; - } - - void evalTo(typename XprType::Scalar* buffer) { - evalSubExprsIfNeeded(NULL); - for (int i = 0; i < dimensions().TotalSize(); ++i) { - buffer[i] += coeff(i); - } - cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - CoeffReturnType result = CoeffReturnType(0); - convolve(firstInput(index), 0, NumKernelDims-1, result); - return result; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const - { - const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - Index indices[2] = {index, index+PacketSize-1}; - Index startInputs[2] = {0, 0}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_outputStride[i]; - const Index idx1 = indices[1] / m_outputStride[i]; - startInputs[0] += idx0 * m_inputStride[i]; - startInputs[1] += idx1 * m_inputStride[i]; - indices[0] -= idx0 * m_outputStride[i]; - indices[1] -= idx1 * m_outputStride[i]; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_outputStride[i]; - const Index idx1 = indices[1] / m_outputStride[i]; - startInputs[0] += idx0 * m_inputStride[i]; - startInputs[1] += idx1 * m_inputStride[i]; - indices[0] -= idx0 * m_outputStride[i]; - indices[1] -= idx1 * m_outputStride[i]; - } - } - startInputs[0] += indices[0]; - startInputs[1] += indices[1]; - - if (startInputs[1]-startInputs[0] == PacketSize-1) { - PacketReturnType result = internal::pset1<PacketReturnType>(0); - convolvePacket(startInputs[0], 0, NumKernelDims-1, result); - return result; - } else { - EIGEN_ALIGN_DEFAULT Scalar data[PacketSize]; - data[0] = Scalar(0); - convolve(startInputs[0], 0, NumKernelDims-1, data[0]); - for (int i = 1; i < PacketSize-1; ++i) { - data[i] = Scalar(0); - convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]); - } - data[PacketSize-1] = Scalar(0); - convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]); - return internal::pload<PacketReturnType>(data); - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { - Index startInput = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStride[i]; - startInput += idx * m_inputStride[i]; - index -= idx * m_outputStride[i]; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStride[i]; - startInput += idx * m_inputStride[i]; - index -= idx * m_outputStride[i]; - } - } - startInput += index; - return startInput; - } - - EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const { - for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { - const Index input = firstIndex + j * m_indexStride[DimIndex]; - const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; - if (DimIndex > 0) { - convolve(input, kernel, DimIndex-1, accum); - } else { - accum += m_inputImpl.coeff(input) * m_kernel[kernel]; - } - } - } - - template <typename Packet> - EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const { - for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { - const Index input = firstIndex + j * m_indexStride[DimIndex]; - const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; - if (DimIndex > 0) { - convolvePacket(input, kernel, DimIndex-1, accum); - } else { - accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { - // Don't make a local copy of the kernel unless we have to (i.e. it's an - // expression that needs to be evaluated) - const Scalar* in_place = m_kernelImpl.data(); - if (in_place) { - m_kernel = in_place; - m_local_kernel = false; - } else { - size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); - Scalar* local = (Scalar*)m_device.allocate(kernel_sz); - typedef TensorEvalToOp<const KernelArgType> EvalTo; - EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; - const bool BlockAccess = false; - internal::TensorExecutor<const EvalTo, Device, PacketAccess, BlockAccess>::run(evalToTmp, m_device); - - m_kernel = local; - m_local_kernel = true; - } - } - - array<Index, NumDims> m_inputStride; - array<Index, NumDims> m_outputStride; - - array<Index, NumKernelDims> m_indexStride; - array<Index, NumKernelDims> m_kernelStride; - TensorEvaluator<InputArgType, Device> m_inputImpl; - TensorEvaluator<KernelArgType, Device> m_kernelImpl; - Dimensions m_dimensions; - - KernelArgType m_kernelArg; - const Scalar* m_kernel; - bool m_local_kernel; - const Device& m_device; -}; - - - - -// Use an optimized implementation of the evaluation code for GPUs whenever possible. -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - -template <int StaticKernelSize> -struct GetKernelSize { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const { - return StaticKernelSize; - } -}; -template <> -struct GetKernelSize<Dynamic> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const { - return kernelSize; - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims, - int StaticKernelSize> -__global__ void EigenConvolutionKernel1D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const int numPlanes, const int numX, - const int maxX, const int kernelSize, float* buffer) { - extern __shared__ float s[]; - - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize); - const int num_x_output = last_x - first_x + 1; - - const int first_plane = blockIdx.y * blockDim.y; - const int plane_stride = blockDim.y * gridDim.y; - - for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { - // Load inputs to shared memory - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = threadIdx.y * num_x_input; - #pragma unroll - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x); - s[i + plane_kernel_offset] = eval.coeff(tensor_index); - } - - __syncthreads(); - - // Compute the convolution - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - #pragma unroll - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - const int kernel_offset = plane_kernel_offset + i; - float result = 0.0f; - #pragma unroll - for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) { - result += s[k + kernel_offset] * kernel[k]; - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x); - buffer[tensor_index] = result; - } - __syncthreads(); - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims, - int StaticKernelSizeX, int StaticKernelSizeY> -__global__ __launch_bounds__(1024, 1) void EigenConvolutionKernel2D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const int numPlanes, const int numX, - const int maxX, const int numY, const int maxY, const int kernelSizeX, - const int kernelSizeY, float* buffer) { - extern __shared__ float s[]; - - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX); - const int num_x_output = last_x - first_x + 1; - - const int first_y = blockIdx.y * maxY; - const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; - const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY); - const int num_y_output = last_y - first_y + 1; - - const int first_plane = blockIdx.z * blockDim.z; - const int plane_stride = blockDim.z * gridDim.z; - - for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { - - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = threadIdx.z * num_y_input; - - // Load inputs to shared memory - #pragma unroll - for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { - const int input_offset = num_x_input * (j + plane_kernel_offset); - #pragma unroll - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y); - s[i + input_offset] = eval.coeff(tensor_index); - } - } - - __syncthreads(); - - // Convolution - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - #pragma unroll - for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { - #pragma unroll - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - float result = 0.0f; - #pragma unroll - for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) { - const int kernel_offset = kernelSizeX * l; - const int input_offset = i + num_x_input * (j + l + plane_kernel_offset); - #pragma unroll - for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) { - result += s[k + input_offset] * kernel[k + kernel_offset]; - } - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y); - buffer[tensor_index] = result; - } - } - - __syncthreads(); - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims> -__global__ void EigenConvolutionKernel3D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const size_t numPlanes, const size_t numX, - const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, - const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, - const size_t kernelSizeZ, float* buffer) { - extern __shared__ float s[]; - - // Load inputs to shared memory - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + kernelSizeX; - - const int first_y = blockIdx.y * maxY; - const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; - const int num_y_input = last_y - first_y + kernelSizeY; - - const int first_z = blockIdx.z * maxZ; - const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1; - const int num_z_input = last_z - first_z + kernelSizeZ; - - for (int p = 0; p < numPlanes; ++p) { - - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = 0; - - for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) { - for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z); - s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index); - } - } - } - - __syncthreads(); - - // Convolution - const int num_z_output = last_z - first_z + 1; - const int num_y_output = last_y - first_y + 1; - const int num_x_output = last_x - first_x + 1; - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) { - for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - float result = 0.0f; - for (int n = 0; n < kernelSizeZ; ++n) { - for (int m = 0; m < kernelSizeY; ++m) { - for (int l = 0; l < kernelSizeX; ++l) { - result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)]; - } - } - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z); - buffer[tensor_index] = result; - } - } - } - __syncthreads(); - } -}; - - - -template<typename Indices, typename InputArgType, typename KernelArgType> -struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice> -{ - typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType; - - static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value; - static const int NumKernelDims = internal::array_size<Indices>::value; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions; - - enum { - IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & - TensorEvaluator<KernelArgType, GpuDevice>::IsAligned, - PacketAccess = false, - BlockAccess = false, - Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) - : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - - const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - - m_dimensions = m_inputImpl.dimensions(); - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename InputArgType::Scalar Scalar; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - preloadKernel(); - m_inputImpl.evalSubExprsIfNeeded(NULL); - if (data) { - executeEval(data); - return false; - } else { - m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)); - executeEval(m_buf); - return true; - } - } - - EIGEN_STRONG_INLINE void cleanup() { - m_inputImpl.cleanup(); - if (m_buf) { - m_device.deallocate(m_buf); - m_buf = NULL; - } - if (m_local_kernel) { - m_device.deallocate((void*)m_kernel); - m_local_kernel = false; - } - m_kernel = NULL; - } - - EIGEN_STRONG_INLINE void preloadKernel() { - // Don't make a local copy of the kernel unless we have to (i.e. it's an - // expression that needs to be evaluated) - const Scalar* in_place = m_kernelImpl.data(); - if (in_place) { - m_kernel = in_place; - m_local_kernel = false; - } else { - size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); - Scalar* local = (Scalar*)m_device.allocate(kernel_sz); - typedef TensorEvalToOp<const KernelArgType> EvalTo; - EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value; - const bool BlockAccess = false; - internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess, BlockAccess>::run(evalToTmp, m_device); - - m_kernel = local; - m_local_kernel = true; - } - } - - static unsigned int ceil(unsigned int num, unsigned int denom) { - const unsigned int rounded_toward_zero = num / denom; - if (num > rounded_toward_zero * denom) { - return rounded_toward_zero + 1; - } - return rounded_toward_zero; - } - - void executeEval(Scalar* data) const { - typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims; - - const int maxSharedMem = m_device.sharedMemPerBlock(); - const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock(); - const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock; - const int numMultiProcessors = m_device.getNumCudaMultiProcessors(); - const int warpSize = 32; - - switch (NumKernelDims) { - case 1: { - const int kernel_size = m_kernelImpl.dimensions().TotalSize(); - - const int numX = dimensions()[m_indices[0]]; - const int numP = dimensions().TotalSize() / numX; - int maxX; - dim3 block_size; - - const int single_stride_dim = - static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : m_inputImpl.dimensions().rank() - 1; - if (m_indices[0] == single_stride_dim) { - // Maximum the reuse - const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; - maxX = (std::min<int>)(inner_dim, numX); - const int maxP = (std::min<int>)(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); - block_size.x = numext::mini(maxThreadsPerBlock, maxX); - block_size.y = (std::min<int>)(maxThreadsPerBlock / block_size.x, maxP); - } - else { - // Read as much as possible alongside the inner most dimension, that is the plane - const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); - const int maxP = (std::min<int>)(inner_dim, numP); - maxX = (std::min<int>)(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); - - block_size.x = numext::mini(warpSize, maxX); - block_size.y = (std::min<int>)(maxThreadsPerBlock/block_size.x, maxP); - } - - const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - const int num_x_blocks = ceil(numX, maxX); - const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); - const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); - - dim3 num_blocks(num_x_blocks, std::min<int>(num_y_blocks, ceil(numP, block_size.y))); - - - //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - - const array<Index, 1> indices(m_indices[0]); - const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]); - internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - switch(kernel_size) { - case 4: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); - break; - } - case 7: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data); - } - } - break; - } - - case 2: { - const int idxX = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1; - const int idxY = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0; - const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; - const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; - - const int numX = dimensions()[m_indices[idxX]]; - const int numY = dimensions()[m_indices[idxY]]; - const int numP = dimensions().TotalSize() / (numX*numY); - - const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); - - // Snap maxX to warp size - int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; - const int maxX = (std::min<int>)(inner_dim, numX); - const int maxY = (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); - const int maxP = (std::min<int>)(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); - - dim3 block_size; - block_size.x = numext::mini(1024, maxX); - block_size.y = (std::min<int>)(1024/block_size.x, maxY); - block_size.z = (std::min<int>)(1024/(block_size.x*block_size.y), maxP); - - const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - const int num_x_blocks = ceil(numX, maxX); - const int num_y_blocks = ceil(numY, maxY); - const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); - const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); - - dim3 num_blocks(num_x_blocks, num_y_blocks, std::min<int>(num_z_blocks, ceil(numP, block_size.z))); - - - //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - - const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]); - const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX], - m_kernelImpl.dimensions()[idxY]); - internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - switch (kernel_size_x) { - case 4: { - switch (kernel_size_y) { - case 7: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data); - break; - } - } - break; - } - case 7: { - switch (kernel_size_y) { - case 4: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data); - break; - } - } - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data); - break; - } - } - break; - } - - case 3: { - const int idxX = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2; - const int idxY = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1; - const int idxZ = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0; - - const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; - const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; - const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; - - const int numX = dimensions()[m_indices[idxX]]; - const int numY = dimensions()[m_indices[idxY]]; - const int numZ = dimensions()[m_indices[idxZ]]; - const int numP = dimensions().TotalSize() / (numX*numY*numZ); - - const int maxX = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); - const int maxY = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); - const int maxZ = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); - - dim3 block_size; - block_size.x = numext::mini(32, maxX); - block_size.y = numext::mini(32, maxY); - block_size.z = (std::min<int>)(1024/(block_size.x*block_size.y), maxZ); - dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); - - const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - const array<Index, 3> indices(m_indices[idxX], m_indices[idxY], - m_indices[idxZ]); - const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX], - m_kernelImpl.dimensions()[idxY], - m_kernelImpl.dimensions()[idxZ]); - internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); - break; - } - - default: { - EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - eigen_assert(m_buf); - eigen_assert(index < m_dimensions.TotalSize()); - return m_buf[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const - { - eigen_assert(m_buf); - eigen_assert(index < m_dimensions.TotalSize()); - return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index); - } - - private: - // No assignment (copies are needed by the kernels) - TensorEvaluator& operator = (const TensorEvaluator&); - - TensorEvaluator<InputArgType, GpuDevice> m_inputImpl; - TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl; - KernelArgType m_kernelArg; - Indices m_indices; - Dimensions m_dimensions; - Scalar* m_buf; - const Scalar* m_kernel; - bool m_local_kernel; - - const GpuDevice& m_device; -}; -#endif - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h deleted file mode 100644 index dc39565d6b..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ /dev/null @@ -1,302 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H -#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H - -namespace Eigen { - -/** \class TensorCustomUnaryOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor custom class. - * - * - */ -namespace internal { -template<typename CustomUnaryFunc, typename XprType> -struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > -{ - typedef typename XprType::Scalar Scalar; - typedef typename XprType::StorageKind StorageKind; - typedef typename XprType::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = traits<XprType>::Layout; -}; - -template<typename CustomUnaryFunc, typename XprType> -struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense> -{ - typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type; -}; - -template<typename CustomUnaryFunc, typename XprType> -struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, 1, typename eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >::type> -{ - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type; -}; - -} // end namespace internal - - - -template<typename CustomUnaryFunc, typename XprType> -class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename internal::nested<TensorCustomUnaryOp>::type Nested; - typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorCustomUnaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) - : m_expr(expr), m_func(func) {} - - EIGEN_DEVICE_FUNC - const CustomUnaryFunc& func() const { return m_func; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_expr; } - - protected: - typename XprType::Nested m_expr; - const CustomUnaryFunc m_func; -}; - - -// Eval as rvalue -template<typename CustomUnaryFunc, typename XprType, typename Device> -struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device> -{ - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType; - typedef typename internal::traits<ArgType>::Index Index; - static const int NumDims = internal::traits<ArgType>::NumDimensions; - typedef DSizes<Index, NumDims> Dimensions; - typedef - typename internal::remove_const<typename ArgType::Scalar>::type Scalar; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<XprType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) - : m_op(op), m_device(device), m_result(NULL) - { - m_dimensions = op.func().dimensions(op.expression()); - } - - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<CoeffReturnType*>( - m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } - - protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( - data, m_dimensions); - m_op.func().eval(m_op.expression(), result, m_device); - } - - Dimensions m_dimensions; - const ArgType m_op; - const Device& m_device; - CoeffReturnType* m_result; -}; - - - -/** \class TensorCustomBinaryOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor custom class. - * - * - */ -namespace internal { -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > -{ - typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, - typename RhsXprType::Scalar>::ret Scalar; - typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<LhsXprType>::NumDimensions; - static const int Layout = traits<LhsXprType>::Layout; -}; - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type; -}; - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, 1, typename eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >::type> -{ - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType; - typedef typename internal::nested<TensorCustomBinaryOp>::type Nested; - typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorCustomBinaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) - - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} - - EIGEN_DEVICE_FUNC - const CustomBinaryFunc& func() const { return m_func; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const CustomBinaryFunc m_func; -}; - - -// Eval as rvalue -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device> -struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device> -{ - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType; - typedef typename internal::traits<XprType>::Index Index; - static const int NumDims = internal::traits<XprType>::NumDimensions; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<LhsXprType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_op(op), m_device(device), m_result(NULL) - { - m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); - } - - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } - - protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); - m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); - } - - Dimensions m_dimensions; - const XprType m_op; - const Device& m_device; - CoeffReturnType* m_result; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h deleted file mode 100644 index 3c33015bc4..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ /dev/null @@ -1,154 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H - -namespace Eigen { - -/** \class TensorDevice - * \ingroup CXX11_Tensor_Module - * - * \brief Pseudo expression providing an operator = that will evaluate its argument - * on the specified computing 'device' (GPU, thread pool, ...) - * - * Example: - * C.device(EIGEN_GPU) = A + B; - * - * Todo: thread pools. - * Todo: operator +=, -=, *= and so on. - */ - -template <typename ExpressionType, typename DeviceType> class TensorDevice { - public: - TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; - Assign assign(m_expression, other); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Sum> Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Difference> Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - protected: - const DeviceType& m_device; - ExpressionType& m_expression; -}; - - -#ifdef EIGEN_USE_THREADS -template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPoolDevice> { - public: - TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; - Assign assign(m_expression, other); - internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Sum> Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Difference> Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device); - return *this; - } - - protected: - const ThreadPoolDevice& m_device; - ExpressionType& m_expression; -}; -#endif - -#if defined(EIGEN_USE_GPU) -template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice> -{ - public: - TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; - Assign assign(m_expression, other); - internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Sum> Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Difference> Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device); - return *this; - } - - protected: - const GpuDevice& m_device; - ExpressionType& m_expression; -}; -#endif - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h deleted file mode 100644 index ac2b2633ff..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ /dev/null @@ -1,935 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H - -namespace Eigen { - -// Default device for the machine (typically a single cpu core) -struct DefaultDevice { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return 1; -#else - // Running on a CUDA device - return 32; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const { - return 2 * numThreads(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return l1CacheSize(); -#else - // Running on a CUDA device, return the amount of shared memory available. - return 48*1024; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - return l3CacheSize(); -#else - // Running on a CUDA device - return firstLevelCacheSize(); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - // Should return an enum that encodes the ISA supported by the CPU - return 1; -#else - // Running on a CUDA device - return __CUDA_ARCH__ / 100; -#endif - } -}; - -// Multiple cpu cores -#ifdef EIGEN_USE_THREADS - -#if __cplusplus > 199711 -// This defines an interface that ThreadPoolDevice can take to use -// custom thread pools underneath. -class ThreadPoolInterface { - public: - virtual void Schedule(std::function<void()> fn) = 0; - - virtual ~ThreadPoolInterface() {} -}; -#endif - -// The implementation of the ThreadPool type ensures that the Schedule method -// runs the functions it is provided in FIFO order when the scheduling is done -// by a single thread. -#ifdef EIGEN_USE_CUSTOM_THREAD_POOL -class ThreadPool : public ThreadPoolInterface { - public: - // Construct a pool that contains "num_threads" threads. - explicit ThreadPool(int num_threads) : threads_(num_threads), waiters_(num_threads) { - for (int i = 0; i < num_threads; i++) { - threads_.push_back(new std::thread([this]() { WorkerLoop(); })); - } - } - - // Wait until all scheduled work has finished and then destroy the - // set of threads. - ~ThreadPool() { - { - // Wait for all work to get done. - std::unique_lock<std::mutex> l(mu_); - while (!pending_.empty()) { - empty_.wait(l); - } - exiting_ = true; - - // Wakeup all waiters. - for (auto w : waiters_) { - w->ready = true; - w->work = nullptr; - w->cv.notify_one(); - } - } - - // Wait for threads to finish. - for (auto t : threads_) { - t->join(); - delete t; - } - } - - // Schedule fn() for execution in the pool of threads. The functions are - // executed in the order in which they are scheduled. - void Schedule(std::function<void()> fn) final { - std::unique_lock<std::mutex> l(mu_); - if (waiters_.empty()) { - pending_.push_back(fn); - } else { - Waiter* w = waiters_.back(); - waiters_.pop_back(); - w->ready = true; - w->work = fn; - w->cv.notify_one(); - } - } - - protected: - void WorkerLoop() { - std::unique_lock<std::mutex> l(mu_); - Waiter w; - while (!exiting_) { - std::function<void()> fn; - if (pending_.empty()) { - // Wait for work to be assigned to me - w.ready = false; - waiters_.push_back(&w); - while (!w.ready) { - w.cv.wait(l); - } - fn = w.work; - w.work = nullptr; - } else { - // Pick up pending work - fn = pending_.front(); - pending_.pop_front(); - if (pending_.empty()) { - empty_.notify_all(); - } - } - if (fn) { - mu_.unlock(); - fn(); - mu_.lock(); - } - } - } - - private: - struct Waiter { - std::condition_variable cv; - std::function<void()> work; - bool ready; - }; - - std::mutex mu_; - FixedSizeVector<std::thread*> threads_; // All threads - FixedSizeVector<Waiter*> waiters_; // Stack of waiting threads. - std::deque<std::function<void()>> pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() - bool exiting_ = false; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object. -// but only one caller must call Notify() on the object. -class Notification { - public: - Notification() : notified_(false) {} - ~Notification() {} - - void Notify() { - std::unique_lock<std::mutex> l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void WaitForNotification() { - std::unique_lock<std::mutex> l(mu_); - while (!notified_) { - cv_.wait(l); - } - } - - private: - std::mutex mu_; - std::condition_variable cv_; - bool notified_; -}; - -#else - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object. -// but only one caller must call Notify() on the object. -class Notification { - public: - Notification() : notified_(false) {} - ~Notification() {} - - void Notify() { - tensorflow::mutex_lock l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void WaitForNotification() { - tensorflow::mutex_lock l(mu_); - while (!notified_) { - cv_.wait(l); - } - } - - private: - tensorflow::mutex mu_; - tensorflow::condition_variable cv_; - bool notified_; -}; -#endif - -// Runs an arbitrary function and then calls Notify() on the passed in -// Notification. -template <typename Function, typename... Args> struct FunctionWrapper -{ - static void run(Notification* n, Function f, Args... args) { - f(args...); - n->Notify(); - } -}; - -static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { - if (n) { - n->WaitForNotification(); - } -} - - -struct MemcpyExecutor { - typedef MemcpyExecutor Self; - - MemcpyExecutor(void *dst, const void *src) : - m_dst(static_cast<char *>(dst)), m_src(static_cast<const char *>(src)) { } - - static EIGEN_STRONG_INLINE void run(const MemcpyExecutor* exec, size_t idx, size_t block_size) { - ::memcpy(&(exec->m_dst[idx]), &(exec->m_src[idx]), block_size); - } - - private: - char* m_dst; - const char* m_src; -}; - -struct MemsetExecutor { - typedef MemsetExecutor Self; - - MemsetExecutor(void *buffer, int val) : - m_buffer(static_cast<char *>(buffer)), m_val(val) { } - - static EIGEN_STRONG_INLINE void run(const MemsetExecutor* exec, size_t idx, size_t block_size) { - ::memset(&(exec->m_buffer[idx]), exec->m_val, block_size); - } - - private: - char* m_buffer; - const int m_val; -}; - - -struct ThreadPoolDevice { - // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) - : pool_(pool), num_threads_(num_cores) {} - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - - EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { -#ifdef __ANDROID__ - ::memcpy(dst, src, n); -#else - if (n <= 32768) { - ::memcpy(dst, src, n); - } else { - MemcpyExecutor memcpy_executor(dst, src); - execute(memcpy_executor, n); - } -#endif - } - - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { -#ifdef __ANDROID__ - ::memset(buffer, c, n); -#else - if (n <= 32768) { - ::memset(buffer, c, n); - } else { - MemsetExecutor memset_executor(buffer, c); - execute(memset_executor, n); - } -#endif - } - - EIGEN_STRONG_INLINE size_t numThreads() const { - return num_threads_; - } - - EIGEN_STRONG_INLINE size_t memcpyThreshold() const { - return 2 * numThreads(); - } - - EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - return l1CacheSize(); - } - - EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // The l3 cache size is shared between all the cores. - return l3CacheSize() / num_threads_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - // Should return an enum that encodes the ISA supported by the CPU - return 1; - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { - Notification* n = new Notification(); - std::function<void()> func = - std::bind(&FunctionWrapper<Function, Args...>::run, n, f, args...); - pool_->Schedule(func); - return n; - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE void enqueue_and_forget(Function&& f, Args&&... args) const { - std::function<void()> func = std::bind(f, args...); - pool_->Schedule(func); - } - - private: - template<typename Executor> - EIGEN_STRONG_INLINE void execute(const Executor& exec, size_t n) const { - // don't spawn a thread to process fewer than 1024 bytes (chosen by small amount of - // experimentation) - // TODO: make block_size a multiple of packet_size and align everything - const size_t block_size = numext::maxi(static_cast<size_t>(1024), n / numThreads()); - const size_t block_count = n / block_size; - eigen_assert(block_count <= numThreads()); - - FixedSizeVector<Notification*> results(block_count); - for (size_t block_idx = 0; block_idx < block_count; block_idx++) { - results.push_back(enqueue(&Executor::run, &exec, block_idx * block_size, block_size)); - } - - if (block_count * block_size < n) { - Executor::run(&exec, block_count * block_size, n - block_count * block_size); - } - - // wait for threads to finish - for (size_t block_idx = 0; block_idx < block_count; block_idx++) { - results[block_idx]->WaitForNotification(); - delete results[block_idx]; - } - } - - // todo: NUMA, ... - size_t num_threads_; - ThreadPoolInterface* pool_; -}; -#endif - - -// GPU offloading -#ifdef EIGEN_USE_GPU - -// An interface abstracting away device specific memory allocator. -class Allocator { - public: - virtual ~Allocator() {} - EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0; - EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0; -}; - -#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__) - -// This defines an interface that GPUDevice can take to use -// CUDA streams underneath. -class StreamInterface { - public: - virtual ~StreamInterface() {} - - virtual const cudaStream_t& stream() const = 0; - virtual const cudaDeviceProp& deviceProperties() const = 0; - - // Allocate memory on the actual device where the computation will run - virtual void* allocate(size_t num_bytes) const = 0; - virtual void deallocate(void* buffer) const = 0; -}; - -static cudaDeviceProp* m_deviceProperties; -static bool m_devicePropInitialized = false; - -#ifndef __CUDA_ARCH__ -static tensorflow::mutex m_devicePropInitMutex(tensorflow::LINKER_INITIALIZED); - -static void initializeDeviceProp() { - if (!m_devicePropInitialized) { - tensorflow::mutex_lock l(m_devicePropInitMutex); - if (!m_devicePropInitialized) { - int num_devices; - cudaError_t status = cudaGetDeviceCount(&num_devices); - eigen_check(status == cudaSuccess); - m_deviceProperties = new cudaDeviceProp[num_devices]; - for (int i = 0; i < num_devices; ++i) { - status = cudaGetDeviceProperties(&m_deviceProperties[i], i); - eigen_check(status == cudaSuccess); - } - m_devicePropInitialized = true; - } - } -} -#else -static void initializeDeviceProp() { - assert(false && "This function should never be called from within a CUDA kernel"); -} -#endif // __CUDA_ARCH__ - -static const cudaStream_t default_stream = cudaStreamDefault; - -class CudaStreamDevice : public StreamInterface { - public: - // Use the default stream on the current device - CudaStreamDevice() : stream_(&default_stream) { - cudaGetDevice(&device_); - initializeDeviceProp(); - } - // Use the default stream on the specified device - CudaStreamDevice(int device) : stream_(&default_stream), device_(device) { - initializeDeviceProp(); - } - // Use the specified stream. Note that it's the - // caller responsibility to ensure that the stream can run on - // the specified device. If no device is specified the code - // assumes that the stream is associated to the current gpu device. - CudaStreamDevice(const cudaStream_t* stream, int device = -1) - : stream_(stream), device_(device) { - if (device < 0) { - cudaGetDevice(&device_); - } else { - int num_devices; - cudaError_t err = cudaGetDeviceCount(&num_devices); - eigen_check(err == cudaSuccess); - eigen_check(device < num_devices); - device_ = device; - } - initializeDeviceProp(); - } - - const cudaStream_t& stream() const { return *stream_; } - const cudaDeviceProp& deviceProperties() const { - return m_deviceProperties[device_]; - } - virtual void* allocate(size_t num_bytes) const { - cudaError_t err = cudaSetDevice(device_); - eigen_check(err == cudaSuccess); - void* result; - err = cudaMalloc(&result, num_bytes); - eigen_check(err == cudaSuccess); - eigen_check(result != NULL); - return result; - } - virtual void deallocate(void* buffer) const { - cudaError_t err = cudaSetDevice(device_); - eigen_check(err == cudaSuccess); - assert(buffer != NULL); - err = cudaFree(buffer); - assert(err == cudaSuccess); - } - - private: - const cudaStream_t* stream_; - int device_; -}; - -static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { - cudaError_t status = cudaDeviceSetSharedMemConfig(config); - eigen_check(status == cudaSuccess); -} - -struct GpuDevice { - // Neither the cudastream nor the allocator is not owned: the caller is - // responsible for their initialization and eventual destruction. - explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { - eigen_assert(stream); - } - - // TODO(bsteiner): This is an internal API, we should not expose it. - EIGEN_STRONG_INLINE const cudaStream_t& stream() const { - return stream_->stream(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { -#ifndef __CUDA_ARCH__ - return stream_->allocate(num_bytes); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); - return NULL; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { -#ifndef __CUDA_ARCH__ - stream_->deallocate(buffer); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, - stream_->stream()); - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = - cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = - cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { - // FIXME - return 32; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const { - return 4 * 1024 * 1024; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - // FIXME - return 48*1024; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // We won't try to take advantage of the l2 cache for the time being, and - // there is no l3 cache on cuda devices. - return firstLevelCacheSize(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { -#ifndef __CUDA_ARCH__ - cudaError_t err = cudaStreamSynchronize(stream_->stream()); - assert(err == cudaSuccess); -#else - assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - inline int getNumCudaMultiProcessors() const { - return stream_->deviceProperties().multiProcessorCount; - } - inline int maxCudaThreadsPerBlock() const { - return stream_->deviceProperties().maxThreadsPerBlock; - } - inline int maxCudaThreadsPerMultiProcessor() const { - return stream_->deviceProperties().maxThreadsPerMultiProcessor; - } - inline int sharedMemPerBlock() const { - return stream_->deviceProperties().sharedMemPerBlock; - } - inline int majorDeviceVersion() const { - return stream_->deviceProperties().major; - } - - // This function checks if the CUDA runtime recorded an error for the - // underlying stream device. - inline bool ok() const { - cudaError_t error = cudaStreamQuery(stream_->stream()); - return (error == cudaSuccess) || (error == cudaErrorNotReady); - } - - private: - const StreamInterface* stream_; -}; - -inline void assertCudaOk() { - cudaError_t err = cudaGetLastError(); - - assert(err != cudaErrorMissingConfiguration); - assert(err != cudaErrorMemoryAllocation); - assert(err != cudaErrorInitializationError); - assert(err != cudaErrorLaunchFailure); - assert(err != cudaErrorPriorLaunchFailure); - assert(err != cudaErrorLaunchTimeout); - assert(err != cudaErrorLaunchOutOfResources); - assert(err != cudaErrorInvalidDeviceFunction); - assert(err != cudaErrorInvalidConfiguration); - assert(err != cudaErrorInvalidDevice); - assert(err != cudaErrorInvalidValue); - assert(err != cudaErrorInvalidPitchValue); - assert(err != cudaErrorInvalidSymbol); - assert(err != cudaErrorMapBufferObjectFailed); - assert(err != cudaErrorUnmapBufferObjectFailed); - assert(err != cudaErrorInvalidHostPointer); - assert(err != cudaErrorInvalidDevicePointer); - assert(err != cudaErrorInvalidTexture); - assert(err != cudaErrorInvalidTextureBinding); - assert(err != cudaErrorInvalidChannelDescriptor); - assert(err != cudaErrorInvalidMemcpyDirection); - assert(err != cudaErrorAddressOfConstant); - assert(err != cudaErrorTextureFetchFailed); - assert(err != cudaErrorTextureNotBound); - assert(err != cudaErrorSynchronizationError); - assert(err != cudaErrorInvalidFilterSetting); - assert(err != cudaErrorInvalidNormSetting); - assert(err != cudaErrorMixedDeviceExecution); - assert(err != cudaErrorCudartUnloading); - assert(err != cudaErrorUnknown); - assert(err != cudaErrorNotYetImplemented); - assert(err != cudaErrorMemoryValueTooLarge); - assert(err != cudaErrorInvalidResourceHandle); - assert(err != cudaErrorNotReady); - assert(err != cudaErrorInsufficientDriver); - assert(err != cudaErrorSetOnActiveProcess); - assert(err != cudaErrorInvalidSurface); - assert(err != cudaErrorNoDevice); - assert(err != cudaErrorECCUncorrectable); - assert(err != cudaErrorSharedObjectSymbolNotFound); - assert(err != cudaErrorSharedObjectInitFailed); - assert(err != cudaErrorUnsupportedLimit); - assert(err != cudaErrorDuplicateVariableName); - assert(err != cudaErrorDuplicateTextureName); - assert(err != cudaErrorDuplicateSurfaceName); - assert(err != cudaErrorDevicesUnavailable); - assert(err != cudaErrorInvalidKernelImage); - assert(err != cudaErrorNoKernelImageForDevice); - assert(err != cudaErrorIncompatibleDriverContext); - assert(err != cudaErrorPeerAccessAlreadyEnabled); - assert(err != cudaErrorPeerAccessNotEnabled); - assert(err != cudaErrorDeviceAlreadyInUse); - assert(err != cudaErrorProfilerDisabled); - assert(err != cudaErrorProfilerNotInitialized); - assert(err != cudaErrorProfilerAlreadyStarted); - assert(err != cudaErrorProfilerAlreadyStopped); - assert(err != cudaErrorAssert); - assert(err != cudaErrorTooManyPeers); - assert(err != cudaErrorHostMemoryAlreadyRegistered); - assert(err != cudaErrorHostMemoryNotRegistered); - assert(err != cudaErrorOperatingSystem); - assert(err != cudaErrorStartupFailure); - assert(err != cudaErrorApiFailureBase); - - // catch errors types introduced after this function was written - assert(err == cudaSuccess); -} - -#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, \ - ...) \ - do { \ - (kernel)<<<(gridsize), (blocksize), (sharedmem), (device).stream()>>>( \ - __VA_ARGS__); \ - assertCudaOk(); \ - } while (false) - -#else // __GCUDACC__ - -// The following is the version of GpuDevice for StreamExecutor -// (go/gpuexecutor) a GPU runtime that supports both CUDA and OpenCL. -// StreamExecutor is being developed as an open-source replacement for the CUDA -// runtime and is the runtime used when compiling with gcudacc. Differences -// between the CUDA runtime and StreamExecutor are abstracted away behind -// GpuDevice. - -// TODO(jpienaar): Temporary workaround until b/18409724 is addressed. -enum cudaSharedMemConfig -{ - cudaSharedMemBankSizeDefault = 0, - cudaSharedMemBankSizeFourByte = 1, - cudaSharedMemBankSizeEightByte = 2 -}; - -static inline void setCudaSharedMemConfig(cudaSharedMemConfig cache_config) { - // TODO(jpienaar): fix when implemented (b/18409724) -} - -struct GpuDevice { - // Default constructor: Get [cached] device 0 and its default stream. - GpuDevice() : allocator_(nullptr) { - perftools::gputools::Platform* platform = - perftools::gputools::MultiPlatformManager::PlatformWithName("cuda") - .ValueOrDie(); - stream_exec_ = platform->ExecutorForDevice(0).ValueOrDie(); - // TODO(rspringer): If we ever pull from an executor aside from 0, this will - // need to be preceded by a call to SetDevice(N); - stream_ = platforms::gpus::gcudacc::GetDefaultStream(); - device_descr_ = &(stream_exec_->GetDeviceDescription()); - } - - GpuDevice(perftools::gputools::Stream* stream, - const Allocator* alloc = nullptr) - : stream_(stream), - allocator_(alloc), - stream_exec_(stream_->parent()), - device_descr_(&(stream_exec_->GetDeviceDescription())) {} - - EIGEN_STRONG_INLINE perftools::gputools::Stream* stream() const { - return stream_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - if (allocator_ != nullptr) return allocator_->allocate(num_bytes); -#ifndef __CUDA_ARCH__ - perftools::gputools::DeviceMemory<char> mem = - stream_exec_->AllocateArray<char>(num_bytes); - return mem.opaque(); -#else - assert(false && - "The default device should be used instead to generate kernel code"); - return nullptr; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - if (allocator_ != nullptr) { - allocator_->deallocate(buffer); - return; - } -#ifndef __CUDA_ARCH__ - perftools::gputools::DeviceMemoryBase gpu_mem(buffer); - stream_exec_->Deallocate(&gpu_mem); -#else - assert(false && - "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, - size_t n) const { -#ifndef __CUDA_ARCH__ - perftools::gputools::DeviceMemoryBase gpu_to(dst); - if (!stream_->ThenMemcpy(&gpu_to, perftools::gputools::DeviceMemoryBase( - const_cast<void*>(src)), - n).ok()) { - assert(false && - "failed during enqueue of 'copy perftools::gputools to " - "perftools::gputools'"); - } -#else - assert(false && - "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - perftools::gputools::DeviceMemoryBase gpu_to(dst); - if (!stream_->ThenMemcpy(&gpu_to, src, n).ok()) { - assert(false && "failed while enqueuing memcpy from host to device"); - } -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - if (!stream_->ThenMemcpy(dst, perftools::gputools::DeviceMemoryBase( - const_cast<void*>(src)), - n).ok()) { - assert(false && "failed while enqueuing memcpy from device to host"); - } -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { -#ifndef __CUDA_ARCH__ - perftools::gputools::DeviceMemoryBase gpu_buffer{buffer}; - if (!stream_exec_->Memset32(stream_, &gpu_buffer, c, n)) { - assert(false && "GPU memset failed."); - } -#else - assert(false && - "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { - // FIXME - return 32; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t memcpyThreshold() const { - return 4 * 1024 * 1024; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - // FIXME - return 48*1024; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // We won't try to take advantage of the l2 cache for the time being, and - // there is no l3 cache on cuda devices. - return firstLevelCacheSize(); - } - - EIGEN_STRONG_INLINE void synchronize() const { - stream_->BlockHostUntilDone(); - } - - EIGEN_DEVICE_FUNC inline int getNumCudaMultiProcessors() const { - return device_descr_->core_count(); - } - - EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerBlock() const { - return device_descr_->threads_per_block_limit(); - } - - EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerMultiProcessor() const { - return device_descr_->threads_per_core_limit(); - } - - EIGEN_DEVICE_FUNC inline int sharedMemPerBlock() const { - return device_descr_->shared_memory_per_block(); - } - - EIGEN_DEVICE_FUNC inline int majorDeviceVersion() const { - int major, minor; - if (device_descr_->cuda_compute_capability(&major, &minor)) { - return major; - } else { - return 0; - } - } - - inline bool ok() const { return stream_->ok(); } - - private: - perftools::gputools::Stream* stream_; - perftools::gputools::StreamExecutor* stream_exec_; - const perftools::gputools::DeviceDescription* device_descr_; - const Allocator* allocator_; -}; - -#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...)\ - (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ - CHECK((device).stream()->ok()); -#endif // __GCUDACC__ - -#endif // EIGEN_USE_GPU -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h deleted file mode 100644 index 19e922f92f..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ /dev/null @@ -1,235 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H -#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H - -namespace Eigen { - -/** \internal - * - * \class TensorDimensionList - * \ingroup CXX11_Tensor_Module - * - * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. - * - * \sa Tensor - */ - -template <typename Index, std::size_t Rank> struct DimensionList { - const Index operator[] (const Index i) const { return i; } -}; - -namespace internal { - -template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > { - static const size_t value = Rank; -}; -template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > { - static const size_t value = Rank; -}; - -template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>& a) { - return n; -} -template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>& a) { - return n; -} - - -#if defined(EIGEN_HAS_CONSTEXPR) -template <typename Index, std::size_t Rank> -struct index_known_statically<DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex) const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct index_known_statically<const DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex) const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct all_indices_known_statically<DimensionList<Index, Rank> > { - constexpr bool operator() () const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct all_indices_known_statically<const DimensionList<Index, Rank> > { - constexpr bool operator() () const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase<DimensionList<Index, Rank> > { - constexpr bool operator() () const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > { - constexpr bool operator() () const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_eq<DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i == value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_eq<const DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i == value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_ne<DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i != value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_ne<const DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i != value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_gt<DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i > value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_gt<const DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i > value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_lt<DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i < value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_lt<const DimensionList<Index, Rank> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return i < value; - } -}; - -#else -template <typename Index, std::size_t Rank> -struct index_known_statically<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct index_known_statically<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct all_indices_known_statically<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() () const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct all_indices_known_statically<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() () const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() () const { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() () const { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_eq<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_eq<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_ne<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_ne<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_gt<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_gt<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_lt<DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_lt<const DimensionList<Index, Rank> > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex i, const DenseIndex value) const { - return false; - } -}; -#endif - -} // end namespace internal -} // end namespace Eigen - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h deleted file mode 100644 index 8bf5272ec8..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ /dev/null @@ -1,597 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H -#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H - - -namespace Eigen { - -/** \internal - * - * \class TensorDimensions - * \ingroup CXX11_Tensor_Module - * - * \brief Set of classes used to encode and store the dimensions of a Tensor. - * - * The Sizes class encodes as part of the type the number of dimensions and the - * sizes corresponding to each dimension. It uses no storage space since it is - * entirely known at compile time. - * The DSizes class is its dynamic sibling: the number of dimensions is known - * at compile time but the sizes are set during execution. - * - * \sa Tensor - */ - -// Can't use std::pairs on cuda devices -template <typename Index> struct IndexPair { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) { } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Index f, Index s) : first(f), second(s) { } - Index first; - Index second; -}; - -// Boilerplate code -namespace internal { - -template<std::size_t n, typename Dimension> struct dget { - static const std::size_t value = get<n, typename Dimension::Base>::value; -}; - - -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct fixed_size_tensor_index_linearization_helper -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(array<Index, NumIndices> const& indices, - const Dimensions& dimensions) - { - return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) + - dget<RowMajor ? n - 1 : (NumIndices - n), Dimensions>::value * - fixed_size_tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(array<Index, NumIndices> const& indices, - const Dimensions&) - { - return 0; - } -}; - -template<typename Index, std::size_t n> -struct fixed_size_tensor_index_extraction_helper -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(const Index index, - const Dimensions& dimensions) - { - const Index mult = (index == n) ? 1 : 0; - return array_get<n>(dimensions) * mult + - fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions); - } -}; - -template<typename Index> -struct fixed_size_tensor_index_extraction_helper<Index, 0> -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(const Index index, - const Dimensions& dimensions) - { - const Index mult = (index == 0) ? 1 : 0; - return array_get<0>(dimensions) * mult; - } -}; - -} // end namespace internal - - -// Fixed size -#ifndef EIGEN_EMULATE_CXX11_META_H -template <typename std::size_t... Indices> -struct Sizes : internal::numeric_list<std::size_t, Indices...> { - typedef internal::numeric_list<std::size_t, Indices...> Base; - static const std::size_t total_size = internal::arg_prod(Indices...); - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return Base::count; - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t TotalSize() { - return internal::arg_prod(Indices...); - } - - Sizes() { } - template <typename DenseIndex> - explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) { - // todo: add assertion - } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template <typename... DenseIndex> Sizes(DenseIndex...) { } - explicit Sizes(std::initializer_list<std::size_t> /*l*/) { - // todo: add assertion - } -#endif - - template <typename T> Sizes& operator = (const T& /*other*/) { - // add assertion failure if the size of other is different - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const int index) const { - return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count - 1>::run(index, *this); - } - - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *static_cast<const Base*>(this)); - } - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *static_cast<const Base*>(this)); - } -}; - -namespace internal { -template <typename std::size_t... Indices> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<Indices...>&) { - return Sizes<Indices...>::total_size; -} -} - -#else - -template <std::size_t n> -struct non_zero_size { - typedef internal::type2val<std::size_t, n> type; -}; -template <> -struct non_zero_size<0> { - typedef internal::null_type type; -}; - -template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { - typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; - static const size_t count = Base::count; - static const std::size_t total_size = internal::arg_prod<Base>::value; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return count; - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { - return internal::arg_prod<Base>::value; - } - - Sizes() { } - template <typename DenseIndex> - explicit Sizes(const array<DenseIndex, Base::count>& indices) { - // todo: add assertion - } -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template <typename... DenseIndex> Sizes(DenseIndex... indices) { } - explicit Sizes(std::initializer_list<std::size_t> l) { - // todo: add assertion - } -#else - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) { - } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) { - } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { - } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { - } - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { - } -#endif - - template <typename T> Sizes& operator = (const T& other) { - // to do: check the size of other - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator[] (const int index) const { - switch (index) { - case 0: - return internal::get<0, Base>::value; - case 1: - return internal::get<1, Base>::value; - case 2: - return internal::get<2, Base>::value; - case 3: - return internal::get<3, Base>::value; - case 4: - return internal::get<4, Base>::value; - default: - eigen_assert(false && "index overflow"); - return static_cast<std::size_t>(-1); - } - } - - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *this); - } - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *this); - } -}; - -namespace internal { -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { - return Sizes<V1, V2, V3, V4, V5>::total_size; -} -} - -#endif - -// Boilerplate -namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct tensor_index_linearization_helper -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const& dimensions) - { - return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + - array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * - tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const&) - { - return array_get<RowMajor ? 0 : NumIndices - 1>(indices); - } -}; -} // end namespace internal - - - -// Dynamic size -template <typename DenseIndex, std::size_t NumDims> -struct DSizes : array<DenseIndex, NumDims> { - typedef array<DenseIndex, NumDims> Base; - static const std::size_t count = NumDims; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return NumDims; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const { - return internal::array_prod(*static_cast<const Base*>(this)); - } - - EIGEN_DEVICE_FUNC DSizes() { - for (int i = 0 ; i < NumDims; ++i) { - (*this)[i] = 0; - } - } - EIGEN_DEVICE_FUNC DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { } - - EIGEN_DEVICE_FUNC DSizes(const DimensionList<DenseIndex, NumDims>& a) { - for (int i = 0 ; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - } - -#ifndef EIGEN_EMULATE_CXX11_META_H - template <typename std::size_t... Indices> - EIGEN_DEVICE_FUNC DSizes(const Sizes<Indices...>& a) { - for (int i = 0 ; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - } -#else - template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> - EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) { - for (int i = 0 ; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - } -#endif - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, IndexTypes... otherDimensions) { - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) - (*this) = array<DenseIndex, NumDims>{{firstDimension, otherDimensions...}}; - } -#else - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { - eigen_assert(NumDims == 1); - (*this)[0] = i0; - } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1) { - eigen_assert(NumDims == 2); - (*this)[0] = i0; - (*this)[1] = i1; - } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { - eigen_assert(NumDims == 3); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { - eigen_assert(NumDims == 4); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - } - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { - eigen_assert(NumDims == 5); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - (*this)[4] = i4; - } -#endif - - EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) { - *static_cast<Base*>(this) = other; - return *this; - } - - // A constexpr would be so much better here - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this)); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); - } -}; - - - - -// Boilerplate -namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct tensor_vsize_index_linearization_helper -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions) - { - return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + - array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * - tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&) - { - return array_get<RowMajor ? 0 : NumIndices - 1>(indices); - } -}; -} // end namespace internal - - -template <typename DenseIndex> -struct VSizes : std::vector<DenseIndex> { - typedef std::vector<DenseIndex> Base; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return Base::size(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const { - return internal::array_prod(*static_cast<const Base*>(this)); - } - - EIGEN_DEVICE_FUNC VSizes() { } - EIGEN_DEVICE_FUNC explicit VSizes(const std::vector<DenseIndex>& a) : Base(a) { } - - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC explicit VSizes(const array<DenseIndex, NumDims>& a) { - this->resize(NumDims); - for (int i = 0; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - } - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC explicit VSizes(const DSizes<DenseIndex, NumDims>& a) { - this->resize(NumDims); - for (int i = 0; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - } - - EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0) { - this->resize(1); - (*this)[0] = i0; - } - EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1) { - this->resize(2); - (*this)[0] = i0; - (*this)[1] = i1; - } - EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { - this->resize(3); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - } - EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { - this->resize(4); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - } - EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { - this->resize(5); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - (*this)[4] = i4; - } - - EIGEN_DEVICE_FUNC VSizes& operator = (const std::vector<DenseIndex>& other) { - *static_cast<Base*>(this) = other; - return *this; - } - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC VSizes& operator = (const array<DenseIndex, NumDims>& a) { - this->resize(NumDims); - for (int i = 0; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - return *this; - } - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC VSizes& operator = (const DSizes<DenseIndex, NumDims>& a) { - this->resize(NumDims); - for (int i = 0; i < NumDims; ++i) { - (*this)[i] = a[i]; - } - return *this; - } - - // A constexpr would be so much better here - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this)); - } - template <std::size_t NumDims> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); - } -}; - - -// Boilerplate -namespace internal { -template <typename DenseIndex> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex array_prod(const VSizes<DenseIndex>& sizes) { - DenseIndex total_size = 1; - for (int i = 0; i < sizes.size(); ++i) { - total_size *= sizes[i]; - } - return total_size; -}; -} - -namespace internal { - -template <typename DenseIndex, std::size_t NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; -}; -template <typename DenseIndex, std::size_t NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; -}; -template <typename DenseIndex> -struct array_size<VSizes<DenseIndex> > { - static const ptrdiff_t value = -1; -}; -#ifndef EIGEN_EMULATE_CXX11_META_H -template <typename std::size_t... Indices> struct array_size<const Sizes<Indices...> > { -static const size_t value = Sizes<Indices...>::count; -}; -template <typename std::size_t... Indices> struct array_size<Sizes<Indices...> > { -static const size_t value = Sizes<Indices...>::count; -}; -template <std::size_t n, typename std::size_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { - return get<n, internal::numeric_list<std::size_t, Indices...> >::value; -} -#else -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; -}; -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; -}; -template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>& a) { - return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; -} - -#endif - - -template <typename Dims1, typename Dims2, size_t n, size_t m> -struct sizes_match_below_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { - return false; - } -}; -template <typename Dims1, typename Dims2, size_t n> -struct sizes_match_below_dim<Dims1, Dims2, n, n> { - static inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & - sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2); - } -}; -template <typename Dims1, typename Dims2> -struct sizes_match_below_dim<Dims1, Dims2, 0, 0> { - static inline bool run(Dims1& dims1, Dims2& dims2) { - return true; - } -}; - -} // end namespace internal - - -template <typename Dims1, typename Dims2> -bool dimensions_match(Dims1& dims1, Dims2& dims2) { - return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2); -} - -template <typename IndexType, typename Dims2> -bool dimensions_match(const VSizes<IndexType>& dims1, Dims2& dims2) { - if (dims1.size() != internal::array_size<Dims2>::value) { - return false; - } - for (int i = 0; i < internal::array_size<Dims2>::value; ++i) { - if (dims1[i] != dims2[i]) { - return false; - } - } - return true; -} - -template <typename Dims1, typename IndexType> -bool dimensions_match(Dims1& dims1, const VSizes<IndexType>& dims2) { - if (internal::array_size<Dims1>::value != dims2.size()) { - return false; - } - for (int i = 0; i < internal::array_size<Dims1>::value; ++i) { - if (dims1[i] != dims2[i]) { - return false; - } - } - return true; -} - -template <typename IndexType> -bool dimensions_match(const VSizes<IndexType>& dims1, const VSizes<IndexType>& dims2) { - return dims1 == dims2; -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h deleted file mode 100644 index 4ad431abae..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ /dev/null @@ -1,151 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H -#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H - -namespace Eigen { - -/** \class TensorForcedEval - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -namespace internal { -template<typename XprType> -struct traits<TensorEvalToOp<XprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename XprType> -struct eval<TensorEvalToOp<XprType>, Eigen::Dense> -{ - typedef const TensorEvalToOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type> -{ - typedef TensorEvalToOp<XprType> type; -}; - -} // end namespace internal - - - - -template<typename XprType> -class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested; - typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr) - : m_xpr(expr), m_buffer(buffer) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; } - - protected: - typename XprType::Nested m_xpr; - CoeffReturnType* m_buffer; -}; - - - -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> -{ - typedef TensorEvalToOp<ArgType> XprType; - typedef typename ArgType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer()) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { - } - - typedef typename XprType::Index Index; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { - assert(scalar == NULL); - return m_impl.evalSubExprsIfNeeded(m_buffer); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { - m_buffer[i] = m_impl.coeff(i); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { - internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_buffer[index]; - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; } - - private: - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - CoeffReturnType* m_buffer; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h deleted file mode 100644 index f2ef2d85c1..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ /dev/null @@ -1,505 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H - -namespace Eigen { - -/** \class TensorEvaluator - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor evaluator classes. - * - * These classes are responsible for the evaluation of the tensor expression. - * - * TODO: add support for more types of expressions, in particular expressions - * leading to lvalues (slicing, reshaping, etc...) - */ - -// Generic evaluator -template<typename Derived, typename Device> -struct TensorEvaluator -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - // NumDimensions is -1 for variable dim tensors - static const int NumCoords = internal::traits<Derived>::NumDimensions; - static const int SafeNumCoords = NumCoords >= 0 ? NumCoords : 0; - - enum { - IsAligned = Derived::IsAligned, - PacketAccess = Derived::PacketAccess, - BlockAccess = internal::is_arithmetic< - typename internal::remove_const<Scalar>::type>::value && - NumCoords >= 0, - Layout = Derived::Layout, - CoordAccess = NumCoords >= 0, - }; - - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout> - TensorBlock; - typedef typename internal::TensorBlockReader< - Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout, - PacketAccess> TensorBlockReader; - typedef typename internal::TensorBlockWriter< - Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout, - PacketAccess> TensorBlockWriter; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast<Scalar*>(m.data())), - m_dims(m.dimensions()), - m_device(device) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { - if (dest) { - m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); - return false; - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); - return m_data[index]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - eigen_assert(m_data); - return m_data[index]; - } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, SafeNumCoords>& coords) const { - eigen_assert(m_data); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return m_data[m_dims.IndexOfColMajor(coords)]; - } else { - return m_data[m_dims.IndexOfRowMajor(coords)]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, SafeNumCoords>& coords) { - eigen_assert(m_data); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return m_data[m_dims.IndexOfColMajor(coords)]; - } else { - return m_data[m_dims.IndexOfRowMajor(coords)]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { - assert(m_data != NULL); - TensorBlockReader::Run(block, m_data); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( - const TensorBlock& block) { - assert(m_data != NULL); - TensorBlockWriter::Run(block, m_data); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } - - protected: - Scalar* m_data; - Dimensions m_dims; - const Device& m_device; -}; - - -namespace { -template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T loadConstant(const T* address) { - return *address; - -} -// Use the texture cache on CUDA devices whenever possible -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 -template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float loadConstant(const float* address) { - return __ldg(address); -} -template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double loadConstant(const double* address) { - return __ldg(address); - - -} -#endif -} - - -// Default evaluator for rvalues -template<typename Derived, typename Device> -struct TensorEvaluator<const Derived, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - // NumDimensions is -1 for variable dim tensors - static const int NumCoords = internal::traits<Derived>::NumDimensions; - static const int SafeNumCoords = NumCoords >= 0 ? NumCoords : 0; - - enum { - IsAligned = Derived::IsAligned, - PacketAccess = Derived::PacketAccess, - BlockAccess = internal::is_arithmetic< - typename internal::remove_const<Scalar>::type>::value && - NumCoords >= 0, - Layout = Derived::Layout, - CoordAccess = NumCoords >= 0, - }; - - // TODO(andydavis) Add block/writeBlock accessors to Tensor and TensorMap so - // we can default BlockAccess to true above. - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout> - TensorBlock; - typedef typename internal::TensorBlockReader< - Index, typename internal::remove_const<Scalar>::type, SafeNumCoords, Layout, - PacketAccess> TensorBlockReader; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value && data) { - m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); - return false; - } - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); - return loadConstant(m_data+index); - } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, SafeNumCoords>& coords) const { - eigen_assert(m_data); - const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) - : m_dims.IndexOfRowMajor(coords); - return loadConstant(m_data+index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { - assert(m_data != NULL); - TensorBlockReader::Run(block, m_data); - } - - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } - - protected: - const Scalar* m_data; - Dimensions m_dims; - const Device& m_device; -}; - - - - -// -------------------- CwiseNullaryOp -------------------- - -template<typename NullaryOp, typename ArgType, typename Device> -struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> -{ - typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; - - enum { - IsAligned = true, - PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC - TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(index); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - private: - const NullaryOp m_functor; - TensorEvaluator<ArgType, Device> m_argImpl; -}; - - - -// -------------------- CwiseUnaryOp -------------------- - -template<typename UnaryOp, typename ArgType, typename Device> -struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> -{ - typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & - internal::functor_traits<UnaryOp>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), - m_argImpl(op.nestedExpression(), device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_argImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_argImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(m_argImpl.coeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - private: - const UnaryOp m_functor; - TensorEvaluator<ArgType, Device> m_argImpl; -}; - - -// -------------------- CwiseBinaryOp -------------------- - -template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device> -{ - typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; - - enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & - TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess & - internal::functor_traits<BinaryOp>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), - m_leftImpl(op.lhsExpression(), device), - m_rightImpl(op.rhsExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use right impl instead if right impl dimensions are known at compile time. - return m_leftImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - private: - const BinaryOp m_functor; - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; -}; - - -// -------------------- SelectOp -------------------- - -template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device> -struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device> -{ - typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & - TensorEvaluator<ElseArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & - TensorEvaluator<ElseArgType, Device>::PacketAccess & - internal::packet_traits<Scalar>::HasBlend, - BlockAccess = false, - Layout = TensorEvaluator<IfArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_condImpl(op.ifExpression(), device), - m_thenImpl(op.thenExpression(), device), - m_elseImpl(op.elseExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); - eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use then or else impl instead if they happen to be known at compile time. - return m_condImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_condImpl.evalSubExprsIfNeeded(NULL); - m_thenImpl.evalSubExprsIfNeeded(NULL); - m_elseImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_condImpl.cleanup(); - m_thenImpl.cleanup(); - m_elseImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const - { - const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - internal::Selector<PacketSize> select; - for (Index i = 0; i < PacketSize; ++i) { - select.select[i] = m_condImpl.coeff(index+i); - } - return internal::pblend(select, - m_thenImpl.template packet<LoadMode>(index), - m_elseImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - private: - TensorEvaluator<IfArgType, Device> m_condImpl; - TensorEvaluator<ThenArgType, Device> m_thenImpl; - TensorEvaluator<ElseArgType, Device> m_elseImpl; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h deleted file mode 100644 index b7cea143ff..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ /dev/null @@ -1,465 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H - -namespace Eigen { - -/** \class TensorExecutor - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor executor class. - * - * This class is responsible for launch the evaluation of the expression on - * the specified computing device. - */ -namespace internal { - -// Default strategy: the expression is evaluated with a single cpu thread. -template <typename Expression, typename Device, - bool Vectorizable, bool Tileable> -class TensorExecutor { - public: - typedef typename Expression::Index Index; - EIGEN_DEVICE_FUNC static inline void run(const Expression& expr, const Device& device = Device()) - { - TensorEvaluator<Expression, Device> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); - for (Index i = 0; i < size; ++i) { - evaluator.evalScalar(i); - } - } - evaluator.cleanup(); - } -}; - -template <typename Expression> -class TensorExecutor<Expression, DefaultDevice, true, false> { - public: - typedef typename Expression::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) - { - TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); - const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size; - - // Manually unroll this loop since compilers don't do it. - const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; - for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { - evaluator.evalPacket(i); - evaluator.evalPacket(i+PacketSize); - evaluator.evalPacket(i+2*PacketSize); - evaluator.evalPacket(i+3*PacketSize); - } - const Index VectorizedSize = (size / PacketSize) * PacketSize; - for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { - evaluator.evalPacket(i); - } - for (Index i = VectorizedSize; i < size; ++i) { - evaluator.evalScalar(i); - } - } - evaluator.cleanup(); - } -}; - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, DefaultDevice, Vectorizable, true> { - public: - typedef typename Expression::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, - const DefaultDevice& device = DefaultDevice()) { - typedef TensorEvaluator<Expression, DefaultDevice> Evaluator; - typedef typename traits<Expression>::Scalar Scalar; - typedef typename traits<Expression>::Index Index; - const std::size_t NumDims = traits<Expression>::NumDimensions; - - typedef TensorBlockMapper<Index, - typename internal::remove_const<Scalar>::type, - NumDims, Evaluator::Layout> TensorBlockMapper; - typedef TensorBlock<Index, typename internal::remove_const<Scalar>::type, - NumDims, Evaluator::Layout> TensorBlock; - - Evaluator evaluator(expr, device); - std::size_t total_size = array_prod(evaluator.dimensions()); - std::size_t cache_size = device.firstLevelCacheSize() / sizeof(Scalar); - if (total_size < cache_size) { - // TODO(andydavis) Reduce block management overhead for small tensors. - internal::TensorExecutor<Expression, DefaultDevice, Vectorizable, - false>::run(expr, device); - return; - } - - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - // Size tensor blocks to fit in cache (or requested target block size). - size_t block_total_size = numext::mini(cache_size, total_size); - TensorBlockShapeType block_shape = kUniformAllDims; - // Query expression tree for desired block size/shape. - std::vector<internal::TensorOpResourceRequirements> resources; - evaluator.getResourceRequirements(&resources); - if (!resources.empty()) { - // TODO(andydavis) Implement different policies (i.e. revert to a - // default policy if block shapes/sizes conflict). - block_shape = resources[0].block_shape; - block_total_size = resources[0].block_total_size; - } - - TensorBlockMapper block_mapper(evaluator.dimensions(), - block_shape, - block_total_size); - - Scalar* data = static_cast<Scalar*>(device.allocate( - block_total_size * sizeof(Scalar))); - - const Index total_block_count = block_mapper.total_block_count(); - for (Index i = 0; i < total_block_count; ++i) { - TensorBlock block = block_mapper.GetBlockForIndex(i, data); - evaluator.evalBlock(&block); - } - device.deallocate(data); - } - evaluator.cleanup(); - } -}; - -// Multicore strategy: the index space is partitioned and each partition is executed on a single core -#ifdef EIGEN_USE_THREADS -template <typename Evaluator, typename Index, bool Vectorizable> -struct EvalRange { - static void run(Evaluator evaluator, const Index first, const Index last) { - eigen_assert(last > first); - for (Index i = first; i < last; ++i) { - evaluator.evalScalar(i); - } - } -}; - -template <typename Evaluator, typename Index> -struct EvalRange<Evaluator, Index, true> { - static void run(Evaluator evaluator, const Index first, const Index last) { - eigen_assert(last > first); - - Index i = first; - static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - if (last - first >= PacketSize) { - eigen_assert(first % PacketSize == 0); - Index lastPacket = last - (last % PacketSize); - for (; i < lastPacket; i += PacketSize) { - evaluator.evalPacket(i); - } - } - - for (; i < last; ++i) { - evaluator.evalScalar(i); - } - } -}; - -template <typename Expression, bool Vectorizable, bool Tileable> -class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const ThreadPoolDevice& device) - { - if (device.numThreads() <= 1) { - DefaultDevice dd; - TensorExecutor<Expression, DefaultDevice, Vectorizable, Tileable>::run(expr, dd); - return; - } - - typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; - Evaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); - - static const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1; - Index blocksz = std::ceil<Index>(static_cast<float>(size)/device.numThreads()) + PacketSize - 1; - const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); - const Index numblocks = size / blocksize; - - Index i = 0; - FixedSizeVector<Notification*> results(numblocks); - for (int i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&EvalRange<Evaluator, Index, Vectorizable>::run, evaluator, i*blocksize, (i+1)*blocksize)); - } - - if (numblocks * blocksize < size) { - EvalRange<Evaluator, Index, Vectorizable>::run(evaluator, numblocks * blocksize, size); - } - - for (int i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } - } - evaluator.cleanup(); - } -}; - -template <typename Index, typename Scalar> -struct BlockRange { - BlockRange(Index s, Index l, Scalar* d) - : index_start(s), index_limit(l), data(d) {} - const Index index_start; - const Index index_limit; - Scalar* data; -}; - -template <typename Evaluator, typename Index, typename Scalar, - std::size_t NumDims> -struct EvalBlockRange { - typedef TensorBlockMapper<Index, Scalar, NumDims, Evaluator::Layout> - BlockMapper; - - static void run(Evaluator evaluator, const BlockMapper& block_mapper, - BlockRange<Index, Scalar> block_range) { - typedef TensorBlock<Index, Scalar, NumDims, Evaluator::Layout> - TensorBlock; - eigen_assert(block_range.index_limit > block_range.index_start); - - for (Index i = block_range.index_start; i < block_range.index_limit; ++i) { - TensorBlock block = block_mapper.GetBlockForIndex(i, block_range.data); - evaluator.evalBlock(&block); - } - } -}; - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, true> { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, - const ThreadPoolDevice& device) { - typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; - typedef typename internal::remove_const< - typename traits<Expression>::Scalar>::type Scalar; - typedef typename traits<Expression>::Index Index; - static const std::size_t NumDims = traits<Expression>::NumDimensions; - typedef TensorBlockMapper<Index, Scalar, NumDims, Evaluator::Layout> - TensorBlockMapper; - typedef TensorBlock<Index, Scalar, NumDims, Evaluator::Layout> - TensorBlock; - typedef BlockRange<Index, Scalar> BlockRange; - - Evaluator evaluator(expr, device); - std::size_t total_size = array_prod(evaluator.dimensions()); - std::size_t cache_size = device.firstLevelCacheSize() / sizeof(Scalar); - if (total_size < cache_size || device.numThreads() <= 1) { - // TODO(andydavis) Reduce block management overhead for small tensors. - DefaultDevice dd; - internal::TensorExecutor<Expression, DefaultDevice, Vectorizable, false>::run(expr, dd); - return; - } - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - TensorBlockShapeType block_shape = kUniformAllDims; - size_t block_total_size = 0; - // Query expression tree for desired block size/shape. - std::vector<internal::TensorOpResourceRequirements> resources; - evaluator.getResourceRequirements(&resources); - if (!resources.empty()) { - // TODO(andydavis) Implement different shape/size policies. - block_shape = resources[0].block_shape; - block_total_size = resources[0].block_total_size; - } - - // Divide the tensor coefficients across the number of threads, subject - // to min/max block size constraints. - const size_t min_block_size = - device.firstLevelCacheSize() / sizeof(Scalar); - const size_t max_block_size = block_total_size > 0 ? block_total_size : - device.lastLevelCacheSize() / sizeof(Scalar); - const size_t target_block_size = numext::maxi( - min_block_size, - numext::mini(static_cast<size_t>(array_prod(evaluator.dimensions())) / device.numThreads(), - max_block_size)); - - TensorBlockMapper block_mapper(evaluator.dimensions(), - block_shape, - target_block_size); - - const Index block_partition_size = - (block_mapper.total_block_count() + device.numThreads() - 1) / - device.numThreads(); - const Index block_partition_count = - (block_mapper.total_block_count() + block_partition_size - 1) / - block_partition_size; - - if (block_partition_count == 1) { - // Avoid thread hop if no parallelism is possible. - Scalar* data = static_cast<Scalar*>( - device.allocate(target_block_size * sizeof(Scalar))); - EvalBlockRange<Evaluator, Index, Scalar, NumDims>::run( - evaluator, block_mapper, - BlockRange(0, block_mapper.total_block_count(), data)); - device.deallocate(data); - } else { - // Multi-threaded case. - struct ThreadState { - Notification* done; - Scalar* data; - }; - FixedSizeVector<ThreadState> thread_state(block_partition_count, - ThreadState()); - - // Dispatch threads. - for (int i = 0; i < block_partition_count; ++i) { - thread_state[i].data = static_cast<Scalar*>( - device.allocate(target_block_size * sizeof(Scalar))); - thread_state[i].done = device.enqueue( - &EvalBlockRange<Evaluator, Index, Scalar, NumDims>::run, - evaluator, block_mapper, - BlockRange(i * block_partition_size, - numext::mini((i + 1) * block_partition_size, - block_mapper.total_block_count()), - thread_state[i].data)); - } - - // Join threads. - for (int i = 0; i < block_partition_count; ++i) { - wait_until_ready(thread_state[i].done); - delete thread_state[i].done; - device.deallocate(thread_state[i].data); - } - } - } - evaluator.cleanup(); - } -}; - -#endif - - -// GPU: the evaluation of the expression is offloaded to a GPU. -#if defined(EIGEN_USE_GPU) - -template <typename Expression, bool Tileable> -class TensorExecutor<Expression, GpuDevice, false, Tileable> { - public: - typedef typename Expression::Index Index; - static void run(const Expression& expr, const GpuDevice& device); -}; - -template <typename Expression, bool Tileable> -class TensorExecutor<Expression, GpuDevice, true, Tileable> { - public: - typedef typename Expression::Index Index; - static void run(const Expression& expr, const GpuDevice& device); -}; - -#if defined(__CUDACC__) -template <typename Evaluator, typename Index> -__global__ void -__launch_bounds__(1024) - EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) { - - const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; - const Index step_size = blockDim.x * gridDim.x; - - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - - // Use the scalar path - for (Index i = first_index; i < size; i += step_size) { - eval.evalScalar(i); - } -} - -template <typename Evaluator, typename Index> -__global__ void -__launch_bounds__(1024) - EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { - - const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; - const Index step_size = blockDim.x * gridDim.x; - - // Cuda memcopies the kernel arguments. That's fine for POD, but for more - // complex types such as evaluators we should really conform to the C++ - // standard and call a proper copy constructor. - Evaluator eval(memcopied_eval); - - // Use the vector path - const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - const Index vectorized_step_size = step_size * PacketSize; - const Index vectorized_size = (size / PacketSize) * PacketSize; - for (Index i = first_index * PacketSize; i < vectorized_size; - i += vectorized_step_size) { - eval.evalPacket(i); - } - for (Index i = vectorized_size + first_index; i < size; i += step_size) { - eval.evalScalar(i); - } -} - -/*static*/ -template <typename Expression, bool Tileable> -inline void TensorExecutor<Expression, GpuDevice, false, Tileable>::run( - const Expression& expr, const GpuDevice& device) { - TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - const int block_size = device.maxCudaThreadsPerBlock(); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const Index size = array_prod(evaluator.dimensions()); - // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. - const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); - - LAUNCH_CUDA_KERNEL( - (EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, - Index>), - num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); -} - -/*static*/ -template <typename Expression, bool Tileable> -inline void TensorExecutor<Expression, GpuDevice, true, Tileable>::run( - const Expression& expr, const GpuDevice& device) { - TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - const int block_size = device.maxCudaThreadsPerBlock(); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const Index size = array_prod(evaluator.dimensions()); - // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. - const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); - - LAUNCH_CUDA_KERNEL( - (EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, - Index>), - num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); -} - -#endif // __CUDACC__ -#endif // EIGEN_USE_GPU - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h deleted file mode 100644 index 49d849e233..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ /dev/null @@ -1,291 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H - -namespace Eigen { - -/** \class TensorExpr - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor expression classes. - * - * The TensorCwiseNullaryOp class applies a nullary operators to an expression. - * This is typically used to generate constants. - * - * The TensorCwiseUnaryOp class represents an expression where a unary operator - * (e.g. cwiseSqrt) is applied to an expression. - * - * The TensorCwiseBinaryOp class represents an expression where a binary - * operator (e.g. addition) is applied to a lhs and a rhs expression. - * - */ -namespace internal { -template<typename NullaryOp, typename XprType> -struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> > - : traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::Nested XprTypeNested; - typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0, - }; -}; - -} // end namespace internal - - - -template<typename NullaryOp, typename XprType> -class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef TensorCwiseNullaryOp<NullaryOp, XprType> Nested; - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) - : m_xpr(xpr), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - nestedExpression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - const NullaryOp& functor() const { return m_functor; } - - protected: - typename XprType::Nested m_xpr; - const NullaryOp m_functor; -}; - - - -namespace internal { -template<typename UnaryOp, typename XprType> -struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> > - : traits<XprType> -{ - // TODO(phli): Add InputScalar, InputPacket. Check references to - // current Scalar/Packet to see if the intent is Input or Output. - typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprType::Nested XprTypeNested; - typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename UnaryOp, typename XprType> -struct eval<TensorCwiseUnaryOp<UnaryOp, XprType>, Eigen::Dense> -{ - typedef const TensorCwiseUnaryOp<UnaryOp, XprType>& type; -}; - -template<typename UnaryOp, typename XprType> -struct nested<TensorCwiseUnaryOp<UnaryOp, XprType>, 1, typename eval<TensorCwiseUnaryOp<UnaryOp, XprType> >::type> -{ - typedef TensorCwiseUnaryOp<UnaryOp, XprType> type; -}; - -} // end namespace internal - - - -template<typename UnaryOp, typename XprType> -class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors> -{ - public: - // TODO(phli): Add InputScalar, InputPacket. Check references to - // current Scalar/Packet to see if the intent is Input or Output. - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef Scalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) - : m_xpr(xpr), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const UnaryOp& functor() const { return m_functor; } - - /** \returns the nested expression */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - nestedExpression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const UnaryOp m_functor; -}; - - -namespace internal { -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs - // are different. - // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to - // current Scalar/Packet to see if the intent is Inputs or Output. - typedef typename result_of< - BinaryOp(typename LhsXprType::Scalar, - typename RhsXprType::Scalar)>::type Scalar; - typedef traits<LhsXprType> XprTraits; - typedef typename promote_storage_type< - typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type< - typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>& type; -}; - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct nested<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, 1, typename eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >::type> -{ - typedef TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors> -{ - public: - // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to - // current Scalar/Packet to see if the intent is Inputs or Output. - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef Scalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const BinaryOp& functor() const { return m_functor; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const BinaryOp m_functor; -}; - - -namespace internal { -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> > - : traits<ThenXprType> -{ - typedef typename traits<ThenXprType>::Scalar Scalar; - typedef traits<ThenXprType> XprTraits; - typedef typename promote_storage_type<typename traits<ThenXprType>::StorageKind, - typename traits<ElseXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<ElseXprType>::Index, - typename traits<ThenXprType>::Index>::type Index; - typedef typename IfXprType::Nested IfNested; - typedef typename ThenXprType::Nested ThenNested; - typedef typename ElseXprType::Nested ElseNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, Eigen::Dense> -{ - typedef const TensorSelectOp<IfXprType, ThenXprType, ElseXprType>& type; -}; - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct nested<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, 1, typename eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >::type> -{ - typedef TensorSelectOp<IfXprType, ThenXprType, ElseXprType> type; -}; - -} // end namespace internal - - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -class TensorSelectOp : public TensorBase<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorSelectOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::promote_storage_type<typename ThenXprType::CoeffReturnType, - typename ElseXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename Eigen::internal::nested<TensorSelectOp>::type Nested; - typedef typename Eigen::internal::traits<TensorSelectOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorSelectOp>::Index Index; - - EIGEN_DEVICE_FUNC - TensorSelectOp(const IfXprType& a_condition, - const ThenXprType& a_then, - const ElseXprType& a_else) - : m_condition(a_condition), m_then(a_then), m_else(a_else) - { } - - EIGEN_DEVICE_FUNC - const IfXprType& ifExpression() const { return m_condition; } - - EIGEN_DEVICE_FUNC - const ThenXprType& thenExpression() const { return m_then; } - - EIGEN_DEVICE_FUNC - const ElseXprType& elseExpression() const { return m_else; } - - protected: - typename IfXprType::Nested m_condition; - typename ThenXprType::Nested m_then; - typename ElseXprType::Nested m_else; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h deleted file mode 100644 index ac73366762..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ /dev/null @@ -1,846 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H -#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H -namespace Eigen { - -/** \class TensorFFT - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor FFT class. - * - * TODO: - * Vectorize the Cooley Tukey and the Bluestein algorithm - * Add support for multithreaded evaluation - * Improve the performance on GPU - */ - -template <bool NeedUprade> struct MakeComplex { - template <typename T> - #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__) - EIGEN_DEVICE_FUNC - #endif - T operator() (const T& val) const { return val; } -}; - -template <> struct MakeComplex<true> { - template <typename T> - #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__) - EIGEN_DEVICE_FUNC - #endif - std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); } -}; - -template <> struct MakeComplex<false> { - template <typename T> - #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__) - EIGEN_DEVICE_FUNC - #endif - std::complex<T> operator() (const std::complex<T>& val) const { return val; } -}; - -template <int ResultType> struct PartOf { - template <typename T> T operator() (const T& val) const { return val; } -}; - -template <> struct PartOf<RealPart> { - template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); } -}; - -template <> struct PartOf<ImagPart> { - template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); } -}; - -namespace internal { -template <typename FFT, typename XprType, int FFTResultType, int FFTDir> -struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { - typedef traits<XprType> XprTraits; - typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename XprTraits::Scalar InputScalar; - typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> -struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> { - typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type; -}; - -template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> -struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> { - typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type; -}; - -} // end namespace internal - -template <typename FFT, typename XprType, int FFTResultType, int FFTDir> -class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> { - public: - typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef OutputScalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested; - typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) - : m_xpr(expr), m_fft(fft) {} - - EIGEN_DEVICE_FUNC - const FFT& fft() const { return m_fft; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& expression() const { - return m_xpr; - } - - protected: - typename XprType::Nested m_xpr; - const FFT m_fft; -}; - -// Eval as rvalue -template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir> -struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> { - typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - typedef internal::traits<XprType> XprTraits; - typedef typename XprTraits::Scalar InputScalar; - typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef OutputScalar CoeffReturnType; - typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; - - enum { - IsAligned = false, - PacketAccess = true, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_data(NULL), m_impl(op.expression(), device), m_fft(op.fft()), m_device(device) { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; - } - } - m_size = m_dimensions.TotalSize(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { - m_impl.evalSubExprsIfNeeded(NULL); - if (data) { - evalToBuf(data); - return false; - } else { - m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); - evalToBuf(m_data); - return true; - } - } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_data) { - m_device.deallocate(m_data); - m_data = NULL; - } - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } - - - private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { - const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; - ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); - - for (int i = 0; i < m_size; ++i) { - buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i)); - } - - for (int i = 0; i < m_fft.size(); ++i) { - int dim = m_fft[i]; - eigen_assert(dim >= 0 && dim < NumDims); - Index line_len = m_dimensions[dim]; - eigen_assert(line_len >= 1); - ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); - const bool is_power_of_two = isPowerOfTwo(line_len); - const int good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); - const int log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); - - ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); - ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); - ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); - if (!is_power_of_two) { - ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); - for (int i = 0; i < line_len + 1; ++i) { - pos_j_base_powered[i] = std::pow(pos_j_base, i * i); - } - } - - for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { - Index base_offset = getBaseOffsetFromIndex(partial_index, dim); - - // get data into line_buf - for (int j = 0; j < line_len; ++j) { - Index offset = getIndexFromOffset(base_offset, dim, j); - line_buf[j] = buf[offset]; - } - - // processs the line - if (is_power_of_two) { - processDataLineCooleyTukey(line_buf, line_len, log_len); - } - else { - processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); - } - - // write back - for (int j = 0; j < line_len; ++j) { - const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0); - Index offset = getIndexFromOffset(base_offset, dim, j); - buf[offset] = line_buf[j] / div_factor; - } - } - m_device.deallocate(line_buf); - if (!pos_j_base_powered) { - m_device.deallocate(a); - m_device.deallocate(b); - m_device.deallocate(pos_j_base_powered); - } - } - - if(!write_to_out) { - for (int i = 0; i < m_size; ++i) { - data[i] = PartOf<FFTResultType>()(buf[i]); - } - m_device.deallocate(buf); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(int x) { - eigen_assert(x > 0); - return !(x & (x - 1)); - } - - //the composite number for padding, used in Bluestein's FFT algorithm - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int findGoodComposite(int n) { - int i = 2; - while (i < 2 * n - 1) i *= 2; - return i; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int getLog2(int m) { - int log2m = 0; - while (m >>= 1) log2m++; - return log2m; - } - - // Call Cooley Tukey algorithm directly, data length must be power of 2 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, int line_len, int log_len) { - eigen_assert(isPowerOfTwo(line_len)); - scramble_FFT(line_buf, line_len); - compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len); - } - - // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, int line_len, int good_composite, int log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { - int n = line_len; - int m = good_composite; - ComplexScalar* data = line_buf; - - for (int i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - a[i] = data[i] * std::conj(pos_j_base_powered[i]); - } - else { - a[i] = data[i] * pos_j_base_powered[i]; - } - } - for (int i = n; i < m; ++i) { - a[i] = ComplexScalar(0, 0); - } - - for (int i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - b[i] = pos_j_base_powered[i]; - } - else { - b[i] = std::conj(pos_j_base_powered[i]); - } - } - for (int i = n; i < m - n; ++i) { - b[i] = ComplexScalar(0, 0); - } - for (int i = m - n; i < m; ++i) { - if(FFTDir == FFT_FORWARD) { - b[i] = pos_j_base_powered[m-i]; - } - else { - b[i] = std::conj(pos_j_base_powered[m-i]); - } - } - - scramble_FFT(a, m); - compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len); - - scramble_FFT(b, m); - compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len); - - for (int i = 0; i < m; ++i) { - a[i] *= b[i]; - } - - scramble_FFT(a, m); - compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len); - - //Do the scaling after ifft - for (int i = 0; i < m; ++i) { - a[i] /= m; - } - - for (int i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - data[i] = a[i] * std::conj(pos_j_base_powered[i]); - } - else { - data[i] = a[i] * pos_j_base_powered[i]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, int n) { - eigen_assert(isPowerOfTwo(n)); - int j = 1; - for (int i = 1; i < n; ++i){ - if (j > i) { - std::swap(data[j-1], data[i-1]); - } - int m = n >> 1; - while (m >= 2 && j > m) { - j -= m; - m >>= 1; - } - j += m; - } - } - - template<int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, int n, int n_power_of_2) { - eigen_assert(isPowerOfTwo(n)); - if (n == 1) { - return; - } - else if (n == 2) { - ComplexScalar tmp = data[1]; - data[1] = data[0] - data[1]; - data[0] += tmp; - return; - } - else if (n == 4) { - ComplexScalar tmp[4]; - tmp[0] = data[0] + data[1]; - tmp[1] = data[0] - data[1]; - tmp[2] = data[2] + data[3]; - if(Dir == FFT_FORWARD) { - tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); - } - else { - tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); - } - data[0] = tmp[0] + tmp[2]; - data[1] = tmp[1] + tmp[3]; - data[2] = tmp[0] - tmp[2]; - data[3] = tmp[1] - tmp[3]; - return; - } - else if (n == 8) { - ComplexScalar tmp_1[8]; - ComplexScalar tmp_2[8]; - - tmp_1[0] = data[0] + data[1]; - tmp_1[1] = data[0] - data[1]; - tmp_1[2] = data[2] + data[3]; - if (Dir == FFT_FORWARD) { - tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); - } - else { - tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); - } - tmp_1[4] = data[4] + data[5]; - tmp_1[5] = data[4] - data[5]; - tmp_1[6] = data[6] + data[7]; - if (Dir == FFT_FORWARD) { - tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); - } - else { - tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); - } - tmp_2[0] = tmp_1[0] + tmp_1[2]; - tmp_2[1] = tmp_1[1] + tmp_1[3]; - tmp_2[2] = tmp_1[0] - tmp_1[2]; - tmp_2[3] = tmp_1[1] - tmp_1[3]; - tmp_2[4] = tmp_1[4] + tmp_1[6]; - // SQRT2DIV2 = sqrt(2)/2 - #define SQRT2DIV2 0.7071067811865476 - if (Dir == FFT_FORWARD) { - tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); - tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); - tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); - } - else { - tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); - tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); - tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); - } - data[0] = tmp_2[0] + tmp_2[4]; - data[1] = tmp_2[1] + tmp_2[5]; - data[2] = tmp_2[2] + tmp_2[6]; - data[3] = tmp_2[3] + tmp_2[7]; - data[4] = tmp_2[0] - tmp_2[4]; - data[5] = tmp_2[1] - tmp_2[5]; - data[6] = tmp_2[2] - tmp_2[6]; - data[7] = tmp_2[3] - tmp_2[7]; - - return; - } - else { - compute_1D_Butterfly<Dir>(data, n/2, n_power_of_2 - 1); - compute_1D_Butterfly<Dir>(data + n/2, n/2, n_power_of_2 - 1); - //Original code: - //RealScalar wtemp = std::sin(M_PI/n); - //RealScalar wpi = -std::sin(2 * M_PI/n); - RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; - RealScalar wpi; - if (Dir == FFT_FORWARD) { - wpi = m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; - } - else { - wpi = 0 - m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; - } - - const ComplexScalar wp(wtemp, wpi); - ComplexScalar w(1.0, 0.0); - for(int i = 0; i < n/2; i++) { - ComplexScalar temp(data[i + n/2] * w); - data[i + n/2] = data[i] - temp; - data[i] += temp; - w += w * wp; - } - return; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { - Index result = 0; - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > omitted_dim; --i) { - const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; - const Index idx = index / partial_m_stride; - index -= idx * partial_m_stride; - result += idx * m_strides[i]; - } - result += index; - } - else { - for (int i = 0; i < omitted_dim; ++i) { - const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; - const Index idx = index / partial_m_stride; - index -= idx * partial_m_stride; - result += idx * m_strides[i]; - } - result += index; - } - // Value of index_coords[omitted_dim] is not determined to this step - return result; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { - Index result = base + offset * m_strides[omitted_dim] ; - return result; - } - - protected: - int m_size; - const FFT& m_fft; - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - TensorEvaluator<ArgType, Device> m_impl; - CoeffReturnType* m_data; - const Device& m_device; - - // This will support a maximum FFT size of 2^32 for each dimension - // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; - RealScalar m_sin_PI_div_n_LUT[32] = { - 0.0, - -2, - -0.999999999999999, - -0.292893218813453, - -0.0761204674887130, - -0.0192147195967696, - -0.00481527332780311, - -0.00120454379482761, - -3.01181303795779e-04, - -7.52981608554592e-05, - -1.88247173988574e-05, - -4.70619042382852e-06, - -1.17654829809007e-06, - -2.94137117780840e-07, - -7.35342821488550e-08, - -1.83835707061916e-08, - -4.59589268710903e-09, - -1.14897317243732e-09, - -2.87243293150586e-10, - -7.18108232902250e-11, - -1.79527058227174e-11, - -4.48817645568941e-12, - -1.12204411392298e-12, - -2.80511028480785e-13, - -7.01277571201985e-14, - -1.75319392800498e-14, - -4.38298482001247e-15, - -1.09574620500312e-15, - -2.73936551250781e-16, - -6.84841378126949e-17, - -1.71210344531737e-17, - -4.28025861329343e-18 - }; - - // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); - RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { - 0.0, - 0.0, - -1.00000000000000e+00, - -7.07106781186547e-01, - -3.82683432365090e-01, - -1.95090322016128e-01, - -9.80171403295606e-02, - -4.90676743274180e-02, - -2.45412285229123e-02, - -1.22715382857199e-02, - -6.13588464915448e-03, - -3.06795676296598e-03, - -1.53398018628477e-03, - -7.66990318742704e-04, - -3.83495187571396e-04, - -1.91747597310703e-04, - -9.58737990959773e-05, - -4.79368996030669e-05, - -2.39684498084182e-05, - -1.19842249050697e-05, - -5.99211245264243e-06, - -2.99605622633466e-06, - -1.49802811316901e-06, - -7.49014056584716e-07, - -3.74507028292384e-07, - -1.87253514146195e-07, - -9.36267570730981e-08, - -4.68133785365491e-08, - -2.34066892682746e-08, - -1.17033446341373e-08, - -5.85167231706864e-09, - -2.92583615853432e-09 - }; -}; - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && !defined(__GCUDACC__) - -template<typename OutputScalar, typename RealScalar, typename ComplexScalar, int ResultType> -struct writeToDeviceData { - void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) { - } -}; - -template<typename OutputScalar, typename RealScalar, typename ComplexScalar> -struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::BothParts> { - void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) { - cudaMemcpy(d_data, data_buf, size * sizeof(ComplexScalar), cudaMemcpyDeviceToDevice); - } -}; - -template<typename OutputScalar, typename RealScalar, typename ComplexScalar> -struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::RealPart> { - void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) { - cudaMemcpy2D(d_data, sizeof(RealScalar), (RealScalar*) data_buf, 2 * sizeof(RealScalar), sizeof(RealScalar), size, cudaMemcpyDeviceToDevice); - } -}; - -template<typename OutputScalar, typename RealScalar, typename ComplexScalar> -struct writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, Eigen::ImagPart> { - void operator()(OutputScalar* d_data, ComplexScalar* data_buf, size_t size) { - RealScalar* data_buf_offset = &(((RealScalar*) data_buf)[1]); - cudaMemcpy2D(d_data, sizeof(RealScalar), data_buf_offset, 2 * sizeof(RealScalar), sizeof(RealScalar), size, cudaMemcpyDeviceToDevice); - } -}; - -template <typename InputScalar, typename RealScalar, typename ComplexScalar, typename InputEvaluator> -__global__ void copyValues(ComplexScalar* d_data, InputEvaluator eval, int total_size) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < total_size) { - d_data[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(eval.coeff(i)); - } -} - -template<typename Scalar, typename Index, int NumDims> -__global__ void fillLineBuf(Scalar* line_buf, Scalar* data_buf, int line_len, - array<Index, NumDims> coords, array<Index, NumDims> m_strides, int dim) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - if(j < line_len) { - coords[dim] = j; - Index index = 0; - for (int i = 0; i < NumDims; ++i) { - index += coords[i] * m_strides[i]; - } - line_buf[j] = data_buf[index]; - } -} - -template<typename ComplexScalar, typename RealScalar, typename Index, int NumDims> -__global__ void writebackLineBuf(ComplexScalar* line_buf, ComplexScalar* data_buf, int line_len, - array<Index, NumDims> coords, array<Index, NumDims> m_strides, int dim, RealScalar div_factor) { - int j = blockIdx.x * blockDim.x + threadIdx.x; - if(j < line_len) { - coords[dim] = j; - Index index = 0; - for (int i = 0; i < NumDims; ++i) { - index += coords[i] * m_strides[i]; - } - - data_buf[index] = line_buf[j]; - ((RealScalar*) data_buf)[2*index] /= div_factor; - ((RealScalar*) data_buf)[2*index + 1] /= div_factor; - } -} - -template <typename FFT, typename ArgType, int FFTResultType, int FFTDir> -struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, GpuDevice> { - typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, GpuDevice>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::Scalar InputScalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar; - typedef typename TensorEvaluator<ArgType, GpuDevice>::Dimensions InputDimensions; - typedef OutputScalar CoeffReturnType; - typedef typename PacketType<OutputScalar, GpuDevice>::type PacketReturnType; - - enum { - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, GpuDevice>::Layout, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const GpuDevice& device) : m_data_buf(NULL), m_impl(op.expression(), device), m_fft(op.fft()) { - const typename TensorEvaluator<ArgType, GpuDevice>::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; - } - } - m_size = m_dimensions.TotalSize(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; - } - - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* d_data) { - m_impl.evalSubExprsIfNeeded(NULL); - if (d_data) { - evalToDeviceData(d_data); - return false; - } else { - evalToSelfDataBuf(); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromCoords(const array<Index, NumDims> & coords) const { - Index result = 0; - for (int i = 0; i < NumDims; ++i) { - result += coords[i] * m_strides[i]; - } - return result; - } - - EIGEN_STRONG_INLINE array<Index, NumDims> getPartialCoordsFromIndex(Index index, Index omitted_dim) const { - array<Index, NumDims> partial_m_strides = m_strides; - array<Index, NumDims> index_coords; - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (Index i = omitted_dim + 1; i < NumDims; ++i) { - partial_m_strides[i] /= m_dimensions[omitted_dim]; - } - for (int i = NumDims - 1; i > 0; --i) { - if(omitted_dim == i) { - } - else { - const Index idx = index / partial_m_strides[i]; - index -= idx * partial_m_strides[i]; - index_coords[i] = idx; - } - } - index_coords[0] = index; - } - else { - for (Index i = omitted_dim - 1; i >= 0; --i) { - partial_m_strides[i] /= m_dimensions[omitted_dim]; - } - for (int i = 0; i < NumDims - 1; ++i) { - if(omitted_dim == i) { - } - else { - const Index idx = index / partial_m_strides[i]; - index -= idx * partial_m_strides[i]; - index_coords[i] = idx; - } - } - index_coords[NumDims - 1] = index; - } - // Value of index_coords[omitted_dim] is not determined to this step - return index_coords; - } - - void evalToSelfDataBuf() { - cudaMalloc((void**) &m_data_buf, sizeof(OutputScalar) * m_size); - evalToDeviceData(m_data_buf); - } - - EIGEN_STRONG_INLINE void evalToDeviceData(OutputScalar* d_data) { - ComplexScalar* data_buf; - cudaMalloc((void**) &data_buf, sizeof(ComplexScalar) * m_size); - - int block_size = 128; - int grid_size = m_size / block_size + 1; - - copyValues<InputScalar, RealScalar, ComplexScalar, TensorEvaluator<ArgType, GpuDevice> > <<<grid_size, block_size>>>(data_buf, m_impl, m_size); - - for (int i = 0; i < m_fft.size(); ++i) { - int dim = m_fft[i]; - eigen_assert(dim >= 0 && dim < NumDims); - int line_len = m_dimensions[dim]; - ComplexScalar* line_buf; - cudaMalloc((void**) &line_buf, sizeof(ComplexScalar) * line_len); - - cufftHandle plan; - cufftPlan1d(&plan, line_len, CUFFT_C2C, 1); - - for (Index partial_index = 0; partial_index < m_size/line_len; ++partial_index) { - array<Index, NumDims> coords = getPartialCoordsFromIndex(partial_index, dim); - // get data into line_buf - int block_size = 128; - int grid_size = line_len / block_size + 1; - fillLineBuf<ComplexScalar, Index, NumDims> <<<grid_size, block_size>>>(line_buf, data_buf, line_len, coords, m_strides, dim); - - if(FFTDir == Eigen::FFT_FORWARD) { - cufftExecC2C(plan, reinterpret_cast<cufftComplex *>(line_buf), reinterpret_cast<cufftComplex*>(line_buf), CUFFT_FORWARD); - } - else { - cufftExecC2C(plan, reinterpret_cast<cufftComplex*>(line_buf), reinterpret_cast<cufftComplex*>(line_buf), CUFFT_INVERSE); - } - // write back - RealScalar div_factor = (FFTDir == FFT_FORWARD) ? 1.0 : line_len; - writebackLineBuf<ComplexScalar, RealScalar, Index, NumDims> <<<grid_size, block_size>>>(line_buf, data_buf, line_len, coords, m_strides, dim, div_factor); - cudaDeviceSynchronize(); - - } - cufftDestroy(plan); - cudaFree(line_buf); - } - writeToDeviceData<OutputScalar, RealScalar, ComplexScalar, FFTResultType>()(d_data, data_buf, m_size); - cudaFree(data_buf); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if(m_data_buf != NULL) cudaFree(m_data_buf); - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { - return m_data_buf[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_data_buf + index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data_buf; } - - protected: - int m_size; - const FFT& m_fft; - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - TensorEvaluator<ArgType, GpuDevice> m_impl; - OutputScalar* m_data_buf; - -}; -#endif - -} // end namespace Eigen -#endif //EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h deleted file mode 100644 index a7af67230f..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ /dev/null @@ -1,277 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H -#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H - -namespace Eigen { - -/** \class TensorFixedSize - * \ingroup CXX11_Tensor_Module - * - * \brief The fixed sized version of the tensor class. - * - * The fixed sized equivalent of - * Eigen::Tensor<float, 3> t(3, 5, 7); - * is - * Eigen::TensorFixedSize<float, Sizes<3,5,7>> t; - */ - -template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType> -class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > -{ - public: - typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self; - typedef TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<Self>::StorageKind StorageKind; - typedef typename internal::traits<Self>::Index Index; - typedef Scalar_ Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - - static const int Options = Options_; - - enum { - IsAligned = bool(EIGEN_ALIGN), - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, - }; - - typedef Dimensions_ Dimensions; - static const std::size_t NumIndices = Dimensions::count; - - protected: - TensorStorage<Scalar, Dimensions, Options> m_storage; - - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } - - // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - // work, because that uses base().coeffRef() - and we don't yet - // implement a similar class hierarchy - inline Self& base() { return *this; } - inline const Self& base() const { return *this; } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - eigen_assert(checkIndexRange(indices)); - return coeff(indices); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return coeff(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const - { - // The bracket operator is only for vectors, use the parenthesis operator instead. - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(index); - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - eigen_assert(checkIndexRange(indices)); - return coeffRef(indices); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeffRef(); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < size()); - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator[](Index index) - { - // The bracket operator is only for vectors, use the parenthesis operator instead - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize() { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) - : m_storage(other.m_storage) - { - } - -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - inline TensorFixedSize(Self&& other) - : m_storage(other.m_storage) - { - } -#endif - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) - { - typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - - template<typename Other> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize& operator=(const Other& other) - { - // FIXME: check that the dimensions of other match the dimensions of *this. - // Unfortunately this isn't possible yet when the rhs is an expression. - typedef TensorAssignOp<Self, const Other> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE bool checkIndexRange(const array<Index, NumIndices>& /*indices*/) const - { - using internal::array_apply_and_reduce; - using internal::array_zip_and_reduce; - using internal::greater_equal_zero_op; - using internal::logical_and_op; - using internal::lesser_op; - - return true; - // check whether the indices are all >= 0 - /* array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && - // check whether the indices fit in the dimensions - array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());*/ - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const - { - if (Options&RowMajor) { - return m_storage.dimensions().IndexOfRowMajor(indices); - } else { - return m_storage.dimensions().IndexOfColMajor(indices); - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h deleted file mode 100644 index 1d1ce47174..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ /dev/null @@ -1,150 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H -#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H - -namespace Eigen { - -/** \class TensorForcedEval - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -namespace internal { -template<typename XprType> -struct traits<TensorForcedEvalOp<XprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename traits<XprType>::StorageKind StorageKind; - typedef typename traits<XprType>::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0, - }; -}; - -template<typename XprType> -struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense> -{ - typedef const TensorForcedEvalOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type> -{ - typedef TensorForcedEvalOp<XprType> type; -}; - -} // end namespace internal - - - -template<typename XprType> -class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested; - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - - -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> -{ - typedef TensorForcedEvalOp<ArgType> XprType; - typedef typename ArgType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - enum { - IsAligned = true, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_impl.evalSubExprsIfNeeded(NULL); - const Index numValues = m_impl.dimensions().TotalSize(); - m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); - // Should initialize the memory in case we're dealing with non POD types. - if (!internal::is_arithmetic<CoeffReturnType>::value) { - for (Index i = 0; i < numValues; ++i) { - new(m_buffer+i) CoeffReturnType(); - } - } - typedef TensorEvalToOp<const ArgType> EvalTo; - EvalTo evalToTmp(m_buffer, m_op); - const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value; - const bool BlockAccess = false; - internal::TensorExecutor<const EvalTo, Device, PacketAccess, BlockAccess>::run(evalToTmp, m_device); - m_impl.cleanup(); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_device.deallocate(m_buffer); - m_buffer = NULL; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_buffer[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; } - - private: - TensorEvaluator<ArgType, Device> m_impl; - const ArgType m_op; - const Device& m_device; - CoeffReturnType* m_buffer; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h deleted file mode 100644 index e11d5ed22e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ /dev/null @@ -1,104 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H -#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H - -namespace Eigen { - -template<typename Scalar_, std::size_t NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; -template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; -template<typename Scalar_, int Options_ = 0, typename IndexType = DenseIndex> class TensorVarDim; -template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap; -template<typename PlainObjectType> class TensorRef; -template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> class TensorBase; - -template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp; -template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; -template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; -template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp; -template<typename Op, typename Dims, typename XprType> class TensorReductionOp; -template<typename XprType> class TensorIndexTupleOp; -template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp; -template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; -template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; -template<typename TargetType, typename XprType> class TensorConversionOp; -template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; -template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionByFFTOp; -template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp; -template<typename IFFT, typename XprType, int ResultType> class TensorIFFTOp; -template<typename DFT, typename XprType, int ResultType> class TensorDFTOp; -template<typename IDFT, typename XprType, int ResultType> class TensorIDFTOp; -template<typename PatchDim, typename XprType> class TensorPatchOp; -template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp; -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp; -template<typename Broadcast, typename XprType> class TensorBroadcastingOp; -template<DenseIndex DimId, typename XprType> class TensorChippingOp; -template<typename NewDimensions, typename XprType> class TensorReshapingOp; -template<typename XprType> class TensorLayoutSwapOp; -template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp; -template<typename ReverseDimensions, typename XprType> class TensorReverseOp; -template<typename XprType> class TensorTrueIndicesOp; -template<typename PaddingDimensions, typename XprType> class TensorPaddingOp; -template<typename Shuffle, typename XprType> class TensorShufflingOp; -template<typename Strides, typename XprType> class TensorStridingOp; -template<typename Strides, typename XprType> class TensorInflationOp; -template<typename Generator, typename XprType> class TensorGeneratorOp; -template<typename LeftXprType, typename RightXprType> class TensorAssignOp; - -template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; - -template<typename XprType> class TensorEvalToOp; -template<typename XprType> class TensorForcedEvalOp; - -template<typename ExpressionType, typename DeviceType> class TensorDevice; -template<typename Derived, typename Device> struct TensorEvaluator; - -class DefaultDevice; -class ThreadPoolDevice; -class GpuDevice; - -enum DFTResultType { - RealPart = 0, - ImagPart = 1, - BothParts = 2 -}; - -enum FFTDirection { - FFT_FORWARD = 0, - FFT_REVERSE = 1 -}; - -namespace internal { -template <typename Device, typename Expression> -struct IsVectorizable { - static const bool value = TensorEvaluator<Expression, Device>::PacketAccess; -}; - -template <typename Expression> -struct IsVectorizable<GpuDevice, Expression> { - static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && - TensorEvaluator<Expression, GpuDevice>::IsAligned; -}; - -template <typename Device, typename Expression> -struct IsTileable { - static const bool value = TensorEvaluator<Expression, Device>::BlockAccess; -}; - -template <typename Expression, typename Device, - bool Vectorizable = IsVectorizable<Device, Expression>::value, - bool Tileable = IsTileable<Device, Expression>::value> -class TensorExecutor; -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h deleted file mode 100644 index 526301ad5b..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ /dev/null @@ -1,706 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H -#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H - -namespace Eigen { -namespace internal { - -namespace { -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) -__device__ int get_random_seed() { - return clock(); -} -#else -int get_random_seed() { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return st.wSecond + 1000 * st.wMilliseconds; -#elif __APPLE__ - return mach_absolute_time(); -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return ts.tv_nsec; -#endif -} -#endif -} - - -// Standard reduction functors -template <typename T> struct SumReducer -{ - static const bool PacketAccess = true; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - (*accum) += t; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = padd<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(0); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(0); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return saccum + predux(vaccum); - } -}; - -template <typename T> struct MeanReducer -{ - static const bool PacketAccess = true; - static const bool IsStateful = true; - - MeanReducer() : scalarCount_(0), packetCount_(0) { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - (*accum) += t; - scalarCount_++; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { - (*accum) = padd<Packet>(*accum, p); - packetCount_++; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(0); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(0); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum / scalarCount_; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return pdiv(vaccum, pset1<Packet>(packetCount_)); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size); - } - - protected: - int scalarCount_; - int packetCount_; -}; - -struct AndReducer -{ - static const bool PacketAccess = false; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { - *accum = *accum && t; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { - return accum; - } -}; - -struct OrReducer { - static const bool PacketAccess = false; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { - *accum = *accum || t; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { - return false; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { - return accum; - } -}; - -template <typename T> struct MaxReducer -{ - static const bool PacketAccess = true; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t > *accum) { *accum = t; } - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmax<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::lowest(); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::maxi(saccum, predux_max(vaccum)); - } -}; - -template <typename T> struct MinReducer -{ - static const bool PacketAccess = true; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t < *accum) { *accum = t; } - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmin<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::highest(); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::mini(saccum, predux_min(vaccum)); - } -}; - - -template <typename T> struct ProdReducer -{ - static const bool PacketAccess = true; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - (*accum) *= t; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmul<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(1); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(1); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return saccum * predux_mul(vaccum); - } -}; - -#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) -// We're not compiling a cuda kernel -template <typename T> class UniformRandomGenerator { - - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - seed = seed ? seed : get_random_seed(); - srand(seed); - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_seed = other.m_seed; - } - - template<typename Index> - T operator()(Index, Index = 0) const { - return random<T>(); - } - template<typename Index> - typename internal::packet_traits<T>::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits<T>::size; - EIGEN_ALIGN_DEFAULT T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = random<T>(); - } - return internal::pload<typename internal::packet_traits<T>::type>(values); - } - - private: - unsigned int m_seed; -}; - -#if __cplusplus > 199711 -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - seed = seed ? seed : get_random_seed(); - m_generator.seed(seed); - } - UniformRandomGenerator(const UniformRandomGenerator<float>& other) { - m_generator.seed(other(0, 0) * UINT_MAX); - m_seed = other.m_seed; - } - - template<typename Index> - float operator()(Index, Index = 0) const { - return m_distribution(m_generator); - } - template<typename Index> - typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits<float>::size; - EIGEN_ALIGN_DEFAULT float values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = this->operator()(i, j); - } - return internal::pload<typename internal::packet_traits<float>::type>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_seed comes first to match the layout of the cpu - // version of the code. - unsigned int m_seed; - mutable std::mt19937 m_generator; - mutable std::uniform_real_distribution<float> m_distribution; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - seed = seed ? seed : get_random_seed(); - m_generator.seed(seed); - } - UniformRandomGenerator(const UniformRandomGenerator<double>& other) { - m_generator.seed(other(0, 0) * UINT_MAX); - m_seed = other.m_seed; - } - - template<typename Index> - double operator()(Index, Index = 0) const { - return m_distribution(m_generator); - } - template<typename Index> - typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits<double>::size; - EIGEN_ALIGN_DEFAULT double values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = this->operator()(i, j); - } - return internal::pload<typename internal::packet_traits<double>::type>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_seed comes first to match the layout of the cpu - // version of the code. - unsigned int m_seed; - mutable std::mt19937 m_generator; - mutable std::uniform_real_distribution<double> m_distribution; -}; -#endif - -#else - -// We're compiling a cuda kernel -template <typename T> class UniformRandomGenerator; - -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - template<typename Index> - __device__ float operator()(Index, Index = 0) const { - return curand_uniform(&m_state); - } - template<typename Index> - __device__ float4 packetOp(Index, Index = 0) const { - return curand_uniform4(&m_state); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ double operator()(Index, Index = 0) const { - return curand_uniform_double(&m_state); - } - template<typename Index> - __device__ double2 packetOp(Index, Index = 0) const { - return curand_uniform2_double(&m_state); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<float> operator()(Index, Index = 0) const { - float4 vals = curand_uniform4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ UniformRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<double> operator()(Index, Index = 0) const { - double2 vals = curand_uniform2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#endif - - -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711 -// We're not compiling a cuda kernel -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - NormalRandomGenerator(unsigned int seed = 0) : m_distribution(0, 1), m_seed(seed) { - seed = seed ? seed : get_random_seed(); - m_generator.seed(seed); - } - NormalRandomGenerator(const NormalRandomGenerator& other) - : m_distribution(other.m_distribution), m_seed(other.m_seed) { - m_generator.seed(other(0, 0) * UINT_MAX); - } - - template<typename Index> - T operator()(Index, Index = 0) const { - return m_distribution(m_generator); - } - template<typename Index> - typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const { - const int packetSize = internal::packet_traits<T>::size; - EIGEN_ALIGN_DEFAULT T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(m_generator); - } - return internal::pload<typename internal::packet_traits<T>::type>(values); - } - - private: - unsigned int m_seed; - mutable std::normal_distribution<T> m_distribution; - mutable std::mt19937 m_generator; -}; - -#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) - -// We're compiling a cuda kernel -template <typename T> class NormalRandomGenerator; - -template <> class NormalRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ float operator()(Index, Index = 0) const { - return curand_normal(&m_state); - } - template<typename Index> - __device__ float4 packetOp(Index, Index = 0) const { - return curand_normal4(&m_state); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ double operator()(Index, Index = 0) const { - return curand_normal_double(&m_state); - } - template<typename Index> - __device__ double2 packetOp(Index, Index = 0) const { - return curand_normal2_double(&m_state); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - - -template <> class NormalRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<float> operator()(Index, Index = 0) const { - float4 vals = curand_normal4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - __device__ NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - seed = seed ? seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_seed = other.m_seed; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int seed = m_seed ? m_seed : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<double> operator()(Index, Index = 0) const { - double2 vals = curand_normal2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - unsigned int m_seed; - mutable curandStatePhilox4_32_10_t m_state; -}; -#else - -template <typename T> class NormalRandomGenerator { - public: - // Uses the given "seed" if non-zero, otherwise uses a random seed. - NormalRandomGenerator(unsigned int seed = 0) : m_seed(seed) {} - - private: - unsigned int m_seed; -}; - -#endif - - -template <typename T, typename Index, size_t NumDims> -class GaussianGenerator { - public: - static const bool PacketAccess = false; - - EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means, - const array<T, NumDims>& std_devs) - : m_means(means) { - for (int i = 0; i < NumDims; ++i) { - m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; - } - } - - T operator()(const array<Index, NumDims>& coordinates) const { - T tmp = T(0); - for (int i = 0; i < NumDims; ++i) { - T offset = coordinates[i] - m_means[i]; - tmp += offset * offset / m_two_sigmas[i]; - } - return std::exp(-tmp); - } - - private: - array<T, NumDims> m_means; - array<T, NumDims> m_two_sigmas; -}; - -template <typename T> struct ArgMaxTupleReducer -{ - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t.second > accum->second) { *accum = t; } - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return T(0, NumTraits<typename T::second_type>::lowest()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { - return accum; - } -}; - -template <typename T> struct ArgMinTupleReducer -{ - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { - if (t.second < accum->second) { *accum = t; } - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return T(0, NumTraits<typename T::second_type>::highest()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { - return accum; - } -}; - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h deleted file mode 100644 index 91a73669a4..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ /dev/null @@ -1,185 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H - -namespace Eigen { - -/** \class TensorGenerator - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor generator class. - * - * - */ -namespace internal { -template<typename Generator, typename XprType> -struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Generator, typename XprType> -struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense> -{ - typedef const TensorGeneratorOp<Generator, XprType>& type; -}; - -template<typename Generator, typename XprType> -struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type> -{ - typedef TensorGeneratorOp<Generator, XprType> type; -}; - -} // end namespace internal - - - -template<typename Generator, typename XprType> -class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorGeneratorOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested; - typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) - : m_xpr(expr), m_generator(generator) {} - - EIGEN_DEVICE_FUNC - const Generator& generator() const { return m_generator; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Generator m_generator; -}; - - -// Eval as rvalue -template<typename Generator, typename ArgType, typename Device> -struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> -{ - typedef TensorGeneratorOp<Generator, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - static const int NumDims = internal::array_size<Dimensions>::value; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_generator(op.generator()) - { - TensorEvaluator<ArgType, Device> impl(op.expression(), device); - m_dimensions = impl.dimensions(); - - if (NumDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; - } - } - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - array<Index, NumDims> coords; - extract_coordinates(index, coords); - return m_generator(coords); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void extract_coordinates(Index index, array<Index, NumDims>& coords) const { - if (NumDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - coords[i] = idx; - } - coords[0] = index; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - coords[i] = idx; - } - coords[NumDims-1] = index; - } - } - } - - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - Generator m_generator; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h deleted file mode 100644 index 53dc0b04aa..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ /dev/null @@ -1,56 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H -#define EIGEN_CXX11_TENSOR_TENSOR_IO_H - -namespace Eigen { - -namespace internal { -template<> -struct significant_decimals_impl<std::string> - : significant_decimals_default_impl<std::string, true> -{}; -} - - -template <typename T> -std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) { - // Evaluate the expression if needed - TensorForcedEvalOp<const T> eval = expr.eval(); - TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> tensor(eval, DefaultDevice()); - tensor.evalSubExprsIfNeeded(NULL); - - typedef typename internal::remove_const<typename T::Scalar>::type Scalar; - typedef typename T::Index Index; - typedef typename TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); - - // Print the tensor as a 1d vector or a 2d matrix. - static const int rank = internal::array_size<Dimensions>::value; - if (rank == 0) { - os << tensor.coeff(0); - } else if (rank == 1) { - Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size); - os << array; - } else { - const Index first_dim = tensor.dimensions()[0]; - static const int layout = TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Layout; - Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim); - os << matrix; - } - - // Cleanup. - tensor.cleanup(); - return os; -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h deleted file mode 100644 index a1d33d964e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ /dev/null @@ -1,757 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H - -namespace Eigen { - -/** \class TensorImagePatch - * \ingroup CXX11_Tensor_Module - * - * \brief Patch extraction specialized for image processing. - * This assumes that the input has a least 3 dimensions ordered as follow: - * 1st dimension: channels (of size d) - * 2nd dimension: rows (of size r) - * 3rd dimension: columns (of size c) - * There can be additional dimensions such as time (for video) or batch (for - * bulk processing after the first 3. - * Calling the image patch code with patch_rows and patch_cols is equivalent - * to calling the regular patch extraction code with parameters d, patch_rows, - * patch_cols, and 1 for all the additional dimensions. - */ -namespace internal { -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType> -{ - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense> -{ - typedef const TensorImagePatchOp<Rows, Cols, XprType>& type; -}; - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type> -{ - typedef TensorImagePatchOp<Rows, Cols, XprType> type; -}; - -template <typename Self, bool Vectorizable> -struct ImagePatchCopyOp { - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::Impl Impl; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const Self& self, const Index num_coeff_to_copy, const Index dst_index, - Scalar* dst_data, const Index src_index) { - const Impl& impl = self.impl(); - for (Index i = 0; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i] = impl.coeff(src_index + i); - } - } -}; - -template <typename Self> -struct ImagePatchCopyOp<Self, true> { - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::Impl Impl; - typedef typename packet_traits<Scalar>::type Packet; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const Self& self, const Index num_coeff_to_copy, const Index dst_index, - Scalar* dst_data, const Index src_index) { - const Impl& impl = self.impl(); - const Index packet_size = internal::unpacket_traits<Packet>::size; - const Index vectorized_size = (num_coeff_to_copy / packet_size) * - packet_size; - for (Index i = 0; i < vectorized_size; i += packet_size) { - Packet p = impl.template packet<Unaligned>(src_index + i); - internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i, p); - } - for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { - dst_data[dst_index + i] = impl.coeff(src_index + i); - } - } -}; - -template <typename Self> -struct ImagePatchPaddingOp { - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename packet_traits<Scalar>::type Packet; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const Index num_coeff_to_pad, const Scalar padding_value, - const Index dst_index, Scalar* dst_data) { - const Index packet_size = internal::unpacket_traits<Packet>::size; - const Packet padded_packet = internal::pset1<Packet>(padding_value); - const Index vectorized_size = (num_coeff_to_pad / packet_size) * - packet_size; - for (Index i = 0; i < vectorized_size; i += packet_size) { - internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i, - padded_packet); - } - for (Index i = vectorized_size; i < num_coeff_to_pad; ++i) { - dst_data[dst_index + i] = padding_value; - } - } -}; - -} // end namespace internal - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorImagePatchOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - PaddingType padding_type, Scalar padding_value) - : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_row_strides(row_strides), m_col_strides(col_strides), - m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), - m_padding_type(padding_type), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - DenseIndex padding_top, DenseIndex padding_bottom, - DenseIndex padding_left, DenseIndex padding_right, - Scalar padding_value) - : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_row_strides(row_strides), m_col_strides(col_strides), - m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), - m_padding_left(padding_left), m_padding_right(padding_right), - m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - DenseIndex patch_rows() const { return m_patch_rows; } - EIGEN_DEVICE_FUNC - DenseIndex patch_cols() const { return m_patch_cols; } - EIGEN_DEVICE_FUNC - DenseIndex row_strides() const { return m_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_strides() const { return m_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_row_strides() const { return m_in_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_col_strides() const { return m_in_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } - EIGEN_DEVICE_FUNC - bool padding_explicit() const { return m_padding_explicit; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top() const { return m_padding_top; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom() const { return m_padding_bottom; } - EIGEN_DEVICE_FUNC - DenseIndex padding_left() const { return m_padding_left; } - EIGEN_DEVICE_FUNC - DenseIndex padding_right() const { return m_padding_right; } - EIGEN_DEVICE_FUNC - PaddingType padding_type() const { return m_padding_type; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const DenseIndex m_patch_rows; - const DenseIndex m_patch_cols; - const DenseIndex m_row_strides; - const DenseIndex m_col_strides; - const DenseIndex m_in_row_strides; - const DenseIndex m_in_col_strides; - const DenseIndex m_row_inflate_strides; - const DenseIndex m_col_inflate_strides; - const bool m_padding_explicit; - const DenseIndex m_padding_top; - const DenseIndex m_padding_bottom; - const DenseIndex m_padding_left; - const DenseIndex m_padding_right; - const PaddingType m_padding_type; - const Scalar m_padding_value; -}; - -// Eval as rvalue -template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> -struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> -{ - typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, - Device> Self; - typedef TensorEvaluator<ArgType, Device> Impl; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = true, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = NumDims == 5, - }; - - typedef typename internal::TensorBlock<Index, Scalar, NumDims, Layout> - OutputTensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - EIGEN_STATIC_ASSERT(NumDims >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - m_paddingValue = op.padding_value(); - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - - // Caches a few variables. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputDepth = input_dims[0]; - m_inputRows = input_dims[1]; - m_inputCols = input_dims[2]; - } else { - m_inputDepth = input_dims[NumInputDims-1]; - m_inputRows = input_dims[NumInputDims-2]; - m_inputCols = input_dims[NumInputDims-3]; - } - - m_row_strides = op.row_strides(); - m_col_strides = op.col_strides(); - - // Input strides and effective input/patch size - m_in_row_strides = op.in_row_strides(); - m_in_col_strides = op.in_col_strides(); - m_row_inflate_strides = op.row_inflate_strides(); - m_col_inflate_strides = op.col_inflate_strides(); - // The "effective" input rows and input cols are the input rows and cols - // after inflating them with zeros. - // For examples, a 2x3 matrix with row_inflate_strides and - // col_inflate_strides of 2 comes from: - // A B C - // D E F - // - // to a matrix is 3 x 5: - // - // A . B . C - // . . . . . - // D . E . F - - m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; - m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; - m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); - m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); - - if (op.padding_explicit()) { - m_outputRows = ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_rowPaddingTop = op.padding_top(); - m_colPaddingLeft = op.padding_left(); - } else { - // Computing padding from the type - switch (op.padding_type()) { - case PADDING_VALID: - m_outputRows = ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - // Calculate the padding - m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; - m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; - break; - case PADDING_SAME: - m_outputRows = ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); - m_outputCols = ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); - // Calculate the padding - m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; - m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; - break; - default: - eigen_assert(false && "unexpected padding"); - } - } - eigen_assert(m_outputRows > 0); - eigen_assert(m_outputCols > 0); - - // Dimensions for result of extraction. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // ColMajor - // 0: depth - // 1: patch_rows - // 2: patch_cols - // 3: number of patches - // 4 and beyond: anything else (such as batch). - m_dimensions[0] = input_dims[0]; - m_dimensions[1] = op.patch_rows(); - m_dimensions[2] = op.patch_cols(); - m_dimensions[3] = m_outputRows * m_outputCols; - for (int i = 4; i < NumDims; ++i) { - m_dimensions[i] = input_dims[i-1]; - } - } else { - // RowMajor - // NumDims-1: depth - // NumDims-2: patch_rows - // NumDims-3: patch_cols - // NumDims-4: number of patches - // NumDims-5 and beyond: anything else (such as batch). - m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; - m_dimensions[NumDims-2] = op.patch_rows(); - m_dimensions[NumDims-3] = op.patch_cols(); - m_dimensions[NumDims-4] = m_outputRows * m_outputCols; - for (int i = NumDims-5; i >= 0; --i) { - m_dimensions[i] = input_dims[i]; - } - } - - // Strides for moving the patch in various dimensions. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_colStride = m_dimensions[1]; - m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; - m_otherStride = m_patchStride * m_dimensions[3]; - } else { - m_colStride = m_dimensions[NumDims-2]; - m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; - m_otherStride = m_patchStride * m_dimensions[NumDims-4]; - } - - // Strides for navigating through the input tensor. - m_rowInputStride = m_inputDepth; - m_colInputStride = m_inputDepth * m_inputRows; - m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; - - // Fast representations of different variables. - m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); - m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); - m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); - m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); - m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); - m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); - - // Number of patches in the width dimension. - m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); - } else { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); - } - - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Patch index corresponding to the passed in index. - const Index patchIndex = index / m_fastPatchStride; - // Find the offset of the element wrt the location of the first element. - const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; - - // Other ways to index this element. - const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; - const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; - - // Calculate col index in the input original tensor. - const Index colIndex = patch2DIndex / m_fastOutputRows; - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate row index in the original input tensor. - const Index rowIndex = patch2DIndex - colIndex * m_outputRows; - const Index rowOffset = patchOffset - colOffset * m_colStride; - const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - - const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const Index packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { - return packetWithPossibleZero(index); - } - - const Index indices[2] = {index, index + packetSize - 1}; - const Index patchIndex = indices[0] / m_fastPatchStride; - if (patchIndex != indices[1] / m_fastPatchStride) { - return packetWithPossibleZero(index); - } - const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; - eigen_assert(otherIndex == indices[1] / m_fastOtherStride); - - // Find the offset of the element wrt the location of the first element. - const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, - (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; - - const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; - eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); - - const Index colIndex = patch2DIndex / m_fastOutputRows; - const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; - - // Calculate col indices in the original input tensor. - const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - - m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; - if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputCols[0] == inputCols[1]) { - const Index rowIndex = patch2DIndex - colIndex * m_outputRows; - const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; - eigen_assert(rowOffsets[0] <= rowOffsets[1]); - // Calculate col indices in the original input tensor. - const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - - m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; - - if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { - // no padding - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; - return m_impl.template packet<Unaligned>(inputIndex); - } - } - - return packetWithPossibleZero(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, m_block_total_size_max)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - typedef typename internal::ImagePatchCopyOp<Self, PacketAccess> - ImagePatchCopyOp; - typedef typename internal::ImagePatchPaddingOp<Self> ImagePatchPaddingOp; - - // Calculate loop limits and various input/output dim sizes. - const DSizes<Index, NumDims>& block_sizes = output_block->block_sizes(); - const bool col_major = - static_cast<int>(Layout) == static_cast<int>(ColMajor); - const Index depth_dim_size = block_sizes[col_major ? 0 : NumDims - 1]; - const Index output_depth_dim_size = m_dimensions[ - col_major ? 0 : NumDims - 1]; - const Index row_dim_size = block_sizes[col_major ? 1 : NumDims - 2]; - const Index output_row_dim_size = m_dimensions[col_major ? 1 : NumDims - 2]; - const Index col_dim_size = block_sizes[col_major ? 2 : NumDims - 3]; - const Index block_col_stride = row_dim_size * depth_dim_size; - const Index patch_index_dim_size = block_sizes[col_major ? 3 : NumDims - 4]; - const Index outer_dim_size = block_sizes.TotalSize() / - (depth_dim_size * row_dim_size * col_dim_size * patch_index_dim_size); - - const Index patch_size = row_dim_size * col_dim_size * depth_dim_size; - const Index batch_size = patch_size * patch_index_dim_size; - - Index output_index = output_block->first_coeff_index(); - - // Loop through outer dimensions. - for (Index outer_dim_index = 0; - outer_dim_index < outer_dim_size; - ++outer_dim_index) { - const Index outer_output_base_index = outer_dim_index * batch_size; - // Find the offset of the element wrt the location of the first element. - const Index patchIndexStart = output_index / m_fastPatchStride; - const Index patchOffset = - (output_index - patchIndexStart * m_patchStride) / m_fastOutputDepth; - const Index colOffsetStart = patchOffset / m_fastColStride; - // Other ways to index this element. - const Index otherIndex = (NumDims == 4) ? - 0 : output_index / m_fastOtherStride; - const Index patch2DIndexStart = (NumDims == 4) ? - 0 : (output_index - otherIndex * m_otherStride) / m_fastPatchStride; - // Calculate starting depth index. - const Index depth = output_index - (output_index / m_fastOutputDepth) * - output_depth_dim_size; - const Index patch_input_base_index = depth + otherIndex * - m_patchInputStride; - - // Loop through patches. - for (Index patch_index_dim_index = 0; - patch_index_dim_index < patch_index_dim_size; - ++patch_index_dim_index) { - const Index patch_output_base_index = outer_output_base_index + - patch_index_dim_index * patch_size; - // Patch index corresponding to the passed in index. - const Index patchIndex = patchIndexStart + patch_index_dim_index; - const Index patch2DIndex = (NumDims == 4) ? - patchIndex : patch2DIndexStart + patch_index_dim_index; - const Index colIndex = patch2DIndex / m_fastOutputRows; - const Index input_col_base = colIndex * m_col_strides; - const Index row_offset_base = (patch2DIndex - colIndex * m_outputRows) * - m_row_strides - m_rowPaddingTop; - - // Loop through columns. - for (Index col_dim_index = 0; - col_dim_index < col_dim_size; - ++col_dim_index) { - const Index col_output_base_index = patch_output_base_index + - col_dim_index * block_col_stride; - - // Calculate col index in the input original tensor. - Index colOffset = colOffsetStart + col_dim_index; - Index inputCol = input_col_base + colOffset * m_in_col_strides - - m_colPaddingLeft; - Index origInputCol = (m_col_inflate_strides == 1) ? - inputCol : ((inputCol >= 0) ? - (inputCol / m_fastInputColStride) : 0); - - bool pad_column = false; - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && - (inputCol != origInputCol * m_col_inflate_strides))) { - pad_column = true; - } - - const Index col_input_base_index = patch_input_base_index + - origInputCol * m_colInputStride; - const Index input_row_base = row_offset_base + - ((patchOffset + col_dim_index * output_row_dim_size) - - colOffset * m_colStride) * m_in_row_strides; - // Loop through rows. - for (Index row_dim_index = 0; - row_dim_index < row_dim_size; - ++row_dim_index) { - const Index output_base_index = col_output_base_index + - row_dim_index * depth_dim_size; - bool pad_row = false; - Index inputIndex; - if (!pad_column) { - Index inputRow = input_row_base + row_dim_index * - m_in_row_strides; - Index origInputRow = (m_row_inflate_strides == 1) ? - inputRow : ((inputRow >= 0) ? - (inputRow / m_fastInputRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && - (inputRow != origInputRow * m_row_inflate_strides))) { - pad_row = true; - } else { - inputIndex = col_input_base_index + origInputRow * - m_rowInputStride; - } - } - // Copy (or pad) along depth dimension. - if (pad_column || pad_row) { - ImagePatchPaddingOp::Run(depth_dim_size, Scalar(m_paddingValue), - output_base_index, output_block->data()); - } else { - ImagePatchCopyOp::Run(*this, depth_dim_size, - output_base_index, output_block->data(), - inputIndex); - } - } - } - } - output_index += m_otherStride; - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - Index rowPaddingTop() const { return m_rowPaddingTop; } - Index colPaddingLeft() const { return m_colPaddingLeft; } - Index outputRows() const { return m_outputRows; } - Index outputCols() const { return m_outputCols; } - Index userRowStride() const { return m_row_strides; } - Index userColStride() const { return m_col_strides; } - Index userInRowStride() const { return m_in_row_strides; } - Index userInColStride() const { return m_in_col_strides; } - Index rowInflateStride() const { return m_row_inflate_strides; } - Index colInflateStride() const { return m_col_inflate_strides; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const - { - // Location of the first element of the patch. - // ColMajor - // 0: d, 1: patch_rows, 2: patch_cols, 3: number of patches, 4: number of batches - // RowMajor - // 0: number of batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: d - const Index patch2DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 1]; - - array<Index, NumDims-1> inputCoords; - Index input_col_idx = patch2DIndex / m_fastInputColsEff; - Index inputCol = input_col_idx + coords[1] * m_in_row_strides - m_rowPaddingTop; - Index inputRow = patch2DIndex - input_col_idx * m_input_cols_eff + coords[2] * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputCoords[0] = coords[0]; // depth - inputCoords[1] = origInputCol; - inputCoords[2] = origInputRow; - inputCoords[3] = coords[4]; // batch - } else { - inputCoords[3] = coords[4]; // depth - inputCoords[2] = origInputCol; - inputCoords[1] = origInputRow; - inputCoords[0] = coords[0]; // batch - } - // If the computed coordinates are outside the original image perimeter, return 0. - if (inputCol < 0 || inputCol >= m_input_cols_eff || inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides)) || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - if (TensorEvaluator<ArgType, Device>::CoordAccess) { - return m_impl.coeff(inputCoords); - } else { - Index inputIndex; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputIndex = - inputCoords[3] * m_patchInputStride + - inputCoords[2] * m_colInputStride + - inputCoords[1] * m_rowInputStride + - inputCoords[0]; - } else { - inputIndex = - inputCoords[1] * m_patchInputStride + - inputCoords[2] * m_colInputStride + - inputCoords[3] * m_rowInputStride + - inputCoords[4]; - } - return m_impl.coeff(inputIndex); - } - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - - Index m_otherStride; - Index m_patchStride; - Index m_colStride; - Index m_row_strides; - Index m_col_strides; - - Index m_in_row_strides; - Index m_in_col_strides; - Index m_row_inflate_strides; - Index m_col_inflate_strides; - - Index m_input_rows_eff; - Index m_input_cols_eff; - Index m_patch_rows_eff; - Index m_patch_cols_eff; - - internal::TensorIntDivisor<Index> m_fastOtherStride; - internal::TensorIntDivisor<Index> m_fastPatchStride; - internal::TensorIntDivisor<Index> m_fastColStride; - internal::TensorIntDivisor<Index> m_fastInputRowStride; - internal::TensorIntDivisor<Index> m_fastInputColStride; - internal::TensorIntDivisor<Index> m_fastInputColsEff; - - Index m_rowInputStride; - Index m_colInputStride; - Index m_patchInputStride; - - Index m_inputDepth; - Index m_inputRows; - Index m_inputCols; - - Index m_outputRows; - Index m_outputCols; - - Index m_rowPaddingTop; - Index m_colPaddingLeft; - - internal::TensorIntDivisor<Index> m_fastOutputRows; - internal::TensorIntDivisor<Index> m_fastOutputDepth; - - Scalar m_paddingValue; - std::size_t m_block_total_size_max; - - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h deleted file mode 100644 index 7631b54f2f..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ /dev/null @@ -1,421 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H -#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H - -#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) - -#define EIGEN_HAS_INDEX_LIST - -namespace Eigen { - -/** \internal - * - * \class TensorIndexList - * \ingroup CXX11_Tensor_Module - * - * \brief Set of classes used to encode a set of Tensor dimensions/indices. - * - * The indices in the list can be known at compile time or at runtime. A mix - * of static and dynamic indices can also be provided if needed. The tensor - * code will attempt to take advantage of the indices that are known at - * compile time to optimize the code it generates. - * - * This functionality requires a c++11 compliant compiler. If your compiler - * is older you need to use arrays of indices instead. - * - * Several examples are provided in the cxx11_tensor_index_list.cpp file. - * - * \sa Tensor - */ - -template <DenseIndex n> -struct type2index { - static const DenseIndex value = n; - constexpr operator DenseIndex() const { return n; } - void set(DenseIndex val) { - eigen_assert(val == n); - } -}; - -namespace internal { -template <typename T> -void update_value(T& val, DenseIndex new_val) { - val = new_val; -} -template <DenseIndex n> -void update_value(type2index<n>& val, DenseIndex new_val) { - val.set(new_val); -} - -template <typename T> -struct is_compile_time_constant { - static constexpr bool value = false; -}; - -template <DenseIndex idx> -struct is_compile_time_constant<type2index<idx> > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<const type2index<idx> > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<type2index<idx>& > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<const type2index<idx>& > { - static constexpr bool value = true; -}; - -template <DenseIndex Idx> -struct tuple_coeff { - template <typename... T> - static constexpr DenseIndex get(const DenseIndex i, const std::tuple<T...>& t) { - return std::get<Idx>(t) * (i == Idx) + tuple_coeff<Idx-1>::get(i, t) * (i != Idx); - } - template <typename... T> - static void set(const DenseIndex i, std::tuple<T...>& t, const DenseIndex value) { - if (i == Idx) { - update_value(std::get<Idx>(t), value); - } else { - tuple_coeff<Idx-1>::set(i, t, value); - } - } - - template <typename... T> - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple<T...>& t) { - return ((i == Idx) & is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value) || - tuple_coeff<Idx-1>::value_known_statically(i, t); - } - - template <typename... T> - static constexpr bool values_up_to_known_statically(const std::tuple<T...>& t) { - return is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value && - tuple_coeff<Idx-1>::values_up_to_known_statically(t); - } - - template <typename... T> - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple<T...>& t) { - return is_compile_time_constant<typename std::tuple_element<Idx, std::tuple<T...> >::type>::value && - is_compile_time_constant<typename std::tuple_element<Idx-1, std::tuple<T...> >::type>::value && - std::get<Idx>(t) > std::get<Idx-1>(t) && - tuple_coeff<Idx-1>::values_up_to_statically_known_to_increase(t); - } -}; - -template <> -struct tuple_coeff<0> { - template <typename... T> - static constexpr DenseIndex get(const DenseIndex i, const std::tuple<T...>& t) { - // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return std::get<0>(t) * (i == 0); - } - template <typename... T> - static void set(const DenseIndex i, std::tuple<T...>& t, const DenseIndex value) { - eigen_assert (i == 0); - update_value(std::get<0>(t), value); - } - template <typename... T> - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple<T...>& t) { - // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return is_compile_time_constant<typename std::tuple_element<0, std::tuple<T...> >::type>::value & (i == 0); - } - - template <typename... T> - static constexpr bool values_up_to_known_statically(const std::tuple<T...>& t) { - return is_compile_time_constant<typename std::tuple_element<0, std::tuple<T...> >::type>::value; - } - - template <typename... T> - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple<T...>& t) { - return true; - } -}; -} // namespace internal - - -template<typename FirstType, typename... OtherTypes> -struct IndexList : std::tuple<FirstType, OtherTypes...> { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { - return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::get(i, *this); - } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { - return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::set(i, *this, value); - } - - constexpr IndexList(const std::tuple<FirstType, OtherTypes...>& other) : std::tuple<FirstType, OtherTypes...>(other) { } - constexpr IndexList() : std::tuple<FirstType, OtherTypes...>() { } - - constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::value_known_statically(i, *this); - } - constexpr bool all_values_known_statically() const { - return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::values_up_to_known_statically(*this); - } - - constexpr bool values_statically_known_to_increase() const { - return internal::tuple_coeff<std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value-1>::values_up_to_statically_known_to_increase(*this); - } -}; - - -template<typename FirstType, typename... OtherTypes> -constexpr IndexList<FirstType, OtherTypes...> make_index_list(FirstType val1, OtherTypes... other_vals) { - return std::make_tuple(val1, other_vals...); -} - - -namespace internal { - -template<typename FirstType, typename... OtherTypes> size_t array_prod(const IndexList<FirstType, OtherTypes...>& sizes) { - size_t result = 1; - for (int i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) { - result *= sizes[i]; - } - return result; -}; - -template<typename FirstType, typename... OtherTypes> struct array_size<IndexList<FirstType, OtherTypes...> > { - static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; -}; -template<typename FirstType, typename... OtherTypes> struct array_size<const IndexList<FirstType, OtherTypes...> > { - static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; -}; - -template<DenseIndex n, typename FirstType, typename... OtherTypes> constexpr DenseIndex array_get(IndexList<FirstType, OtherTypes...>& a) { - return std::get<n>(a); -} -template<DenseIndex n, typename FirstType, typename... OtherTypes> constexpr DenseIndex array_get(const IndexList<FirstType, OtherTypes...>& a) { - return std::get<n>(a); -} - -template <typename T> -struct index_known_statically { - constexpr bool operator() (DenseIndex) const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_known_statically<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_known_statically<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i); - } -}; - -template <typename T> -struct all_indices_known_statically { - constexpr bool operator() () const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct all_indices_known_statically<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() () const { - return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct all_indices_known_statically<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() () const { - return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); - } -}; - -template <typename T> -struct indices_statically_known_to_increase { - constexpr bool operator() () const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct indices_statically_known_to_increase<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() () const { - return IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct indices_statically_known_to_increase<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() () const { - return IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); - } -}; - -template <typename Tx> -struct index_statically_eq { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_eq<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] == value; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_eq<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] == value; - } -}; - -template <typename T> -struct index_statically_ne { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_ne<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] != value; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_ne<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] != value; - } -}; - - -template <typename T> -struct index_statically_gt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_gt<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] > value; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_gt<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] > value; - } -}; - -template <typename T> -struct index_statically_lt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_lt<IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] < value; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_lt<const IndexList<FirstType, OtherTypes...> > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) && - IndexList<FirstType, OtherTypes...>()[i] < value; - } -}; - -} // end namespace internal -} // end namespace Eigen - -#else - -namespace Eigen { -namespace internal { - -// No C++11 support -template <typename T> -struct index_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex) const{ - return false; - } -}; - -template <typename T> -struct all_indices_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { - return false; - } -}; - -template <typename T> -struct indices_statically_known_to_increase { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { - return false; - } -}; - -template <typename T> -struct index_statically_eq { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; - -template <typename T> -struct index_statically_ne { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; - -template <typename T> -struct index_statically_gt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; - -template <typename T> -struct index_statically_lt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; - -} // end namespace internal -} // end namespace Eigen - -#endif - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h deleted file mode 100644 index 40a50e4662..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ /dev/null @@ -1,219 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Ke Yang <yangke@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H -#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H - -namespace Eigen { - -/** \class TensorInflation - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor inflation class. - * - * - */ -namespace internal { -template<typename Strides, typename XprType> -struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Strides, typename XprType> -struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense> -{ - typedef const TensorInflationOp<Strides, XprType>& type; -}; - -template<typename Strides, typename XprType> -struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type> -{ - typedef TensorInflationOp<Strides, XprType> type; -}; - -} // end namespace internal - -template<typename Strides, typename XprType> -class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorInflationOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested; - typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) - : m_xpr(expr), m_strides(strides) {} - - EIGEN_DEVICE_FUNC - const Strides& strides() const { return m_strides; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Strides m_strides; -}; - -// Eval as rvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device> -{ - typedef TensorInflationOp<Strides, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_strides(op.strides()) - { - m_dimensions = m_impl.dimensions(); - // Expand each dimension to the inflated dimension. - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; - } - - // Remember the strides for fast division. - for (int i = 0; i < NumDims; ++i) { - m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - } - } else { // RowMajor - m_outputStrides[NumDims-1] = 1; - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - } - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - // Computes the input index given the output index. Returns true if the output - // index doesn't fall into a hole. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const - { - eigen_assert(index < dimensions().TotalSize()); - *inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (idx != idx / m_fastStrides[i] * m_strides[i]) { - return false; - } - *inputIndex += idx / m_strides[i] * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (index != index / m_fastStrides[0] * m_strides[0]) { - return false; - } - *inputIndex += index / m_strides[0]; - return true; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (idx != idx / m_fastStrides[i] * m_strides[i]) { - return false; - } - *inputIndex += idx / m_strides[i] * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { - return false; - } - *inputIndex += index / m_strides[NumDims - 1]; - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - Index inputIndex = 0; - if (getInputIndex(index, &inputIndex)) { - return m_impl.coeff(inputIndex); - } else { - return Scalar(0); - } - } - - // TODO(yangke): optimize this function so that we can detect and produce - // all-zero packets - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const Strides m_strides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h deleted file mode 100644 index 375c763152..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +++ /dev/null @@ -1,82 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H -#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - -#include <initializer_list> - -namespace Eigen { - -/** \class TensorInitializer - * \ingroup CXX11_Tensor_Module - * - * \brief Helper template to initialize Tensors from std::initializer_lists. - */ -namespace internal { - -template <typename Derived, int N> -struct Initializer { - typedef std::initializer_list< - typename Initializer<Derived, N - 1>::InitList> InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, - const InitList& vals) { - int i = 0; - for (auto v : vals) { - (*indices)[traits<Derived>::NumDimensions - N] = i++; - Initializer<Derived, N - 1>::run(tensor, indices, v); - } - } -}; - -template <typename Derived> -struct Initializer<Derived, 1> { - typedef std::initializer_list<typename traits<Derived>::Scalar> InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, - const InitList& vals) { - int i = 0; - // There is likely a faster way to do that than iterating. - for (auto v : vals) { - (*indices)[traits<Derived>::NumDimensions - 1] = i++; - tensor.coeffRef(*indices) = v; - } - } -}; - -template <typename Derived> -struct Initializer<Derived, Dynamic> { - typedef std::initializer_list<typename traits<Derived>::Scalar> InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, - const InitList& vals) { - // Static initialization not implemented for VarDims tensors. - eigen_assert(false); - } -}; - -template <typename Derived, int N> -void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor, - const typename Initializer<Derived, traits<Derived>::NumDimensions>::InitList& vals) { - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions> indices; - Initializer<Derived, traits<Derived>::NumDimensions>::run(tensor, &indices, vals); -} - -} // namespace internal -} // namespace Eigen - -#endif // EIGEN_HAS_VARIADIC_TEMPLATES - -#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h deleted file mode 100644 index 8330f65dde..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ /dev/null @@ -1,351 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H -#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H - - -namespace Eigen { - -/** \internal - * - * \class TensorIntDiv - * \ingroup CXX11_Tensor_Module - * - * \brief Fast integer division by a constant. - * - * See the paper from Granlund and Montgomery for explanation. - * (at http://dx.doi.org/10.1145/773473.178249) - * - * \sa Tensor - */ - -namespace internal { - -#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__) - -namespace { - // Note: result is undefined if val == 0 - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) - { -#ifdef __CUDA_ARCH__ - if (sizeof(T) == 8) { - return __clzll(val); - } - return __clz(val); -#elif EIGEN_COMP_MSVC - DWORD leading_zeros = 0; - if (sizeof(T) == 8) { - _BitScanReverse64(&leading_zero, val); - } - else { - _BitScanReverse(&leading_zero, val); - } -#else - if (sizeof(T) == 8) { - return __builtin_clzl(static_cast<uint64_t>(val)); - } - return __builtin_clz(static_cast<uint32_t>(val)); -#endif - } - - - template <typename T> - struct DividerTraits { - typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type; - static const int N = sizeof(T) * 8; - }; - - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { -#if defined(__CUDA_ARCH__) - return __umulhi(a, b); -#else - return (static_cast<uint64_t>(a) * b) >> 32; -#endif - } - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { -#if defined(__CUDA_ARCH__) - return __umul64hi(a, b); -#elif defined(__SIZEOF_INT128__) - __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); - return static_cast<uint64_t>(v >> 64); -#else - return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper(); -#endif - } - - template <int N, typename T> - struct DividerHelper { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { - EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); - return (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1; - } - }; - - template <typename T> - struct DividerHelper<64, T> { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) - return ((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); -#else - const uint64_t shift = 1ULL << log_div; - TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1)); - return static_cast<uint64_t>(result); -#endif - } - }; - -} - - -template <typename T, bool div_gt_one = false> -struct TensorIntDivisor { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - multiplier = 0; - shift1 = 0; - shift2 = 0; - } - - // Must have 0 < divider < 2^31. This is relaxed to - // 0 < divider < 2^63 when using 64-bit indices on platforms that support - // the __uint128_t type. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { - const int N = DividerTraits<T>::N; - eigen_assert(divider < NumTraits<UnsignedType>::highest()/2); - eigen_assert(divider > 0); - - // fast ln2 - const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider)); - int log_div = N - leading_zeros; - // if divider is a power of two then log_div is 1 more than it should be. - if ((1ull << (log_div-1)) == divider) - log_div--; - - multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider); - shift1 = log_div > 1 ? 1 : log_div; - shift2 = log_div > 1 ? log_div-1 : 0; - } - - // Must have 0 <= numerator. On platforms that dont support the __uint128_t - // type numerator should also be less than 2^32-1. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { - eigen_assert(numerator < NumTraits<UnsignedType>::highest()/2); - eigen_assert(numerator >= 0); - - UnsignedType t1 = muluh(multiplier, numerator); - UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1; - return (t1 + t) >> shift2; - } - - private: - typedef typename DividerTraits<T>::type UnsignedType; - UnsignedType multiplier; - int32_t shift1; - int32_t shift2; -}; - - -// Optimized version for signed 32 bit integers. -// Derived from Hacker's Delight. -template <> -class TensorIntDivisor<int32_t, true> { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - magic = 0; - shift = 0; - } - // Must have 2 <= divider - EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) { - eigen_assert(divider >= 2); - calcMagic(divider); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { -#ifdef __CUDA_ARCH__ - return (__umulhi(magic, n) >> shift); -#else - uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n); - return (static_cast<uint32_t>(v >> 32) >> shift); -#endif - } - -private: - // Compute the magic numbers. See Hacker's Delight section 10 for an in - // depth explanation. - EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { - const unsigned two31 = 0x80000000; // 2**31. - unsigned ad = d; - unsigned t = two31 + (ad >> 31); - unsigned anc = t - 1 - t%ad; // Absolute value of nc. - int p = 31; // Init. p. - unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. - unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). - unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. - unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). - unsigned delta = 0; - do { - p = p + 1; - q1 = 2*q1; // Update q1 = 2**p/|nc|. - r1 = 2*r1; // Update r1 = rem(2**p, |nc|). - if (r1 >= anc) { // (Must be an unsigned - q1 = q1 + 1; // comparison here). - r1 = r1 - anc;} - q2 = 2*q2; // Update q2 = 2**p/|d|. - r2 = 2*r2; // Update r2 = rem(2**p, |d|). - if (r2 >= ad) { // (Must be an unsigned - q2 = q2 + 1; // comparison here). - r2 = r2 - ad;} - delta = ad - r2; - } while (q1 < delta || (q1 == delta && r1 == 0)); - - magic = (unsigned)(q2 + 1); - shift = p - 32; - } - - uint32_t magic; - int32_t shift; -}; - - -template <typename T, bool div_gt_one> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { - return divisor.divide(numerator); -} - - -#else -// Reverse to the old code since gcudacc doesn't support the code above. -template <typename T, bool div_gt_one = false> -struct TensorIntDivisor { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - multiplier = 0; - shift1 = 0; - shift2 = 0; - } - - // Must have 1 <= divider <= 2^31-1 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { - const int N = 32; - eigen_assert(divider > 0); - eigen_assert(divider < (1ull<<(N-1))); - - // fast ln2 -#ifndef __CUDA_ARCH__ - const int leading_zeros = __builtin_clz(divider); -#else - const int leading_zeros = __clz(divider); -#endif - int log_div = N - leading_zeros; - // if divider is a power of two then log_div is 1 more than it should be. - if ((1ull << (log_div-1)) == divider) - log_div--; - - multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1; - shift1 = log_div > 1 ? 1 : log_div; - shift2 = log_div > 1 ? log_div-1 : 0; - } - - // Must have 0 <= numerator <= 2^32-1 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { - const int N = 32; - eigen_assert(numerator >= 0); - eigen_assert(static_cast<uint64_t>(numerator) < 1ull<<N); - - uint32_t t1 = (multiplier * numerator) >> N; - uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1; - return (t1 + t) >> shift2; - } - - private: - uint64_t multiplier; - int32_t shift1; - int32_t shift2; -}; - - -// Optimized version for signed 32 bit integers. -// Derived from Hacker's Delight. -template <> -class TensorIntDivisor<int, true> { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - magic = 0; - shift = 0; - } - // Must have 2 <= divider - EIGEN_DEVICE_FUNC TensorIntDivisor(int divider) { - eigen_assert(divider >= 2); - calcMagic(divider); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int n) const { -#ifdef __CUDA_ARCH__ - return (__umulhi(magic, n) >> shift); -#else - uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n); - return (static_cast<unsigned int>(v >> 32) >> shift); -#endif - } - -private: - // Compute the magic numbers. See Hacker's Delight section 10 for an in - // depth explanation. - EIGEN_DEVICE_FUNC void calcMagic(int d) { - const unsigned two31 = 0x80000000; // 2**31. - unsigned ad = d; - unsigned t = two31 + (ad >> 31); - unsigned anc = t - 1 - t%ad; // Absolute value of nc. - int p = 31; // Init. p. - unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. - unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). - unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. - unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). - unsigned delta = 0; - do { - p = p + 1; - q1 = 2*q1; // Update q1 = 2**p/|nc|. - r1 = 2*r1; // Update r1 = rem(2**p, |nc|). - if (r1 >= anc) { // (Must be an unsigned - q1 = q1 + 1; // comparison here). - r1 = r1 - anc;} - q2 = 2*q2; // Update q2 = 2**p/|d|. - r2 = 2*r2; // Update r2 = rem(2**p, |d|). - if (r2 >= ad) { // (Must be an unsigned - q2 = q2 + 1; // comparison here). - r2 = r2 - ad;} - delta = ad - r2; - } while (q1 < delta || (q1 == delta && r1 == 0)); - - magic = (unsigned)(q2 + 1); - shift = p - 32; - } - - unsigned int magic; - int shift; -}; - - -template <typename T, bool div_gt_one> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { - return divisor.divide(numerator); -} - -#endif - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h deleted file mode 100644 index bd795d54b0..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ /dev/null @@ -1,217 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H -#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H - -namespace Eigen { - -/** \class TensorLayoutSwap - * \ingroup CXX11_Tensor_Module - * - * \brief Swap the layout from col-major to row-major, or row-major - * to col-major, and invert the order of the dimensions. - * - * Beware: the dimensions are reversed by this operation. If you want to - * preserve the ordering of the dimensions, you need to combine this - * operation with a shuffle. - * - * \example: - * Tensor<float, 2, ColMajor> input(2, 4); - * Tensor<float, 2, RowMajor> output = input.swap_layout(); - * eigen_assert(output.dimension(0) == 4); - * eigen_assert(output.dimension(1) == 2); - * - * array<int, 2> shuffle(1, 0); - * output = input.swap_layout().shuffle(shuffle); - * eigen_assert(output.dimension(0) == 2); - * eigen_assert(output.dimension(1) == 4); - * - */ -namespace internal { -template<typename XprType> -struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = (static_cast<int>(traits<XprType>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor; -}; - -template<typename XprType> -struct eval<TensorLayoutSwapOp<XprType>, Eigen::Dense> -{ - typedef const TensorLayoutSwapOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorLayoutSwapOp<XprType>, 1, typename eval<TensorLayoutSwapOp<XprType> >::type> -{ - typedef TensorLayoutSwapOp<XprType> type; -}; - -} // end namespace internal - - - -template<typename XprType> -class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType; - typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested; - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) - { - typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; -}; - - -// Eval as rvalue -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> -{ - typedef TensorLayoutSwapOp<ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == - static_cast<int>(ColMajor)) - ? RowMajor - : ColMajor, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - for(int i = 0; i < NumDims; ++i) { - m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - return m_impl.evalSubExprsIfNeeded(data); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_impl.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; - Dimensions m_dimensions; -}; - - -// Eval as lvalue -template<typename ArgType, typename Device> - struct TensorEvaluator<TensorLayoutSwapOp<ArgType>, Device> - : public TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> -{ - typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base; - typedef TensorLayoutSwapOp<ArgType> XprType; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == - static_cast<int>(ColMajor)) - ? RowMajor - : ColMajor, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return this->m_impl.coeffRef(index); - } - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - this->m_impl.template writePacket<StoreMode>(index, x); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h deleted file mode 100644 index 908bdc38ad..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ /dev/null @@ -1,320 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H -#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H - -namespace Eigen { - -/** \class TensorMap - * \ingroup CXX11_Tensor_Module - * - * \brief A tensor expression mapping an existing array of data. - * - */ - -template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> > -{ - public: - typedef TensorMap<PlainObjectType, Options_> Self; - typedef typename PlainObjectType::Base Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; - typedef typename internal::traits<PlainObjectType>::Index Index; - typedef typename internal::traits<PlainObjectType>::Scalar Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - - /* typedef typename internal::conditional< - bool(internal::is_lvalue<PlainObjectType>::value), - Scalar *, - const Scalar *>::type - PointerType;*/ - typedef Scalar* PointerType; - typedef PointerType PointerArgType; - - static const int Options = Options_; - - static const Index NumIndices = PlainObjectType::NumIndices; - typedef typename PlainObjectType::Dimensions Dimensions; - - enum { - IsAligned = ((int(Options_) & Aligned) == Aligned), - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = PlainObjectType::Layout, - CoordAccess = true, - }; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { - EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { - EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { - EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { - EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array<Index, NumIndices>& dimensions) - : m_data(dataPtr), m_dimensions(dimensions) - { } - - template <typename Dimensions> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) - : m_data(dataPtr), m_dimensions(dimensions) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) - : m_data(tensor.data()), m_dimensions(tensor.dimensions()) - { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar* data() { return m_data; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - // eigen_assert(checkIndexRange(indices)); - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(indices); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(indices); - return m_data[index]; - } - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0 || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - eigen_assert(rank() == 0); - return m_data[0]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const - { - static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - return m_data[index]; - } - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_data[index]; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; - return m_data[index]; - } else { - const Index index = i0 + i1 * m_dimensions[0]; - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); - return m_data[index]; - } - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - // eigen_assert(checkIndexRange(indices)); - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(indices); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(indices); - return m_data[index]; - } - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()() - { - static_assert(NumIndices == 0 || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - eigen_internal_assert(rank() == 0); - return m_data[0]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - { - static_assert(sizeof...(otherIndices) + 1 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - const std::size_t NumDims = sizeof...(otherIndices) + 1; - if (PlainObjectType::Options&RowMajor) { - const array<Index, NumDims> dims = {firstIndex, otherIndices...}; - const Index index = m_dimensions.IndexOfRowMajor(dims); - return m_data[index]; - } else { - const array<Index, NumDims> dims = {firstIndex, otherIndices...}; - const Index index = m_dimensions.IndexOfColMajor(dims); - return m_data[index]; - } - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_data[index]; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; - return m_data[index]; - } else { - const Index index = i0 + i1 * m_dimensions[0]; - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); - return m_data[index]; - } - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other) - { - typedef TensorAssignOp<Self, const Self> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Self& operator=(const OtherDerived& other) - { - typedef TensorAssignOp<Self, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - private: - Scalar* m_data; - Dimensions m_dimensions; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h deleted file mode 100644 index 4dd9af6f92..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ /dev/null @@ -1,103 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H -#define EIGEN_CXX11_TENSOR_TENSOR_META_H - -namespace Eigen { - -template<bool cond> struct Cond {}; - -template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T1& choose(Cond<true>, const T1& first, const T2&) { - return first; -} - -template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T2& choose(Cond<false>, const T1&, const T2& second) { - return second; -} - - -// Default packet types -template <typename Scalar, typename Device> -struct PacketType { - typedef typename internal::packet_traits<Scalar>::type type; - static const int size = internal::unpacket_traits<type>::size; -}; - -// For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -template <> -struct PacketType<float, GpuDevice> { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType<double, GpuDevice> { - typedef double2 type; - static const int size = 2; -}; -#endif - - -#if defined(EIGEN_HAS_CONSTEXPR) -#define EIGEN_CONSTEXPR constexpr -#else -#define EIGEN_CONSTEXPR -#endif - -// Tuple mimics std::pair but works on e.g. nvcc. -template <typename U, typename V> struct Tuple { - public: - U first; - V second; - - typedef U first_type; - typedef V second_type; - - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple() : first(), second() {} - - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple(const U& f, const V& s) : first(f), second(s) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple& operator= (const Tuple& rhs) { - if (&rhs == this) return *this; - first = rhs.first; - second = rhs.second; - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void swap(Tuple& rhs) { - using numext::swap; - swap(first, rhs.first); - swap(second, rhs.second); - } -}; - -template <typename U, typename V> -EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) { - return (x.first == y.first && x.second == y.second); -} - -template <typename U, typename V> -EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) { - return !(x == y); -} - -#undef EIGEN_CONSTEXPR - -} // namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h deleted file mode 100644 index e67f3da31a..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ /dev/null @@ -1,817 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H -#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H - -namespace Eigen { - -/** \class TensorReshaping - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -namespace internal { -template<typename NewDimensions, typename XprType> -struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = array_size<NewDimensions>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename NewDimensions, typename XprType> -struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorReshapingOp<NewDimensions, XprType>& type; -}; - -template<typename NewDimensions, typename XprType> -struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type> -{ - typedef TensorReshapingOp<NewDimensions, XprType> type; -}; - -} // end namespace internal - - - -template<typename NewDimensions, typename XprType> -class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorReshapingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType; - typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) - : m_xpr(expr), m_dims(dims) {} - - EIGEN_DEVICE_FUNC - const NewDimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) - { - typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const NewDimensions m_dims; -}; - - -// Eval as rvalue -template<typename NewDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> -{ - typedef TensorReshapingOp<NewDimensions, ArgType> XprType; - typedef NewDimensions Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - // TODO(andydavis) Re-enable BlockAccess when the performance issue - // with block-based reshape is resolved. - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_dimensions(op.dimensions()) - { - // The total size of the reshaped tensor must be equal to the total size - // of the input tensor. - eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); - - if (BlockAccess) { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = - m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - m_inputStrides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1]; - } - } else { -#ifdef __CUDACC__ - // TODO(andydavis) Remove the following line of code when associated - // nvcc bug b/22973013 is fixed. - for (int i = 0; i < 1; ++i) {} -#endif - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - m_inputStrides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; - } - } - } - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - static const std::size_t NumOutputDims = - internal::array_size<Dimensions>::value; - static const std::size_t NumInputDims = internal::array_size< - typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, NumOutputDims, Layout> - OutputTensorBlock; - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, NumInputDims, Layout> - InputTensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - return m_impl.evalSubExprsIfNeeded(data); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_impl.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - m_impl.getResourceRequirements(resources); - } - - // TODO(andydavis) Reduce the overhead of this function. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - // Calculate output block unit-stride inner dimension length. - const DSizes<Index, NumOutputDims>& output_block_sizes = - output_block->block_sizes(); - Index output_inner_dim_size = 1; - Index output_outer_dim_start = NumOutputDims; - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumOutputDims - i - 1; - output_inner_dim_size *= output_block_sizes[dim]; - if (output_block_sizes[dim] < m_dimensions[dim]) { - output_outer_dim_start = i + 1; - break; - } - } - - // Initialize output block iterator state. - struct BlockIteratorState { - Index stride; - Index span; - Index size; - Index count; - }; - array<BlockIteratorState, NumOutputDims> block_iter_state; - - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumOutputDims - i - 1; - block_iter_state[i].size = output_block_sizes[dim]; - block_iter_state[i].stride = m_outputStrides[dim]; - block_iter_state[i].span = - block_iter_state[i].stride * (block_iter_state[i].size - 1); - block_iter_state[i].count = 0; - } - - const Index output_outer_dim_size = output_block_sizes.TotalSize() / - output_inner_dim_size; - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = - m_impl.dimensions(); - - Index index = output_block->first_coeff_index(); - for (Index outer_idx = 0; outer_idx < output_outer_dim_size; ++outer_idx) { - Index inner_idx = 0; - while (inner_idx < output_inner_dim_size) { - // Calculate input coords based on 'index'. - array<Index, NumInputDims> input_coords; - Index idx = index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumInputDims - 1; i > 0; --i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[0] = idx; - } else { - for (int i = 0; i < NumInputDims - 1; ++i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[NumInputDims - 1] = idx; - } - - // Calculate target input block shape, using at most - // 'output_inner_dim_size' coefficients along the input block's inner - // dimensions. - DSizes<Index, NumInputDims> input_block_sizes; - Index num_to_allocate = output_inner_dim_size - inner_idx; - for (Index i = 0; i < NumInputDims; ++i) { - const Index dim = - static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumInputDims - i - 1; - input_block_sizes[dim] = numext::mini( - num_to_allocate, (static_cast<Index>(input_dims[dim]) - - input_coords[dim])); - if (input_coords[dim] == 0) { - num_to_allocate /= input_block_sizes[dim]; - } else { - num_to_allocate = 1; - } - } - - // Calculate input block strides. - DSizes<Index, NumInputDims> input_block_strides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - input_block_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_block_strides[i] = input_block_strides[i - 1] * - input_block_sizes[i - 1]; - } - } else { - input_block_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_block_strides[i] = input_block_strides[i + 1] * - input_block_sizes[i + 1]; - } - } - - // Instantiate and read input block from input tensor. - InputTensorBlock input_block(index, input_block_sizes, - input_block_strides, m_inputStrides, - output_block->data() + outer_idx * - output_inner_dim_size + inner_idx); - - m_impl.block(&input_block); - - const Index input_block_total_size = input_block_sizes.TotalSize(); - index += input_block_total_size; - inner_idx += input_block_total_size; - } - eigen_assert(inner_idx == output_inner_dim_size); - index -= output_inner_dim_size; - // Update index. - for (Index i = output_outer_dim_start; i < NumOutputDims; ++i) { - if (++block_iter_state[i].count < block_iter_state[i].size) { - index += block_iter_state[i].stride; - break; - } - block_iter_state[i].count = 0; - index -= block_iter_state[i].span; - } - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); } - - EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; - NewDimensions m_dimensions; - DSizes<Index, NumOutputDims> m_outputStrides; - DSizes<Index, NumInputDims> m_inputStrides; -}; - - -// Eval as lvalue -template<typename NewDimensions, typename ArgType, typename Device> - struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device> - : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> - -{ - typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base; - typedef TensorReshapingOp<NewDimensions, ArgType> XprType; - typedef NewDimensions Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(index); - } - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - this->m_impl.template writePacket<StoreMode>(index, x); - } -}; - - -/** \class TensorSlicing - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor slicing class. - * - * - */ -namespace internal { -template<typename StartIndices, typename Sizes, typename XprType> -struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = array_size<StartIndices>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename StartIndices, typename Sizes, typename XprType> -struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> -{ - typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type; -}; - -template<typename StartIndices, typename Sizes, typename XprType> -struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type> -{ - typedef TensorSlicingOp<StartIndices, Sizes, XprType> type; -}; - -} // end namespace internal - - - -template<typename StartIndices, typename Sizes, typename XprType> -class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorSlicingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) - : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} - - EIGEN_DEVICE_FUNC - const StartIndices& startIndices() const { return m_indices; } - EIGEN_DEVICE_FUNC - const Sizes& sizes() const { return m_sizes; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) - { - typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const StartIndices m_indices; - const Sizes m_sizes; -}; - - -// Eval as rvalue -template<typename StartIndices, typename Sizes, typename ArgType, typename Device> -struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> -{ - typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; - static const int NumDims = internal::array_size<Sizes>::value; - - enum { - // Alignment can't be guaranteed at compile time since it depends on the - // slice offsets and sizes. - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) - { - for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) { - eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const Sizes& output_dims = op.sizes(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - } - - // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } else { - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - } - - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } - - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef Sizes Dimensions; - typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout> - TensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - m_impl.evalSubExprsIfNeeded(NULL); - if (internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value && data && m_impl.data()) { - Index contiguous_values = 1; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - contiguous_values *= dimensions()[i]; - if (dimensions()[i] != m_impl.dimensions()[i]) { - break; - } - } - } else { - for (int i = NumDims-1; i >= 0; --i) { - contiguous_values *= dimensions()[i]; - if (dimensions()[i] != m_impl.dimensions()[i]) { - break; - } - } - } - // Use memcpy if it's going to be faster than using the regular evaluation. - if (contiguous_values > m_device.memcpyThreshold()) { - Scalar* src = (Scalar*)m_impl.data(); - for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { - Index offset = srcCoeff(i); - m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); - } - return false; - } - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_fastOutputStrides[i]; - const Index idx1 = indices[1] / m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; - inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + m_offsets[0]); - inputIndices[1] += (indices[1] + m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_fastOutputStrides[i]; - const Index idx1 = indices[1] / m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; - inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); - inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) - { - array<Index, NumDims> inputCoords; - for (int i = 0; i < NumDims; ++i) { - inputCoords = coords[i] + this->m_offsets[i]; - } - return m_impl.coeff(inputCoords); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, m_block_total_size_max)); - m_impl.getResourceRequirements(resources); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - TensorBlock* output_block) const { - TensorBlock input_block(srcCoeff(output_block->first_coeff_index()), - output_block->block_sizes(), - output_block->block_strides(), - m_inputStrides, - output_block->data()); - m_impl.block(&input_block); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { - Scalar* result = m_impl.data(); - if (result) { - Index offset = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - if (m_dimensions[i] != m_impl.dimensions()[i]) { - offset += m_offsets[i] * m_inputStrides[i]; - for (int j = i+1; j < NumDims; ++j) { - if (m_dimensions[j] > 1) { - return NULL; - } - offset += m_offsets[j] * m_inputStrides[j]; - } - break; - } - } - } else { - for (int i = NumDims - 1; i >= 0; --i) { - if (m_dimensions[i] != m_impl.dimensions()[i]) { - offset += m_offsets[i] * m_inputStrides[i]; - for (int j = i-1; j >= 0; --j) { - if (m_dimensions[j] > 1) { - return NULL; - } - offset += m_offsets[j] * m_inputStrides[j]; - } - break; - } - } - } - return result + offset; - } - return NULL; - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += (index + m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += (index + m_offsets[NumDims-1]); - } - return inputIndex; - } - - array<Index, NumDims> m_outputStrides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - Dimensions m_dimensions; - const StartIndices m_offsets; - std::size_t m_block_total_size_max; -}; - - -// Eval as lvalue -template<typename StartIndices, typename Sizes, typename ArgType, typename Device> -struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> - : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base; - typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; - static const int NumDims = internal::array_size<Sizes>::value; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef Sizes Dimensions; - typedef internal::TensorBlock<Index, ScalarNonConst, NumDims, Layout> - TensorBlock; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; - const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; - inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + this->m_offsets[0]); - inputIndices[1] += (indices[1] + this->m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; - const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; - inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); - inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - this->m_impl.template writePacket<StoreMode>(inputIndices[0], x); - } - else { - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - this->m_impl.coeffRef(inputIndices[0]) = values[0]; - this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; - for (int i = 1; i < packetSize-1; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(const array<Index, NumDims>& coords) - { - array<Index, NumDims> inputCoords; - for (int i = 0; i < NumDims; ++i) { - inputCoords = coords[i] + this->m_offsets[i]; - } - return this->m_impl.coeffRef(inputCoords); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( - const TensorBlock& block) { - this->m_impl.writeBlock( - TensorBlock(this->srcCoeff(block.first_coeff_index()), - block.block_sizes(), - block.block_strides(), - this->m_inputStrides, - const_cast<ScalarNonConst*>(block.data()))); - - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h deleted file mode 100644 index d1dff3f38b..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ /dev/null @@ -1,388 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H -#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H - -namespace Eigen { - -/** \class TensorPadding - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor padding class. - * At the moment only padding with a constant value is supported. - * - */ -namespace internal { -template<typename PaddingDimensions, typename XprType> -struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename PaddingDimensions, typename XprType> -struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorPaddingOp<PaddingDimensions, XprType>& type; -}; - -template<typename PaddingDimensions, typename XprType> -struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type> -{ - typedef TensorPaddingOp<PaddingDimensions, XprType> type; -}; - -} // end namespace internal - - - -template<typename PaddingDimensions, typename XprType> -class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorPaddingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, - const Scalar padding_value) - : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - const PaddingDimensions& padding() const { return m_padding_dims; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const PaddingDimensions m_padding_dims; - const Scalar m_padding_value; -}; - - -// Eval as rvalue -template<typename PaddingDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device> -{ - typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<PaddingDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = true, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()) - { - // Compute dimensions - m_dimensions = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] += m_padding[i].first + m_padding[i].second; - } - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - if (NumDims > 0) { - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; - } - } else { - m_outputStrides[NumDims] = 1; - if (NumDims > 0) { - m_inputStrides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; - } - m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; - } - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - eigen_assert(index < dimensions().TotalSize()); - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (NumDims > 0) { - if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) { - return m_paddingValue; - } - inputIndex += (index - m_padding[0].first); - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i+1]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i+1]; - } - if (NumDims > 0) { - if (index < m_padding[NumDims-1].first || - index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { - return m_paddingValue; - } - inputIndex += (index - m_padding[NumDims-1].first); - } - } - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return packetColMajor(index); - } - return packetRowMajor(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - if (NumDims > 0) { - const Index idx = coords[0]; - if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[0].first; - } - for (int i = 1; i < NumDims; ++i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } else { - if (NumDims > 0) { - const Index idx = coords[NumDims-1]; - if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[NumDims-1].first; - } - for (int i = NumDims - 2; i >= 0; --i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } - return m_impl.coeff(inputIndex); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - const Index initialIndex = index; - Index inputIndex = 0; - for (int i = NumDims - 1; i > 0; --i) { - const Index first = index; - const Index last = index + packetSize - 1; - const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; - const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; - const Index lastPaddedRight = m_outputStrides[i+1]; - - if (last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { - // all the coefficient are between the 2 padding zones. - const Index idx = index / m_outputStrides[i]; - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - else { - // Every other case - return packetWithPossibleZero(initialIndex); - } - } - - const Index last = index + packetSize - 1; - const Index first = index; - - if (NumDims > 0) { - const Index lastPaddedLeft = m_padding[0].first; - const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); - const Index lastPaddedRight = m_outputStrides[1]; - - if (last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { - // all the coefficient are between the 2 padding zones. - inputIndex += (index - m_padding[0].first); - return m_impl.template packet<Unaligned>(inputIndex); - } - } - - // Every other case - return packetWithPossibleZero(initialIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - const Index initialIndex = index; - Index inputIndex = 0; - - for (int i = 0; i < NumDims - 1; ++i) { - const Index first = index; - const Index last = index + packetSize - 1; - const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; - const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; - const Index lastPaddedRight = m_outputStrides[i]; - - if (last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { - // all the coefficient are between the 2 padding zones. - const Index idx = index / m_outputStrides[i+1]; - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i+1]; - } - else { - // Every other case - return packetWithPossibleZero(initialIndex); - } - } - - const Index last = index + packetSize - 1; - const Index first = index; - - if (NumDims > 0) { - const Index lastPaddedLeft = m_padding[NumDims-1].first; - const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); - const Index lastPaddedRight = m_outputStrides[NumDims-1]; - - if (last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { - // all the coefficient are between the 2 padding zones. - inputIndex += (index - m_padding[NumDims-1].first); - return m_impl.template packet<Unaligned>(inputIndex); - } - } - - // Every other case - return packetWithPossibleZero(initialIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - array<Index, NumDims+1> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - PaddingDimensions m_padding; - - Scalar m_paddingValue; -}; - - - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h deleted file mode 100644 index c89022ab8e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ /dev/null @@ -1,314 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H - -namespace Eigen { - -/** \class TensorPatch - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor patch class. - * - * - */ -namespace internal { -template<typename PatchDim, typename XprType> -struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<typename PatchDim, typename XprType> -struct eval<TensorPatchOp<PatchDim, XprType>, Eigen::Dense> -{ - typedef const TensorPatchOp<PatchDim, XprType>& type; -}; - -template<typename PatchDim, typename XprType> -struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type> -{ - typedef TensorPatchOp<PatchDim, XprType> type; -}; - -} // end namespace internal - - - -template<typename PatchDim, typename XprType> -class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorPatchOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorPatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorPatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) - : m_xpr(expr), m_patch_dims(patch_dims) {} - - EIGEN_DEVICE_FUNC - const PatchDim& patch_dims() const { return m_patch_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const PatchDim m_patch_dims; -}; - - -// Eval as rvalue -template<typename PatchDim, typename ArgType, typename Device> -struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> -{ - typedef TensorPatchOp<PatchDim, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = true, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - Index num_patches = 1; - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const PatchDim& patch_dims = op.patch_dims(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims-1; ++i) { - m_dimensions[i] = patch_dims[i]; - num_patches *= (input_dims[i] - patch_dims[i] + 1); - } - m_dimensions[NumDims-1] = num_patches; - - m_inputStrides[0] = 1; - m_patchStrides[0] = 1; - for (int i = 1; i < NumDims-1; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); - } - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - } else { - for (int i = 0; i < NumDims-1; ++i) { - m_dimensions[i+1] = patch_dims[i]; - num_patches *= (input_dims[i] - patch_dims[i] + 1); - } - m_dimensions[0] = num_patches; - - m_inputStrides[NumDims-2] = 1; - m_patchStrides[NumDims-2] = 1; - for (int i = NumDims-3; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); - } - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims-2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - } - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; - // Find the location of the first element of the patch. - Index patchIndex = index / m_outputStrides[output_stride_index]; - // Find the offset of the element wrt the location of the first element. - Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = patchOffset / m_outputStrides[i]; - patchOffset -= offsetIdx * m_outputStrides[i]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = patchOffset / m_outputStrides[i+1]; - patchOffset -= offsetIdx * m_outputStrides[i+1]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } - inputIndex += (patchIndex + patchOffset); - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; - Index indices[2] = {index, index + packetSize - 1}; - Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], - indices[1] / m_outputStrides[output_stride_index]}; - Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], - indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; - - Index inputIndices[2] = {0, 0}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], - patchIndices[1] / m_patchStrides[i]}; - patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; - patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; - - const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], - patchOffsets[1] / m_outputStrides[i]}; - patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; - patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; - - inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; - inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], - patchIndices[1] / m_patchStrides[i]}; - patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; - patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; - - const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], - patchOffsets[1] / m_outputStrides[i+1]}; - patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; - patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; - - inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; - inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; - } - } - inputIndices[0] += (patchIndices[0] + patchOffsets[0]); - inputIndices[1] += (patchIndices[1] + patchOffsets[1]); - - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const - { - Index patch_coord_idx = Layout == ColMajor ? NumDims - 1 : 0; - // Location of the first element of the patch. - const Index patchIndex = coords[patch_coord_idx]; - - if (TensorEvaluator<ArgType, Device>::CoordAccess) { - array<Index, NumDims-1> inputCoords; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = coords[i]; - inputCoords[i] = coords[i] + patchIdx; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = coords[i+1]; - inputCoords[i] = coords[i+1] + patchIdx; - } - } - Index coords_idx = Layout == ColMajor ? 0 : NumDims - 1; - inputCoords[0] = (patchIndex + coords[coords_idx]); - return m_impl.coeff(inputCoords); - } - else { - Index inputIndex = 0; - if (Layout == ColMajor) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = coords[i]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = coords[i+1]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } - Index coords_idx = Layout == ColMajor ? 0 : NumDims - 1; - inputIndex += (patchIndex + coords[coords_idx]); - return m_impl.coeff(inputIndex); - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims-1> m_inputStrides; - array<Index, NumDims-1> m_patchStrides; - - TensorEvaluator<ArgType, Device> m_impl; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h deleted file mode 100644 index a70d5ae1f0..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ /dev/null @@ -1,1141 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H - -namespace Eigen { - -/** \class TensorReduction - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reduction class. - * - */ - -namespace internal { -template<typename Op, typename Dims, typename XprType> -struct traits<TensorReductionOp<Op, Dims, XprType> > - : traits<XprType> -{ - typedef typename traits<XprType>::Scalar Scalar; - typedef typename traits<XprType>::StorageKind StorageKind; - typedef typename traits<XprType>::Index Index; - typedef typename XprType::Nested Nested; -}; - -template<typename Op, typename Dims, typename XprType> -struct eval<TensorReductionOp<Op, Dims, XprType>, Eigen::Dense> -{ - typedef const TensorReductionOp<Op, Dims, XprType>& type; -}; - -template<typename Op, typename Dims, typename XprType> -struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReductionOp<Op, Dims, XprType> >::type> -{ - typedef TensorReductionOp<Op, Dims, XprType> type; -}; - - - -template <typename InputDims, typename OutputDims, typename ReducedDims> EIGEN_DEVICE_FUNC -static void partition_dims(const InputDims& input_dims, - const array<bool, internal::array_size<InputDims>::value>& reduced, - OutputDims* output_dims, ReducedDims* reduced_dims) { - const int NumInputDims = internal::array_size<InputDims>::value; - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (OutputDims::count == 0 || reduced[i]) { - (*reduced_dims)[reduceIndex] = input_dims[i]; - ++reduceIndex; - } else { - (*output_dims)[outputIndex] = input_dims[i]; - ++outputIndex; - } - } -} - - - -template <typename ReducedDims, int NumTensorDims, int Layout> -struct are_inner_most_dims { - static const bool value = false; -}; -template <typename ReducedDims, int NumTensorDims, int Layout> -struct preserve_inner_most_dims { - static const bool value = false; -}; - -#if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) -// The use of the tmp1, tmp2, tmp3 intermediate variables is needed for nvcc 7 -// to compile the code below. NVidia is working on a fix. -template <typename ReducedDims, int NumTensorDims> -struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()(); - static const bool tmp2 = index_statically_eq<ReducedDims>()(0, 0); - static const bool tmp3 = index_statically_eq<ReducedDims>()(array_size<ReducedDims>::value-1, array_size<ReducedDims>::value-1); - static const bool value = tmp1 & tmp2 & tmp3; -}; -template <typename ReducedDims, int NumTensorDims> -struct are_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()(); - static const bool tmp2 = index_statically_eq<ReducedDims>()(0, NumTensorDims - array_size<ReducedDims>::value); - static const bool tmp3 = index_statically_eq<ReducedDims>()(array_size<ReducedDims>::value - 1, NumTensorDims - 1); - static const bool value = tmp1 & tmp2 & tmp3; - -}; -template <typename ReducedDims, int NumTensorDims> -struct preserve_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()(); - static const bool tmp2 = index_statically_gt<ReducedDims>()(0, 0); - static const bool value = tmp1 & tmp2; - -}; -template <typename ReducedDims, int NumTensorDims> -struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>()(); - static const bool tmp2 = index_statically_lt<ReducedDims>()(array_size<ReducedDims>::value - 1, NumTensorDims - 1); - static const bool value = tmp1 & tmp2; -}; -#endif - - -template <int DimIndex, typename Self, typename Op> -struct GenericDimReducer { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { - EIGEN_STATIC_ASSERT(DimIndex >= 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; - GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); - } - } -}; -template <typename Self, typename Op> -struct GenericDimReducer<-1, Self, Op> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { - reducer.reduce(self.m_impl.coeff(firstIndex), accum); - } -}; - -template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct InnerMostDimReducer { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { - typename Self::CoeffReturnType accum = reducer.initialize(); - for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { - reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); - } - return reducer.finalize(accum); - } -}; - -template <typename Self, typename Op> -struct InnerMostDimReducer<Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { - const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size; - const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; - typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>(); - for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { - reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p); - } - typename Self::CoeffReturnType accum = reducer.initialize(); - for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { - reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); - } - return reducer.finalizeBoth(accum, p); - } -}; - -template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct InnerMostDimPreserver { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - eigen_assert(false && "should never be called"); - } -}; - -template <int DimIndex, typename Self, typename Op> -struct InnerMostDimPreserver<DimIndex, Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - EIGEN_STATIC_ASSERT(DimIndex >= 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; - InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); - } - } -}; - -template <typename Self, typename Op> -struct InnerMostDimPreserver<-1, Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex), accum); - } -}; - -// Default full reducer -template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct FullReducer { - static const bool HasOptimizedImplementation = false; - - static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { - const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); - *output = InnerMostDimReducer<Self, Op>::reduce(self, 0, num_coeffs, reducer); - } -}; - - -#ifdef EIGEN_USE_THREADS -// Multithreaded full reducers -template <typename Eval, typename Op, bool Vectorizable = (Eval::InputPacketAccess & Op::PacketAccess)> -struct FullReducerShard { - static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { - - shard->saccum = reducer.initialize(); - for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) { - reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); - } - } - - typename Eval::CoeffReturnType saccum; -}; - -template <typename Eval, typename Op> -struct FullReducerShard<Eval, Op, true> { - static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { - - const int packetSize = internal::unpacket_traits<typename Eval::PacketReturnType>::size; - const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; - - shard->paccum = reducer.template initializePacket<typename Eval::PacketReturnType>(); - for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) { - reducer.reducePacket(eval.m_impl.template packet<Unaligned>(firstIndex + j), &shard->paccum); - } - shard->saccum = reducer.initialize(); - for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) { - reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); - } - } - - typename Eval::PacketReturnType paccum; - typename Eval::CoeffReturnType saccum; -}; - - -template <typename Self, typename Op> -struct FullReducer<Self, Op, ThreadPoolDevice, false> { - static const bool HasOptimizedImplementation = !Op::IsStateful; - - // launch one reducer per thread and accumulate the result. - static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { - typedef typename Self::Index Index; - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads()); - const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; - eigen_assert(num_coeffs >= numblocks * blocksize); - - FixedSizeVector<Notification*> results(numblocks); - FixedSizeVector<FullReducerShard<Self, Op, false> > shards(numblocks, FullReducerShard<Self, Op, false>()); - for (Index i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&FullReducerShard<Self, Op, false>::run, self, i*blocksize, blocksize, reducer, &shards[i])); - } - - FullReducerShard<Self, Op, false> finalShard; - if (numblocks * blocksize < num_coeffs) { - FullReducerShard<Self, Op, false>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); - } else { - finalShard.saccum = reducer.initialize(); - } - - for (Index i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } - - for (Index i = 0; i < numblocks; ++i) { - reducer.reduce(shards[i].saccum, &finalShard.saccum); - } - *output = reducer.finalize(finalShard.saccum); - } -}; - -template <typename Self, typename Op> -struct FullReducer<Self, Op, ThreadPoolDevice, true> { - static const bool HasOptimizedImplementation = !Op::IsStateful; - - // launch one reducer per thread and accumulate the result. - static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { - typedef typename Self::Index Index; - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads()); - const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; - eigen_assert(num_coeffs >= numblocks * blocksize); - - FixedSizeVector<Notification*> results(numblocks); - FixedSizeVector<FullReducerShard<Self, Op, true> > shards(numblocks, FullReducerShard<Self, Op, true>()); - for (Index i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&FullReducerShard<Self, Op, true>::run, self, i*blocksize, blocksize, reducer, &shards[i])); - } - - FullReducerShard<Self, Op, true> finalShard; - if (numblocks * blocksize < num_coeffs) { - FullReducerShard<Self, Op, true>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); - } else { - finalShard.paccum = reducer.template initializePacket<typename Self::PacketReturnType>(); - finalShard.saccum = reducer.initialize(); - } - - for (Index i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } - - for (Index i = 0; i < numblocks; ++i) { - reducer.reducePacket(shards[i].paccum, &finalShard.paccum); - reducer.reduce(shards[i].saccum, &finalShard.saccum); - } - - *output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum); - } -}; -#endif - - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -// Full reducers for GPU, don't vectorize for now - -// Reducer function that enables multiple cuda thread to safely accumulate at the same -// output address. It basically reads the current value of the output variable, and -// attempts to update it with the new value. If in the meantime another cuda thread -// updated the content of the output address it will try again. -template <typename T, typename R> -__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { -#if __CUDA_ARCH__ >= 300 - if (sizeof(T) == 4) - { - unsigned int oldval = *reinterpret_cast<unsigned int*>(output); - unsigned int newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - unsigned int readback; - while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - } - } - else if (sizeof(T) == 8) { - unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output); - unsigned long long newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - unsigned long long readback; - while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - } - } - else { - assert(0 && "Wordsize not supported"); - } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -template <typename T> -__device__ inline void atomicReduce(T* output, T accum, SumReducer<T>&) { -#if __CUDA_ARCH__ >= 300 - atomicAdd(output, accum); -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -template <int BlockSize, int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output) { - const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - - if (first_index == 0) { - *output = reducer.initialize(); - } - - typename Self::CoeffReturnType accum = reducer.initialize(); - for (Index i = 0; i < NumPerThread; ++i) { - const Index index = first_index + i * BlockSize; - if (index >= num_coeffs) { - break; - } - typename Self::CoeffReturnType val = input.m_impl.coeff(index); - reducer.reduce(val, &accum); - } - - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reduce(__shfl_down(accum, offset), &accum); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(output, accum, reducer); - } -} - - -template <typename Self, typename Op, bool Vectorizable> -struct FullReducer<Self, Op, GpuDevice, Vectorizable> { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats. - static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same<typename Self::CoeffReturnType, float>::value; - - template <typename OutputType> - static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(false && "Should only be called on floats"); - } - - static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { - typedef typename Self::Index Index; - - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const int block_size = 256; - const int num_per_thread = 128; - const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); - } -}; - -#endif - - -template <typename Self, typename Op, - bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -class BlockReducer { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - typedef typename Self::PacketReturnType PacketReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - for (Index i = 0; i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - CoeffReturnType Finalize() { - return op_.finalize(accum_); - } - PacketReturnType FinalizePacket() { - // TODO(andydavis) This function should not be called for Scalar - // reductions: clean this up or add an assert here. - return PacketReturnType(); - } - - private: - CoeffReturnType accum_; - Op op_; -}; - -template <typename Self, typename Op> -class BlockReducer<Self, Op, true> { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - typedef typename Self::PacketReturnType PacketReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - vaccum_ = op_.template initializePacket<PacketReturnType>(); - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - const int packet_size = internal::unpacket_traits<PacketReturnType>::size; - const Index vectorized_size = (num_values_to_reduce / packet_size) * - packet_size; - for (Index i = 0; i < vectorized_size; i += packet_size) { - op_.reducePacket(internal::ploadt<PacketReturnType, Unaligned>( - &data[index + i]), &vaccum_); - } - for (Index i = vectorized_size; i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - CoeffReturnType Finalize() { - return op_.finalizeBoth(accum_, vaccum_); - } - PacketReturnType FinalizePacket() { - return op_.finalizePacket(vaccum_); - } - - private: - PacketReturnType vaccum_; - CoeffReturnType accum_; - Op op_; -}; - -} // end namespace internal - - -template <typename Op, typename Dims, typename XprType> -class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>, ReadOnlyAccessors> { - public: - typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) - { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const XprType& expression() const { return m_expr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dims& dims() const { return m_dims; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Op& reducer() const { return m_reducer; } - - protected: - typename XprType::Nested m_expr; - const Dims m_dims; - const Op m_reducer; -}; - - -// Eval as rvalue -template<typename Op, typename Dims, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> -{ - typedef TensorReductionOp<Op, Dims, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - static const int NumInputDims = internal::array_size<InputDimensions>::value; - static const int NumReducedDims = internal::array_size<Dims>::value; - EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE) - static const int NumOutputDims = NumInputDims - NumReducedDims; - typedef DSizes<Index, NumOutputDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self; - static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; - - enum { - IsAligned = false, - PacketAccess = Self::InputPacketAccess && Op::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - typedef typename internal::TensorBlock<Index, ScalarNonConst, NumOutputDims, - Layout> OutputTensorBlock; - typedef typename internal::TensorBlock<Index, ScalarNonConst, NumInputDims, - Layout> InputTensorBlock; - - static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value; - static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value; - static const bool RunningFullReduction = (NumInputDims==NumReducedDims); - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) - { - EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), - YOU_MADE_A_PROGRAMMING_MISTAKE); - for (int i = 0; i < NumInputDims; ++i) { - m_reduced_dim[i] = false; - } - for (int i = 0; i < NumReducedDims; ++i) { - eigen_assert(op.dims()[i] >= 0); - eigen_assert(op.dims()[i] < NumInputDims); - m_reduced_dim[op.dims()[i]] = true; - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - internal::partition_dims(input_dims, m_reduced_dim, &m_dimensions, &m_reducedDims); - - // Precompute output strides. - if (NumOutputDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } - } - - // Precompute input strides. - if (NumInputDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - } - } else { - m_inputStrides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; - } - } - } - - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (m_reduced_dim[i]) { - m_reducedStrides[reduceIndex] = m_inputStrides[i]; - ++reduceIndex; - } else { - m_preservedStrides[outputIndex] = m_inputStrides[i]; - m_output_to_input_dim_map[outputIndex] = i; - ++outputIndex; - } - } - - m_numValuesToReduce - = NumOutputDims == 0 ? internal::array_prod(input_dims) - : (static_cast<int>(Layout) == static_cast<int>(ColMajor)) - ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; - - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - m_impl.evalSubExprsIfNeeded(NULL); - - // Use the FullReducer if possible. - if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation && - ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || - (internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) { - - bool need_assign = false; - if (!data) { - m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); - data = m_result; - need_assign = true; - } - - Op reducer(m_reducer); - internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data); - return need_assign; - } - - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - - if (m_result) { - m_device.deallocate(m_result); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - if (RunningFullReduction && m_result) { - return *m_result; - } - Op reducer(m_reducer); - if (ReducingInnerMostDims) { - return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index), - m_numValuesToReduce, reducer); - } else { - typename Self::CoeffReturnType accum = reducer.initialize(); - internal::GenericDimReducer<NumReducedDims-1, Self, Op>::reduce(*this, firstInput(index), reducer, &accum); - return reducer.finalize(accum); - } - } - - // TODO(bsteiner): provide a more efficient implementation. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - if (ReducingInnerMostDims) { - const Index num_values_to_reduce = m_numValuesToReduce; - const Index firstIndex = firstInput(index); - for (Index i = 0; i < packetSize; ++i) { - Op reducer(m_reducer); - values[i] = internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstIndex + i * num_values_to_reduce, - num_values_to_reduce, reducer); - } - } else if (PreservingInnerMostDims) { - const Index firstIndex = firstInput(index); - const int innermost_dim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : NumOutputDims - 1; - // TBD: extend this the the n innermost dimensions that we preserve. - if (((firstIndex % m_dimensions[innermost_dim]) + packetSize - 1) < m_dimensions[innermost_dim]) { - Op reducer(m_reducer); - typename Self::PacketReturnType accum = reducer.template initializePacket<typename Self::PacketReturnType>(); - internal::InnerMostDimPreserver<NumReducedDims-1, Self, Op>::reduce(*this, firstIndex, reducer, &accum); - return reducer.finalizePacket(accum); - } else { - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index + i); - } - } - } else { - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index + i); - } - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - resources->push_back(internal::TensorOpResourceRequirements( - internal::kSkewedInnerDims, m_block_total_size_max)); - m_impl.getResourceRequirements(resources); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - // Special case full reductions to avoid input block copy below. - if (NumInputDims == NumReducedDims) { - eigen_assert(output_block->first_coeff_index() == 0); - eigen_assert(output_block->block_sizes().TotalSize() == 1); - Op reducer(m_reducer); - output_block->data()[0] = internal::InnerMostDimReducer<Self, Op>::reduce( - *this, 0, m_numValuesToReduce, reducer); - return; - } - - // Calculate input tensor 'slice' required to reduce output block coeffs. - DSizes<Index, NumInputDims> input_slice_sizes(m_impl.dimensions()); - for (int i = 0; i < NumOutputDims; ++i) { - // Clip preserved input dimensions by output block size. - input_slice_sizes[m_output_to_input_dim_map[i]] = - output_block->block_sizes()[i]; - } - - // Shard input tensor slice into blocks (because it could be large if we - // need to reduce along several dimensions to calculate required output - // coefficients). - const Index max_coeff_count = - numext::mini(((m_device.firstLevelCacheSize()) / sizeof(Scalar)), - input_slice_sizes.TotalSize()); - - // Calculate max output shard size needed to keep working set of reducers - // in L1, while leaving enough space for reducer overhead and 'packet_size' - // reductions. - DSizes<Index, NumInputDims> target_input_block_sizes; - CalculateTargetInputBlockShape(max_coeff_count, input_slice_sizes, - &target_input_block_sizes); - // Calculate indices for first preserved dimension. - const Index first_preserved_dim_output_index = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? - 0 : NumOutputDims - 1; - const Index first_preserved_dim_input_index = m_output_to_input_dim_map[ - first_preserved_dim_output_index]; - const bool inner_most_dim_preserved = first_preserved_dim_input_index == - (static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : - NumInputDims - 1) | PreservingInnerMostDims; - - // Calculate output block inner/outer dimension sizes. - const Index output_block_inner_dim_size = output_block->block_sizes()[ - first_preserved_dim_output_index]; - const Index output_block_outer_dim_size = - output_block->block_sizes().TotalSize() / output_block_inner_dim_size; - // Calculate shard size for first preserved dimension. - const Index output_shard_size = target_input_block_sizes[ - first_preserved_dim_input_index]; - const Index num_output_shards = - (output_block_inner_dim_size + output_shard_size - 1) / - output_shard_size; - - // Initialize 'tensor_slice_offsets' from input coords of output index. - DSizes<Index, NumInputDims> tensor_slice_offsets; - GetInputCoordsForOutputIndex(output_block->first_coeff_index(), - &tensor_slice_offsets); - - // Store tensor slice offset in first preserved dimension to be used - // to update tensor slice extents in loop below. - const Index first_preserved_dim_offset_start = tensor_slice_offsets[ - first_preserved_dim_input_index]; - - array<BlockIteratorState, NumOutputDims> block_iter_state; - - // Initialize state used to iterate through output coefficients - // and update 'tensor_slice_offsets' in outer preserved dims. - for (int i = 0; i < NumOutputDims - 1; ++i) { - const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i + 1 : NumOutputDims - i - 2; - block_iter_state[i].input_dim = m_output_to_input_dim_map[dim]; - block_iter_state[i].output_size = output_block->block_sizes()[dim]; - block_iter_state[i].output_count = 0; - } - - // Allocate input block memory. - ScalarNonConst* input_block_data = static_cast<ScalarNonConst*>( - m_device.allocate(max_coeff_count * sizeof(Scalar))); - // Allocate reducer memory. - const bool packet_reductions_enabled = (Self::InputPacketAccess & - Op::PacketAccess); - const Index packet_size = internal::unpacket_traits<PacketReturnType>::size; - const Index num_reducers = - (inner_most_dim_preserved && packet_reductions_enabled) ? - (output_shard_size / packet_size + output_shard_size % packet_size + - packet_size) : output_shard_size; - typedef internal::BlockReducer<Self, Op> BlockReducer; - BlockReducer* reducers = static_cast<BlockReducer*>( - m_device.allocate(num_reducers * sizeof(BlockReducer))); - - InputDimensions input_tensor_dims(m_impl.dimensions()); - for (Index output_outer_index = 0; - output_outer_index < output_block_outer_dim_size; - ++output_outer_index) { - for (Index output_shard_index = 0; - output_shard_index < num_output_shards; - ++output_shard_index) { - // Initialize 'tensor_slice_extents' for this output shard. - DSizes<Index, NumInputDims> tensor_slice_extents(input_slice_sizes); - for (int i = 0; i < NumInputDims; ++i) { - if (i == first_preserved_dim_input_index) { - // Clip first preserved dim size to output shard size. - tensor_slice_extents[i] = numext::mini( - output_shard_size, - input_slice_sizes[i] - (tensor_slice_offsets[i] - - first_preserved_dim_offset_start)); - - } else if (!m_reduced_dim[i]) { - // Clip outer preserved dims to size 1, so that we reduce a - // contiguous set of output coefficients. - tensor_slice_extents[i] = 1; - } - } - - // Intialize output coefficient reducers. - for (int i = 0; i < num_reducers; ++i) { - new (&reducers[i]) BlockReducer(m_reducer); - } - - typedef internal::TensorSliceBlockMapper< - Index, ScalarNonConst, NumInputDims, Layout> TensorSliceBlockMapper; - - // TODO(andydavis) Consider removing 'input_block_stride_order' if we - // find that scattered reads are not worth supporting in - // TensorSliceBlockMapper. - TensorSliceBlockMapper block_mapper( - input_tensor_dims, tensor_slice_offsets, tensor_slice_extents, - target_input_block_sizes, DimensionList<Index, NumInputDims>()); - - const Index num_outputs_to_update = tensor_slice_extents[ - first_preserved_dim_input_index]; - const Index preserved_dim_vector_reducer_count = - (inner_most_dim_preserved && packet_reductions_enabled) ? - num_outputs_to_update / packet_size: 0; - const Index preserved_dim_vector_coeff_count = - inner_most_dim_preserved ? preserved_dim_vector_reducer_count * - packet_size : 0; - const Index preserved_dim_reducer_limit = - (inner_most_dim_preserved && packet_reductions_enabled) ? - (preserved_dim_vector_reducer_count + - num_outputs_to_update % packet_size) : num_outputs_to_update; - - const Index total_block_count = block_mapper.total_block_count(); - for (Index b = 0; b < total_block_count; ++b) { - InputTensorBlock input_block = block_mapper.GetBlockForIndex( - b, input_block_data); - // Read. - m_impl.block(&input_block); - - Index num_values_to_reduce = 1; - for (Index i = 0; i < NumInputDims; ++i) { - if (m_reduced_dim[i]) { - num_values_to_reduce *= input_block.block_sizes()[i]; - } - } - // Reduce. - if (inner_most_dim_preserved) { - const Index input_outer_dim_size = - input_block.block_sizes().TotalSize() / num_outputs_to_update; - for (Index input_outer_dim_index = 0; - input_outer_dim_index < input_outer_dim_size; - ++input_outer_dim_index) { - const Index input_outer_dim_base = input_outer_dim_index * - num_outputs_to_update; - for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) { - reducers[i].Reduce(input_outer_dim_base + i * packet_size, - packet_size, input_block.data()); - } - const Index scalar_reducer_base = input_outer_dim_base + - preserved_dim_vector_coeff_count; - for (Index i = preserved_dim_vector_reducer_count; - i < preserved_dim_reducer_limit; ++i) { - reducers[i].Reduce(scalar_reducer_base + i - - preserved_dim_vector_reducer_count, - 1, - input_block.data()); - } - } - } else { - for (Index i = 0; i < num_outputs_to_update; ++i) { - reducers[i].Reduce(i * num_values_to_reduce, - num_values_to_reduce, - input_block.data()); - } - } - } - - // Finalize all reducers for this output shard. - const Index output_base_index = - output_outer_index * output_block_inner_dim_size + - output_shard_index * output_shard_size; - if (inner_most_dim_preserved) { - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packet_size]; - for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) { - const Index reducer_base = output_base_index + i * packet_size; - internal::pstore<CoeffReturnType, PacketReturnType>( - values, reducers[i].FinalizePacket()); - for (Index j = 0; j < packet_size; ++j) { - output_block->data()[reducer_base + j] = values[j]; - } - } - const Index scalar_reducer_base = output_base_index + - preserved_dim_vector_coeff_count; - - for (Index i = preserved_dim_vector_reducer_count; - i < preserved_dim_reducer_limit; ++i) { - output_block->data()[ - scalar_reducer_base + i - preserved_dim_vector_reducer_count] = - reducers[i].Finalize(); - } - } else { - for (int i = 0; i < num_outputs_to_update; ++i) { - output_block->data()[output_base_index + i] = - reducers[i].Finalize(); - } - } - - // Update 'tensor_slice_offsets' by num outputs for this output shard. - tensor_slice_offsets[first_preserved_dim_input_index] += - num_outputs_to_update; - } - // Update slice offset for inner preserved dim. - tensor_slice_offsets[first_preserved_dim_input_index] -= - output_block_inner_dim_size; - // Update slice offsets for remaining output dims. - for (int i = 0; i < NumOutputDims - 1; ++i) { - BlockIteratorState& b = block_iter_state[i]; - if (++b.output_count < b.output_size) { - ++tensor_slice_offsets[b.input_dim]; - break; - } - b.output_count = 0; - tensor_slice_offsets[b.input_dim] -= b.output_size - 1; - } - } - - // Free memory. - m_device.deallocate(input_block_data); - m_device.deallocate(reducers); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - template <int, typename, typename> friend struct internal::GenericDimReducer; - template <typename, typename, bool> friend struct internal::InnerMostDimReducer; - template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver; - template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer; -#ifdef EIGEN_USE_THREADS - template <typename S, typename O, bool V> friend struct internal::FullReducerShard; -#endif -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); -#endif - - struct BlockIteratorState { - Index input_dim; - Index output_size; - Index output_count; - }; - - // Returns the Index in the input tensor of the first value that needs to be - // used to compute the reduction at output index "index". - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { - if (ReducingInnerMostDims) { - return index * m_numValuesToReduce; - } - Index startInput = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumOutputDims - 1; i > 0; --i) { - // This is index_i in the output tensor. - const Index idx = index / m_fastOutputStrides[i]; - startInput += idx * m_preservedStrides[i]; - index -= idx * m_outputStrides[i]; - } - } else { - for (int i = 0; i < NumOutputDims - 1; ++i) { - // This is index_i in the output tensor. - const Index idx = index / m_fastOutputStrides[i]; - startInput += idx * m_preservedStrides[i]; - index -= idx * m_outputStrides[i]; - } - } - if (PreservingInnerMostDims) { - eigen_assert(m_numValuesToReduce == 1); - startInput += index; - } else { - startInput += index * m_numValuesToReduce; - } - return startInput; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void GetInputCoordsForOutputIndex( - Index index, - DSizes<Index, NumInputDims>* coords) const { - for (int i = 0; i < NumInputDims; ++i) { - (*coords)[i] = 0; - } - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumOutputDims - 1; i > 0; --i) { - const Index idx = index / m_fastOutputStrides[i]; - (*coords)[m_output_to_input_dim_map[i]] = idx; - index -= idx * m_outputStrides[i]; - } - (*coords)[m_output_to_input_dim_map[0]] = index; - } else { - for (int i = 0; i < NumOutputDims - 1; ++i) { - const Index idx = index / m_fastOutputStrides[i]; - (*coords)[m_output_to_input_dim_map[i]] = idx; - index -= idx * m_outputStrides[i]; - } - (*coords)[m_output_to_input_dim_map[NumOutputDims-1]] = index; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void CalculateTargetInputBlockShape( - const Index max_coeff_count, - const DSizes<Index, NumInputDims>& input_slice_sizes, - DSizes<Index, NumInputDims>* target_input_block_sizes) const { - typedef typename internal::packet_traits<Scalar>::type Packet; - const Index packet_size = internal::unpacket_traits<Packet>::size; - typedef internal::BlockReducer<Self, Op> BlockReducer; - // TODO(andydavis) Compute reducer overhead correctly for the case where - // we are preserving the inner most dimension, and a single reducer - // reduces a packet's worth of output coefficients. - const Index reducer_overhead = sizeof(BlockReducer) / sizeof(Scalar); - - Index coeff_to_allocate = max_coeff_count; - bool first_preserved_dim_allocated = false; - bool first_reduced_dim_allocated = false; - for (int i = 0; i < NumInputDims; ++i) { - const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumInputDims - i - 1; - (*target_input_block_sizes)[dim] = 1; - if (m_reduced_dim[dim]) { - // TODO(andydavis) Consider allocating to multiple reduced dimensions. - // Watch out for cases where reduced dimensions are not contiguous, - // which induces scattered reads. - if (!first_reduced_dim_allocated) { - (*target_input_block_sizes)[dim] = numext::mini(input_slice_sizes[dim], - coeff_to_allocate); - coeff_to_allocate /= (*target_input_block_sizes)[dim]; - first_reduced_dim_allocated = true; - } - } else if (!first_preserved_dim_allocated) { - // TODO(andydavis) Include output block size in this L1 working set - // calculation. - const Index allocated = max_coeff_count - coeff_to_allocate; - const Index alloc_size = numext::maxi(static_cast<Index>(1), - coeff_to_allocate / - reducer_overhead); - (*target_input_block_sizes)[dim] = numext::mini(input_slice_sizes[dim], - alloc_size); - coeff_to_allocate = numext::maxi( - static_cast<Index>(1), - coeff_to_allocate / ((*target_input_block_sizes)[dim] * - reducer_overhead)); - first_preserved_dim_allocated = true; - } - } - } - - // Bitmap indicating if an input dimension is reduced or not. - array<bool, NumInputDims> m_reduced_dim; - // Dimensions of the output of the operation. - Dimensions m_dimensions; - // Precomputed strides for the input tensor. - array<Index, NumInputDims> m_inputStrides; - // Precomputed strides for the output tensor. - array<Index, NumOutputDims> m_outputStrides; - array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides; - // Subset of strides of the input tensor for the non-reduced dimensions. - // Indexed by output dimensions. - array<Index, NumOutputDims> m_preservedStrides; - // Map from output to input dimension index. - array<Index, NumOutputDims> m_output_to_input_dim_map; - // How many values go into each reduction - Index m_numValuesToReduce; - - // Subset of strides of the input tensor for the reduced dimensions. - // Indexed by reduced dimensions. - array<Index, NumReducedDims> m_reducedStrides; - // Size of the input dimensions that are reduced. - // Indexed by reduced dimensions. - array<Index, NumReducedDims> m_reducedDims; - - // Evaluator for the input expression. - TensorEvaluator<ArgType, Device> m_impl; - - // Operation to apply for computing the reduction. - Op m_reducer; - - // For full reductions -#ifdef EIGEN_USE_GPU - static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value; -#else - static const bool RunningOnGPU = false; -#endif - CoeffReturnType* m_result; - std::size_t m_block_total_size_max; - - const Device& m_device; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h deleted file mode 100644 index d052dcdf69..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ /dev/null @@ -1,642 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Manjunath Kudlur <keveman@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H -#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H - -#if defined(EIGEN_USE_GPU) - -namespace Eigen { -namespace internal { - -template <typename OutExpr, typename InExpr, typename Op, typename Indices, - bool Tileable> -class TensorExecutor< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, false, Tileable> { - public: - typedef const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const> - Expression; - static void run(const Expression& expr, const GpuDevice& device); -}; - -template <typename OutExpr, typename InExpr, typename Op, typename Indices, - bool Tileable> -class TensorExecutor< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, true, Tileable> { - public: - typedef const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const> - Expression; - static void run(const Expression& expr, const GpuDevice& device); -}; - -template <typename InExpr, typename Op, typename Indices, bool Tileable> -class TensorExecutor<const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> >, - GpuDevice, false, Tileable> { - public: - typedef const TensorEvalToOp< - const TensorReductionOp<Op, const Indices, const InExpr> > Expression; - static void run(const Expression& expr, const GpuDevice& device); -}; - -template <typename InExpr, typename Op, typename Indices, bool Tileable> -class TensorExecutor<const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> >, - GpuDevice, true, Tileable> { - public: - typedef const TensorEvalToOp< - const TensorReductionOp<Op, const Indices, const InExpr> > Expression; - static void run(const Expression& expr, const GpuDevice& device); -}; - -} // end namespace internal -} // end namespace Eigen - -#if defined(__CUDACC__) - -namespace Eigen { - -namespace internal { - -namespace { - -#define DIVUP(x, y) (((x) + (y)-1) / (y)) - -// Initialize output[0..size-1] with val -template <typename Output> -__global__ void InitVector(const float val, int size, Output output) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = idx; i < size; i += gridDim.x * blockDim.x) { - output.coeffRef(i) = val; - } -} - -// ----------------------------------------------------------------------------- -// Column Reduction kernels -// ----------------------------------------------------------------------------- -template <int GRID_DIM, int BLOCK_DIM, int NUM_PER_THREAD, typename Input, - typename Output, typename Reducer> -__global__ void ColumnReduceKernel(Reducer reducer, const Input input, int rows, - int cols, Output output) { - assert(blockDim.x == BLOCK_DIM); - assert(blockDim.y == 1); - assert(blockDim.z == 1); - - assert(gridDim.x == GRID_DIM); - assert(gridDim.y == 1); - assert(gridDim.z == 1); - - typedef typename Input::Index Index; - - const Index num_input_points = DIVUP(rows, NUM_PER_THREAD) * cols; - const int bx = blockIdx.x; - const int tx = threadIdx.x; - - for (Index i = bx * BLOCK_DIM + tx; i < num_input_points; - i += BLOCK_DIM * GRID_DIM) { - const Index input_col = i % cols; - const Index input_row_begin = - ((i / cols) % DIVUP(rows, NUM_PER_THREAD)) * NUM_PER_THREAD; - float reduced_val = reducer.bottom_value(); - for (int j = 0; j < NUM_PER_THREAD; ++j) { - float val = ((input_col < cols) && (input_row_begin + j < rows)) - ? input.coeff((input_row_begin + j) * cols + input_col) - : reducer.bottom_value(); - reduced_val = reducer(reduced_val, val); - } -#if __CUDA_ARCH__ >= 300 - reducer.atomic_reduce(&output.coeffRef(input_col), reduced_val); -#endif - } -} - -// ----------------------------------------------------------------------------- -// Row Reduction kernels -// ----------------------------------------------------------------------------- -template <int GRID_DIM, int BLOCK_DIM, int NUM_PER_THREAD, typename Input, - typename Output, typename Reducer> -__global__ void RowReduceKernel(Reducer reducer, const Input input, int rows, - int cols, Output output) { - assert(BLOCK_DIM % 32 == 0); - assert(blockDim.x == BLOCK_DIM); - assert(blockDim.y == 1); - assert(blockDim.z == 1); - - assert(gridDim.x == GRID_DIM); - assert(gridDim.y == 1); - assert(gridDim.z == 1); - - const int unroll_times = 16; - assert(NUM_PER_THREAD % unroll_times == 0); - - typedef typename Input::Index Index; - - __shared__ float temp[BLOCK_DIM]; - - const Index input_col_blocks = DIVUP(cols, BLOCK_DIM * NUM_PER_THREAD); - const Index num_input_blocks = input_col_blocks * rows; - - const int bx = blockIdx.x; - const int tx = threadIdx.x; - - for (Index i = bx; i < num_input_blocks; i += GRID_DIM) { - const Index col_block = i % input_col_blocks; - const Index row_block = i / input_col_blocks; - const Index col_begin = col_block * BLOCK_DIM * NUM_PER_THREAD + tx; - const Index row = row_block; - float reduced_val = reducer.bottom_value(); - if (row < rows) { - for (Index j = 0; j < NUM_PER_THREAD; j += unroll_times) { - const Index last_col = col_begin + BLOCK_DIM * (j + unroll_times - 1); - if (last_col >= cols) { - // We can skip the last iteration of the loop since we know - // that col >= cols there. -#pragma unroll - for (int k = 0; k < unroll_times - 1; ++k) { - const Index col = col_begin + BLOCK_DIM * (j + k); - const float val = (col < cols ? input.coeff(row * cols + col) - : reducer.bottom_value()); - reduced_val = reducer(reduced_val, val); - } - break; // col < cols for all later iterations. - } else { - // Faster version of the loop with no branches after unrolling. -#pragma unroll - for (int k = 0; k < unroll_times; ++k) { - const Index col = col_begin + BLOCK_DIM * (j + k); - reduced_val = reducer(reduced_val, input.coeff(row * cols + col)); - } - } - } - } - temp[tx] = reduced_val; - - __syncthreads(); - const int warp_id = tx & 31; - if (warp_id < 16) temp[tx] = reducer(temp[tx], temp[tx + 16]); - if (warp_id < 8) temp[tx] = reducer(temp[tx], temp[tx + 8]); - if (warp_id < 4) temp[tx] = reducer(temp[tx], temp[tx + 4]); - if (warp_id < 2) temp[tx] = reducer(temp[tx], temp[tx + 2]); - if (warp_id < 1) temp[tx] = reducer(temp[tx], temp[tx + 1]); - - if (warp_id == 0) { - if (row < rows) { -#if __CUDA_ARCH__ >= 300 - reducer.atomic_reduce(&output.coeffRef(row), temp[tx]); -#endif - } - } - - __syncthreads(); - } -} - -template <typename Input, typename Output, typename Reducer> -void ColumnReduceCuda(Reducer reducer, const GpuDevice& device, - const Input input, int rows, int cols, Output output) { - const int block_size = 256; - const int grid_size = 128; - const int num_per_thread = 16; - LAUNCH_CUDA_KERNEL(InitVector, 32, 1024, 0, device, reducer.bottom_value(), - cols, output); - LAUNCH_CUDA_KERNEL( - (ColumnReduceKernel<grid_size, block_size, num_per_thread>), grid_size, - block_size, 0, device, reducer, input, rows, cols, output); -} - -template <typename Input, typename Output, typename Reducer> -void RowReduceCuda(Reducer reducer, const GpuDevice& device, const Input input, - int rows, int cols, Output output) { - const int block_size = 256; - const int grid_size = 32; - const int num_per_thread = 128; - LAUNCH_CUDA_KERNEL(InitVector, 32, 1024, 0, device, reducer.bottom_value(), - rows, output); - LAUNCH_CUDA_KERNEL((RowReduceKernel<grid_size, block_size, num_per_thread>), - grid_size, block_size, 0, device, reducer, input, rows, - cols, output); -} - -// Provides arbitrary sum reductions, applying a function across the -// right argument being reduced prior to summing -template <typename F> -struct FnSumReducer { - __host__ __device__ FnSumReducer(F f) : f_(f) {} - __host__ __device__ float bottom_value() { return 0.0f; } - __device__ float operator()(float x, float y) const { return x + f_(y); } - __device__ void atomic_reduce(float* x, float y) const { atomicAdd(x, y); } - - F f_; -}; - -// Identity is used for the basic SumReduction -struct Identity { - __device__ float operator()(float x) const { return x; } -}; - -struct CudaSumReducer : FnSumReducer<Identity> { - __host__ __device__ CudaSumReducer() : FnSumReducer(Identity()) {} -}; - -struct CudaMaxReducer { - // nvcc doesn't recognize numeric_limits<float>::lowest for some reason. - CudaMaxReducer() { - bottom_value_ = -3.40282347E+38F; // std::numeric_limits<float>::lowest(); - } - __host__ __device__ float bottom_value() { return bottom_value_; } - __device__ float operator()(float x, float y) const { return fmax(x, y); } - - // This is equivalent to atomicMax(x, y), but CUDA does not have atomicMax for - // float data type. Instead, this atomically compares-and-swaps the old value - // at x with y. If the old value returned by the CAS operation was already - // larger than y, or what was read before, it declares success and finishes, - // otherwise repeats the procedure. - __device__ void atomic_reduce(float* x, float y) { - unsigned int old_val = *reinterpret_cast<unsigned int*>(x); - while (*reinterpret_cast<float*>(&old_val) < y) { - unsigned int current_val = - atomicCAS(reinterpret_cast<unsigned int*>(x), old_val, - *reinterpret_cast<unsigned int*>(&y)); - if (old_val == current_val) { - break; - } - old_val = current_val; - } - } - float bottom_value_; -}; - -} // end namespace - -template <typename Op> -struct IsFloatSumReduction { - static const bool value = false; -}; - -template <> -struct IsFloatSumReduction<SumReducer<float> > { - static const bool value = true; -}; - -template <typename Op> -struct IsFloatMaxReduction { - static const bool value = false; -}; - -template <> -struct IsFloatMaxReduction<MaxReducer<float> > { - static const bool value = true; -}; - -template <typename Op> -struct SumOrMaxOfFloat { - static const bool value = - IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value; -}; - -enum ReductionType { ROW_REDUCE, COL_REDUCE, UNOPTIMIZED }; - -template <typename Op, typename Expr, typename ReductionExpr> -ReductionType GetReductionType(const Expr& expr, - const ReductionExpr& reduction_expr, - const GpuDevice& device, std::size_t* rows, - std::size_t* cols) { - typedef TensorEvaluator<const Expr, GpuDevice> EvalExpr; - typedef TensorEvaluator<const ReductionExpr, GpuDevice> ReductionEvalExpr; - - if (device.majorDeviceVersion() < 3) { - return UNOPTIMIZED; - } - const EvalExpr eval_expr(expr, device); - - // We only have fast reductions for sum/max of float. - if (!SumOrMaxOfFloat<Op>::value) { - return UNOPTIMIZED; - } - - // For sum/max of float, if we are doing a full reduction, we can - // use the ROW_REDUCE optimization. - if (ReductionEvalExpr::NumReducedDims == ReductionEvalExpr::NumInputDims) { - *rows = 1; - *cols = array_prod(eval_expr.dimensions()); - return ROW_REDUCE; - } - - if (ReductionEvalExpr::NumReducedDims > 1) { - return UNOPTIMIZED; - } - - const int dim = reduction_expr.dims()[0]; - if (static_cast<int>(ReductionEvalExpr::Layout) == - static_cast<int>(RowMajor)) { - if (dim == ReductionEvalExpr::NumInputDims - 1) { - *rows = array_prod(eval_expr.dimensions()) / - eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1]; - *cols = eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1]; - if (*cols < 32) return UNOPTIMIZED; - return ROW_REDUCE; - } else if (dim == 0) { - *rows = eval_expr.dimensions()[0]; - *cols = array_prod(eval_expr.dimensions()) / eval_expr.dimensions()[0]; - if (*rows < 32) return UNOPTIMIZED; - return COL_REDUCE; - } - } else if (static_cast<int>(ReductionEvalExpr::Layout) == - static_cast<int>(ColMajor)) { - if (dim == ReductionEvalExpr::NumInputDims - 1) { - *rows = eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1]; - *cols = array_prod(eval_expr.dimensions()) / - eval_expr.dimensions()[ReductionEvalExpr::NumInputDims - 1]; - if (*rows < 32) return UNOPTIMIZED; - return COL_REDUCE; - } else if (dim == 0) { - *rows = array_prod(eval_expr.dimensions()) / eval_expr.dimensions()[0]; - *cols = eval_expr.dimensions()[0]; - if (*cols < 32) return UNOPTIMIZED; - return ROW_REDUCE; - } - } - return UNOPTIMIZED; -} - -template <typename Expression, typename Index, bool Vectorizable> -struct LaunchKernel; - -template <typename Expression, typename Index> -struct LaunchKernel<Expression, Index, true> { - static void launch(int num_blocks, int block_size, const GpuDevice& device, - const TensorEvaluator<Expression, GpuDevice>& evaluator, - Index size) { - LAUNCH_CUDA_KERNEL( - (EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, - Index>), - num_blocks, block_size, 0, device, evaluator, size); - } -}; - -template <typename Expression, typename Index> -struct LaunchKernel<Expression, Index, false> { - static void launch(int num_blocks, int block_size, const GpuDevice& device, - const TensorEvaluator<Expression, GpuDevice>& evaluator, - Index size) { - LAUNCH_CUDA_KERNEL( - (EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, - Index>), - num_blocks, block_size, 0, device, evaluator, size); - } -}; - -template <typename F, typename LHS, typename RHS, bool Compatible> -struct LaunchRowReduce; - -template <typename F, typename LHS, typename RHS> -struct LaunchRowReduce<F, LHS, RHS, true> { - static void launch(const GpuDevice& device, RHS input, std::size_t rows, - std::size_t cols, LHS output) { - RowReduceCuda(F(), device, input, rows, cols, output); - } -}; - -template <typename F, typename LHS, typename RHS> -struct LaunchRowReduce<F, LHS, RHS, false> { - static void launch(const GpuDevice& device, RHS input, std::size_t rows, - std::size_t cols, LHS output) {} -}; - -template <typename F, typename LHS, typename RHS, bool Compatible> -struct LaunchColReduce; - -template <typename F, typename LHS, typename RHS> -struct LaunchColReduce<F, LHS, RHS, true> { - static void launch(const GpuDevice& device, RHS input, std::size_t rows, - std::size_t cols, LHS output) { - ColumnReduceCuda(F(), device, input, rows, cols, output); - } -}; - -template <typename F, typename LHS, typename RHS> -struct LaunchColReduce<F, LHS, RHS, false> { - static void launch(const GpuDevice& device, RHS input, std::size_t rows, - std::size_t cols, LHS output) {} -}; - -template <typename Expression, typename Device, bool Vectorizable> -class TensorAssignExecutorHelper; - -template <typename OutExpr, typename InExpr, typename Op, typename Indices, - bool Vectorizable> -class TensorAssignExecutorHelper< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, Vectorizable> { - public: - typedef const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const> - Expression; - - typedef typename Expression::Index Index; - typedef TensorEvaluator<OutExpr, GpuDevice> LHSEval; - typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval; - static inline void run(const Expression& expr, const GpuDevice& device) { - std::size_t rows, cols; - const ReductionType reduction_type = - GetReductionType<Op>(expr.rhsExpression().expression(), - expr.rhsExpression(), device, &rows, &cols); - if (reduction_type == UNOPTIMIZED) { - TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - const int num_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / - device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LaunchKernel<Expression, Index, Vectorizable>::launch( - num_blocks, block_size, device, evaluator, size); - } - evaluator.cleanup(); - } else { - LHSEval output(expr.lhsExpression(), device); - RHSEval input(expr.rhsExpression().expression(), device); - bool lhs_needs_assign = output.evalSubExprsIfNeeded(NULL); - bool rhs_needs_assign = input.evalSubExprsIfNeeded(NULL); - if (lhs_needs_assign && rhs_needs_assign) { - const bool Compatible = - IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value; - if (reduction_type == ROW_REDUCE) { - if (IsFloatSumReduction<Op>::value) { - LaunchRowReduce<CudaSumReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else if (IsFloatMaxReduction<Op>::value) { - LaunchRowReduce<CudaMaxReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else { - // Unsupported reduction type - assert(false && "Unsupported reduction function for ROW_REDUCE"); - } - } else { - if (IsFloatSumReduction<Op>::value) { - LaunchColReduce<CudaSumReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else if (IsFloatMaxReduction<Op>::value) { - LaunchColReduce<CudaMaxReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else { - // Unsupported reduction type - assert(false && "Unsupported reduction function for COL_REDUCE"); - } - } - } - input.cleanup(); - output.cleanup(); - } - } -}; - -template <typename OutExpr, typename InExpr, typename Op, typename Indices, - bool Tileable> -inline void TensorExecutor< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, false, Tileable>::run(const Expression& expr, - const GpuDevice& device) { - TensorAssignExecutorHelper< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, false>::run(expr, device); -} - -template <typename OutExpr, typename InExpr, typename Op, typename Indices, - bool Tileable> -inline void TensorExecutor< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, true, Tileable>::run(const Expression& expr, - const GpuDevice& device) { - TensorAssignExecutorHelper< - const TensorAssignOp< - OutExpr, TensorReductionOp<Op, Indices const, InExpr const> const>, - GpuDevice, true>::run(expr, device); -} - -template <typename T, typename Index> -struct PtrWrapper { - EIGEN_DEVICE_FUNC PtrWrapper(T* ptr) : m_ptr(ptr) {} - EIGEN_DEVICE_FUNC T& coeffRef(Index i) { return *(m_ptr + i); } - T* m_ptr; -}; - -template <typename Expression, typename Device, bool Vectorizable> -class TensorEvalToExecutorHelper; - -template <typename InExpr, typename Op, typename Indices, bool Vectorizable> -class TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> >, - GpuDevice, Vectorizable> { - public: - typedef const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> > Expression; - typedef typename Expression::Index Index; - typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval; - - static inline void run(const Expression& expr, const GpuDevice& device) { - std::size_t rows, cols; - const ReductionType reduction_type = - GetReductionType<Op>(expr.expression().expression(), expr.expression(), - device, &rows, &cols); - if (reduction_type == UNOPTIMIZED) { - TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - const int num_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / - device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LaunchKernel<Expression, Index, Vectorizable>::launch( - num_blocks, block_size, device, evaluator, size); - } - evaluator.cleanup(); - } else { - typedef typename internal::remove_const<typename Expression::Scalar>::type Scalar; - PtrWrapper<Scalar, Index> output(expr.buffer()); - TensorEvaluator<const InExpr, GpuDevice> input( - expr.expression().expression(), device); - typedef PtrWrapper<Scalar, Index> LHSEval; - typedef TensorEvaluator<const InExpr, GpuDevice> RHSEval; - bool rhs_needs_assign = input.evalSubExprsIfNeeded(NULL); - if (rhs_needs_assign) { - const bool Compatible = - IsFloatSumReduction<Op>::value || IsFloatMaxReduction<Op>::value; - if (reduction_type == ROW_REDUCE) { - if (IsFloatSumReduction<Op>::value) { - LaunchRowReduce<CudaSumReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else if (IsFloatMaxReduction<Op>::value) { - LaunchRowReduce<CudaMaxReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } - } else { - if (IsFloatSumReduction<Op>::value) { - LaunchColReduce<CudaSumReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } else if (IsFloatMaxReduction<Op>::value) { - LaunchColReduce<CudaMaxReducer, LHSEval, RHSEval, - Compatible>::launch(device, input, rows, cols, - output); - } - } - } - input.cleanup(); - } - } -}; - -template <typename InExpr, typename Op, typename Indices, bool Tileable> -inline void -TensorExecutor<const TensorEvalToOp< - const TensorReductionOp<Op, const Indices, const InExpr> >, - GpuDevice, false, Tileable>::run(const Expression& expr, - const GpuDevice& device) { - TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> >, - GpuDevice, false>::run(expr, device); -} - -template <typename InExpr, typename Op, typename Indices, bool Tileable> -inline void -TensorExecutor<const TensorEvalToOp< - const TensorReductionOp<Op, const Indices, const InExpr> >, - GpuDevice, true, Tileable>::run(const Expression& expr, - const GpuDevice& device) { - TensorEvalToExecutorHelper<const TensorEvalToOp<const TensorReductionOp< - Op, const Indices, const InExpr> >, - GpuDevice, true>::run(expr, device); -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // __CUDACC__ -#endif // EIGEN_USE_GPU -#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h deleted file mode 100644 index fb8ba09dd3..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ /dev/null @@ -1,442 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H -#define EIGEN_CXX11_TENSOR_TENSOR_REF_H - -namespace Eigen { - -namespace internal { - -template <typename Dimensions, typename Scalar> -class TensorLazyBaseEvaluator { - public: - TensorLazyBaseEvaluator() : m_refcount(0) { } - virtual ~TensorLazyBaseEvaluator() { } - - EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0; - EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0; - - EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0; - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0; - - void incrRefCount() { ++m_refcount; } - void decrRefCount() { --m_refcount; } - int refCount() const { return m_refcount; } - - private: - // No copy, no assigment; - TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); - TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); - - int m_refcount; -}; - - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, typename TensorEvaluator<Expr, Device>::Scalar> { - public: - // typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions; - typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar; - - TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { - m_dims = m_impl.dimensions(); - m_impl.evalSubExprsIfNeeded(NULL); - } - virtual ~TensorLazyEvaluatorReadOnly() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const { - return m_dims; - } - EIGEN_DEVICE_FUNC virtual const Scalar* data() const { - return m_impl.data(); - } - - EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const { - return m_impl.coeff(index); - } - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) { - eigen_assert(false && "can't reference the coefficient of a rvalue"); - return m_dummy; - }; - - protected: - TensorEvaluator<Expr, Device> m_impl; - Dimensions m_dims; - Scalar m_dummy; -}; - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> { - public: - typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base; - typedef typename Base::Scalar Scalar; - - TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) { - } - virtual ~TensorLazyEvaluatorWritable() { - } - - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { - return this->m_impl.coeffRef(index); - } -}; - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type { - public: - typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base; - typedef typename Base::Scalar Scalar; - - TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) { - } - virtual ~TensorLazyEvaluator() { - } -}; - -} // namespace internal - - -/** \class TensorRef - * \ingroup CXX11_Tensor_Module - * - * \brief A reference to a tensor expression - * The expression will be evaluated lazily (as much as possible). - * - */ -template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef<PlainObjectType> > -{ - public: - typedef TensorRef<PlainObjectType> Self; - typedef typename PlainObjectType::Base Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; - typedef typename internal::traits<PlainObjectType>::Index Index; - typedef typename internal::traits<PlainObjectType>::Scalar Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - typedef Scalar* PointerType; - typedef PointerType PointerArgType; - - static const Index NumIndices = PlainObjectType::NumIndices; - typedef typename PlainObjectType::Dimensions Dimensions; - - enum { - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - Layout = PlainObjectType::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) { - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef(Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) { - m_evaluator->incrRefCount(); - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, const Expression, DefaultDevice>(expr, DefaultDevice())) { - m_evaluator->incrRefCount(); - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) { - unrefEvaluator(); - m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice()); - m_evaluator->incrRefCount(); - return *this; - } - - ~TensorRef() { - unrefEvaluator(); - } - - TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) { - eigen_assert(m_evaluator->refCount() > 0); - m_evaluator->incrRefCount(); - } - - TensorRef(TensorRef& other) : m_evaluator(other.m_evaluator) { - eigen_assert(m_evaluator->refCount() > 0); - m_evaluator->incrRefCount(); - } - - TensorRef& operator = (const TensorRef& other) { - if (this != &other) { - unrefEvaluator(); - m_evaluator = other.m_evaluator; - eigen_assert(m_evaluator->refCount() > 0); - m_evaluator->incrRefCount(); - } - return *this; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index index) const - { - return m_evaluator->coeff(index); - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const - { - const std::size_t NumIndices = (sizeof...(otherIndices) + 1); - const array<Index, NumIndices> indices{{firstIndex, otherIndices...}}; - return coeff(indices); - } - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) - { - const std::size_t NumIndices = (sizeof...(otherIndices) + 1); - const array<Index, NumIndices> indices{{firstIndex, otherIndices...}}; - return coeffRef(indices); - } -#else - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const - { - array<Index, 2> indices; - indices[0] = i0; - indices[1] = i1; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const - { - array<Index, 3> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const - { - array<Index, 4> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - array<Index, 5> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - indices[4] = i4; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1) - { - array<Index, 2> indices; - indices[0] = i0; - indices[1] = i1; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2) - { - array<Index, 3> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - array<Index, 4> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4) - { - array<Index, 5> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - indices[4] = i4; - return coeffRef(indices); - } -#endif - - template <std::size_t NumIndices> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const - { - const Dimensions& dims = this->dimensions(); - Index index = 0; - if (PlainObjectType::Options & RowMajor) { - index += indices[0]; - for (int i = 1; i < NumIndices; ++i) { - index = index * dims[i] + indices[i]; - } - } else { - index += indices[NumIndices-1]; - for (int i = NumIndices-2; i >= 0; --i) { - index = index * dims[i] + indices[i]; - } - } - return m_evaluator->coeff(index); - } - template <std::size_t NumIndices> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - const Dimensions& dims = this->dimensions(); - Index index = 0; - if (PlainObjectType::Options & RowMajor) { - index += indices[0]; - for (int i = 1; i < NumIndices; ++i) { - index = index * dims[i] + indices[i]; - } - } else { - index += indices[NumIndices-1]; - for (int i = NumIndices-2; i >= 0; --i) { - index = index * dims[i] + indices[i]; - } - } - return m_evaluator->coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return m_evaluator->coeff(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return m_evaluator->coeffRef(index); - } - - private: - EIGEN_STRONG_INLINE void unrefEvaluator() { - if (m_evaluator) { - m_evaluator->decrRefCount(); - if (m_evaluator->refCount() == 0) { - delete m_evaluator; - } - } - } - - internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator; -}; - - -// evaluator for rvalues -template<typename Derived, typename Device> -struct TensorEvaluator<const TensorRef<Derived>, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Packet Packet; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename Derived::Packet PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - enum { - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - Layout = TensorRef<Derived>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&) - : m_ref(m) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_ref.coeff(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_ref.coeffRef(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_ref.data(); } - - protected: - TensorRef<Derived> m_ref; -}; - - -// evaluator for lvalues -template<typename Derived, typename Device> -struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<const TensorRef<Derived>, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Packet Packet; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename Derived::Packet PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - typedef TensorEvaluator<const TensorRef<Derived>, Device> Base; - - enum { - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return this->m_ref.coeffRef(index); - } -}; - - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h deleted file mode 100644 index 44e147de3e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ /dev/null @@ -1,278 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H -#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H -namespace Eigen { - -/** \class TensorReverse - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reverse elements class. - * - */ -namespace internal { -template<typename ReverseDimensions, typename XprType> -struct traits<TensorReverseOp<ReverseDimensions, - XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename ReverseDimensions, typename XprType> -struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorReverseOp<ReverseDimensions, XprType>& type; -}; - -template<typename ReverseDimensions, typename XprType> -struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1, - typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type> -{ - typedef TensorReverseOp<ReverseDimensions, XprType> type; -}; - -} // end namespace internal - -template<typename ReverseDimensions, typename XprType> -class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, - XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorReverseOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind - StorageKind; - typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp( - const XprType& expr, const ReverseDimensions& reverse_dims) - : m_xpr(expr), m_reverse_dims(reverse_dims) {} - - EIGEN_DEVICE_FUNC - const ReverseDimensions& reverse() const { return m_reverse_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other) - { - typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const ReverseDimensions m_reverse_dims; -}; - -// Eval as rvalue -template<typename ReverseDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> -{ - typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<ReverseDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : m_impl(op.expression(), device), m_reverse(op.reverse()) - { - // Compute strides - m_dimensions = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i-1] * m_dimensions[i-1]; - } - } else { - m_strides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i+1] * m_dimensions[i+1]; - } - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex( - Index index) const { - eigen_assert(index < dimensions().TotalSize()); - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - if (m_reverse[i]) { - idx = m_dimensions[i] - idx - 1; - } - inputIndex += idx * m_strides[i] ; - } - if (m_reverse[0]) { - inputIndex += (m_dimensions[0] - index - 1); - } else { - inputIndex += index; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - if (m_reverse[i]) { - idx = m_dimensions[i] - idx - 1; - } - inputIndex += idx * m_strides[i] ; - } - if (m_reverse[NumDims-1]) { - inputIndex += (m_dimensions[NumDims-1] - index - 1); - } else { - inputIndex += index; - } - } - return inputIndex; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( - Index index) const { - return m_impl.coeff(reverseIndex(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - // TODO(ndjaitly): write a better packing routine that uses - // local structure. - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type - values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - TensorEvaluator<ArgType, Device> m_impl; - ReverseDimensions m_reverse; -}; - -// Eval as lvalue - -template <typename ReverseDimensions, typename ArgType, typename Device> -struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device> - : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, - Device> { - typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, - Device> Base; - typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<ReverseDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : Base(op, device) {} - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dimensions& dimensions() const { return this->m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return this->m_impl.coeffRef(Base::reverseIndex(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - // This code is pilfered from TensorMorphing.h - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < packetSize; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h deleted file mode 100644 index efa2f358db..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ /dev/null @@ -1,415 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H -#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H - -namespace Eigen { - -/** \class TensorShuffling - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor shuffling class. - * - * - */ -namespace internal { -template<typename Shuffle, typename XprType> -struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Shuffle, typename XprType> -struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense> -{ - typedef const TensorShufflingOp<Shuffle, XprType>& type; -}; - -template<typename Shuffle, typename XprType> -struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type> -{ - typedef TensorShufflingOp<Shuffle, XprType> type; -}; - -} // end namespace internal - - - -template<typename Shuffle, typename XprType> -class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorShufflingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) - : m_xpr(expr), m_shuffle(shuffle) {} - - EIGEN_DEVICE_FUNC - const Shuffle& shufflePermutation() const { return m_shuffle; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const TensorShufflingOp& other) - { - typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Shuffle m_shuffle; -}; - - -// Eval as rvalue -template<typename Shuffle, typename ArgType, typename Device> -struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> -{ - typedef TensorShufflingOp<Shuffle, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, NumDims, - TensorEvaluator<ArgType, Device>::Layout> TensorBlock; - typedef typename internal::TensorBlockReader< - Index, typename internal::remove_const<Scalar>::type, NumDims, - TensorEvaluator<ArgType, Device>::Layout, PacketAccess> TensorBlockReader; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_shuffle(op.shufflePermutation()), m_impl(op.expression(), device) - { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = input_dims[m_shuffle[i]]; - m_inverseShuffle[m_shuffle[i]] = i; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_unshuffledInputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_unshuffledInputStrides[i] = - m_unshuffledInputStrides[i - 1] * input_dims[i - 1]; - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } else { - m_unshuffledInputStrides[NumDims - 1] = 1; - m_outputStrides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_unshuffledInputStrides[i] = - m_unshuffledInputStrides[i + 1] * input_dims[i + 1]; - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } - - for (int i = 0; i < NumDims; ++i) { - m_inputStrides[i] = m_unshuffledInputStrides[m_shuffle[i]]; - } - - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.firstLevelCacheSize() / - sizeof(Scalar)); - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const { - resources->push_back(internal::TensorOpResourceRequirements( - internal::kUniformAllDims, m_block_total_size_max)); - m_impl.getResourceRequirements(resources); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - TensorBlock* output_block) const { - if (m_impl.data() != NULL) { - // Fast path: we have direct access to the data, so shuffle as we read. - TensorBlockReader::Run(output_block, - srcCoeff(output_block->first_coeff_index()), - m_inverseShuffle, - m_unshuffledInputStrides, - m_impl.data()); - return; - } - - // Slow path: read unshuffled block from the input and shuffle in-place. - // Initialize input block sizes using input-to-output shuffle map. - DSizes<Index, NumDims> input_block_sizes; - for (Index i = 0; i < NumDims; ++i) { - input_block_sizes[i] = output_block->block_sizes()[m_inverseShuffle[i]]; - } - - // Calculate input block strides. - DSizes<Index, NumDims> input_block_strides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - input_block_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - input_block_strides[i] = input_block_strides[i - 1] * - input_block_sizes[i - 1]; - } - } else { - input_block_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - input_block_strides[i] = input_block_strides[i + 1] * - input_block_sizes[i + 1]; - } - } - - // Read input block. - TensorBlock input_block(srcCoeff(output_block->first_coeff_index()), - input_block_sizes, - input_block_strides, - m_unshuffledInputStrides, - output_block->data()); - - m_impl.block(&input_block); - - // Naive In-place shuffle: random IO but block size is O(L1 cache size). - // TODO(andydavis) Improve the performance of this in-place shuffle. - const Index total_size = input_block_sizes.TotalSize(); - std::vector<bool> bitmap(total_size, false); - ScalarNonConst* data = const_cast<ScalarNonConst*>(output_block->data()); - const DSizes<Index, NumDims>& output_block_strides = - output_block->block_strides(); - for (Index input_index = 0; input_index < total_size; ++input_index) { - if (bitmap[input_index]) { - // Coefficient at this index has already been shuffled. - continue; - } - - Index output_index = GetBlockOutputIndex(input_index, - input_block_strides, - output_block_strides); - if (output_index == input_index) { - // Coefficient already in place. - bitmap[output_index] = true; - continue; - } - - // The following loop starts at 'input_index', and shuffles - // coefficients into their shuffled location at 'output_index'. - // It skips through the array shuffling coefficients by following - // the shuffle cycle starting and ending a 'start_index'. - ScalarNonConst evicted_value; - ScalarNonConst shuffled_value = data[input_index]; - do { - evicted_value = data[output_index]; - data[output_index] = shuffled_value; - shuffled_value = evicted_value; - bitmap[output_index] = true; - output_index = GetBlockOutputIndex(output_index, - input_block_strides, - output_block_strides); - } while (output_index != input_index); - - data[output_index] = shuffled_value; - bitmap[output_index] = true; - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex( - Index input_index, - const DSizes<Index, NumDims>& input_block_strides, - const DSizes<Index, NumDims>& output_block_strides) const { - Index output_index = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = input_index / input_block_strides[i]; - output_index += idx * output_block_strides[m_inverseShuffle[i]]; - input_index -= idx * input_block_strides[i]; - } - return output_index + input_index * - output_block_strides[m_inverseShuffle[0]]; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = input_index / input_block_strides[i]; - output_index += idx * output_block_strides[m_inverseShuffle[i]]; - input_index -= idx * input_block_strides[i]; - } - return output_index + input_index * - output_block_strides[m_inverseShuffle[NumDims - 1]]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - return inputIndex + index * m_inputStrides[0]; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - return inputIndex + index * m_inputStrides[NumDims - 1]; - } - } - - const Shuffle& m_shuffle; - Dimensions m_dimensions; - array<Index, NumDims> m_inverseShuffle; - array<Index, NumDims> m_outputStrides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; - array<Index, NumDims> m_inputStrides; - array<Index, NumDims> m_unshuffledInputStrides; - TensorEvaluator<ArgType, Device> m_impl; - std::size_t m_block_total_size_max; -}; - - -// Eval as lvalue -template<typename Shuffle, typename ArgType, typename Device> -struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device> - : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base; - - typedef TensorShufflingOp<Shuffle, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - }; - - typedef typename internal::TensorBlock< - Index, typename internal::remove_const<Scalar>::type, NumDims, - TensorEvaluator<ArgType, Device>::Layout> TensorBlock; - typedef typename internal::TensorBlockWriter< - Index, typename internal::remove_const<Scalar>::type, NumDims, - TensorEvaluator<ArgType, Device>::Layout, PacketAccess> TensorBlockWriter; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < packetSize; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( - const TensorBlock& block) { - eigen_assert(this->m_impl.data() != NULL); - TensorBlockWriter::Run(block, this->srcCoeff(block.first_coeff_index()), - this->m_inverseShuffle, - this->m_unshuffledInputStrides, this->m_impl.data()); - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h deleted file mode 100644 index cfde4fdc72..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ /dev/null @@ -1,247 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H -#define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H - -#ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN - #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN; -#else - #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN -#endif - -namespace Eigen { - -/** \internal - * - * \class TensorStorage - * \ingroup CXX11_Tensor_Module - * - * \brief Stores the data of a tensor - * - * This class stores the data of fixed-size, dynamic-size or mixed tensors - * in a way as compact as possible. - * - * \sa Tensor - */ -template<typename T, typename Dimensions, int Options_> class TensorStorage; - - -// Pure fixed-size storage -template<typename T, int Options_, typename FixedDimensions> -class TensorStorage<T, FixedDimensions, Options_> -{ - private: - static const std::size_t Size = FixedDimensions::total_size; - - EIGEN_ALIGN_DEFAULT T m_data[Size]; - FixedDimensions m_dimensions; - - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStorage() { - EIGEN_STATIC_ASSERT(Size == FixedDimensions::total_size, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T *data() { return m_data; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T *data() const { return m_data; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const FixedDimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); } -}; - - -// pure dynamic -template<typename T, int Options_, typename IndexType, std::size_t NumIndices_> -class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> -{ - public: - typedef IndexType Index; - typedef DSizes<IndexType, NumIndices_> Dimensions; - typedef TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> Self; - - EIGEN_DEVICE_FUNC TensorStorage() - : m_data(NumIndices_ ? 0 : internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1)) - , m_dimensions() {} - - EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) - : m_data(NumIndices_ ? 0 : internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1)) - , m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {} - - EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions) - : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions) - { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN } - - EIGEN_DEVICE_FUNC TensorStorage(const Self& other) - : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions))) - , m_dimensions(other.m_dimensions) - { - internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data); - } - EIGEN_DEVICE_FUNC Self& operator=(const Self& other) - { - if (this != &other) { - Self tmp(other); - this->swap(tmp); - } - return *this; - } - - EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); } - EIGEN_DEVICE_FUNC void swap(Self& other) - { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;} - - EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions) - { - const Index currentSz = internal::array_prod(m_dimensions); - if(size != currentSz) - { - internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz); - if (size) - m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size); - else - m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN - } - m_dimensions = nbDimensions; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } - - private: - T *m_data; - Dimensions m_dimensions; -}; - - -// pure dynamic -template<typename T, int Options_> -class TensorStorage<T, VSizes<DenseIndex>, Options_> -{ - T* m_data; - VSizes<DenseIndex> m_dimensions; - typedef TensorStorage<T, VSizes<DenseIndex>, Options_> Self_; - - public: - EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {} - - template <DenseIndex NumDims> - EIGEN_DEVICE_FUNC TensorStorage(const array<DenseIndex, NumDims>& dimensions) - { - m_dimensions.resize(NumDims); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = dimensions[i]; - } - const DenseIndex size = array_prod(dimensions); - m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size); - EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN - } - - EIGEN_DEVICE_FUNC TensorStorage(const std::vector<DenseIndex>& dimensions) - : m_dimensions(dimensions) - { - const DenseIndex size = internal::array_prod(dimensions); - m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size); - EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - TensorStorage(IndexTypes... dimensions) { - const int NumDims = sizeof...(dimensions); - m_dimensions.resize(NumDims); - const array<DenseIndex, NumDims> dim{{dimensions...}}; - DenseIndex size = 1; - for (int i = 0; i < NumDims; ++i) { - size *= dim[i]; - m_dimensions[i] = dim[i]; - } - m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size); - EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN - } -#endif - - EIGEN_DEVICE_FUNC TensorStorage(const Self_& other) - : m_data(internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(internal::array_prod(other.m_dimensions))) - , m_dimensions(other.m_dimensions) - { - internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data); - } - - EIGEN_DEVICE_FUNC Self_& operator=(const Self_& other) - { - if (this != &other) { - Self_ tmp(other); - this->swap(tmp); - } - return *this; - } - - EIGEN_DEVICE_FUNC ~TensorStorage() - { - internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, internal::array_prod(m_dimensions)); - } - - EIGEN_DEVICE_FUNC void swap(Self_& other) - { std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const VSizes<DenseIndex>& dimensions() const { return m_dimensions; } - - template <typename NewDimensions> EIGEN_DEVICE_FUNC - void resize(DenseIndex size, const NewDimensions& nbDimensions) - { - const DenseIndex currentSz = internal::array_prod(m_dimensions); - if(size != currentSz) - { - internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, currentSz); - if (size) - m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size); - else - m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN - } - m_dimensions.resize(internal::array_size<NewDimensions>::value); - for (int i = 0; i < internal::array_size<NewDimensions>::value; ++i) { - m_dimensions[i] = nbDimensions[i]; - } - } - EIGEN_DEVICE_FUNC void resize(DenseIndex size, const std::vector<DenseIndex>& nbDimensions) - { - const DenseIndex currentSz = internal::array_prod(m_dimensions); - if(size != currentSz) - { - internal::conditional_managed_delete_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(m_data, currentSz); - if (size) - m_data = internal::conditional_managed_new_auto<T,(Options_&DontAlign)==0,(Options_&AllocateUVM)>(size); - else - m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN - } - m_dimensions = nbDimensions; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h deleted file mode 100644 index 8abe5ea8e4..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ /dev/null @@ -1,329 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H -#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H - -namespace Eigen { - -/** \class TensorStriding - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor striding class. - * - * - */ -namespace internal { -template<typename Strides, typename XprType> -struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Strides, typename XprType> -struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense> -{ - typedef const TensorStridingOp<Strides, XprType>& type; -}; - -template<typename Strides, typename XprType> -struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type> -{ - typedef TensorStridingOp<Strides, XprType> type; -}; - -} // end namespace internal - - - -template<typename Strides, typename XprType> -class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorStridingOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims) - : m_xpr(expr), m_dims(dims) {} - - EIGEN_DEVICE_FUNC - const Strides& strides() const { return m_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other) - { - typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Strides m_dims; -}; - - -// Eval as rvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> -{ - typedef TensorStridingOp<Strides, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - m_dimensions = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_inputStrides[i-1] *= op.strides()[i-1]; - } - m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; - } else { // RowMajor - m_outputStrides[NumDims-1] = 1; - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_inputStrides[i+1] *= op.strides()[i+1]; - } - m_inputStrides[0] *= op.strides()[0]; - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_outputStrides[i]; - const Index idx1 = indices[1] / m_outputStrides[i]; - inputIndices[0] += idx0 * m_inputStrides[i]; - inputIndices[1] += idx1 * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += indices[0] * m_inputStrides[0]; - inputIndices[1] += indices[1] * m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_outputStrides[i]; - const Index idx1 = indices[1] / m_outputStrides[i]; - inputIndices[0] += idx0 * m_inputStrides[i]; - inputIndices[1] += idx1 * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; - inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += index * m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += index * m_inputStrides[NumDims-1]; - } - return inputIndex; - } - - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; -}; - - -// Eval as lvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> - : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> -{ - typedef TensorStridingOp<Strides, ArgType> XprType; - typedef TensorEvaluator<const XprType, Device> Base; - // typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - // typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < this->dimensions().TotalSize()); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / this->m_outputStrides[i]; - const Index idx1 = indices[1] / this->m_outputStrides[i]; - inputIndices[0] += idx0 * this->m_inputStrides[i]; - inputIndices[1] += idx1 * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += indices[0] * this->m_inputStrides[0]; - inputIndices[1] += indices[1] * this->m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / this->m_outputStrides[i]; - const Index idx1 = indices[1] / this->m_outputStrides[i]; - inputIndices[0] += idx0 * this->m_inputStrides[i]; - inputIndices[1] += idx1 * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; - inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - this->m_impl.template writePacket<Unaligned>(inputIndices[0], x); - } - else { - EIGEN_ALIGN_DEFAULT Scalar values[packetSize]; - internal::pstore<Scalar, PacketReturnType>(values, x); - this->m_impl.coeffRef(inputIndices[0]) = values[0]; - this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; - for (int i = 1; i < packetSize-1; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h deleted file mode 100644 index b8c1eadfc3..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ /dev/null @@ -1,294 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H -#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H - -namespace Eigen { -namespace internal { - - -template<typename Scalar, int Options> -class compute_tensor_flags -{ - enum { - is_dynamic_size_storage = 1, - - aligned_bit = - ( - ((Options&DontAlign)==0) && ( -#if EIGEN_ALIGN_STATICALLY - (!is_dynamic_size_storage) -#else - 0 -#endif - || -#if EIGEN_ALIGN - is_dynamic_size_storage -#else - 0 -#endif - ) - ) ? AlignedBit : 0, - packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0 - }; - - public: - enum { ret = packet_access_bit | aligned_bit}; -}; - - -template<typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_> -struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - typedef Scalar_ Scalar; - typedef Dense StorageKind; - typedef IndexType_ Index; - static const int NumDimensions = NumIndices_; - static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; - enum { - Options = Options_, - Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit), - }; -}; - - -template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_> -struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> > -{ - typedef Scalar_ Scalar; - typedef Dense StorageKind; - typedef IndexType_ Index; - static const int NumDimensions = array_size<Dimensions>::value; - static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; - enum { - Options = Options_, - Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit), - }; -}; - - -template<typename Scalar_, int Options_, typename IndexType_> -struct traits<TensorVarDim<Scalar_, Options_, IndexType_> > -{ - typedef Scalar_ Scalar; - typedef Dense StorageKind; - typedef IndexType_ Index; - static const int NumDimensions = -1; - static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; - enum { - Options = Options_, - Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit), - }; -}; - -template<typename PlainObjectType, int Options_> -struct traits<TensorMap<PlainObjectType, Options_> > - : public traits<PlainObjectType> -{ - typedef traits<PlainObjectType> BaseTraits; - typedef typename BaseTraits::Scalar Scalar; - typedef typename BaseTraits::StorageKind StorageKind; - typedef typename BaseTraits::Index Index; - static const int NumDimensions = BaseTraits::NumDimensions; - static const int Layout = BaseTraits::Layout; - enum { - Options = Options_, - Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), - }; -}; - -template<typename PlainObjectType> -struct traits<TensorRef<PlainObjectType> > - : public traits<PlainObjectType> -{ - typedef traits<PlainObjectType> BaseTraits; - typedef typename BaseTraits::Scalar Scalar; - typedef typename BaseTraits::StorageKind StorageKind; - typedef typename BaseTraits::Index Index; - static const int NumDimensions = BaseTraits::NumDimensions; - static const int Layout = BaseTraits::Layout; - enum { - Options = BaseTraits::Options, - Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), - }; -}; - - -template<typename _Scalar, std::size_t NumIndices_, int Options, typename IndexType_> -struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> -{ - typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; -}; - -template<typename _Scalar, std::size_t NumIndices_, int Options, typename IndexType_> -struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> -{ - typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; -}; - -template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template<typename Scalar_, int Options, typename IndexType_> -struct eval<TensorVarDim<Scalar_, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorVarDim<Scalar_, Options, IndexType_>& type; -}; - -template<typename Scalar_, int Options, typename IndexType_> -struct eval<const TensorVarDim<Scalar_, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorVarDim<Scalar_, Options, IndexType_>& type; -}; - -template<typename PlainObjectType, int Options> -struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense> -{ - typedef const TensorMap<PlainObjectType, Options>& type; -}; - -template<typename PlainObjectType, int Options> -struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense> -{ - typedef const TensorMap<PlainObjectType, Options>& type; -}; - -template<typename PlainObjectType> -struct eval<TensorRef<PlainObjectType>, Eigen::Dense> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -template<typename PlainObjectType> -struct eval<const TensorRef<PlainObjectType>, Eigen::Dense> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - - -template <typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_> -struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, 1, typename eval<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >::type> -{ - typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; -}; - -template <typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_> -struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_>, 1, typename eval<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >::type> -{ - typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; -}; - -template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, 1, typename eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >::type> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, 1, typename eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >::type> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template <typename Scalar_, int Options> -struct nested<TensorVarDim<Scalar_, Options>, 1, typename eval<TensorVarDim<Scalar_, Options> >::type> -{ - typedef const TensorVarDim<Scalar_, Options>& type; -}; - -template <typename Scalar_, int Options> -struct nested<const TensorVarDim<Scalar_, Options>, 1, typename eval<const TensorVarDim<Scalar_, Options> >::type> -{ - typedef const TensorVarDim<Scalar_, Options>& type; -}; - - -template <typename PlainObjectType, int Options> -struct nested<TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type> -{ - typedef const TensorMap<PlainObjectType, Options>& type; -}; - -template <typename PlainObjectType, int Options> -struct nested<const TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type> -{ - typedef const TensorMap<PlainObjectType, Options>& type; -}; - -template <typename PlainObjectType> -struct nested<TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -template <typename PlainObjectType> -struct nested<const TensorRef<PlainObjectType>, 1, typename eval<TensorRef<PlainObjectType> >::type> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -} // end namespace internal - -// Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C, -// R, B), and convolve it with a set of filters, which can also be presented as -// a tensor (D, K, K, M), where M is the number of filters, K is the filter -// size, and each 3-dimensional tensor of size (D, K, K) is a filter. For -// simplicity we assume that we always use square filters (which is usually the -// case in images), hence the two Ks in the tensor dimension. It also takes in -// a few additional parameters: -// Stride (S): The convolution stride is the offset between locations where we -// apply the filters. A larger stride means that the output will be -// spatially smaller. -// Padding (P): The padding we apply to the input tensor along the R and C -// dimensions. This is usually used to make sure that the spatial -// dimensions of the output matches our intention. -// -// Two types of padding are often used: -// SAME: The pad value is computed so that the output will have size -// R/S and C/S. -// VALID: no padding is carried out. -// When we do padding, the padded values at the padded locations are usually -// zero. -// -// The output dimensions for convolution, when given all the parameters above, -// are as follows: -// When Padding = SAME: the output size is (B, R', C', M), where -// R' = ceil(float(R) / float(S)) -// C' = ceil(float(C) / float(S)) -// where ceil is the ceiling function. The input tensor is padded with 0 as -// needed. The number of padded rows and columns are computed as: -// Pr = ((R' - 1) * S + K - R) / 2 -// Pc = ((C' - 1) * S + K - C) / 2 -// when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2. -// This is where SAME comes from - the output has the same size as the input has. -// When Padding = VALID: the output size is computed as -// R' = ceil(float(R - K + 1) / float(S)) -// C' = ceil(float(C - K + 1) / float(S)) -// and the number of padded rows and columns are computed in the same way as in -// the SAME case. -// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0, -// Pc=0. -typedef enum { - PADDING_VALID = 1, - PADDING_SAME = 2, -} PaddingType; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h deleted file mode 100644 index ec1d44e6a6..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrueIndices.h +++ /dev/null @@ -1,250 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H -#define EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H -namespace Eigen { - -/** \class TensorTrueIndices - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor provide indices of true values class. - * - */ -namespace internal { -template<typename XprType> -struct traits<TensorTrueIndicesOp<XprType> > : public traits<XprType> -{ - typedef DenseIndex Scalar; - typedef DenseIndex CoeffReturnType; - typedef traits<XprType> XprTraits; - //typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = 2; // XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename XprType> -struct eval<TensorTrueIndicesOp<XprType>, Eigen::Dense> -{ - typedef const TensorTrueIndicesOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorTrueIndicesOp<XprType>, 1, - typename eval<TensorTrueIndicesOp<XprType> >::type> -{ - typedef TensorTrueIndicesOp<XprType> type; -}; - -} // end namespace internal - -template<typename XprType> -class TensorTrueIndicesOp : public TensorBase<TensorTrueIndicesOp<XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Scalar Scalar; - //typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::CoeffReturnType CoeffReturnType; - typedef typename internal::packet_traits<CoeffReturnType>::type PacketReturnType; - typedef typename Eigen::internal::nested<TensorTrueIndicesOp>::type Nested; - typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::StorageKind - StorageKind; - typedef typename Eigen::internal::traits<TensorTrueIndicesOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTrueIndicesOp( - const XprType& expr, const CoeffReturnType& not_found = -1) - : m_xpr(expr), m_not_found(not_found) { - } - - EIGEN_DEVICE_FUNC - const CoeffReturnType& not_found() const { return m_not_found; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorTrueIndicesOp& operator = (const TensorTrueIndicesOp& other) - { - typedef TensorAssignOp<TensorTrueIndicesOp, const TensorTrueIndicesOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorTrueIndicesOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorTrueIndicesOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - CoeffReturnType m_not_found; -}; - -// Eval as rvalue -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorTrueIndicesOp<ArgType>, Device> -{ - typedef TensorTrueIndicesOp<ArgType> XprType; - typedef typename XprType::Index InputIndex; - typedef typename XprType::Index Index; - static const int NumDims = 2; - typedef DSizes<Index, 2> Dimensions; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - static const int NumInputDims = internal::array_size<InputDimensions>::value; - - enum { - IsAligned = true, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : m_impl(op.expression(), device), m_not_found(op.not_found()) - { - // Store original dimensions - m_orig_dimensions = m_impl.dimensions(); - - // Calculate output dimensions - m_dimensions[0] = m_orig_dimensions.TotalSize(); - m_dimensions[1] = NumInputDims; - - // Calculate strides of input expression - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_strides[i] = m_strides[i-1] * m_orig_dimensions[i-1]; - } - } else { - m_strides[NumInputDims-1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i+1] * m_orig_dimensions[i+1]; - } - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE InputIndex origIndices( - Index index) const { - eigen_assert(index < dimensions().TotalSize()); - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputIndex = index % m_dimensions[0]; - } else { - inputIndex = index / m_dimensions[1]; - } - return inputIndex; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int whichDim( - Index index) const { - eigen_assert(index < dimensions().TotalSize()); - int inputDim = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputDim = index / m_dimensions[0]; - } else { - inputDim = index % m_dimensions[1]; - } - return inputDim; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType origDim( - int dim, InputIndex index) const { - eigen_assert(index < m_orig_dimensions.TotalSize()); - eigen_assert(dim > -1 && dim < m_orig_dimensions.size()); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumInputDims - 1; i > 0; --i) { - Index idx = index / m_strides[i]; - if (i == dim) return idx; // Found our dimension - index -= idx * m_strides[i]; - } - return index; - } else { - for (int i = 0; i < NumInputDims - 1; ++i) { - Index idx = index / m_strides[i]; - if (i == dim) return idx; // Found our dimension - index -= idx * m_strides[i]; - } - return index; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( - Index index) const { - InputIndex orig_index = origIndices(index); - if (m_impl.coeff(orig_index)) - return origDim(whichDim(index), orig_index); - else { - return m_not_found; - } - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - // TODO(ndjaitly): write a better packing routine that uses - // local structure. - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type - values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - InputDimensions m_orig_dimensions; - Dimensions m_dimensions; - TensorEvaluator<ArgType, Device> m_impl; - array<Index, NumInputDims> m_strides; - CoeffReturnType m_not_found; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_TRUE_INDICES_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h deleted file mode 100644 index 44aff63702..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ /dev/null @@ -1,232 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H -#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H - -namespace Eigen { -namespace internal { - -template <uint64_t n> -struct static_val { - static const uint64_t value = n; - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { - eigen_assert(v == n); - } -}; - - -template <typename HIGH = uint64_t, typename LOW = uint64_t> -struct TensorUInt128 -{ - HIGH high; - LOW low; - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(int x) : high(0), low(x) { - eigen_assert(x >= 0); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(int64_t x) : high(0), low(x) { - eigen_assert(x >= 0); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(uint64_t x) : high(0), low(x) { } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { - return low; - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { - return low; - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { - return high; - } -}; - - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - return (lhs.high == rhs.high) & (lhs.low == rhs.low); -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - return (lhs.high != rhs.high) | (lhs.low != rhs.low); -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (lhs.high != rhs.high) { - return lhs.high > rhs.high; - } - return lhs.low >= rhs.low; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (lhs.high != rhs.high) { - return lhs.high < rhs.high; - } - return lhs.low < rhs.low; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low); - if (result.low < rhs.low) { - result.high += 1; - } - return result; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low); - if (result.low > lhs.low) { - result.high -= 1; - } - return result; -} - - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - // Split each 128-bit integer into 4 32-bit integers, and then do the - // multiplications by hand as follow: - // lhs a b c d - // rhs e f g h - // ----------- - // ah bh ch dh - // bg cg dg - // cf df - // de - // The result is stored in 2 64bit integers, high and low. - - const uint64_t LOW = 0x00000000FFFFFFFFLL; - const uint64_t HIGH = 0xFFFFFFFF00000000LL; - - uint64_t d = lhs.low & LOW; - uint64_t c = (lhs.low & HIGH) >> 32LL; - uint64_t b = lhs.high & LOW; - uint64_t a = (lhs.high & HIGH) >> 32LL; - - uint64_t h = rhs.low & LOW; - uint64_t g = (rhs.low & HIGH) >> 32LL; - uint64_t f = rhs.high & LOW; - uint64_t e = (rhs.high & HIGH) >> 32LL; - - // Compute the low 32 bits of low - uint64_t acc = d * h; - uint64_t low = acc & LOW; - // Compute the high 32 bits of low. Add a carry every time we wrap around - acc >>= 32LL; - uint64_t carry = 0; - uint64_t acc2 = acc + c * h; - if (acc2 < acc) { - carry++; - } - acc = acc2 + d * g; - if (acc < acc2) { - carry++; - } - low |= (acc << 32LL); - - // Carry forward the high bits of acc to initiate the computation of the - // low 32 bits of high - acc2 = (acc >> 32LL) | (carry << 32LL); - carry = 0; - - acc = acc2 + b * h; - if (acc < acc2) { - carry++; - } - acc2 = acc + c * g; - if (acc2 < acc) { - carry++; - } - acc = acc2 + d * f; - if (acc < acc2) { - carry++; - } - uint64_t high = acc & LOW; - - // Start to compute the high 32 bits of high. - acc2 = (acc >> 32LL) | (carry << 32LL); - - acc = acc2 + a * h; - acc2 = acc + b * g; - acc = acc2 + c * f; - acc2 = acc + d * e; - high |= (acc2 << 32LL); - - return TensorUInt128<uint64_t, uint64_t>(high, low); -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -static TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) { - return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); - } else if (lhs < rhs) { - return TensorUInt128<uint64_t, uint64_t>(0); - } else { - // calculate the biggest power of 2 times rhs that's less than or equal to lhs - TensorUInt128<uint64_t, uint64_t> power2(1); - TensorUInt128<uint64_t, uint64_t> d(rhs); - TensorUInt128<uint64_t, uint64_t> tmp(lhs - d); - while (lhs >= d) { - tmp = tmp - d; - d = d + d; - power2 = power2 + power2; - } - - tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); - TensorUInt128<uint64_t, uint64_t> result(0); - while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) { - if (tmp >= d) { - tmp = tmp - d; - result = result + power2; - } - // Shift right - power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); - d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63)); - } - - return result; - } -} - - -} // namespace internal -} // namespace Eigen - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h deleted file mode 100644 index 49954b955e..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVarDim.h +++ /dev/null @@ -1,315 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H -#define EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H - -namespace Eigen { - -/** \class Tensor - * \ingroup CXX11_Tensor_Module - * - * \brief A version of the tensor class that supports a variable number of dimensions. - * - * The variable equivalent of - * Eigen::Tensor<float, 3> t(3, 5, 7); - * is - * Eigen::TensorVarDim<float> t(3, 5, 7); - */ - -template<typename Scalar_, int Options_, typename IndexType_> -class TensorVarDim : public TensorBase<TensorVarDim<Scalar_, Options_, IndexType_> > -{ - public: - typedef TensorVarDim<Scalar_, Options_, IndexType_> Self; - typedef TensorBase<TensorVarDim<Scalar_, Options_, IndexType_> > Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<Self>::StorageKind StorageKind; - typedef typename internal::traits<Self>::Index Index; - typedef Scalar_ Scalar; - typedef typename internal::packet_traits<Scalar>::type Packet; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - typedef typename Base::PacketReturnType PacketReturnType; - - enum { - IsAligned = bool(EIGEN_ALIGN) & !(Options_ & DontAlign), - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = Options_ & RowMajor ? RowMajor : ColMajor, - // disabled for now as the number of coefficients is not known by the - // caller at compile time. - CoordAccess = false, - }; - - static const int Options = Options_; - - static const Index NumIndices = Dynamic; - - typedef VSizes<Index> Dimensions; - - protected: - TensorStorage<Scalar, VSizes<Index>, Options_> m_storage; - - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_storage.dimensions().size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } - - // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - // work, because that uses base().coeffRef() - and we don't yet - // implement a similar class hierarchy - inline Self& base() { return *this; } - inline const Self& base() const { return *this; } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - static const std::size_t NumIndices = sizeof...(otherIndices) + 2; - return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - template <std::size_t NumIndices> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - static const std::size_t NumIndices = sizeof...(otherIndices) + 2; - return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - template <std::size_t NumIndices> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - static const std::size_t NumIndices = sizeof...(otherIndices) + 2; - return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - template <std::size_t NumIndices> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - eigen_assert(checkIndexRange(indices)); - return coeff(indices); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return coeff(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const - { - return coeff(index); - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - static const size_t NumIndices = sizeof...(otherIndices) + 1; - return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - template <std::size_t NumIndices> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - eigen_assert(checkIndexRange(indices)); - return coeffRef(indices); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < size()); - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) - { - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim() - : m_storage() - { - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim(const Self& other) - : m_storage(other.m_storage) - { - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_STRONG_INLINE TensorVarDim(Index firstDimension, IndexTypes... otherDimensions) - : m_storage(firstDimension, otherDimensions...) - { - } -#endif - - EIGEN_STRONG_INLINE explicit TensorVarDim(const std::vector<Index>& dimensions) - : m_storage(dimensions) - { - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) - { - typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim(const TensorBase<OtherDerived, WriteAccessors>& other) - { - typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim& operator=(const TensorVarDim& other) - { - typedef TensorAssignOp<TensorVarDim, const TensorVarDim> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorVarDim& operator=(const OtherDerived& other) - { - typedef TensorAssignOp<TensorVarDim, const OtherDerived> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - void resize(Index firstDimension, IndexTypes... otherDimensions) - { - // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - static const std::size_t NumIndices = sizeof...(otherDimensions) + 1; - resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}}); - } -#endif - - template <size_t NumIndices> - void resize(const array<Index, NumIndices>& dimensions) - { - Index size = Index(1); - for (std::size_t i = 0; i < NumIndices; i++) { - internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); - size *= dimensions[i]; - } - #ifdef EIGEN_INITIALIZE_COEFFS - bool size_changed = size != this->size(); - m_storage.resize(size, dimensions); - if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - #else - m_storage.resize(size, dimensions); - #endif - } - void resize(const std::vector<Index>& dimensions) - { - Index size = Index(1); - for (std::size_t i = 0; i < dimensions.size(); i++) { - internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); - size *= dimensions[i]; - } - #ifdef EIGEN_INITIALIZE_COEFFS - bool size_changed = size != this->size(); - m_storage.resize(size, dimensions); - if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - #else - m_storage.resize(size, dimensions); - #endif - } - - protected: - template <std::size_t NumIndices> - bool checkIndexRange(const array<Index, NumIndices>& indices) const - { - /* using internal::array_apply_and_reduce; - using internal::array_zip_and_reduce; - using internal::greater_equal_zero_op; - using internal::logical_and_op; - using internal::lesser_op; - - return - // check whether the indices are all >= 0 - array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && - // check whether the indices fit in the dimensions - array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions()); - */ - return true; - } - - template <std::size_t NumIndices> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const - { - if (Options&RowMajor) { - return m_storage.dimensions().IndexOfRowMajor(indices); - } else { - return m_storage.dimensions().IndexOfColMajor(indices); - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_VAR_DIM_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h deleted file mode 100644 index de86c57f11..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ /dev/null @@ -1,677 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H - -namespace Eigen { - -/** \class TensorVolumePatch - * \ingroup CXX11_Tensor_Module - * - * \brief Patch extraction specialized for processing of volumetric data. - * This assumes that the input has a least 4 dimensions ordered as follows: - * - channels - * - planes - * - rows - * - columns - * - (optional) additional dimensions such as time or batch size. - * Calling the volume patch code with patch_planes, patch_rows, and patch_cols - * is equivalent to calling the regular patch extraction code with parameters - * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional - * dimensions. - */ -namespace internal { -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType> -{ - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense> -{ - typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type; -}; - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type> -{ - typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type; -}; - -} // end namespace internal - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar; - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Packet Packet; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - PaddingType padding_type, Scalar padding_value) - : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), - m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), - m_padding_type(padding_type), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - DenseIndex padding_top_z, DenseIndex padding_bottom_z, - DenseIndex padding_top, DenseIndex padding_bottom, - DenseIndex padding_left, DenseIndex padding_right, - Scalar padding_value) - : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), - m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), - m_padding_left(padding_left), m_padding_right(padding_right), - m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - DenseIndex patch_planes() const { return m_patch_planes; } - EIGEN_DEVICE_FUNC - DenseIndex patch_rows() const { return m_patch_rows; } - EIGEN_DEVICE_FUNC - DenseIndex patch_cols() const { return m_patch_cols; } - EIGEN_DEVICE_FUNC - DenseIndex plane_strides() const { return m_plane_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_strides() const { return m_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_strides() const { return m_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_plane_strides() const { return m_in_plane_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_row_strides() const { return m_in_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_col_strides() const { return m_in_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } - EIGEN_DEVICE_FUNC - bool padding_explicit() const { return m_padding_explicit; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top_z() const { return m_padding_top_z; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom_z() const { return m_padding_bottom_z; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top() const { return m_padding_top; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom() const { return m_padding_bottom; } - EIGEN_DEVICE_FUNC - DenseIndex padding_left() const { return m_padding_left; } - EIGEN_DEVICE_FUNC - DenseIndex padding_right() const { return m_padding_right; } - EIGEN_DEVICE_FUNC - PaddingType padding_type() const { return m_padding_type; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const DenseIndex m_patch_planes; - const DenseIndex m_patch_rows; - const DenseIndex m_patch_cols; - const DenseIndex m_plane_strides; - const DenseIndex m_row_strides; - const DenseIndex m_col_strides; - const DenseIndex m_in_plane_strides; - const DenseIndex m_in_row_strides; - const DenseIndex m_in_col_strides; - const DenseIndex m_plane_inflate_strides; - const DenseIndex m_row_inflate_strides; - const DenseIndex m_col_inflate_strides; - const bool m_padding_explicit; - const DenseIndex m_padding_top_z; - const DenseIndex m_padding_bottom_z; - const DenseIndex m_padding_top; - const DenseIndex m_padding_bottom; - const DenseIndex m_padding_left; - const DenseIndex m_padding_right; - const PaddingType m_padding_type; - const Scalar m_padding_value; -}; - - -// Eval as rvalue -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> -struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device> -{ - typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = NumDims == 6, - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - EIGEN_STATIC_ASSERT(NumDims >= 5, YOU_MADE_A_PROGRAMMING_MISTAKE); - - m_paddingValue = op.padding_value(); - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - - // Cache a few variables. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputDepth = input_dims[0]; - m_inputPlanes = input_dims[1]; - m_inputRows = input_dims[2]; - m_inputCols = input_dims[3]; - } else { - m_inputDepth = input_dims[NumInputDims-1]; - m_inputPlanes = input_dims[NumInputDims-2]; - m_inputRows = input_dims[NumInputDims-3]; - m_inputCols = input_dims[NumInputDims-4]; - } - - m_plane_strides = op.plane_strides(); - m_row_strides = op.row_strides(); - m_col_strides = op.col_strides(); - - // Input strides and effective input/patch size - m_in_plane_strides = op.in_plane_strides(); - m_in_row_strides = op.in_row_strides(); - m_in_col_strides = op.in_col_strides(); - m_plane_inflate_strides = op.plane_inflate_strides(); - m_row_inflate_strides = op.row_inflate_strides(); - m_col_inflate_strides = op.col_inflate_strides(); - - // The "effective" spatial size after inflating data with zeros. - m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1; - m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; - m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; - m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1); - m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); - m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); - - if (op.padding_explicit()) { - m_outputPlanes = ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); - m_outputRows = ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_planePaddingTop = op.padding_top_z(); - m_rowPaddingTop = op.padding_top(); - m_colPaddingLeft = op.padding_left(); - } else { - // Computing padding from the type - switch (op.padding_type()) { - case PADDING_VALID: - m_outputPlanes = ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); - m_outputRows = ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_planePaddingTop = 0; - m_rowPaddingTop = 0; - m_colPaddingLeft = 0; - break; - case PADDING_SAME: { - m_outputPlanes = ceil(m_input_planes_eff / static_cast<float>(m_plane_strides)); - m_outputRows = ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); - m_outputCols = ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); - const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff; - const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff; - const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff; - m_planePaddingTop = dz - dz / 2; - m_rowPaddingTop = dy - dy / 2; - m_colPaddingLeft = dx - dx / 2; - break; - } - default: - eigen_assert(false && "unexpected padding"); - } - } - eigen_assert(m_outputRows > 0); - eigen_assert(m_outputCols > 0); - eigen_assert(m_outputPlanes > 0); - - // Dimensions for result of extraction. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // ColMajor - // 0: depth - // 1: patch_planes - // 2: patch_rows - // 3: patch_cols - // 4: number of patches - // 5 and beyond: anything else (such as batch). - m_dimensions[0] = input_dims[0]; - m_dimensions[1] = op.patch_planes(); - m_dimensions[2] = op.patch_rows(); - m_dimensions[3] = op.patch_cols(); - m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols; - for (int i = 5; i < NumDims; ++i) { - m_dimensions[i] = input_dims[i-1]; - } - } else { - // RowMajor - // NumDims-1: depth - // NumDims-2: patch_planes - // NumDims-3: patch_rows - // NumDims-4: patch_cols - // NumDims-5: number of patches - // NumDims-6 and beyond: anything else (such as batch). - m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; - m_dimensions[NumDims-2] = op.patch_planes(); - m_dimensions[NumDims-3] = op.patch_rows(); - m_dimensions[NumDims-4] = op.patch_cols(); - m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols; - for (int i = NumDims-6; i >= 0; --i) { - m_dimensions[i] = input_dims[i]; - } - } - - // Strides for the output tensor. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_rowStride = m_dimensions[1]; - m_colStride = m_dimensions[2] * m_rowStride; - m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0]; - m_otherStride = m_patchStride * m_dimensions[4]; - } else { - m_rowStride = m_dimensions[NumDims-2]; - m_colStride = m_dimensions[NumDims-3] * m_rowStride; - m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1]; - m_otherStride = m_patchStride * m_dimensions[NumDims-5]; - } - - // Strides for navigating through the input tensor. - m_planeInputStride = m_inputDepth; - m_rowInputStride = m_inputDepth * m_inputPlanes; - m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; - m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; - - m_outputPlanesRows = m_outputPlanes * m_outputRows; - - // Fast representations of different variables. - m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); - m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); - m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); - m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride); - m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); - m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); - m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides); - m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); - m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes); - m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows); - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); - } else { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Patch index corresponding to the passed in index. - const Index patchIndex = index / m_fastPatchStride; - - // Spatial offset within the patch. This has to be translated into 3D - // coordinates within the patch. - const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; - - // Batch, etc. - const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride; - const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; - - // Calculate column index in the input original tensor. - const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate row index in the original input tensor. - const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; - const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride; - const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate plane index in the original input tensor. - const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); - const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; - const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; - const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); - if (inputPlane < 0 || inputPlane >= m_input_planes_eff || - ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - - const Index inputIndex = depth + - origInputRow * m_rowInputStride + - origInputCol * m_colInputStride + - origInputPlane * m_planeInputStride + - otherIndex * m_otherInputStride; - - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const Index packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || - m_in_plane_strides != 1 || m_plane_inflate_strides != 1) { - return packetWithPossibleZero(index); - } - - const Index indices[2] = {index, index + packetSize - 1}; - const Index patchIndex = indices[0] / m_fastPatchStride; - if (patchIndex != indices[1] / m_fastPatchStride) { - return packetWithPossibleZero(index); - } - const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride; - eigen_assert(otherIndex == indices[1] / m_fastOtherStride); - - // Find the offset of the element wrt the location of the first element. - const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, - (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; - - const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; - eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); - - const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; - const Index colOffsets[2] = { - patchOffsets[0] / m_fastColStride, - patchOffsets[1] / m_fastColStride}; - - // Calculate col indices in the original input tensor. - const Index inputCols[2] = { - colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft, - colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; - if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputCols[0] != inputCols[1]) { - return packetWithPossibleZero(index); - } - - const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; - const Index rowOffsets[2] = { - (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, - (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; - eigen_assert(rowOffsets[0] <= rowOffsets[1]); - // Calculate col indices in the original input tensor. - const Index inputRows[2] = { - rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop, - rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; - - if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputRows[0] != inputRows[1]) { - return packetWithPossibleZero(index); - } - - const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); - const Index planeOffsets[2] = { - patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride, - patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride}; - eigen_assert(planeOffsets[0] <= planeOffsets[1]); - const Index inputPlanes[2] = { - planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop, - planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop}; - - if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { - // no padding - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - const Index inputIndex = depth + - inputRows[0] * m_rowInputStride + - inputCols[0] * m_colInputStride + - m_planeInputStride * inputPlanes[0] + - otherIndex * m_otherInputStride; - return m_impl.template packet<Unaligned>(inputIndex); - } - - return packetWithPossibleZero(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - Index planePaddingTop() const { return m_planePaddingTop; } - Index rowPaddingTop() const { return m_rowPaddingTop; } - Index colPaddingLeft() const { return m_colPaddingLeft; } - Index outputPlanes() const { return m_outputPlanes; } - Index outputRows() const { return m_outputRows; } - Index outputCols() const { return m_outputCols; } - Index userPlaneStride() const { return m_plane_strides; } - Index userRowStride() const { return m_row_strides; } - Index userColStride() const { return m_col_strides; } - Index userInPlaneStride() const { return m_in_plane_strides; } - Index userInRowStride() const { return m_in_row_strides; } - Index userInColStride() const { return m_in_col_strides; } - Index planeInflateStride() const { return m_plane_inflate_strides; } - Index rowInflateStride() const { return m_row_inflate_strides; } - Index colInflateStride() const { return m_col_inflate_strides; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const - { - // ColMajor - // 0: depth, 1: patch_planes, 2: patch_rows, 3: patch_cols, 4: number of patches, 5: batches - // RowMajor - // 0: batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: patch_planes, 5: depth - const Index patch3DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 4 : 1]; - const Index colOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 2]; - const Index rowOffset= coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 3]; - const Index planeOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 4]; - - array<Index, NumDims-1> inputCoords; - - const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; - const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; - const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const Index planeIndex = patch3DIndex - colIndex * m_outputPlanesRows - rowIndex * m_outputRows; - const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; - const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); - if (inputPlane < 0 || inputPlane >= m_input_planes_eff || - ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { - return Scalar(m_paddingValue); - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputCoords[0] = coords[0]; // depth - inputCoords[1] = origInputPlane; - inputCoords[2] = origInputRow; - inputCoords[3] = origInputCol; - inputCoords[4] = coords[5]; // batch - } else { - inputCoords[4] = coords[5]; // depth - inputCoords[3] = origInputPlane; - inputCoords[2] = origInputRow; - inputCoords[1] = origInputCol; - inputCoords[0] = coords[0]; // batch - } - if (TensorEvaluator<ArgType, Device>::CoordAccess) { - return m_impl.coeff(inputCoords); - } else { - Index inputIndex; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputIndex = - inputCoords[4] * m_otherInputStride + - inputCoords[3] * m_colInputStride + - inputCoords[2] * m_rowInputStride + - inputCoords[1] * m_planeInputStride + - inputCoords[0]; - } else { - inputIndex = - inputCoords[0] * m_otherInputStride + - inputCoords[1] * m_colInputStride + - inputCoords[2] * m_rowInputStride + - inputCoords[3] * m_planeInputStride + - inputCoords[4]; - } - return m_impl.coeff(inputIndex); - } - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - - // Parameters passed to the costructor. - Index m_plane_strides; - Index m_row_strides; - Index m_col_strides; - - Index m_outputPlanes; - Index m_outputRows; - Index m_outputCols; - - Index m_planePaddingTop; - Index m_rowPaddingTop; - Index m_colPaddingLeft; - - Index m_in_plane_strides; - Index m_in_row_strides; - Index m_in_col_strides; - - Index m_plane_inflate_strides; - Index m_row_inflate_strides; - Index m_col_inflate_strides; - - // Cached input size. - Index m_inputDepth; - Index m_inputPlanes; - Index m_inputRows; - Index m_inputCols; - - // Other cached variables. - Index m_outputPlanesRows; - - // Effective input/patch post-inflation size. - Index m_input_planes_eff; - Index m_input_rows_eff; - Index m_input_cols_eff; - Index m_patch_planes_eff; - Index m_patch_rows_eff; - Index m_patch_cols_eff; - - // Strides for the output tensor. - Index m_otherStride; - Index m_patchStride; - Index m_rowStride; - Index m_colStride; - - // Strides for the input tensor. - Index m_planeInputStride; - Index m_rowInputStride; - Index m_colInputStride; - Index m_otherInputStride; - - internal::TensorIntDivisor<Index> m_fastOtherStride; - internal::TensorIntDivisor<Index> m_fastPatchStride; - internal::TensorIntDivisor<Index> m_fastColStride; - internal::TensorIntDivisor<Index> m_fastRowStride; - internal::TensorIntDivisor<Index> m_fastInputPlaneStride; - internal::TensorIntDivisor<Index> m_fastInputRowStride; - internal::TensorIntDivisor<Index> m_fastInputColStride; - internal::TensorIntDivisor<Index> m_fastInputColsEff; - internal::TensorIntDivisor<Index> m_fastOutputPlanesRows; - internal::TensorIntDivisor<Index> m_fastOutputPlanes; - internal::TensorIntDivisor<Index> m_fastOutputDepth; - - Scalar m_paddingValue; - - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md deleted file mode 100644 index 9bc1161976..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/g3doc/README.md +++ /dev/null @@ -1,1792 +0,0 @@ -# Eigen Tensors - -Tensors are multidimensional arrays of elements. Elements are typically scalars, -but more complex types such as strings are also supported. - -[TOC] - -## Tensor Classes - -You can manipulate a tensor with one of the following classes. They all are in -the namespace ```::Eigen.``` - - -### Class Tensor<data_type, rank> - -This is the class to use to create a tensor and allocate memory for it. The -class is templatized with the tensor datatype, such as float or int, and the -tensor rank. The rank is the number of dimensions, for example rank 2 is a -matrix. - -Tensors of this class are resizable. For example, if you assign a tensor of a -different size to a Tensor, that tensor is resized to match its new value. - -#### Constructor Tensor<data_type, rank>(size0, size1, ...) - -Constructor for a Tensor. The constructor must be passed ```rank``` integers -indicating the sizes of the instance along each of the the ```rank``` -dimensions. - - // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns - // memory to hold 24 floating point values (24 = 2 x 3 x 4). - Tensor<float, 3> t_3d(2, 3, 4); - - // Resize t_3d by assigning a tensor of different sizes, but same rank. - t_3d = Tensor<float, 3>(3, 4, 3); - -#### Constructor Tensor<data_type, rank>(size_array) - -Constructor where the sizes for the constructor are specified as an array of -values instead of an explicitly list of parameters. The array type to use is -```Eigen::array<Eigen::Index>```. The array can be constructed automatically -from an initializer list. - - // Create a tensor of strings of rank 2 with sizes 5, 7. - Tensor<string, 2> t_2d({5, 7}); - - -### Class TensorFixedSize<data_type, Sizes<size0, size1, ...>> - -Class to use for tensors of fixed size, where the size is known at compile -time. Fixed sized tensors can provide very fast computations because all their -dimensions are known by the compiler. FixedSize tensors are not resizable. - -If the total number of elements in a fixed size tensor is small enough the -tensor data is held onto the stack and does not cause heap allocation and free. - - // Create a 4 x 3 tensor of floats. - TensorFixedSize<float, Sizes<4, 3>> t_4x3; - -### Class TensorMap<Tensor<data_type, rank>> - -This is the class to use to create a tensor on top of memory allocated and -owned by another part of your code. It allows to view any piece of allocated -memory as a Tensor. Instances of this class do not own the memory where the -data are stored. - -A TensorMap is not resizable because it does not own the memory where its data -are stored. - -#### Constructor TensorMap<Tensor<data_type, rank>>(data, size0, size1, ...) - -Constructor for a Tensor. The constructor must be passed a pointer to the -storage for the data, and "rank" size attributes. The storage has to be -large enough to hold all the data. - - // Map a tensor of ints on top of stack-allocated storage. - int storage[128]; // 2 x 4 x 2 x 8 = 128 - TensorMap<int, 4> t_4d(storage, 2, 4, 2, 8); - - // The same storage can be viewed as a different tensor. - // You can also pass the sizes as an array. - TensorMap<int, 2> t_2d(storage, 16, 8); - - // You can also map fixed-size tensors. Here we get a 1d view of - // the 2d fixed-size tensor. - TensorFixedSize<float, Sizes<4, 5>> t_4x3; - TensorMap<float, 1> t_12(t_4x3, 12); - - -#### Class TensorRef - -See Assigning to a TensorRef below. - -## Accessing Tensor Elements - -#### <data_type> tensor(index0, index1...) - -Return the element at position ```(index0, index1...)``` in tensor -```tensor```. You must pass as many parameters as the rank of ```tensor```. -The expression can be used as an l-value to set the value of the element at the -specified position. The value returned is of the datatype of the tensor. - - // Set the value of the element at position (0, 1, 0); - Tensor<float, 3> t_3d(2, 3, 4); - t_3d(0, 1, 0) = 12.0f; - - // Initialize all elements to random values. - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 4; ++k) { - t_3d(i, j, k) = ...some random value...; - } - } - } - - // Print elements of a tensor. - for (int i = 0; i < 2; ++i) { - LOG(INFO) << t_3d(i, 0, 0); - } - - -## TensorLayout - -The tensor library supports 2 layouts: ```ColMajor``` (the default) and -```RowMajor```. Only the default column major layout is currently fully -supported, and it is therefore not recommended to attempt to use the row major -layout at the moment. - -The layout of a tensor is optionally specified as part of its type. If not -specified explicitly column major is assumed. - - Tensor<float, 3, ColMajor> col_major; // equivalent to Tensor<float, 3> - TensorMap<Tensor<float, 3, RowMajor> > row_major(data, ...); - -All the arguments to an expression must use the same layout. Attempting to mix -different layouts will result in a compilation error. - -It is possible to change the layout of a tensor or an expression using the -```swap_layout()``` method. Note that this will also reverse the order of the -dimensions. - - Tensor<float, 2, ColMajor> col_major(2, 4); - Tensor<float, 2, RowMajor> row_major(2, 4); - - Tensor<float, 2> col_major_result = col_major; // ok, layouts match - Tensor<float, 2> col_major_result = row_major; // will not compile - - // Simple layout swap - col_major_result = row_major.swap_layout(); - eigen_assert(col_major_result.dimension(0) == 4); - eigen_assert(col_major_result.dimension(1) == 2); - - // Swap the layout and preserve the order of the dimensions - array<int, 2> shuffle(1, 0); - col_major_result = row_major.swap_layout().shuffle(shuffle); - eigen_assert(col_major_result.dimension(0) == 2); - eigen_assert(col_major_result.dimension(1) == 4); - - -## Tensor Operations - -The Eigen Tensor library provides a vast library of operations on Tensors: -numerical operations such as addition and multiplication, geometry operations -such as slicing and shuffling, etc. These operations are available as methods -of the Tensor classes, and in some cases as operator overloads. For example -the following code computes the elementwise addition of two tensors: - - Tensor<float, 3> t1(2, 3, 4); - ...set some values in t1... - Tensor<float, 3> t2(2, 3, 4); - ...set some values in t2... - // Set t3 to the element wise sum of t1 and t2 - Tensor<float, 3> t3 = t1 + t2; - -While the code above looks easy enough, it is important to understand that the -expression ```t1 + t2``` is not actually adding the values of the tensors. The -expression instead constructs a "tensor operator" object of the class -TensorCwiseBinaryOp<scalar_sum>, which has references to the tensors -```t1``` and ```t2```. This is a small C++ object that knows how to add -```t1``` and ```t2```. It is only when the value of the expression is assigned -to the tensor ```t3``` that the addition is actually performed. Technically, -this happens through the overloading of ```operator=()``` in the Tensor class. - -This mechanism for computing tensor expressions allows for lazy evaluation and -optimizations which are what make the tensor library very fast. - -Of course, the tensor operators do nest, and the expression ```t1 + t2 * -0.3f``` is actually represented with the (approximate) tree of operators: - - TensorCwiseBinaryOp<scalar_sum>(t1, TensorCwiseUnaryOp<scalar_mul>(t2, 0.3f)) - - -### Tensor Operations and C++ "auto" - -Because Tensor operations create tensor operators, the C++ ```auto``` keyword -does not have its intuitive meaning. Consider these 2 lines of code: - - Tensor<float, 3> t3 = t1 + t2; - auto t4 = t1 + t2; - -In the first line we allocate the tensor ```t3``` and it will contain the -result of the addition of ```t1``` and ```t2```. In the second line, ```t4``` -is actually the tree of tensor operators that will compute the addition of -```t1``` and ```t2```. In fact, ```t4``` is *not* a tensor and you cannot get -the values of its elements: - - Tensor<float, 3> t3 = t1 + t2; - cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0) - - auto t4 = t1 + t2; - cout << t4(0, 0, 0); // Compilation error! - -When you use ```auto``` you do not get a Tensor as a result but instead a -non-evaluated expression. So only use ```auto``` to delay evaluation. - -Unfortunately, there is no single underlying concrete type for holding -non-evaluated expressions, hence you have to use auto in the case when you do -want to hold non-evaluated expressions. - -When you need the results of a set of tensor computations you have to assign the -result to a Tensor that will be capable of holding them. This can be -either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing -piece of memory. All the following will work: - - auto t4 = t1 + t2; - - Tensor<float, 3> result = t4; // Could also be: result(t4); - cout << result(0, 0, 0); - - TensorMap<float, 4> result(<a float* with enough space>, <size0>, ...) = t4; - cout << result(0, 0, 0); - - TensorFixedSize<float, Sizes<size0, ...>> result = t4; - cout << result(0, 0, 0); - -Until you need the results, you can keep the operation around, and even reuse -it for additional operations. As long as you keep the expression as an -operation, no computation is performed. - - // One way to compute exp((t1 + t2) * 0.2f); - auto t3 = t1 + t2; - auto t4 = t3 * 0.2f; - auto t5 = t4.exp(); - Tensor<float, 3> result = t5; - - // Another way, exactly as efficient as the previous one: - Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp(); - -### Controlling When Expression are Evaluated - -There are several ways to control when expressions are evaluated: - -* Assignment to a Tensor, TensorFixedSize, or TensorMap. -* Use of the eval() method. -* Assignment to a TensorRef. - -#### Assigning to a Tensor, TensorFixedSize, or TensorMap. - -The most common way to evaluate an expression is to assign it to a Tensor. In -the example below, the ```auto``` declarations make the intermediate values -"Operations", not Tensors, and do not cause the expressions to be evaluated. -The assignment to the Tensor ```result``` causes the evaluation of all the -operations. - - auto t3 = t1 + t2; // t3 is an Operation. - auto t4 = t3 * 0.2f; // t4 is an Operation. - auto t5 = t4.exp(); // t5 is an Operation. - Tensor<float, 3> result = t5; // The operations are evaluated. - -If you know the ranks and sizes of the Operation value you can assign the -Operation to a TensorFixedSize instead of a Tensor, which is a bit more -efficient. - - // We know that the result is a 4x4x2 tensor! - TensorFixedSize<float, Sizes<4, 4, 2>> result = t5; - -Simiarly, assigning an expression to a TensorMap causes its evaluation. Like -tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to -have the rank and sizes of the expression that are assigned to them. - -#### Calling eval(). - -When you compute large composite expressions, you sometimes want to tell Eigen -that an intermediate value in the expression tree is worth evaluating ahead of -time. This is done by inserting a call to the ```eval()``` method of the -expression Operation. - - // The previous example could have been written: - Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp(); - - // If you want to compute (t1 + t2) once ahead of time you can write: - Tensor<float, 3> result = ((t1 + t2).eval() * 0.2f).exp(); - -Semantically, calling ```eval()``` is equivalent to materializing the value of -the expression in a temporary Tensor of the right size. The code above in -effect does: - - // .eval() knows the size! - TensorFixedSize<float, Sizes<4, 4, 2>> tmp = t1 + t2; - Tensor<float, 3> result = (tmp * 0.2f).exp(); - -Note that the return value of ```eval()``` is itself an Operation, so the -following code does not do what you may think: - - // Here t3 is an evaluation Operation. t3 has not been evaluated yet. - auto t3 = (t1 + t2).eval(); - - // You can use t3 in another expression. Still no evaluation. - auto t4 = (t3 * 0.2f).exp(); - - // The value is evaluated when you assign the Operation to a Tensor, using - // an intermediate tensor to represent t3.x - Tensor<float, 3> result = t4; - -While in the examples above calling ```eval()``` does not make a difference in -performance, in other cases it can make a huge difference. In the expression -below the ```broadcast()``` expression causes the ```X.maximum()``` expression -to be evaluated many times: - - Tensor<...> X ...; - Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast)) - * beta).exp(); - -Inserting a call to ```eval()``` between the ```maximum()``` and -```reshape()``` calls guarantees that maximum() is only computed once and -greatly speeds-up execution: - - Tensor<...> Y = - ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) - * beta).exp(); - -In the other example below, the tensor ```Y``` is both used in the expression -and its assignment. This is an aliasing problem and if the evaluation is not -done in the right order Y will be updated incrementally during the evaluation -resulting in bogus results: - - Tensor<...> Y ...; - Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast)); - -Inserting a call to ```eval()``` between the ```sum()``` and ```reshape()``` -expressions ensures that the sum is computed before any updates to ```Y``` are -done. - - Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); - -Note that an eval around the full right hand side expression is not needed -because the generated has to compute the i-th value of the right hand side -before assigning it to the left hand side. - -However, if you were assigning the expression value to a shuffle of ```Y``` -then you would need to force an eval for correctness by adding an ```eval()``` -call for the right hand side: - - Y.shuffle(...) = - (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval(); - - -#### Assigning to a TensorRef. - -If you need to access only a few elements from the value of an expression you -can avoid materializing the value in a full tensor by using a TensorRef. - -A TensorRef is a small wrapper class for any Eigen Operation. It provides -overloads for the ```()``` operator that let you access individual values in -the expression. TensorRef is convenient, because the Operation themselves do -not provide a way to access individual elements. - - // Create a TensorRef for the expression. The expression is not - // evaluated yet. - TensorRef<Tensor<float, 3> > ref = ((t1 + t2) * 0.2f).exp(); - - // Use "ref" to access individual elements. The expression is evaluated - // on the fly. - float at_0 = ref(0, 0, 0); - cout << ref(0, 1, 0); - -Only use TensorRef when you need a subset of the values of the expression. -TensorRef only computes the values you access. However note that if you are -going to access all the values it will be much faster to materialize the -results in a Tensor first. - -In some cases, if the full Tensor result would be very large, you may save -memory by accessing it as a TensorRef. But not always. So don't count on it. - - -### Controlling How Expressions Are Evaluated - -The tensor library provides several implementations of the various operations -such as contractions and convolutions. The implementations are optimized for -different environments: single threaded on CPU, multi threaded on CPU, or on a -GPU using cuda. Additional implementations may be added later. - -You can choose which implementation to use with the ```device()``` call. If -you do not choose an implementation explicitly the default implementation that -uses a single thread on the CPU is used. - -The default implementation has been optimized for recent Intel CPUs, taking -advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the -library on ARM CPUs. Note that you need to pass compiler-dependent flags -to enable the use of SSE, AVX, and other instructions. - -For example, the following code adds two tensors using the default -single-threaded CPU implementation: - - Tensor<float, 2> a(30, 40); - Tensor<float, 2> b(30, 40); - Tensor<float, 2> c = a + b; - -To choose a different implementation you have to insert a ```device()``` call -before the assignment of the result. For technical C++ reasons this requires -that the Tensor for the result be declared on its own. This means that you -have to know the size of the result. - - Eigen::Tensor<float, 2> c(30, 40); - c.device(...) = a + b; - -The call to ```device()``` must be the last call on the left of the operator=. - -You must pass to the ```device()``` call an Eigen device object. There are -presently three devices you can use: DefaultDevice, ThreadPoolDevice and -GpuDevice. - - -#### Evaluating With the DefaultDevice - -This is exactly the same as not inserting a ```device()``` call. - - DefaultDevice my_device; - c.device(my_device) = a + b; - -#### Evaluating with a Thread Pool - - #include "thread/threadpool.h" - - // Create a threadpool and start the threads. This is the Google way, - // other environments use different mechanism to create a thread pool. - ThreadPool my_pool(4 /* number of threads in the pool */); - my_pool.StartWorkers(); - - // Create the Eigen ThreadPoolDevice. - // You typically use up to all the available threads in the pool. - Eigen::ThreadPoolDevice my_device(&my_pool, 4 /* number of threads to use */); - - // Now just use the device when evaluating expressions. - Eigen::Tensor<float, 2> c(30, 50); - c.device(my_device) = a.contract(b, dot_product_dims); - - -#### Evaluating On GPU - -This is presently a bit more complicated than just using a thread pool device. -You need to create a GPU device but you also need to explicitly allocate the -memory for tensors with cuda. - - -## API Reference - -### Datatypes - -In the documentation of the tensor methods and Operation we mention datatypes -that are tensor-type specific: - -#### <Tensor-Type>::Dimensions - -Acts like an array of ints. Has an ```int size``` attribute, and can be -indexed like an array to access individual values. Used to represent the -dimensions of a tensor. See ```dimensions()```. - -#### <Tensor-Type>::Index - -Acts like an ```int```. Used for indexing tensors along their dimensions. See -```operator()```, ```dimension()```, and ```size()```. - -#### <Tensor-Type>::Scalar - -Represents the datatype of individual tensor elements. For example, for a -```Tensor<float>```, ```Scalar``` is the type ```float```. See -```setConstant()```. - -#### <Operation> - -We use this pseudo type to indicate that a tensor Operation is returned by a -method. We indicate in the text the type and dimensions of the tensor that the -Operation returns after evaluation. - -The Operation will have to be evaluated, for example by assigning it to a -tensor, before you can access the values of the resulting tensor. You can also -access the values through a TensorRef. - - -## Built-in Tensor Methods - -These are usual C++ methods that act on tensors immediately. They are not -Operations which provide delayed evaluation of their results. Unless specified -otherwise, all the methods listed below are available on all tensor classes: -Tensor, TensorFixedSize, and TensorMap. - -## Metadata - -### int NumDimensions - -Constant value indicating the number of dimensions of a Tensor. This is also -known as the tensor "rank". - - Eigen::Tensor<float, 2> a(3, 4); - cout << "Dims " << a.NumDimensions; - => Dims 2 - -### Dimensions dimensions() - -Returns an array-like object representing the dimensions of the tensor. -The actual type of the dimensions() result is <Tensor-Type>::Dimensions. - - Eigen::Tensor<float, 2> a(3, 4); - const Eigen::Tensor<float, 2>::Dimensions& d = a.dimensions(); - cout << "Dim size: " << d.size << ", dim 0: " << d[0] - << ", dim 1: " << d[1]; - => Dim size: 2, dim 0: 3, dim 1: 4 - -If you use a C++11 compiler, you can use ```auto``` to simplify the code: - - const auto& d = a.dimensions(); - cout << "Dim size: " << d.size << ", dim 0: " << d[0] - << ", dim 1: " << d[1]; - => Dim size: 2, dim 0: 3, dim 1: 4 - -### Index dimension(Index n) - -Returns the n-th dimension of the tensor. The actual type of the -```dimension()``` result is ```<Tensor-Type>::Index```, but you can -always use it like an int. - - Eigen::Tensor<float, 2> a(3, 4); - int dim1 = a.dimension(1); - cout << "Dim 1: " << dim1; - => Dim 1: 4 - -### Index size() - -Returns the total number of elements in the tensor. This is the product of all -the tensor dimensions. The actual type of the ```size()``` result is -```<Tensor-Type>::Index```, but you can always use it like an int. - - Eigen::Tensor<float, 2> a(3, 4); - cout << "Size: " << a.size(); - => Size: 12 - - -### Getting Dimensions From An Operation - -A few operations provide ```dimensions()``` directly, -e.g. ```TensorReslicingOp```. Most operations defer calculating dimensions -until the operation is being evaluated. If you need access to the dimensions -of a deferred operation, you can wrap it in a TensorRef (see Assigning to a -TensorRef above), which provides ```dimensions()``` and ```dimension()``` as -above. - -TensorRef can also wrap the plain Tensor types, so this is a useful idiom in -templated contexts where the underlying object could be either a raw Tensor -or some deferred operation (e.g. a slice of a Tensor). In this case, the -template code can wrap the object in a TensorRef and reason about its -dimensionality while remaining agnostic to the underlying type. - - -## Constructors - -### Tensor - -Creates a tensor of the specified size. The number of arguments must be equal -to the rank of the tensor. The content of the tensor is not initialized. - - Eigen::Tensor<float, 2> a(3, 4); - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - -### TensorFixedSize - -Creates a tensor of the specified size. The number of arguments in the Size<> -template parameter determines the rank of the tensor. The content of the tensor -is not initialized. - - Eigen::TensorFixedSize<float, Sizes<3, 4>> a; - cout << "Rank: " << a.rank() << endl; - => Rank: 2 - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - -### TensorMap - -Creates a tensor mapping an existing array of data. The data must not be freed -until the TensorMap is discarded, and the size of the data must be large enough -to accomodate the coefficients of the tensor. - - float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - Eigen::TensorMap<float, 2> a(data, 3, 4); - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - cout << "a(1, 2): " << a(1, 2) << endl; - => a(1, 2): 9 - - -## Contents Initialization - -When a new Tensor or a new TensorFixedSize are created, memory is allocated to -hold all the tensor elements, but the memory is not initialized. Similarly, -when a new TensorMap is created on top of non-initialized memory, its -contents are not initialized. - -You can use one of the methods below to initialize the tensor memory. These -have an immediate effect on the tensor and return the tensor itself as a -result. These are not tensor Operations which delay evaluation. - -### <Tensor-Type> setConstant(const Scalar& val) - -Sets all elements of the tensor to the constant value ```val```. ```Scalar``` -is the type of data stored in the tensor. You can pass any value that is -convertible to that type. - -Returns the tensor itself in case you want to chain another call. - - a.setConstant(12.3f); - cout << "Constant: " << endl << a << endl << endl; - => - Constant: - 12.3 12.3 12.3 12.3 - 12.3 12.3 12.3 12.3 - 12.3 12.3 12.3 12.3 - -Note that ```setConstant()``` can be used on any tensor where the element type -has a copy constructor and an ```operator=()```: - - Eigen::Tensor<string, 2> a(2, 3); - a.setConstant("yolo"); - cout << "String tensor: " << endl << a << endl << endl; - => - String tensor: - yolo yolo yolo - yolo yolo yolo - - -### <Tensor-Type> setZero() - -Fills the tensor with zeros. Equivalent to ```setConstant(Scalar(0))```. -Returns the tensor itself in case you want to chain another call. - - a.setZero(); - cout << "Zeros: " << endl << a << endl << endl; - => - Zeros: - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - - -### <Tensor-Type> setValues({..initializer_list}) - -Fills the tensor with explicit values specified in a std::initializer_list. -The type of the initializer list depends on the type and rank of the tensor. - -If the tensor has rank N, the initializer list must be nested N times. The -most deeply nested lists must contains P scalars of the Tensor type where P is -the size of the last dimension of the Tensor. - -For example, for a ```TensorFixedSize<float, Sizes<2, 3>>``` the initializer list must -contains 2 lists of 3 floats each. - -```setValues()``` returns the tensor itself in case you want to chain another -call. - - Eigen::Tensor<float, 2> a(2, 3); - a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}}); - cout << "a" << endl << a << endl << endl; - => - a - 0 1 2 - 3 4 5 - -If a list is too short, the corresponding elements of the tensor will not be -changed. This is valid at each level of nesting. For example the following -code only sets the values of the first row of the tensor. - - Eigen::Tensor<int, 2> a(2, 3); - a.setConstant(1000); - a.setValues({{10, 20, 30}}); - cout << "a" << endl << a << endl << endl; - => - a - 10 20 30 - 1000 1000 1000 - -### <Tensor-Type> setRandom() - -Fills the tensor with random values. Returns the tensor itself in case you -want to chain another call. - - a.setRandom(); - cout << "Random: " << endl << a << endl << endl; - => - Random: - 0.680375 0.59688 -0.329554 0.10794 - -0.211234 0.823295 0.536459 -0.0452059 - 0.566198 -0.604897 -0.444451 0.257742 - -You can customize ```setRandom()``` by providing your own random number -generator as a template argument: - - a.setRandom<MyRandomGenerator>(); - -Here, ```MyRandomGenerator``` must be a struct with the following member -functions, where Scalar and Index are the same as ```<Tensor-Type>::Scalar``` -and ```<Tensor-Type>::Index```. - -See ```struct UniformRandomGenerator``` in TensorFunctors.h for an example. - - // Custom number generator for use with setRandom(). - struct MyRandomGenerator { - // Default and copy constructors. Both are needed - MyRandomGenerator() { } - MyRandomGenerator(const MyRandomGenerator& ) { } - - // Return a random value to be used. "element_location" is the - // location of the entry to set in the tensor, it can typically - // be ignored. - Scalar operator()(Eigen::DenseIndex element_location, - Eigen::DenseIndex /*unused*/ = 0) const { - return <randomly generated value of type T>; - } - - // Same as above but generates several numbers at a time. - typename internal::packet_traits<Scalar>::type packetOp( - Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { - return <a packet of randomly generated values>; - } - }; - -You can also use one of the 2 random number generators that are part of the -tensor library: -* UniformRandomGenerator -* NormalRandomGenerator - - -## Data Access - -The Tensor, TensorFixedSize, and TensorRef classes provide the following -accessors to access the tensor coefficients: - - const Scalar& operator()(const array<Index, NumIndices>& indices) - const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - Scalar& operator()(const array<Index, NumIndices>& indices) - Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - -The number of indices must be equal to the rank of the tensor. Moreover, these -accessors are not available on tensor expressions. In order to access the -values of a tensor expression, the expression must either be evaluated or -wrapped in a TensorRef. - - -### Scalar* data() and const Scalar* data() const - -Returns a pointer to the storage for the tensor. The pointer is const if the -tensor was const. This allows direct access to the data. The layout of the -data depends on the tensor layout: RowMajor or ColMajor. - -This access is usually only needed for special cases, for example when mixing -Eigen Tensor code with other libraries. - -Scalar is the type of data stored in the tensor. - - Eigen::Tensor<float, 2> a(3, 4); - float* a_data = a.data(); - a_data[0] = 123.45f; - cout << "a(0, 0): " << a(0, 0); - => a(0, 0): 123.45 - - -## Tensor Operations - -All the methods documented below return non evaluated tensor ```Operations```. -These can be chained: you can apply another Tensor Operation to the value -returned by the method. - -The chain of Operation is evaluated lazily, typically when it is assigned to a -tensor. See "Controlling when Expressions are Evaluated" for more details about -their evaluation. - -### <Operation> constant(const Scalar& val) - -Returns a tensor of the same type and dimensions as the original tensor but -where all elements have the value ```val```. - -This is useful, for example, when you want to add or subtract a constant from a -tensor, or multiply every element of a tensor by a scalar. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = a + a.constant(2.0f); - Eigen::Tensor<float, 2> c = b * b.constant(0.2f); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - cout << "c" << endl << c << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - 3 3 3 - 3 3 3 - - c - 0.6 0.6 0.6 - 0.6 0.6 0.6 - -### <Operation> random() - -Returns a tensor of the same type and dimensions as the current tensor -but where all elements have random values. - -This is for example useful to add random values to an existing tensor. -The generation of random values can be customized in the same manner -as for ```setRandom()```. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = a + a.random(); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - 1.68038 1.5662 1.82329 - 0.788766 1.59688 0.395103 - - -## Unary Element Wise Operations - -All these operations take a single input tensor as argument and return a tensor -of the same type and dimensions as the tensor to which they are applied. The -requested operations are applied to each element independently. - -### <Operation> operator-() - -Returns a tensor of the same type and dimensions as the original tensor -containing the opposite values of the original tensor. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = -a; - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - -1 -1 -1 - -1 -1 -1 - -### <Operation> sqrt() - -Returns a tensor of the same type and dimensions as the original tensor -containing the square roots of the original tensor. - -### <Operation> rsqrt() - -Returns a tensor of the same type and dimensions as the original tensor -containing the inverse square roots of the original tensor. - -### <Operation> square() - -Returns a tensor of the same type and dimensions as the original tensor -containing the squares of the original tensor values. - -### <Operation> inverse() - -Returns a tensor of the same type and dimensions as the original tensor -containing the inverse of the original tensor values. - -### <Operation> exp() - -Returns a tensor of the same type and dimensions as the original tensor -containing the exponential of the original tensor. - -### <Operation> log() - -Returns a tensor of the same type and dimensions as the original tensor -containing the natural logarithms of the original tensor. - -### <Operation> abs() - -Returns a tensor of the same type and dimensions as the original tensor -containing the absolute values of the original tensor. - -### <Operation> pow(Scalar exponent) - -Returns a tensor of the same type and dimensions as the original tensor -containing the coefficients of the original tensor to the power of the -exponent. - -The type of the exponent, Scalar, is always the same as the type of the -tensor coefficients. For example, only integer exponents can be used in -conjuntion with tensors of integer values. - -You can use cast() to lift this restriction. For example this computes -cubic roots of an int Tensor: - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 1, 8}, {27, 64, 125}}); - Eigen::Tensor<double, 2> b = a.cast<double>().pow(1.0 / 3.0); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 0 1 8 - 27 64 125 - - b - 0 1 2 - 3 4 5 - -### <Operation> operator * (Scalar scale) - -Multiplies all the coefficients of the input tensor by the provided scale. - -### <Operation> cwiseMax(Scalar threshold) -TODO - -### <Operation> cwiseMin(Scalar threshold) -TODO - -### <Operation> unaryExpr(const CustomUnaryOp& func) -TODO - - -## Binary Element Wise Operations - -These operations take two input tensors as arguments. The 2 input tensors should -be of the same type and dimensions. The result is a tensor of the same -dimensions as the tensors to which they are applied, and unless otherwise -specified it is also of the same type. The requested operations are applied to -each pair of elements independently. - -### <Operation> operator+(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise sums of the inputs. - -### <Operation> operator-(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise differences of the inputs. - -### <Operation> operator*(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise products of the inputs. - -### <Operation> operator/(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise quotients of the inputs. - -This operator is not supported for integer types. - -### <Operation> cwiseMax(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise maximums of the inputs. - -### <Operation> cwiseMin(const OtherDerived& other) - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise mimimums of the inputs. - -### <Operation> Logical operators - -The following logical operators are supported as well: - -* operator&&(const OtherDerived& other) -* operator||(const OtherDerived& other) -* operator<(const OtherDerived& other) -* operator<=(const OtherDerived& other) -* operator>(const OtherDerived& other) -* operator>=(const OtherDerived& other) -* operator==(const OtherDerived& other) -* operator!=(const OtherDerived& other) - -They all return a tensor of boolean values. - - -## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) - -Selection is a coefficient-wise ternary operator that is the tensor equivalent -to the if-then-else operation. - - Tensor<bool, 3> if = ...; - Tensor<float, 3> then = ...; - Tensor<float, 3> else = ...; - Tensor<float, 3> result = if.select(then, else); - -The 3 arguments must be of the same dimensions, which will also be the dimension -of the result. The 'if' tensor must be of type boolean, the 'then' and the -'else' tensor must be of the same type, which will also be the type of the -result. - -Each coefficient in the result is equal to the corresponding coefficient in the -'then' tensor if the corresponding value in the 'if' tensor is true. If not, the -resulting coefficient will come from the 'else' tensor. - - -## Contraction - -Tensor *contractions* are a generalization of the matrix product to the -multidimensional case. - - // Create 2 matrices using tensors of rank 2 - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{1, 2, 3}, {6, 5, 4}}); - Eigen::Tensor<int, 2> b(3, 2); - a.setValues({{1, 2}, {4, 5}, {5, 6}}); - - // Compute the traditional matrix product - array<IndexPair<int>, 1> product_dims = { IndexPair(1, 0) }; - Eigen::Tensor<int, 2> AB = a.contract(b, product_dims); - - // Compute the product of the transpose of the matrices - array<IndexPair<int>, 1> transpose_product_dims = { IndexPair(0, 1) }; - Eigen::Tensor<int, 2> AtBt = a.contract(b, transposed_product_dims); - - -## Reduction Operations - -A *Reduction* operation returns a tensor with fewer dimensions than the -original tensor. The values in the returned tensor are computed by applying a -*reduction operator* to slices of values from the original tensor. You specify -the dimensions along which the slices are made. - -The Eigen Tensor library provides a set of predefined reduction operators such -as ```maximum()``` and ```sum()``` and lets you define additional operators by -implementing a few methods from a reductor template. - -### Reduction Dimensions - -All reduction operations take a single parameter of type -```<TensorType>::Dimensions``` which can always be specified as an array of -ints. These are called the "reduction dimensions." The values are the indices -of the dimensions of the input tensor over which the reduction is done. The -parameter can have at most as many element as the rank of the input tensor; -each element must be less than the tensor rank, as it indicates one of the -dimensions to reduce. - -Each dimension of the input tensor should occur at most once in the reduction -dimensions as the implementation does not remove duplicates. - -The order of the values in the reduction dimensions does not affect the -results, but the code may execute faster if you list the dimensions in -increasing order. - -Example: Reduction along one dimension. - - // Create a tensor of 2 dimensions - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{1, 2, 3}, {6, 5, 4}}); - // Reduce it along the second dimension (1)... - Eigen::array<int, 1> dims({1 /* dimension to reduce */}); - // ...using the "maximum" operator. - // The result is a tensor with one dimension. The size of - // that dimension is the same as the first (non-reduced) dimension of a. - Eigen::Tensor<int, 1> b = a.maximum(dims); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 2 3 - 6 5 4 - - b - 3 - 6 - -Example: Reduction along two dimensions. - - Eigen::Tensor<float, 3, Eigen::ColMajor> a(2, 3, 4); - a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, - {7.0f, 6.0f, 5.0f, 4.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}, - {{12.0f, 13.0f, 14.0f, 15.0f}, - {19.0f, 18.0f, 17.0f, 16.0f}, - {20.0f, 21.0f, 22.0f, 23.0f}}}); - // The tensor a has 3 dimensions. We reduce along the - // first 2, resulting in a tensor with a single dimension - // of size 4 (the last dimension of a.) - // Note that we pass the array of reduction dimensions - // directly to the maximum() call. - Eigen::Tensor<float, 1, Eigen::ColMajor> b = - a.maximum(Eigen::array<int, 2>({0, 1})); - cout << "b" << endl << b << endl << endl; - => - b - 20 - 21 - 22 - 23 - -#### Reduction along all dimensions - -As a special case, if you pass no parameter to a reduction operation the -original tensor is reduced along *all* its dimensions. The result is a -one-dimension tensor with a single value. - - Eigen::Tensor<float, 3> a(2, 3, 4); - a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, - {7.0f, 6.0f, 5.0f, 4.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}, - {{12.0f, 13.0f, 14.0f, 15.0f}, - {19.0f, 18.0f, 17.0f, 16.0f}, - {20.0f, 21.0f, 22.0f, 23.0f}}}); - // Reduce along all dimensions using the sum() operator. - Eigen::Tensor<float, 1> b = a.sum(); - cout << "b" << endl << b << endl << endl; - => - b - 276 - - -### <Operation> sum(const Dimensions& new_dims) -### <Operation> sum() - -Reduce a tensor using the sum() operator. The resulting values -are the sum of the reduced values. - -### <Operation> mean(const Dimensions& new_dims) -### <Operation> mean() - -Reduce a tensor using the mean() operator. The resulting values -are the mean of the reduced values. - -### <Operation> maximum(const Dimensions& new_dims) -### <Operation> maximum() - -Reduce a tensor using the maximum() operator. The resulting values are the -largest of the reduced values. - -### <Operation> minimum(const Dimensions& new_dims) -### <Operation> minimum() - -Reduce a tensor using the minimum() operator. The resulting values -are the smallest of the reduced values. - -### <Operation> prod(const Dimensions& new_dims) -### <Operation> prod() - -Reduce a tensor using the prod() operator. The resulting values -are the product of the reduced values. - -### <Operation> all(const Dimensions& new_dims) -### <Operation> all() -Reduce a tensor using the all() operator. Casts tensor to bool and then checks -whether all elements are true. Runs through all elements rather than -short-circuiting, so may be significantly inefficient. - -### <Operation> any(const Dimensions& new_dims) -### <Operation> any() -Reduce a tensor using the any() operator. Casts tensor to bool and then checks -whether any element is true. Runs through all elements rather than -short-circuiting, so may be significantly inefficient. - -### <Operation> reduce(const Dimensions& new_dims, const Reducer& reducer) - -Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` -in TensorFunctors.h for information on how to implement a reduction operator. - - -## Convolutions - -### <Operation> convolve(const KernelDerived& kernel, const Dimensions& dims) - -Returns a tensor that is the output of the convolution of the of the input tensor with the kernel, -along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor -which were part of the convolution will be reduced by the formula: -output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). -The dimension sizes for dimensions that were not part of the convolution will remain the same. -Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the -convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is -for the last dimension). - - // Compute convolution along the second and third dimension. - Tensor<float, 4, DataLayout> input(3, 3, 7, 11); - Tensor<float, 2, DataLayout> kernel(2, 2); - Tensor<float, 4, DataLayout> output(3, 2, 6, 11); - input.setRandom(); - kernel.setRandom(); - - Eigen::array<Eigen::DenseIndex, 2> dims({1, 2}); // Specify second and third dimension for convolution. - output = input.convolve(kernel, dims); - - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 2; ++j) { - for (int k = 0; k < 6; ++k) { - for (int l = 0; l < 11; ++l) { - const float result = output(i,j,k,l); - const float expected = input(i,j+0,k+0,l) * kernel(0,0) + - input(i,j+1,k+0,l) * kernel(1,0) + - input(i,j+0,k+1,l) * kernel(0,1) + - input(i,j+1,k+1,l) * kernel(1,1); - VERIFY_IS_APPROX(result, expected); - } - } - } - } - - - -## Geometrical Operations - -These operations return a Tensor with different dimensions than the original -Tensor. They can be used to access slices of tensors, see them with different -dimensions, or pad tensors with additional data. - -### <Operation> reshape(const Dimensions& new_dims) - -Returns a view of the input tensor that has been reshaped to the specified -new dimensions. The argument new_dims is an array of Index values. The -rank of the resulting tensor is equal to the number of elements in new_dims. - -The product of all the sizes in the new dimension array must be equal to -the number of elements in the input tensor. - - // Increase the rank of the input tensor by introducing a new dimension - // of size 1. - Tensor<float, 2> input(7, 11); - array<int, 3> three_dims{{7, 11, 1}}; - Tensor<float, 3> result = input.reshape(three_dims); - - // Decrease the rank of the input tensor by merging 2 dimensions; - array<int, 1> one_dim{{7 * 11}}; - Tensor<float, 1> result = input.reshape(one_dim); - -This operation does not move any data in the input tensor, so the resulting -contents of a reshaped Tensor depend on the data layout of the original Tensor. - -For example this is what happens when you ```reshape()``` a 2D ColMajor tensor -to one dimension: - - Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2}); - Eigen::Tensor<float, 1, Eigen::ColMajor> b = a.reshape(one_dim); - cout << "b" << endl << b << endl; - => - b - 0 - 300 - 100 - 400 - 200 - 500 - -This is what happens when the 2D Tensor is RowMajor: - - Eigen::Tensor<float, 2, Eigen::RowMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2}); - Eigen::Tensor<float, 1, Eigen::RowMajor> b = a.reshape(one_dim); - cout << "b" << endl << b << endl; - => - b - 0 - 100 - 200 - 300 - 400 - 500 - -The reshape operation is a lvalue. In other words, it can be used on the left -side of the assignment operator. - -The previous example can be rewritten as follow: - - Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 2> two_dim({2, 3}); - Eigen::Tensor<float, 1, Eigen::ColMajor> b; - b.reshape(two_dim) = a; - cout << "b" << endl << b << endl; - => - b - 0 - 300 - 100 - 400 - 200 - 500 - -Note that "b" itself was not reshaped but that instead the assignment is done to -the reshape view of b. - - -### <Operation> shuffle(const Shuffle& shuffle) - -Returns a copy of the input tensor whose dimensions have been -reordered according to the specified permutation. The argument shuffle -is an array of Index values. Its size is the rank of the input -tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th -dimension of the output tensor equals to the size of the shuffle[i]-th -dimension of the input tensor. For example: - - // Shuffle all dimensions to the left by 1. - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output = input.shuffle({1, 2, 0}) - - eigen_assert(output.dimension(0) == 30); - eigen_assert(output.dimension(1) == 50); - eigen_assert(output.dimension(2) == 20); - -Indices into the output tensor are shuffled accordingly to formulate -indices into the input tensor. For example, one can assert in the above -code snippet that: - - eigen_assert(output(3, 7, 11) == input(11, 3, 7)); - -In general, one can assert that - - eigen_assert(output(..., indices[shuffle[i]], ...) == - input(..., indices[i], ...)) - -The shuffle operation results in a lvalue, which means that it can be assigned -to. In other words, it can be used on the left side of the assignment operator. - -Let's rewrite the previous example to take advantage of this feature: - - // Shuffle all dimensions to the left by 1. - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output(30, 50, 20); - output.shuffle({2, 0, 1}) = input; - - -### <Operation> stride(const Strides& strides) - -Returns a view of the input tensor that strides (skips stride-1 -elements) along each of the dimensions. The argument strides is an -array of Index values. The dimensions of the resulting tensor are -ceil(input_dimensions[i] / strides[i]). - -For example this is what happens when you ```stride()``` a 2D tensor: - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<Eigen::DenseIndex, 2> strides({3, 2}); - Eigen::Tensor<int, 2> b = a.stride(strides); - cout << "b" << endl << b << endl; - => - b - 0 200 - 900 1100 - -It is possible to assign a tensor to a stride: - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output(40, 90, 200); - output.stride({2, 3, 4}) = input; - -### <Operation> inflate(const Strides& strides) - -Returns a view of an "inflated" tensor of the input tensor by inserting zeros -between the original elements in the input tensor. The argument strides is an -array of Index values, indicating how much "inflation" there is. The dimensions - of the resulting tensor are (input_dimensions[i] - 1) * strides[i] + 1. In -some sense it is the inverse of the ```stride()``` operation. - -For example this is what happens when you ```inflate()``` a 2D tensor: - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}}); - Eigen::array<Eigen::DenseIndex, 2> strides({3, 2}); - Eigen::Tensor<int, 2> b = a.inflate(strides); - cout << "b" << endl << b << endl; - => - b - 0 0 0 100 0 0 200 - 0 0 0 0 0 0 0 - 300 0 0 400 0 0 500 - -The ```inflate()``` operation is an r-value only operation as it doesn't make -sense to assign a value to an inflated tensor in positions where the values are -hardwired to zero. - -### <Operation> slice(const StartIndices& offsets, const Sizes& extents) - -Returns a sub-tensor of the given tensor. For each dimension i, the slice is -made of the coefficients stored between offset[i] and offset[i] + extents[i] in -the input tensor. - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<int, 2> offsets = {1, 0}; - Eigen::array<int, 2> extents = {2, 2}; - Eigen::Tensor<int, 1> slice = a.slice(offsets, extents); - cout << "a" << endl << a << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - cout << "slice" << endl << slice << endl; - => - slice - 300 400 - 600 700 - - -### <Operation> chip(const Index offset, const Index dim) - -A chip is a special kind of slice. It is the subtensor at the given offset in -the dimension dim. The returned tensor has one fewer dimension than the input -tensor: the dimension dim is removed. - -For example, a matrix chip would be either a row or a column of the input -matrix. - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::Tensor<int, 1> row_3 = a.chip(2, 0); - Eigen::Tensor<int, 1> col_2 = a.chip(1, 1); - cout << "a" << endl << a << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - cout << "row_3" << endl << row_3 << endl; - => - row_3 - 600 700 800 - cout << "col_2" << endl << col_2 << endl; - => - col_2 - 100 400 700 1000 - -It is possible to assign values to a tensor chip since the chip operation is a -lvalue. For example: - - Eigen::Tensor<int, 1> a(3); - a.setValues({{100, 200, 300}}); - Eigen::Tensor<int, 2> b(2, 3); - b.setZero(); - b.chip(0, 0) = a; - cout << "a" << endl << a << endl; - => - a - 100 - 200 - 300 - cout << "b" << endl << b << endl; - => - b - 100 200 300 - 0 0 0 - - -### <Operation> reverse(const ReverseDimensions& reverse) - -Returns a view of the input tensor that reverses the order of the coefficients -along a subset of the dimensions. The argument reverse is an array of boolean -values that indicates whether or not the order of the coefficients should be -reversed along each of the dimensions. This operation preserves the dimensions -of the input tensor. - -For example this is what happens when you ```reverse()``` the first dimension -of a 2D tensor: - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<bool, 2> reverse({true, false}); - Eigen::Tensor<int, 2> b = a.reverse(reverse); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - b - 900 1000 1100 - 600 700 800 - 300 400 500 - 0 100 200 - - -### <Operation> broadcast(const Broadcast& broadcast) - -Returns a view of the input tensor in which the input is replicated one to many -times. -The broadcast argument specifies how many copies of the input tensor need to be -made in each of the dimensions. - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}}); - Eigen::array<int, 2> bcast({3, 2}); - Eigen::Tensor<int, 2> b = a.broadcast(bcast); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - b - 0 100 200 0 100 200 - 300 400 500 300 400 500 - 0 100 200 0 100 200 - 300 400 500 300 400 500 - 0 100 200 0 100 200 - 300 400 500 300 400 500 - -### <Operation> concatenate(const OtherDerived& other, Axis axis) - -TODO - -### <Operation> pad(const PaddingDimensions& padding) - -Returns a view of the input tensor in which the input is padded with zeros. - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}}); - Eigen::array<std::pair<int, int>, 2> paddings; - paddings[0] = make_pair(0, 1); - paddings[1] = make_pair(2, 3); - Eigen::Tensor<int, 2> b = a.pad(paddings); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - b - 0 0 0 0 - 0 0 0 0 - 0 100 200 0 - 300 400 500 0 - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - - -### <Operation> extract_patches(const PatchDims& patch_dims) - -Returns a tensor of coefficient patches extracted from the input tensor, where -each patch is of dimension specified by 'patch_dims'. The returned tensor has -one greater dimension than the input tensor, which is used to index each patch. -The patch index in the output tensor depends on the data layout of the input -tensor: the patch index is the last dimension ColMajor layout, and the first -dimension in RowMajor layout. - -For example, given the following input tensor: - - Eigen::Tensor<float, 2, DataLayout> tensor(3,4); - tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f}, - {4.0f, 5.0f, 6.0f, 7.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}); - - cout << "tensor: " << endl << tensor << endl; - => - tensor: - 0 1 2 3 - 4 5 6 7 - 8 9 10 11 - -Six 2x2 patches can be extracted and indexed using the following code: - - Eigen::Tensor<float, 3, DataLayout> patch; - Eigen::array<Eigen::DenseIndex, 2> patch_dims; - patch_dims[0] = 2; - patch_dims[1] = 2; - patch = tensor.extract_patches(patch_dims); - for (int k = 0; k < 6; ++k) { - cout << "patch index: " << k << endl; - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 2; ++j) { - if (DataLayout == ColMajor) { - cout << patch(i, j, k) << " "; - } else { - cout << patch(k, i, j) << " "; - } - } - cout << endl; - } - } - -This code results in the following output when the data layout is ColMajor: - - patch index: 0 - 0 1 - 4 5 - patch index: 1 - 4 5 - 8 9 - patch index: 2 - 1 2 - 5 6 - patch index: 3 - 5 6 - 9 10 - patch index: 4 - 2 3 - 6 7 - patch index: 5 - 6 7 - 10 11 - -This code results in the following output when the data layout is RowMajor: -(NOTE: the set of patches is the same as in ColMajor, but are indexed differently). - - patch index: 0 - 0 1 - 4 5 - patch index: 1 - 1 2 - 5 6 - patch index: 2 - 2 3 - 6 7 - patch index: 3 - 4 5 - 8 9 - patch index: 4 - 5 6 - 9 10 - patch index: 5 - 6 7 - 10 11 - -### <Operation> extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const Index row_inflate_stride, const Index col_inflate_stride, - const PaddingType padding_type, const Scalar padding_value) - -Returns a tensor of coefficient image patches extracted from the input tensor, -which is expected to have dimensions ordered as follows (depending on the data -layout of the input tensor, and the number of additional dimensions 'N'): - -* ColMajor - * 1st dimension: channels (of size d) - * 2nd dimension: rows (of size r) - * 3rd dimension: columns (of size c) - * 4th-Nth dimension: time (for video) or batch (for bulk processing). - -* RowMajor (reverse order of ColMajor) - * 1st-Nth dimension: time (for video) or batch (for bulk processing). - * N+1'th dimension: columns (of size c) - * N+2'th dimension: rows (of size r) - * N+3'th dimension: channels (of size d) - -The returned tensor has one greater dimension than the input tensor, which is -used to index each patch. The patch index in the output tensor depends on the -data layout of the input tensor: the patch index is the 4'th dimension in -ColMajor layout, and the 4'th from the last dimension in RowMajor layout. - -For example, given the following input tensor with the following dimension -sizes: - -* depth: 2 -* rows: 3 -* columns: 5 -* batch: 7 - - Tensor<float, 4> tensor(2,3,5,7); - Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); - -2x2 image patches can be extracted and indexed using the following code: - -* 2D patch: ColMajor (patch indexed by second-to-last dimension) - - Tensor<float, 5> twod_patch; - twod_patch = tensor.extract_image_patches<2, 2>(); - // twod_patch.dimension(0) == 2 - // twod_patch.dimension(1) == 2 - // twod_patch.dimension(2) == 2 - // twod_patch.dimension(3) == 3*5 - // twod_patch.dimension(4) == 7 - -* 2D patch: RowMajor (patch indexed by the second dimension) - - Tensor<float, 5, RowMajor> twod_patch_row_major; - twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>(); - // twod_patch_row_major.dimension(0) == 7 - // twod_patch_row_major.dimension(1) == 3*5 - // twod_patch_row_major.dimension(2) == 2 - // twod_patch_row_major.dimension(3) == 2 - // twod_patch_row_major.dimension(4) == 2 - -Input parameters: - -* patch_rows, patch_cols: Spatial extent of the extracted patches. -* row_stride, col_stride: Image Displacement (in pixels) between the - upper-left coordinates of consecutive patches. -* in_row_stride, in_col_stride: Image displacement (in pixels) between - two consecutive patch samples. If larger than 1 (default), they allow - for sparsely sampling the input image. -* row_inflate_stride, col_inflate_stride: If larger than 1 (default), "inflates" - the inputs by inserting zeros between the original elements. This is useful - for backward convolution. -* padding_type: Boundary conditions. Either PADDING_SAME (default) - or PADDING_VALID. -* padding_value: the value used in padding, defaults to 0. - -## Special Operations - -### <Operation> cast<T>() - -Returns a tensor of type T with the same dimensions as the original tensor. -The returned tensor contains the values of the original tensor converted to -type T. - - Eigen::Tensor<float, 2> a(2, 3); - Eigen::Tensor<int, 2> b = a.cast<int>(); - -This can be useful for example if you need to do element-wise division of -Tensors of integers. This is not currently supported by the Tensor library -but you can easily cast the tensors to floats to do the division: - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 1, 2}, {3, 4, 5}}); - Eigen::Tensor<int, 2> b = - (a.cast<float>() / a.constant(2).cast<float>()).cast<int>(); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 0 1 2 - 3 4 5 - - b - 0 0 1 - 1 2 2 - - -### <Operation> eval() - -TODO - - -## Representation of scalar values - -Scalar values are often represented by tensors of size 1 and rank 1. It would be -more logical and user friendly to use tensors of rank 0 instead. For example -Tensor<T, N>::maximum() currently returns a Tensor<T, 1>. Similarly, the inner -product of 2 1d tensors (through contractions) returns a 1d tensor. In the -future these operations might be updated to return 0d tensors instead. - -## GPU Support - -NVidia GPU support can be enabled using: - - #define EIGEN_USE_GPU - -To speedup operations on GPU, it is also recommended to use 32 bit indices. This -prevents Eigen from using 64 bit loop indices, which have to be emulated in -software and make any operation extremely slow. - -This can be achieved globally by using the EIGEN_DEFAULT_DENSE_INDEX_TYPE define -as follow: - - #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int - -This can also be done individually for each tensor by using the Index32Bit -option as follow: - - Eigen::Tensor<DataType, Rank, Eigen::Index32Bit> t; - Eigen::TensorMap<Eigen::Tensor<DataType, Rank, Eigen::Index32Bit> > t_map; - - -## Limitations - -* The number of tensor dimensions is currently limited to 250 when using a - compiler that supports cxx11. It is limited to only 5 for older compilers. -* The IndexList class requires a cxx11 compliant compiler. You can use an - array of indices instead if you don't have access to a modern compiler. -* TensorVarDims are only partially supported -* On GPUs only floating point values are properly tested and optimized for. -* Complex and integer values are known to be broken on GPUs. If you try to use - them you'll most likely end up triggering a static assertion failure such as - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h deleted file mode 100644 index 13cb2157f2..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +++ /dev/null @@ -1,293 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H - -namespace Eigen { - -class DynamicSGroup -{ - public: - inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); } - inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { } - inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); } - inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } - inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } - - void add(int one, int two, int flags = 0); - - template<typename Gen_> - inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); } - inline void addSymmetry(int one, int two) { add(one, two, 0); } - inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); } - inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); } - inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); } - - template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> - inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) const - { - eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required number of indices."); - for (std::size_t i = 0; i < size(); i++) - initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list<int, N>::type()), m_elements[i].flags, initial, std::forward<Args>(args)...); - return initial; - } - - template<typename Op, typename RV, typename Index, typename... Args> - inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) const - { - eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required number of indices."); - for (std::size_t i = 0; i < size(); i++) - initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward<Args>(args)...); - return initial; - } - - inline int globalFlags() const { return m_globalFlags; } - inline std::size_t size() const { return m_elements.size(); } - - template<typename Tensor_, typename... IndexTypes> - inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const - { - static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); - } - - template<typename Tensor_> - inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const - { - return internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup>(tensor, *this, indices); - } - private: - struct GroupElement { - std::vector<int> representation; - int flags; - bool isId() const - { - for (std::size_t i = 0; i < representation.size(); i++) - if (i != (size_t)representation[i]) - return false; - return true; - } - }; - struct Generator { - int one; - int two; - int flags; - constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {} - }; - - std::size_t m_numIndices; - std::vector<GroupElement> m_elements; - std::vector<Generator> m_generators; - int m_globalFlags; - - template<typename Index, std::size_t N, int... n> - inline std::array<Index, N> h_permute(std::size_t which, const std::array<Index, N>& idx, internal::numeric_list<int, n...>) const - { - return std::array<Index, N>{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }}; - } - - template<typename Index> - inline std::vector<Index> h_permute(std::size_t which, std::vector<Index> idx) const - { - std::vector<Index> result; - result.reserve(idx.size()); - for (auto k : m_elements[which].representation) - result.push_back(idx[k]); - for (std::size_t i = m_numIndices; i < idx.size(); i++) - result.push_back(idx[i]); - return result; - } - - inline GroupElement ge(Generator const& g) const - { - GroupElement result; - result.representation.reserve(m_numIndices); - result.flags = g.flags; - for (std::size_t k = 0; k < m_numIndices; k++) { - if (k == (std::size_t)g.one) - result.representation.push_back(g.two); - else if (k == (std::size_t)g.two) - result.representation.push_back(g.one); - else - result.representation.push_back(int(k)); - } - return result; - } - - GroupElement mul(GroupElement, GroupElement) const; - inline GroupElement mul(Generator g1, GroupElement g2) const - { - return mul(ge(g1), g2); - } - - inline GroupElement mul(GroupElement g1, Generator g2) const - { - return mul(g1, ge(g2)); - } - - inline GroupElement mul(Generator g1, Generator g2) const - { - return mul(ge(g1), ge(g2)); - } - - inline int findElement(GroupElement e) const - { - for (auto ee : m_elements) { - if (ee.representation == e.representation) - return ee.flags ^ e.flags; - } - return -1; - } - - void updateGlobalFlags(int flagDiffOfSameGenerator); -}; - -// dynamic symmetry group that auto-adds the template parameters in the constructor -template<typename... Gen> -class DynamicSGroupFromTemplateArgs : public DynamicSGroup -{ - public: - inline DynamicSGroupFromTemplateArgs() : DynamicSGroup() - { - add_all(internal::type_list<Gen...>()); - } - inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { } - inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { } - inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(const DynamicSGroupFromTemplateArgs<Gen...>& o) { DynamicSGroup::operator=(o); return *this; } - inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(DynamicSGroupFromTemplateArgs<Gen...>&& o) { DynamicSGroup::operator=(o); return *this; } - - private: - template<typename Gen1, typename... GenNext> - inline void add_all(internal::type_list<Gen1, GenNext...>) - { - add(Gen1()); - add_all(internal::type_list<GenNext...>()); - } - - inline void add_all(internal::type_list<>) - { - } -}; - -inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const -{ - eigen_internal_assert(g1.representation.size() == m_numIndices); - eigen_internal_assert(g2.representation.size() == m_numIndices); - - GroupElement result; - result.representation.reserve(m_numIndices); - for (std::size_t i = 0; i < m_numIndices; i++) { - int v = g2.representation[g1.representation[i]]; - eigen_assert(v >= 0); - result.representation.push_back(v); - } - result.flags = g1.flags ^ g2.flags; - return result; -} - -inline void DynamicSGroup::add(int one, int two, int flags) -{ - eigen_assert(one >= 0); - eigen_assert(two >= 0); - eigen_assert(one != two); - - if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) { - std::size_t newNumIndices = (one > two) ? one : two + 1; - for (auto& gelem : m_elements) { - gelem.representation.reserve(newNumIndices); - for (std::size_t i = m_numIndices; i < newNumIndices; i++) - gelem.representation.push_back(i); - } - m_numIndices = newNumIndices; - } - - Generator g{one, two, flags}; - GroupElement e = ge(g); - - /* special case for first generator */ - if (m_elements.size() == 1) { - while (!e.isId()) { - m_elements.push_back(e); - e = mul(e, g); - } - - if (e.flags > 0) - updateGlobalFlags(e.flags); - - // only add in case we didn't have identity - if (m_elements.size() > 1) - m_generators.push_back(g); - return; - } - - int p = findElement(e); - if (p >= 0) { - updateGlobalFlags(p); - return; - } - - std::size_t coset_order = m_elements.size(); - m_elements.push_back(e); - for (std::size_t i = 1; i < coset_order; i++) - m_elements.push_back(mul(m_elements[i], e)); - m_generators.push_back(g); - - std::size_t coset_rep = coset_order; - do { - for (auto g : m_generators) { - e = mul(m_elements[coset_rep], g); - p = findElement(e); - if (p < 0) { - // element not yet in group - m_elements.push_back(e); - for (std::size_t i = 1; i < coset_order; i++) - m_elements.push_back(mul(m_elements[i], e)); - } else if (p > 0) { - updateGlobalFlags(p); - } - } - coset_rep += coset_order; - } while (coset_rep < m_elements.size()); -} - -inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator) -{ - switch (flagDiffOfSameGenerator) { - case 0: - default: - // nothing happened - break; - case NegationFlag: - // every element is it's own negative => whole tensor is zero - m_globalFlags |= GlobalZeroFlag; - break; - case ConjugationFlag: - // every element is it's own conjugate => whole tensor is real - m_globalFlags |= GlobalRealFlag; - break; - case (NegationFlag | ConjugationFlag): - // every element is it's own negative conjugate => whole tensor is imaginary - m_globalFlags |= GlobalImagFlag; - break; - /* NOTE: - * since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator - * causes the tensor to be real and the next one to be imaginary, this will - * trivially give the correct result - */ - } -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h deleted file mode 100644 index 942293bd71..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +++ /dev/null @@ -1,236 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H - -namespace Eigen { - -namespace internal { - -template<typename list> struct tensor_static_symgroup_permutate; - -template<int... nn> -struct tensor_static_symgroup_permutate<numeric_list<int, nn...>> -{ - constexpr static std::size_t N = sizeof...(nn); - - template<typename T> - constexpr static inline std::array<T, N> run(const std::array<T, N>& indices) - { - return {{indices[nn]...}}; - } -}; - -template<typename indices_, int flags_> -struct tensor_static_symgroup_element -{ - typedef indices_ indices; - constexpr static int flags = flags_; -}; - -template<typename Gen, int N> -struct tensor_static_symgroup_element_ctor -{ - typedef tensor_static_symgroup_element< - typename gen_numeric_list_swapped_pair<int, N, Gen::One, Gen::Two>::type, - Gen::Flags - > type; -}; - -template<int N> -struct tensor_static_symgroup_identity_ctor -{ - typedef tensor_static_symgroup_element< - typename gen_numeric_list<int, N>::type, - 0 - > type; -}; - -template<typename iib> -struct tensor_static_symgroup_multiply_helper -{ - template<int... iia> - constexpr static inline numeric_list<int, get<iia, iib>::value...> helper(numeric_list<int, iia...>) { - return numeric_list<int, get<iia, iib>::value...>(); - } -}; - -template<typename A, typename B> -struct tensor_static_symgroup_multiply -{ - private: - typedef typename A::indices iia; - typedef typename B::indices iib; - constexpr static int ffa = A::flags; - constexpr static int ffb = B::flags; - - public: - static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices."); - - typedef tensor_static_symgroup_element< - decltype(tensor_static_symgroup_multiply_helper<iib>::helper(iia())), - ffa ^ ffb - > type; -}; - -template<typename A, typename B> -struct tensor_static_symgroup_equality -{ - typedef typename A::indices iia; - typedef typename B::indices iib; - constexpr static int ffa = A::flags; - constexpr static int ffb = B::flags; - static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices."); - - constexpr static bool value = is_same<iia, iib>::value; - - private: - /* this should be zero if they are identical, or else the tensor - * will be forced to be pure real, pure imaginary or even pure zero - */ - constexpr static int flags_cmp_ = ffa ^ ffb; - - /* either they are not equal, then we don't care whether the flags - * match, or they are equal, and then we have to check - */ - constexpr static bool is_zero = value && flags_cmp_ == NegationFlag; - constexpr static bool is_real = value && flags_cmp_ == ConjugationFlag; - constexpr static bool is_imag = value && flags_cmp_ == (NegationFlag | ConjugationFlag); - - public: - constexpr static int global_flags = - (is_real ? GlobalRealFlag : 0) | - (is_imag ? GlobalImagFlag : 0) | - (is_zero ? GlobalZeroFlag : 0); -}; - -template<std::size_t NumIndices, typename... Gen> -struct tensor_static_symgroup -{ - typedef StaticSGroup<Gen...> type; - constexpr static std::size_t size = type::static_size; -}; - -template<typename Index, std::size_t N, int... ii, int... jj> -constexpr static inline std::array<Index, N> tensor_static_symgroup_index_permute(std::array<Index, N> idx, internal::numeric_list<int, ii...>, internal::numeric_list<int, jj...>) -{ - return {{ idx[ii]..., idx[jj]... }}; -} - -template<typename Index, int... ii> -static inline std::vector<Index> tensor_static_symgroup_index_permute(std::vector<Index> idx, internal::numeric_list<int, ii...>) -{ - std::vector<Index> result{{ idx[ii]... }}; - std::size_t target_size = idx.size(); - for (std::size_t i = result.size(); i < target_size; i++) - result.push_back(idx[i]); - return result; -} - -template<typename T> struct tensor_static_symgroup_do_apply; - -template<typename first, typename... next> -struct tensor_static_symgroup_do_apply<internal::type_list<first, next...>> -{ - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> - static inline RV run(const std::array<Index, NumIndices>& idx, RV initial, Args&&... args) - { - static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices."); - typedef typename internal::gen_numeric_list<int, NumIndices - SGNumIndices, SGNumIndices>::type remaining_indices; - initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward<Args>(args)...); - return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); - } - - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> - static inline RV run(const std::vector<Index>& idx, RV initial, Args&&... args) - { - eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); - initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward<Args>(args)...); - return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); - } -}; - -template<EIGEN_TPL_PP_SPEC_HACK_DEF(typename, empty)> -struct tensor_static_symgroup_do_apply<internal::type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>> -{ - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> - static inline RV run(const std::array<Index, NumIndices>&, RV initial, Args&&...) - { - // do nothing - return initial; - } - - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> - static inline RV run(const std::vector<Index>&, RV initial, Args&&...) - { - // do nothing - return initial; - } -}; - -} // end namespace internal - -template<typename... Gen> -class StaticSGroup -{ - constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; - typedef internal::group_theory::enumerate_group_elements< - internal::tensor_static_symgroup_multiply, - internal::tensor_static_symgroup_equality, - typename internal::tensor_static_symgroup_identity_ctor<NumIndices>::type, - internal::type_list<typename internal::tensor_static_symgroup_element_ctor<Gen, NumIndices>::type...> - > group_elements; - typedef typename group_elements::type ge; - public: - constexpr inline StaticSGroup() {} - constexpr inline StaticSGroup(const StaticSGroup<Gen...>&) {} - constexpr inline StaticSGroup(StaticSGroup<Gen...>&&) {} - - template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> - static inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) - { - return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); - } - - template<typename Op, typename RV, typename Index, typename... Args> - static inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) - { - eigen_assert(idx.size() == NumIndices); - return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); - } - - constexpr static std::size_t static_size = ge::count; - - constexpr static inline std::size_t size() { - return ge::count; - } - constexpr static inline int globalFlags() { return group_elements::global_flags; } - - template<typename Tensor_, typename... IndexTypes> - inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const - { - static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); - } - - template<typename Tensor_> - inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const - { - return internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>>(tensor, *this, indices); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h deleted file mode 100644 index 879d6cd77b..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +++ /dev/null @@ -1,338 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H - -namespace Eigen { - -enum { - NegationFlag = 0x01, - ConjugationFlag = 0x02 -}; - -enum { - GlobalRealFlag = 0x01, - GlobalImagFlag = 0x02, - GlobalZeroFlag = 0x03 -}; - -namespace internal { - -template<std::size_t NumIndices, typename... Sym> struct tensor_symmetry_pre_analysis; -template<std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup; -template<bool instantiate, std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup_if; -template<typename Tensor_> struct tensor_symmetry_calculate_flags; -template<typename Tensor_> struct tensor_symmetry_assign_value; -template<typename... Sym> struct tensor_symmetry_num_indices; - -} // end namespace internal - -template<int One_, int Two_> -struct Symmetry -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = 0; -}; - -template<int One_, int Two_> -struct AntiSymmetry -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = NegationFlag; -}; - -template<int One_, int Two_> -struct Hermiticity -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = ConjugationFlag; -}; - -template<int One_, int Two_> -struct AntiHermiticity -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = ConjugationFlag | NegationFlag; -}; - -/** \class DynamicSGroup - * \ingroup TensorSymmetry_Module - * - * \brief Dynamic symmetry group - * - * The %DynamicSGroup class represents a symmetry group that need not be known at - * compile time. It is useful if one wants to support arbitrary run-time defineable - * symmetries for tensors, but it is also instantiated if a symmetry group is defined - * at compile time that would be either too large for the compiler to reasonably - * generate (using templates to calculate this at compile time is very inefficient) - * or that the compiler could generate the group but that it wouldn't make sense to - * unroll the loop for setting coefficients anymore. - */ -class DynamicSGroup; - -/** \internal - * - * \class DynamicSGroupFromTemplateArgs - * \ingroup TensorSymmetry_Module - * - * \brief Dynamic symmetry group, initialized from template arguments - * - * This class is a child class of DynamicSGroup. It uses the template arguments - * specified to initialize itself. - */ -template<typename... Gen> -class DynamicSGroupFromTemplateArgs; - -/** \class StaticSGroup - * \ingroup TensorSymmetry_Module - * - * \brief Static symmetry group - * - * This class represents a symmetry group that is known and resolved completely - * at compile time. Ideally, no run-time penalty is incurred compared to the - * manual unrolling of the symmetry. - * - * <b><i>CAUTION:</i></b> - * - * Do not use this class directly for large symmetry groups. The compiler - * may run into a limit, or segfault or in the very least will take a very, - * very, very long time to compile the code. Use the SGroup class instead - * if you want a static group. That class contains logic that will - * automatically select the DynamicSGroup class instead if the symmetry - * group becomes too large. (In that case, unrolling may not even be - * beneficial.) - */ -template<typename... Gen> -class StaticSGroup; - -/** \class SGroup - * \ingroup TensorSymmetry_Module - * - * \brief Symmetry group, initialized from template arguments - * - * This class represents a symmetry group whose generators are already - * known at compile time. It may or may not be resolved at compile time, - * depending on the estimated size of the group. - * - * \sa StaticSGroup - * \sa DynamicSGroup - */ -template<typename... Gen> -class SGroup : public internal::tensor_symmetry_pre_analysis<internal::tensor_symmetry_num_indices<Gen...>::value, Gen...>::root_type -{ - public: - constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; - typedef typename internal::tensor_symmetry_pre_analysis<NumIndices, Gen...>::root_type Base; - - // make standard constructors + assignment operators public - inline SGroup() : Base() { } - inline SGroup(const SGroup<Gen...>& other) : Base(other) { } - inline SGroup(SGroup<Gen...>&& other) : Base(other) { } - inline SGroup<Gen...>& operator=(const SGroup<Gen...>& other) { Base::operator=(other); return *this; } - inline SGroup<Gen...>& operator=(SGroup<Gen...>&& other) { Base::operator=(other); return *this; } - - // all else is defined in the base class -}; - -namespace internal { - -template<typename... Sym> struct tensor_symmetry_num_indices -{ - constexpr static std::size_t value = 1; -}; - -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> -{ -private: - constexpr static std::size_t One = static_cast<std::size_t>(One_); - constexpr static std::size_t Two = static_cast<std::size_t>(Two_); - constexpr static std::size_t Three = tensor_symmetry_num_indices<Sym...>::value; - - // don't use std::max, since it's not constexpr until C++14... - constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1; -public: - constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three; -}; - -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiSymmetry<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Hermiticity<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiHermiticity<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; - -/** \internal - * - * \class tensor_symmetry_pre_analysis - * \ingroup TensorSymmetry_Module - * - * \brief Pre-select whether to use a static or dynamic symmetry group - * - * When a symmetry group could in principle be determined at compile time, - * this template implements the logic whether to actually do that or whether - * to rather defer that to runtime. - * - * The logic is as follows: - * <dl> - * <dt><b>No generators (trivial symmetry):</b></dt> - * <dd>Use a trivial static group. Ideally, this has no performance impact - * compared to not using symmetry at all. In practice, this might not - * be the case.</dd> - * <dt><b>More than 4 generators:</b></dt> - * <dd>Calculate the group at run time, it is likely far too large for the - * compiler to be able to properly generate it in a realistic time.</dd> - * <dt><b>Up to and including 4 generators:</b></dt> - * <dd>Actually enumerate all group elements, but then check how many there - * are. If there are more than 16, it is unlikely that unrolling the - * loop (as is done in the static compile-time case) is sensible, so - * use a dynamic group instead. If there are at most 16 elements, actually - * use that static group. Note that the largest group with 4 generators - * still compiles with reasonable resources.</dd> - * </dl> - * - * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470 - * with 16 GiB RAM (all generators non-redundant and the subgroups don't - * factorize): - * - * # Generators -O0 -ggdb -O2 - * ------------------------------------------------------------------- - * 1 0.5 s / 250 MiB 0.45s / 230 MiB - * 2 0.5 s / 260 MiB 0.5 s / 250 MiB - * 3 0.65s / 310 MiB 0.62s / 310 MiB - * 4 2.2 s / 860 MiB 1.7 s / 770 MiB - * 5 130 s / 13000 MiB 120 s / 11000 MiB - * - * It is clear that everything is still very efficient up to 4 generators, then - * the memory and CPU requirements become unreasonable. Thus we only instantiate - * the template group theory logic if the number of generators supplied is 4 or - * lower, otherwise this will be forced to be done during runtime, where the - * algorithm is reasonably fast. - */ -template<std::size_t NumIndices> -struct tensor_symmetry_pre_analysis<NumIndices> -{ - typedef StaticSGroup<> root_type; -}; - -template<std::size_t NumIndices, typename Gen_, typename... Gens_> -struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...> -{ - constexpr static std::size_t max_static_generators = 4; - constexpr static std::size_t max_static_elements = 16; - typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper; - constexpr static std::size_t possible_size = helper::size; - - typedef typename conditional< - possible_size == 0 || possible_size >= max_static_elements, - DynamicSGroupFromTemplateArgs<Gen_, Gens_...>, - typename helper::type - >::type root_type; -}; - -template<bool instantiate, std::size_t NumIndices, typename... Gens> -struct tensor_static_symgroup_if -{ - constexpr static std::size_t size = 0; - typedef void type; -}; - -template<std::size_t NumIndices, typename... Gens> -struct tensor_static_symgroup_if<true, NumIndices, Gens...> : tensor_static_symgroup<NumIndices, Gens...> {}; - -template<typename Tensor_> -struct tensor_symmetry_assign_value -{ - typedef typename Tensor_::Index Index; - typedef typename Tensor_::Scalar Scalar; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_) - { - Scalar value(value_); - if (transformation_flags & ConjugationFlag) - value = numext::conj(value); - if (transformation_flags & NegationFlag) - value = -value; - tensor.coeffRef(transformed_indices) = value; - return dummy; - } -}; - -template<typename Tensor_> -struct tensor_symmetry_calculate_flags -{ - typedef typename Tensor_::Index Index; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transform_flags, int current_flags, const std::array<Index, NumIndices>& orig_indices) - { - if (transformed_indices == orig_indices) { - if (transform_flags & (ConjugationFlag | NegationFlag)) - return current_flags | GlobalImagFlag; // anti-hermitian diagonal - else if (transform_flags & ConjugationFlag) - return current_flags | GlobalRealFlag; // hermitian diagonal - else if (transform_flags & NegationFlag) - return current_flags | GlobalZeroFlag; // anti-symmetric diagonal - } - return current_flags; - } -}; - -template<typename Tensor_, typename Symmetry_, int Flags = 0> -class tensor_symmetry_value_setter -{ - public: - typedef typename Tensor_::Index Index; - typedef typename Tensor_::Scalar Scalar; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array<Index, NumIndices> const& indices) - : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { } - - inline tensor_symmetry_value_setter<Tensor_, Symmetry_, Flags>& operator=(Scalar const& value) - { - doAssign(value); - return *this; - } - private: - Tensor_& m_tensor; - Symmetry_ m_symmetry; - std::array<Index, NumIndices> m_indices; - - inline void doAssign(Scalar const& value) - { - #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES - int value_flags = m_symmetry.template apply<internal::tensor_symmetry_calculate_flags<Tensor_>, int>(m_indices, m_symmetry.globalFlags(), m_indices); - if (value_flags & GlobalRealFlag) - eigen_assert(numext::imag(value) == 0); - if (value_flags & GlobalImagFlag) - eigen_assert(numext::real(value) == 0); - #endif - m_symmetry.template apply<internal::tensor_symmetry_assign_value<Tensor_>, int>(m_indices, 0, m_tensor, value); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h deleted file mode 100644 index 0fe0b7c46d..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +++ /dev/null @@ -1,666 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H -#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H - -namespace Eigen { - -namespace internal { - -namespace group_theory { - -/** \internal - * \file CXX11/Tensor/util/TemplateGroupTheory.h - * This file contains C++ templates that implement group theory algorithms. - * - * The algorithms allow for a compile-time analysis of finite groups. - * - * Currently only Dimino's algorithm is implemented, which returns a list - * of all elements in a group given a set of (possibly redundant) generators. - * (One could also do that with the so-called orbital algorithm, but that - * is much more expensive and usually has no advantages.) - */ - -/********************************************************************** - * "Ok kid, here is where it gets complicated." - * - Amelia Pond in the "Doctor Who" episode - * "The Big Bang" - * - * Dimino's algorithm - * ================== - * - * The following is Dimino's algorithm in sequential form: - * - * Input: identity element, list of generators, equality check, - * multiplication operation - * Output: list of group elements - * - * 1. add identity element - * 2. remove identities from list of generators - * 3. add all powers of first generator that aren't the - * identity element - * 4. go through all remaining generators: - * a. if generator is already in the list of elements - * -> do nothing - * b. otherwise - * i. remember current # of elements - * (i.e. the size of the current subgroup) - * ii. add all current elements (which includes - * the identity) each multiplied from right - * with the current generator to the group - * iii. add all remaining cosets that are generated - * by products of the new generator with itself - * and all other generators seen so far - * - * In functional form, this is implemented as a long set of recursive - * templates that have a complicated relationship. - * - * The main interface for Dimino's algorithm is the template - * enumerate_group_elements. All lists are implemented as variadic - * type_list<typename...> and numeric_list<typename = int, int...> - * templates. - * - * 'Calling' templates is usually done via typedefs. - * - * This algorithm is an extended version of the basic version. The - * extension consists in the fact that each group element has a set - * of flags associated with it. Multiplication of two group elements - * with each other results in a group element whose flags are the - * XOR of the flags of the previous elements. Each time the algorithm - * notices that a group element it just calculated is already in the - * list of current elements, the flags of both will be compared and - * added to the so-called 'global flags' of the group. - * - * The rationale behind this extension is that this allows not only - * for the description of symmetries between tensor indices, but - * also allows for the description of hermiticity, antisymmetry and - * antihermiticity. Negation and conjugation each are specific bit - * in the flags value and if two different ways to reach a group - * element lead to two different flags, this poses a constraint on - * the allowed values of the resulting tensor. For example, if a - * group element is reach both with and without the conjugation - * flags, it is clear that the resulting tensor has to be real. - * - * Note that this flag mechanism is quite generic and may have other - * uses beyond tensor properties. - * - * IMPORTANT: - * This algorithm assumes the group to be finite. If you try to - * run it with a group that's infinite, the algorithm will only - * terminate once you hit a compiler limit (max template depth). - * Also note that trying to use this implementation to create a - * very large group will probably either make you hit the same - * limit, cause the compiler to segfault or at the very least - * take a *really* long time (hours, days, weeks - sic!) to - * compile. It is not recommended to plug in more than 4 - * generators, unless they are independent of each other. - */ - -/** \internal - * - * \class strip_identities - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Cleanse a list of group elements of the identity element - * - * This template is used to make a first pass through all initial - * generators of Dimino's algorithm and remove the identity - * elements. - * - * \sa enumerate_group_elements - */ -template<template<typename, typename> class Equality, typename id, typename L> struct strip_identities; - -template< - template<typename, typename> class Equality, - typename id, - typename t, - typename... ts -> -struct strip_identities<Equality, id, type_list<t, ts...>> -{ - typedef typename conditional< - Equality<id, t>::value, - typename strip_identities<Equality, id, type_list<ts...>>::type, - typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type - >::type type; - constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags; -}; - -template< - template<typename, typename> class Equality, - typename id - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts) -> -struct strip_identities<Equality, id, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(ts)>> -{ - typedef type_list<> type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_first_step_elements_helper - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template that adds powers of the first generator to the list of group elements - * - * This template calls itself recursively to add powers of the first - * generator to the list of group elements. It stops if it reaches - * the identity element again. - * - * \sa enumerate_group_elements, dimino_first_step_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename g, - typename current_element, - typename elements, - bool dont_add_current_element // = false -> -struct dimino_first_step_elements_helper : - public dimino_first_step_elements_helper< - Multiply, - Equality, - id, - g, - typename Multiply<current_element, g>::type, - typename concat<elements, type_list<current_element>>::type, - Equality<typename Multiply<current_element, g>::type, id>::value - > {}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename g, - typename current_element, - typename elements -> -struct dimino_first_step_elements_helper<Multiply, Equality, id, g, current_element, elements, true> -{ - typedef elements type; - constexpr static int global_flags = Equality<current_element, id>::global_flags; -}; - -/** \internal - * - * \class dimino_first_step_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Add all powers of the first generator to the list of group elements - * - * This template takes the first non-identity generator and generates the initial - * list of elements which consists of all powers of that generator. For a group - * with just one generated, it would be enumerated after this. - * - * \sa enumerate_group_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators -> -struct dimino_first_step_elements -{ - typedef typename get<0, generators>::type first_generator; - typedef typename skip<1, generators>::type next_generators; - typedef type_list<first_generator> generators_done; - - typedef dimino_first_step_elements_helper< - Multiply, - Equality, - id, - first_generator, - first_generator, - type_list<id>, - false - > helper; - typedef typename helper::type type; - constexpr static int global_flags = helper::global_flags; -}; - -/** \internal - * - * \class dimino_get_coset_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Generate all elements of a specific coset - * - * This template generates all the elements of a specific coset by - * multiplying all elements in the given subgroup with the new - * coset representative. Note that the first element of the - * subgroup is always the identity element, so the first element of - * ther result of this template is going to be the coset - * representative itself. - * - * Note that this template accepts an additional boolean parameter - * that specifies whether to actually generate the coset (true) or - * just return an empty list (false). - * - * \sa enumerate_group_elements, dimino_add_cosets_for_rep - */ -template< - template<typename, typename> class Multiply, - typename sub_group_elements, - typename new_coset_rep, - bool generate_coset // = true -> -struct dimino_get_coset_elements -{ - typedef typename apply_op_from_right<Multiply, new_coset_rep, sub_group_elements>::type type; -}; - -template< - template<typename, typename> class Multiply, - typename sub_group_elements, - typename new_coset_rep -> -struct dimino_get_coset_elements<Multiply, sub_group_elements, new_coset_rep, false> -{ - typedef type_list<> type; -}; - -/** \internal - * - * \class dimino_add_cosets_for_rep - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template for adding coset spaces - * - * This template multiplies the coset representative with a generator - * from the list of previous generators. If the new element is not in - * the group already, it adds the corresponding coset. Finally it - * proceeds to call itself with the next generator from the list. - * - * \sa enumerate_group_elements, dimino_add_all_coset_spaces - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename g, - typename... gs, - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<g, gs...>, rep_element, sub_group_size> -{ - typedef typename Multiply<rep_element, g>::type new_coset_rep; - typedef contained_in_list_gf<Equality, new_coset_rep, elements> _cil; - constexpr static bool add_coset = !_cil::value; - - typedef typename dimino_get_coset_elements< - Multiply, - sub_group_elements, - new_coset_rep, - add_coset - >::type coset_elements; - - typedef dimino_add_cosets_for_rep< - Multiply, - Equality, - id, - sub_group_elements, - typename concat<elements, coset_elements>::type, - type_list<gs...>, - rep_element, - sub_group_size - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = _cil::global_flags | _helper::global_flags; - - /* Note that we don't have to update global flags here, since - * we will only add these elements if they are not part of - * the group already. But that only happens if the coset rep - * is not already in the group, so the check for the coset rep - * will catch this. - */ -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, rep_element, sub_group_size> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_all_coset_spaces - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template for adding all coset spaces for a new generator - * - * This template tries to go through the list of generators (with - * the help of the dimino_add_cosets_for_rep template) as long as - * it still finds elements that are not part of the group and add - * the corresponding cosets. - * - * \sa enumerate_group_elements, dimino_add_cosets_for_rep - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - int sub_group_size, - int rep_pos, - bool stop_condition // = false -> -struct dimino_add_all_coset_spaces -{ - typedef typename get<rep_pos, elements>::type rep_element; - typedef dimino_add_cosets_for_rep< - Multiply, - Equality, - id, - sub_group_elements, - elements, - generators, - rep_element, - sub_group_elements::count - > _ac4r; - typedef typename _ac4r::type new_elements; - - constexpr static int new_rep_pos = rep_pos + sub_group_elements::count; - constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count; - - typedef dimino_add_all_coset_spaces< - Multiply, - Equality, - id, - sub_group_elements, - new_elements, - generators, - sub_group_size, - new_rep_pos, - new_stop_condition - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - int sub_group_size, - int rep_pos -> -struct dimino_add_all_coset_spaces<Multiply, Equality, id, sub_group_elements, elements, generators, sub_group_size, rep_pos, true> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_generator - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Enlarge the group by adding a new generator. - * - * It accepts a boolean parameter that determines if the generator is redundant, - * i.e. was already seen in the group. In that case, it reduces to a no-op. - * - * \sa enumerate_group_elements, dimino_add_all_coset_spaces - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename elements, - typename generators_done, - typename current_generator, - bool redundant // = false -> -struct dimino_add_generator -{ - /* this template is only called if the generator is not redundant - * => all elements of the group multiplied with the new generator - * are going to be new elements of the most trivial coset space - */ - typedef typename apply_op_from_right<Multiply, current_generator, elements>::type multiplied_elements; - typedef typename concat<elements, multiplied_elements>::type new_elements; - - constexpr static int rep_pos = elements::count; - - typedef dimino_add_all_coset_spaces< - Multiply, - Equality, - id, - elements, // elements of previous subgroup - new_elements, - typename concat<generators_done, type_list<current_generator>>::type, - elements::count, // size of previous subgroup - rep_pos, - false // don't stop (because rep_pos >= new_elements::count is always false at this point) - > _helper; - typedef typename _helper::type type; - constexpr static int global_flags = _helper::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename elements, - typename generators_done, - typename current_generator -> -struct dimino_add_generator<Multiply, Equality, id, elements, generators_done, current_generator, true> -{ - // redundant case - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_remaining_generators - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template that adds all remaining generators to a group - * - * Loop through the list of generators that remain and successively - * add them to the group. - * - * \sa enumerate_group_elements, dimino_add_generator - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators_done, - typename remaining_generators, - typename elements -> -struct dimino_add_remaining_generators -{ - typedef typename get<0, remaining_generators>::type first_generator; - typedef typename skip<1, remaining_generators>::type next_generators; - - typedef contained_in_list_gf<Equality, first_generator, elements> _cil; - - typedef dimino_add_generator< - Multiply, - Equality, - id, - elements, - generators_done, - first_generator, - _cil::value - > _helper; - - typedef typename _helper::type new_elements; - - typedef dimino_add_remaining_generators< - Multiply, - Equality, - id, - typename concat<generators_done, type_list<first_generator>>::type, - next_generators, - new_elements - > _next_iter; - - typedef typename _next_iter::type type; - constexpr static int global_flags = - _cil::global_flags | - _helper::global_flags | - _next_iter::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators_done, - typename elements -> -struct dimino_add_remaining_generators<Multiply, Equality, id, generators_done, type_list<>, elements> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class enumerate_group_elements_noid - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Helper template that implements group element enumeration - * - * This is a helper template that implements the actual enumeration - * of group elements. This has been split so that the list of - * generators can be cleansed of the identity element before - * performing the actual operation. - * - * \sa enumerate_group_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators, - int initial_global_flags = 0 -> -struct enumerate_group_elements_noid -{ - typedef dimino_first_step_elements<Multiply, Equality, id, generators> first_step; - typedef typename first_step::type first_step_elements; - - typedef dimino_add_remaining_generators< - Multiply, - Equality, - id, - typename first_step::generators_done, - typename first_step::next_generators, // remaining_generators - typename first_step::type // first_step elements - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = - initial_global_flags | - first_step::global_flags | - _helper::global_flags; -}; - -// in case when no generators are specified -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - int initial_global_flags -> -struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initial_global_flags> -{ - typedef type_list<id> type; - constexpr static int global_flags = initial_global_flags; -}; - -/** \internal - * - * \class enumerate_group_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Enumerate all elements in a finite group - * - * This template enumerates all elements in a finite group. It accepts - * the following template parameters: - * - * \tparam Multiply The multiplication operation that multiplies two group elements - * with each other. - * \tparam Equality The equality check operation that checks if two group elements - * are equal to another. - * \tparam id The identity element - * \tparam _generators A list of (possibly redundant) generators of the group - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename _generators -> -struct enumerate_group_elements - : public enumerate_group_elements_noid< - Multiply, - Equality, - id, - typename strip_identities<Equality, id, _generators>::type, - strip_identities<Equality, id, _generators>::global_flags - > -{ -}; - -} // end namespace group_theory - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ |