unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H

namespace Eigen {
namespace internal {

// Standard reduction functors
template <typename T> struct SumReducer
{
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SumReducer() : m_sum(0) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    m_sum += t;
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_sum;
  }

 private:
  typename internal::remove_all<T>::type m_sum;
};

template <typename T> struct MaxReducer
{
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max(-(std::numeric_limits<T>::max)()) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    if (t > m_max) { m_max = t; }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_max;
  }

 private:
  typename internal::remove_all<T>::type m_max;
};

template <typename T> struct MinReducer
{
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MinReducer() : m_min((std::numeric_limits<T>::max)()) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    if (t < m_min) { m_min = t; }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_min;
  }

 private:
  typename internal::remove_all<T>::type m_min;
};


#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
// We're not compiling a cuda kernel
template <typename T> struct UniformRandomGenerator {
  template<typename Index>
  T operator()(Index, Index = 0) const {
    return random<T>();
  }
  template<typename Index>
  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
    const int packetSize = internal::packet_traits<T>::size;
    EIGEN_ALIGN_DEFAULT T values[packetSize];
    for (int i = 0; i < packetSize; ++i) {
      values[i] = random<T>();
    }
    return internal::pload<typename internal::packet_traits<T>::type>(values);
  }
};

#else

// We're compiling a cuda kernel
template <typename T> struct UniformRandomGenerator;

template <> struct UniformRandomGenerator<float> {
  UniformRandomGenerator() {
    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
    curand_init(0, tid, 0, &m_state);
  }

  template<typename Index>
  float operator()(Index, Index = 0) const {
    return curand_uniform(&m_state);
  }
  template<typename Index>
  float4 packetOp(Index, Index = 0) const {
    return curand_uniform4(&m_state);
  }

 private:
  mutable curandStatePhilox4_32_10_t m_state;
};

template <> struct UniformRandomGenerator<double> {
  UniformRandomGenerator() {
    const int tid = blockIdx.x * blockDim.x + threadIdx.x;
    curand_init(0, tid, 0, &m_state);
  }
  template<typename Index>
  double operator()(Index, Index = 0) const {
    return curand_uniform_double(&m_state);
  }
  template<typename Index>
  double2 packetOp(Index, Index = 0) const {
    return curand_uniform2_double(&m_state);
  }

 private:
  mutable curandStatePhilox4_32_10_t m_state;
};

#endif


} // end namespace internal
} // end namespace Eigen

#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H