Eigen/src/Core/arch/CUDA/TypeCasting.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_TYPE_CASTING_CUDA_H
#define EIGEN_TYPE_CASTING_CUDA_H

namespace Eigen {

namespace internal {

#if defined(EIGEN_HAS_CUDA_FP16)

template<>
struct scalar_cast_op<float, half> {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
  typedef half result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const {
    #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
      return __float2half(a);
    #else
      return half(a);
    #endif
  }
};

template<>
struct functor_traits<scalar_cast_op<float, half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };


template<>
struct scalar_cast_op<int, half> {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
  typedef half result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const {
    #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
      return __float2half(static_cast<float>(a));
    #else
      return half(static_cast<float>(a));
    #endif
  }
};

template<>
struct functor_traits<scalar_cast_op<int, half> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };


template<>
struct scalar_cast_op<half, float> {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
  typedef float result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const {
    #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
      return __half2float(a);
    #else
      return static_cast<float>(a);
    #endif
  }
};

template<>
struct functor_traits<scalar_cast_op<half, float> >
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };


template <>
struct type_casting_traits<half, float> {
  enum {
    VectorizedCast = 1,
    SrcCoeffRatio = 2,
    TgtCoeffRatio = 1
  };
};

template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
  float2 r1 = __half22float2(a);
  float2 r2 = __half22float2(b);
  return make_float4(r1.x, r1.y, r2.x, r2.y);
#else
  half r1;
  r1.x = a.x & 0xFFFF;
  half r2;
  r2.x = (a.x & 0xFFFF0000) >> 16;
  half r3;
  r3.x = b.x & 0xFFFF;
  half r4;
  r4.x = (b.x & 0xFFFF0000) >> 16;
  return make_float4(static_cast<float>(r1), static_cast<float>(r2),
                     static_cast<float>(r3), static_cast<float>(r4));
#endif
}

template <>
struct type_casting_traits<float, half> {
  enum {
    VectorizedCast = 1,
    SrcCoeffRatio = 1,
    TgtCoeffRatio = 2
  };
};

template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
  // Simply discard the second half of the input
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
  return __float22half2_rn(make_float2(a.x, a.y));
#else
  half r1 = static_cast<half>(a.x);
  half r2 = static_cast<half>(a.y);
  half2 r;
  r.x = 0;
  r.x |= r1.x;
  r.x |= (static_cast<unsigned int>(r2.x) << 16);
  return r;
#endif
}

#endif

} // end namespace internal

} // end namespace Eigen

#endif // EIGEN_TYPE_CASTING_CUDA_H