1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/kernels/training_ops.h"
namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice;
namespace functor {
template <typename T>
struct ApplyGradientDescent<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::ConstScalar alpha,
typename TTypes<T>::ConstFlat delta) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = delta.dimension(0);
Eigen::Sizes<1> single;
var.device(d) -= alpha.reshape(single).broadcast(bcast) * delta;
}
};
template <typename T>
struct ApplyAdagrad<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstFlat grad) {
accum.device(d) += grad.square();
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
var.device(d) -= lr.reshape(single).broadcast(bcast) * grad * accum.rsqrt();
}
};
template <typename T>
struct ApplyMomentum<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar momentum) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
accum.device(d) = accum * momentum.reshape(single).broadcast(bcast) + grad;
var.device(d) -= lr.reshape(single).broadcast(bcast) * accum;
}
};
template <typename T>
struct ApplyAdam<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat m, typename TTypes<T>::Flat v,
typename TTypes<T>::ConstScalar beta1_power,
typename TTypes<T>::ConstScalar beta2_power,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar beta1,
typename TTypes<T>::ConstScalar beta2,
typename TTypes<T>::ConstScalar epsilon,
typename TTypes<T>::ConstFlat grad) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
const auto one = static_cast<T>(1.0);
m.device(d) =
m +
(beta1.constant(one) - beta1).reshape(single).broadcast(bcast) *
(grad - m);
v.device(d) =
v +
(beta2.constant(one) - beta2).reshape(single).broadcast(bcast) *
(grad.square() - v);
var.device(d) -= (lr * (beta2_power.constant(one) - beta2_power).sqrt() /
(beta1_power.constant(one) - beta1_power))
.reshape(single)
.broadcast(bcast) *
m / (epsilon.reshape(single).broadcast(bcast) + v.sqrt());
}
};
template <typename T>
struct ApplyRMSProp<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat ms, typename TTypes<T>::Flat mom,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar rho,
typename TTypes<T>::ConstScalar momentum,
typename TTypes<T>::ConstScalar epsilon,
typename TTypes<T>::ConstFlat grad) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
const auto one = static_cast<T>(1.0);
ms.device(d) = ms +
(rho.constant(one) - rho).reshape(single).broadcast(bcast) *
(grad.square() - ms);
mom.device(d) =
mom * momentum.reshape(single).broadcast(bcast) +
lr.reshape(single).broadcast(bcast) * grad /
((epsilon.reshape(single).broadcast(bcast) + ms).sqrt());
var.device(d) -= mom;
}
};
} // namespace functor
template struct functor::ApplyGradientDescent<GPUDevice, float>;
template struct functor::ApplyGradientDescent<GPUDevice, double>;
template struct functor::ApplyAdagrad<GPUDevice, float>;
template struct functor::ApplyAdagrad<GPUDevice, double>;
template struct functor::ApplyMomentum<GPUDevice, float>;
template struct functor::ApplyMomentum<GPUDevice, double>;
template struct functor::ApplyAdam<GPUDevice, float>;
template struct functor::ApplyAdam<GPUDevice, double>;
template struct functor::ApplyRMSProp<GPUDevice, float>;
template struct functor::ApplyRMSProp<GPUDevice, double>;
} // end namespace tensorflow
#endif // GOOGLE_CUDA
|