aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h74
1 files changed, 74 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index 787cbd031..2be4f9cc5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Copyright (C) 2018 Mehdi Goli <eigen@codeplay.com> Codeplay Software Ltd.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -44,6 +45,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
uint64_t rnd = ::random() ^ mach_absolute_time();
return rnd;
+
#else
// Augment the current time with pseudo random number generation
// to ensure that we get different seeds if we try to generate seeds
@@ -147,14 +149,41 @@ template <typename T> class UniformRandomGenerator {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
uint64_t seed = 0) {
m_state = PCG_XSH_RS_state(seed);
+ #ifdef EIGEN_USE_SYCL
+ // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
+ // Therefor, we need two step to initializate the m_state.
+ // IN SYCL, the constructor of the functor is s called on the CPU
+ // and we get the clock seed here from the CPU. However, This seed is
+ //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
+ // and only available on the Operator() function (which is called on the GPU).
+ // Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread
+ // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
+ // the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
+ // similar to CUDA Therefore, the thread Id injection is not available at this stage.
+ //However when the operator() is called the thread ID will be avilable. So inside the opeator,
+ // we add the thrreadID, BlockId,... (which is equivalent of i)
+ //to the seed and construct the unique m_state per thead similar to cuda.
+ m_exec_once =false;
+ #endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
const UniformRandomGenerator& other) {
m_state = other.m_state;
+ #ifdef EIGEN_USE_SYCL
+ m_exec_once =other.m_exec_once;
+ #endif
}
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T operator()(Index i) const {
+ #ifdef EIGEN_USE_SYCL
+ if(!m_exec_once) {
+ // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
+ // The (i * 6364136223846793005ULL) is the remaining part of the PCG_XSH_RS_state on the GPU side
+ m_state += (i * 6364136223846793005ULL);
+ m_exec_once =true;
+ }
+ #endif
T result = RandomToTypeUniform<T>(&m_state, i);
return result;
}
@@ -163,6 +192,14 @@ template <typename T> class UniformRandomGenerator {
Packet packetOp(Index i) const {
const int packetSize = internal::unpacket_traits<Packet>::size;
EIGEN_ALIGN_MAX T values[packetSize];
+ #ifdef EIGEN_USE_SYCL
+ if(!m_exec_once) {
+ // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
+ m_state += (i * 6364136223846793005ULL);
+ m_exec_once =true;
+ }
+ #endif
+ EIGEN_UNROLL_LOOP
for (int j = 0; j < packetSize; ++j) {
values[j] = RandomToTypeUniform<T>(&m_state, i);
}
@@ -171,6 +208,9 @@ template <typename T> class UniformRandomGenerator {
private:
mutable uint64_t m_state;
+ #ifdef EIGEN_USE_SYCL
+ mutable bool m_exec_once;
+ #endif
};
template <typename Scalar>
@@ -222,14 +262,37 @@ template <typename T> class NormalRandomGenerator {
// Uses the given "seed" if non-zero, otherwise uses a random seed.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
m_state = PCG_XSH_RS_state(seed);
+ #ifdef EIGEN_USE_SYCL
+ // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
+ // Therefor, we need two steps to initializate the m_state.
+ // IN SYCL, the constructor of the functor is s called on the CPU
+ // and we get the clock seed here from the CPU. However, This seed is
+ //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
+ // and only available on the Operator() function (which is called on the GPU).
+ // Therefore, the thread Id injection is not available at this stage. However when the operator()
+ //is called the thread ID will be avilable. So inside the opeator,
+ // we add the thrreadID, BlockId,... (which is equivalent of i)
+ //to the seed and construct the unique m_state per thead similar to cuda.
+ m_exec_once =false;
+ #endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
const NormalRandomGenerator& other) {
m_state = other.m_state;
+#ifdef EIGEN_USE_SYCL
+ m_exec_once=other.m_exec_once;
+#endif
}
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T operator()(Index i) const {
+ #ifdef EIGEN_USE_SYCL
+ if(!m_exec_once) {
+ // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
+ m_state += (i * 6364136223846793005ULL);
+ m_exec_once =true;
+ }
+ #endif
T result = RandomToTypeNormal<T>(&m_state, i);
return result;
}
@@ -238,6 +301,14 @@ template <typename T> class NormalRandomGenerator {
Packet packetOp(Index i) const {
const int packetSize = internal::unpacket_traits<Packet>::size;
EIGEN_ALIGN_MAX T values[packetSize];
+ #ifdef EIGEN_USE_SYCL
+ if(!m_exec_once) {
+ // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
+ m_state += (i * 6364136223846793005ULL);
+ m_exec_once =true;
+ }
+ #endif
+ EIGEN_UNROLL_LOOP
for (int j = 0; j < packetSize; ++j) {
values[j] = RandomToTypeNormal<T>(&m_state, i);
}
@@ -246,6 +317,9 @@ template <typename T> class NormalRandomGenerator {
private:
mutable uint64_t m_state;
+ #ifdef EIGEN_USE_SYCL
+ mutable bool m_exec_once;
+ #endif
};