aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_rng.h
blob: 4e1b82969b6c1637fa2d1eb9ddeab52955c42c4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_RNG_H_
#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_RNG_H_

#include "tensorflow/stream_executor/platform/mutex.h"
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/platform/thread_annotations.h"
#include "tensorflow/stream_executor/plugin_registry.h"
#include "tensorflow/stream_executor/rng.h"

typedef struct curandGenerator_st *curandGenerator_t;

namespace perftools {
namespace gputools {

class Stream;
template <typename ElemT>
class DeviceMemory;

namespace cuda {

// Opaque and unique identifier for the cuRAND plugin.
extern const PluginId kCuRandPlugin;

class CUDAExecutor;

// CUDA-platform implementation of the random number generation support
// interface.
//
// Thread-safe post-initialization.
class CUDARng : public rng::RngSupport {
 public:
  explicit CUDARng(CUDAExecutor *parent);

  // Retrieves a curand library generator handle. This is necessary for
  // enqueuing random number generation work onto the device.
  // TODO(leary) provide a way for users to select the RNG algorithm.
  bool Init();

  // Releases a curand library generator handle, if one was acquired.
  ~CUDARng() override;

  // See rng::RngSupport for details on the following overrides.
  bool DoPopulateRandUniform(Stream *stream, DeviceMemory<float> *v) override;
  bool DoPopulateRandUniform(Stream *stream, DeviceMemory<double> *v) override;
  bool DoPopulateRandUniform(Stream *stream,
                             DeviceMemory<std::complex<float>> *v) override;
  bool DoPopulateRandUniform(Stream *stream,
                             DeviceMemory<std::complex<double>> *v) override;
  bool DoPopulateRandGaussian(Stream *stream, float mean, float stddev,
                              DeviceMemory<float> *v) override;
  bool DoPopulateRandGaussian(Stream *stream, double mean, double stddev,
                              DeviceMemory<double> *v) override;

  bool SetSeed(Stream *stream, const uint8 *seed, uint64 seed_bytes) override;

 private:
  // Actually performs the work of generating random numbers - the public
  // methods are thin wrappers to this interface.
  template <typename T>
  bool DoPopulateRandUniformInternal(Stream *stream, DeviceMemory<T> *v);
  template <typename ElemT, typename FuncT>
  bool DoPopulateRandGaussianInternal(Stream *stream, ElemT mean, ElemT stddev,
                                      DeviceMemory<ElemT> *v, FuncT func);

  // Sets the stream for the internal curand generator.
  //
  // This is a stateful operation, as the handle can only have one stream set at
  // a given time, so it is usually performed right before enqueuing work to do
  // with random number generation.
  bool SetStream(Stream *stream) EXCLUSIVE_LOCKS_REQUIRED(mu_);

  // mutex that guards the cuRAND handle for this device.
  mutex mu_;

  // CUDAExecutor which instantiated this CUDARng.
  // Immutable post-initialization.
  CUDAExecutor *parent_;

  // cuRANDalibrary handle on the device.
  curandGenerator_t rng_ GUARDED_BY(mu_);

  SE_DISALLOW_COPY_AND_ASSIGN(CUDARng);
};

}  // namespace cuda
}  // namespace gputools
}  // namespace perftools

#endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_RNG_H_