blob: 966d7343f745080bd8e237962cf5176d6067f4a6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
#include <memory>
#include "tensorflow/stream_executor/platform/port.h"
#include <vector>
#include "tensorflow/stream_executor/executor_cache.h"
#include "tensorflow/stream_executor/lib/statusor.h"
#include "tensorflow/stream_executor/multi_platform_manager.h"
#include "tensorflow/stream_executor/platform.h"
#include "tensorflow/stream_executor/platform/mutex.h"
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/platform/thread_annotations.h"
#include "tensorflow/stream_executor/stream_executor_internal.h"
#include "tensorflow/stream_executor/stream_executor_pimpl.h"
#include "tensorflow/stream_executor/trace_listener.h"
namespace perftools {
namespace gputools {
namespace cuda {
// Opaque and unique identifier for the CUDA platform plugin.
// This is needed so that plugins can refer to/identify this platform without
// instantiating a CudaPlatform object.
extern const Platform::Id kCudaPlatformId;
// Cuda-specific platform plugin, registered as a singleton value via module
// initializer.
class CudaPlatform : public Platform {
public:
CudaPlatform();
~CudaPlatform() override;
// CudaPlatform-specific functionality
// Returns the number of distinct buses / NUMA nodes on the machine.
int BusCount();
// Returns the bus/NUMA node for the specified device ordinal.
int DeviceToBus(int device_ordinal);
// Returns the lowest-ordinal-number StreamExecutor on the specified bus.
port::StatusOr<StreamExecutor*> FirstExecutorForBus(int bus_ordinal);
// Platform interface implementation:
// Returns the same value as kCudaPlatform above.
Platform::Id id() const override;
// Returns -1 as a sentinel on internal failure (and logs the error).
int VisibleDeviceCount() const override;
const string& Name() const override;
port::StatusOr<StreamExecutor*> ExecutorForDevice(int ordinal) override;
port::StatusOr<StreamExecutor*> ExecutorForDeviceWithPluginConfig(
int ordinal, const PluginConfig& config) override;
port::StatusOr<StreamExecutor*> GetExecutor(
const StreamExecutorConfig& config) override;
port::StatusOr<std::unique_ptr<StreamExecutor>> GetUncachedExecutor(
const StreamExecutorConfig& config) override;
void RegisterTraceListener(std::unique_ptr<TraceListener> listener) override;
void UnregisterTraceListener(TraceListener* listener) override;
private:
// Determines the number of NUMA nodes and the assignment of executor to each.
void InspectNumaNodes();
// This platform's name.
string name_;
// mutex that guards internal state.
mutable mutex mu_;
// Cache of created executors.
ExecutorCache executor_cache_;
// The smallest NUMA node value for any device managed by this machine
// manager. Used, along with limit_numa_node_, to convert NUMA nodes into bus
// ordinals. The NUMA node space occupied by GPUs is assumed to be dense./
int min_numa_node_;
// Larger than the NUMA node value for any device managed by this machine
// manager.
int limit_numa_node_;
SE_DISALLOW_COPY_AND_ASSIGN(CudaPlatform);
};
} // namespace cuda
} // namespace gputools
} // namespace perftools
#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
|