#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_ #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_ #include #include "tensorflow/stream_executor/platform/port.h" #include #include "tensorflow/stream_executor/executor_cache.h" #include "tensorflow/stream_executor/lib/statusor.h" #include "tensorflow/stream_executor/multi_platform_manager.h" #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/platform/thread_annotations.h" #include "tensorflow/stream_executor/stream_executor_internal.h" #include "tensorflow/stream_executor/stream_executor_pimpl.h" #include "tensorflow/stream_executor/trace_listener.h" namespace perftools { namespace gputools { namespace cuda { // Opaque and unique identifier for the CUDA platform plugin. // This is needed so that plugins can refer to/identify this platform without // instantiating a CudaPlatform object. extern const Platform::Id kCudaPlatformId; // Cuda-specific platform plugin, registered as a singleton value via module // initializer. class CudaPlatform : public Platform { public: CudaPlatform(); ~CudaPlatform() override; // CudaPlatform-specific functionality // Returns the number of distinct buses / NUMA nodes on the machine. int BusCount(); // Returns the bus/NUMA node for the specified device ordinal. int DeviceToBus(int device_ordinal); // Returns the lowest-ordinal-number StreamExecutor on the specified bus. port::StatusOr FirstExecutorForBus(int bus_ordinal); // Platform interface implementation: // Returns the same value as kCudaPlatform above. Platform::Id id() const override; // Returns -1 as a sentinel on internal failure (and logs the error). int VisibleDeviceCount() const override; const string& Name() const override; port::StatusOr ExecutorForDevice(int ordinal) override; port::StatusOr ExecutorForDeviceWithPluginConfig( int ordinal, const PluginConfig& config) override; port::StatusOr GetExecutor( const StreamExecutorConfig& config) override; port::StatusOr> GetUncachedExecutor( const StreamExecutorConfig& config) override; void RegisterTraceListener(std::unique_ptr listener) override; void UnregisterTraceListener(TraceListener* listener) override; private: // Determines the number of NUMA nodes and the assignment of executor to each. void InspectNumaNodes(); // This platform's name. string name_; // mutex that guards internal state. mutable mutex mu_; // Cache of created executors. ExecutorCache executor_cache_; // The smallest NUMA node value for any device managed by this machine // manager. Used, along with limit_numa_node_, to convert NUMA nodes into bus // ordinals. The NUMA node space occupied by GPUs is assumed to be dense./ int min_numa_node_; // Larger than the NUMA node value for any device managed by this machine // manager. int limit_numa_node_; SE_DISALLOW_COPY_AND_ASSIGN(CudaPlatform); }; } // namespace cuda } // namespace gputools } // namespace perftools #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_