aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc8
-rw-r--r--tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc13
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc11
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h6
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc30
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc5
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h2
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc10
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h4
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc59
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.cc224
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.h22
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device_test.cc19
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_id.h32
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_id_manager.cc38
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_id_manager.h12
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc32
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_id_utils.h37
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_process_state.cc15
-rw-r--r--tensorflow/core/grappler/clusters/single_machine.cc6
-rw-r--r--tensorflow/core/grappler/clusters/utils.cc13
-rw-r--r--tensorflow/core/grappler/clusters/utils.h2
-rw-r--r--tensorflow/core/grappler/clusters/utils_test.cc22
-rw-r--r--tensorflow/core/grappler/costs/utils.cc8
-rw-r--r--tensorflow/core/protobuf/config.proto2
25 files changed, 333 insertions, 299 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b019c99882..f29f4d6deb 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -780,12 +780,12 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
// If device is not set, use the first found GPU device for the conversion.
for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
TfGpuId tf_gpu_id(tf_gpu_id_value);
- CudaGpuId cuda_gpu_id;
- Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+ PlatformGpuId platform_gpu_id;
+ Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
if (s.ok()) {
VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
- << cuda_gpu_id.value();
- cuda_device_id = cuda_gpu_id.value();
+ << platform_gpu_id.value();
+ cuda_device_id = platform_gpu_id.value();
GPUOptions gpu_options;
// If the TF to Cuda gpu id mapping exist, the device and corresponding
// allocator must have been initialized already, so the
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
index 2b42d81f47..88cf8d5980 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@@ -565,21 +565,22 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources(
new TRTInt8Calibrator(device_buffers_, batch_size, name()));
const string label(name());
auto segment_graph = &segment_graph_;
- const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id;
- if (cuda_gpu_id < 0) {
+ const int platform_gpu_id =
+ ctx->device()->tensorflow_gpu_device_info()->gpu_id;
+ if (platform_gpu_id < 0) {
LOG(ERROR) << "Can't get gpu_device_info from context->device()";
return tensorflow::errors::InvalidArgument(
"Context->device doesn't contain device info!");
}
const int64 workspace_size_bytes = workspace_size_;
cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes,
- cuda_gpu_id, workspace_size_bytes]() {
- VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id
+ platform_gpu_id, workspace_size_bytes]() {
+ VLOG(0) << "Starting calibration thread on device " << platform_gpu_id
<< ", Calibration Resource @ " << cres;
- auto err = cudaSetDevice(cuda_gpu_id);
+ auto err = cudaSetDevice(platform_gpu_id);
if (err != cudaSuccess) {
// TODO(aaroey): should return error here.
- LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id
+ LOG(ERROR) << "Couldn't set cuda device to " << platform_gpu_id
<< " in calibration thread";
}
// ConvertGraphDefToEngine() will try to build the engine. This thread
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 2d4c8d0201..c8db384b64 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -22,16 +22,17 @@ limitations under the License.
namespace tensorflow {
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
- const string& name)
- : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
+GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id,
+ size_t total_memory, const string& name)
+ : GPUBFCAllocator(platform_gpu_id, total_memory, GPUOptions(), name) {}
-GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id,
+ size_t total_memory,
const GPUOptions& gpu_options,
const string& name)
: BFCAllocator(
new GPUMemAllocator(
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(),
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
gpu_options.per_process_gpu_memory_fraction() > 1.0 ||
gpu_options.experimental().use_unified_memory()),
total_memory, gpu_options.allow_growth(), name) {}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index f1cc2eace1..435ffb4959 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -35,11 +35,11 @@ namespace tensorflow {
// algorithm.
class GPUBFCAllocator : public BFCAllocator {
public:
- // 'cuda_gpu_id' refers to the ID of the GPU device within
+ // 'platform_gpu_id' refers to the ID of the GPU device within
// the process and must reference a valid ID in the process.
- GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+ GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory,
const string& name);
- GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+ GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory,
const GPUOptions& gpu_options, const string& name);
virtual ~GPUBFCAllocator() {}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 67caeb3495..518ccba580 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -46,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
}
TEST(GPUBFCAllocatorTest, NoDups) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
CheckStats(&a, 0, 0, 0, 0);
// Allocate a lot of raw pointers
@@ -75,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) {
}
TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
// Allocate 256 raw pointers of sizes between 100 bytes and about
// a meg
random::PhiloxRandom philox(123, 17);
@@ -133,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
}
TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
CheckStats(&a, 0, 0, 0, 0);
float* first_ptr = a.Allocate<float>(1024);
@@ -168,18 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
}
TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
float* ptr = a.Allocate<float>(0);
EXPECT_EQ(nullptr, ptr);
}
TEST(GPUBFCAllocatorTest, TracksSizes) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
EXPECT_EQ(true, a.TracksAllocationSizes());
}
TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
float* t1 = a.Allocate<float>(1);
EXPECT_EQ(4, a.RequestedSize(t1));
EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -188,7 +188,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
// Configure a 1MiB byte limit
- GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 20, "GPU_0_bfc");
float* first_ptr = a.Allocate<float>(1 << 6);
float* second_ptr = a.Allocate<float>(1 << 20);
@@ -203,7 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
options.set_allow_growth(true);
// Max of 2GiB, but starts out small.
- GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1LL << 31, options, "GPU_0_bfc");
// Allocate 10 raw pointers of sizes between 100 bytes and about
// 64 megs.
@@ -264,8 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
}
TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
- GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
- GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc");
+ GPUBFCAllocator b(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc");
void* amem = a.AllocateRaw(1, 1);
void* bmem = b.AllocateRaw(1, 1 << 30);
a.DeallocateRaw(amem);
@@ -273,7 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
}
static void BM_Allocation(int iters) {
- GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc");
// Exercise a few different allocation sizes
std::vector<size_t> sizes = {256, 4096, 16384, 524288,
512, 1048576, 10485760, 104857600,
@@ -289,7 +289,7 @@ static void BM_Allocation(int iters) {
BENCHMARK(BM_Allocation);
static void BM_AllocationThreaded(int iters, int num_threads) {
- GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc");
thread::ThreadPool pool(Env::Default(), "test", num_threads);
std::atomic_int_fast32_t count(iters);
mutex done_lock;
@@ -325,7 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
// A more complex benchmark that defers deallocation of an object for
// "delay" allocations.
static void BM_AllocationDelayed(int iters, int delay) {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
// Exercise a few different allocation sizes
std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
int size_index = 0;
@@ -363,7 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
// only methods inside this class can access private members of BFCAllocator.
void TestBinDebugInfo() {
- GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc");
std::vector<void*> initial_ptrs;
std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -441,7 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
}
void TestLog2FloorNonZeroSlow() {
- GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
+ GPUBFCAllocator a(PlatformGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 934a57a5fb..553a5628ad 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -28,9 +28,10 @@ limitations under the License.
namespace tensorflow {
GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id)
+ PlatformGpuId platform_gpu_id)
: base_allocator_(allocator) {
- stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ stream_exec_ =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
}
GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 856fdc34b4..8f38cc5a18 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -32,7 +32,7 @@ namespace tensorflow {
class GPUcudaMallocAllocator : public VisitableAllocator {
public:
explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id);
+ PlatformGpuId platform_gpu_id);
~GPUcudaMallocAllocator() override;
string Name() override { return "gpu_debug"; }
void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index e4c834b30d..badb021aa5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -74,9 +74,10 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
// GPUDebugAllocator
// -----------------------------------------------------------------------------
GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id)
+ PlatformGpuId platform_gpu_id)
: base_allocator_(allocator) {
- stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ stream_exec_ =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
}
GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
@@ -159,9 +160,10 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
// GPUNanResetAllocator
// -----------------------------------------------------------------------------
GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id)
+ PlatformGpuId platform_gpu_id)
: base_allocator_(allocator) {
- stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ stream_exec_ =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
}
GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 0f9b72040c..9e007ed8c1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -34,7 +34,7 @@ namespace tensorflow {
class GPUDebugAllocator : public VisitableAllocator {
public:
explicit GPUDebugAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id);
+ PlatformGpuId platform_gpu_id);
~GPUDebugAllocator() override;
string Name() override { return "gpu_debug"; }
void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@@ -66,7 +66,7 @@ class GPUDebugAllocator : public VisitableAllocator {
class GPUNanResetAllocator : public VisitableAllocator {
public:
explicit GPUNanResetAllocator(VisitableAllocator* allocator,
- CudaGpuId cuda_gpu_id);
+ PlatformGpuId platform_gpu_id);
~GPUNanResetAllocator() override;
string Name() override { return "gpu_nan_reset"; }
void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 236a0afa0b..bc3e3a8c35 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -34,10 +34,11 @@ namespace tensorflow {
namespace {
TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
- const CudaGpuId cuda_gpu_id(0);
- GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id);
- auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ const PlatformGpuId platform_gpu_id(0);
+ GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id);
+ auto stream_exec =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
for (int s : {8}) {
std::vector<int64> cpu_array(s);
@@ -58,11 +59,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
for (int s : {8, 211}) {
EXPECT_DEATH(
{
- const CudaGpuId cuda_gpu_id(0);
- GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id);
+ const PlatformGpuId platform_gpu_id(0);
+ GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id);
auto stream_exec =
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
std::vector<int64> cpu_array(s);
memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -91,11 +92,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
for (int s : {8, 22}) {
EXPECT_DEATH(
{
- const CudaGpuId cuda_gpu_id(0);
- GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id);
+ const PlatformGpuId platform_gpu_id(0);
+ GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id);
auto stream_exec =
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
std::vector<int64> cpu_array(s);
memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -121,10 +122,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
}
TEST(GPUDebugAllocatorTest, ResetToNan) {
- const CudaGpuId cuda_gpu_id(0);
- GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id);
- auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ const PlatformGpuId platform_gpu_id(0);
+ GPUNanResetAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id);
+ auto stream_exec =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
std::vector<float> cpu_array(1024);
std::vector<float> cpu_array_result(1024);
@@ -161,13 +163,14 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
}
TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
- const CudaGpuId cuda_gpu_id(0);
+ const PlatformGpuId platform_gpu_id(0);
// NaN reset must be the outer-most allocator.
GPUNanResetAllocator a(
- new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id),
- cuda_gpu_id);
- auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id),
+ platform_gpu_id);
+ auto stream_exec =
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
std::vector<float> cpu_array(1024);
std::vector<float> cpu_array_result(1024);
@@ -204,18 +207,18 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
}
TEST(GPUDebugAllocatorTest, TracksSizes) {
- const CudaGpuId cuda_gpu_id(0);
- GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id);
+ const PlatformGpuId platform_gpu_id(0);
+ GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id);
EXPECT_EQ(true, a.TracksAllocationSizes());
}
TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
- const CudaGpuId cuda_gpu_id(0);
+ const PlatformGpuId platform_gpu_id(0);
GPUNanResetAllocator a(
- new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
- cuda_gpu_id),
- cuda_gpu_id);
+ new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""),
+ platform_gpu_id),
+ platform_gpu_id);
float* t1 = a.Allocate<float>(1);
EXPECT_EQ(4, a.RequestedSize(t1));
EXPECT_EQ(256, a.AllocatedSize(t1));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 2763ac0d4a..4bf23bc017 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -105,9 +105,9 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize);
stream_ = cuda_stream;
allocator_ = alloc;
- CudaGpuId cuda_gpu_id;
- TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
- device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id.value()];
+ PlatformGpuId platform_gpu_id;
+ TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+ device_prop_ = &Eigen::m_deviceProperties[platform_gpu_id.value()];
}
const cudaStream_t& stream() const override { return *stream_; }
@@ -332,9 +332,10 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
gpu_device_info_->stream = streams_[0]->compute;
gpu_device_info_->default_context = device_contexts_[0];
gpu_device_info_->event_mgr = em_.get();
- CudaGpuId cuda_gpu_id;
- TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id));
- gpu_device_info_->gpu_id = cuda_gpu_id.value();
+ PlatformGpuId platform_gpu_id;
+ TF_RETURN_IF_ERROR(
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
+ gpu_device_info_->gpu_id = platform_gpu_id.value();
set_tensorflow_gpu_device_info(gpu_device_info_);
// Whether and how the GPU device uses its own threadpool.
@@ -690,9 +691,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
Eigen::GpuDevice device_;
};
-// Parse 'visible_device_list' into a list of CUDA GPU ids.
+// Parse 'visible_device_list' into a list of platform GPU ids.
Status ParseVisibleDeviceList(const string& visible_device_list,
- std::vector<CudaGpuId>* visible_gpu_order) {
+ std::vector<PlatformGpuId>* visible_gpu_order) {
visible_gpu_order->clear();
se::Platform* gpu_manager = GPUMachineManager();
@@ -707,26 +708,28 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
} else {
const std::vector<string> order_str =
str_util::Split(visible_device_list, ',');
- for (const string& cuda_gpu_id_str : order_str) {
- int32 cuda_gpu_id;
- if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) {
+ for (const string& platform_gpu_id_str : order_str) {
+ int32 platform_gpu_id;
+ if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) {
return errors::InvalidArgument(
"Could not parse entry in 'visible_device_list': '",
- cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list);
+ platform_gpu_id_str, "'. visible_device_list = ",
+ visible_device_list);
}
- if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) {
+ if (platform_gpu_id < 0 ||
+ platform_gpu_id >= gpu_manager->VisibleDeviceCount()) {
return errors::InvalidArgument(
- "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id,
+ "'visible_device_list' listed an invalid GPU id '", platform_gpu_id,
"' but visible device count is ",
gpu_manager->VisibleDeviceCount());
}
- visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id));
+ visible_gpu_order->push_back(PlatformGpuId(platform_gpu_id));
}
}
// Validate no repeats.
- std::set<CudaGpuId> visible_device_set(visible_gpu_order->begin(),
- visible_gpu_order->end());
+ std::set<PlatformGpuId> visible_device_set(visible_gpu_order->begin(),
+ visible_gpu_order->end());
if (visible_device_set.size() != visible_gpu_order->size()) {
return errors::InvalidArgument(
"visible_device_list contained a duplicate entry: ",
@@ -737,8 +740,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
Status VerifyVirtualDeviceSettings(
const size_t num_gpus_to_use, const GPUOptions& gpu_options,
- const std::vector<CudaGpuId>& visible_gpu_order,
- const std::vector<CudaGpuId>& valid_cuda_gpu_ids) {
+ const std::vector<PlatformGpuId>& visible_gpu_order,
+ const std::vector<PlatformGpuId>& valid_platform_gpu_ids) {
const auto& virtual_devices = gpu_options.experimental().virtual_devices();
CHECK(!virtual_devices.empty());
if (gpu_options.per_process_gpu_memory_fraction() > 0) {
@@ -760,11 +763,11 @@ Status VerifyVirtualDeviceSettings(
" #GPUs in visible_device_list: ", visible_gpu_order.size(),
" virtual_devices.size(): ", virtual_devices.size());
}
- if (valid_cuda_gpu_ids.size() != virtual_devices.size()) {
+ if (valid_platform_gpu_ids.size() != virtual_devices.size()) {
return errors::Unknown(
"The number of valid GPUs doesn't match the number of elements in "
"the virtual_devices list.",
- " #valid GPUs: ", valid_cuda_gpu_ids.size(),
+ " #valid GPUs: ", valid_platform_gpu_ids.size(),
" virtual_devices.size(): ", virtual_devices.size());
}
return Status::OK();
@@ -806,18 +809,18 @@ int64 MinSystemMemory(int64 available_memory) {
}
// Get the memory limit for the virtual device being created on GPU with
-// 'cuda_gpu_id', when that virtual device is the only virtual device being
+// 'platform_gpu_id', when that virtual device is the only virtual device being
// created on that GPU.
Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options,
- CudaGpuId cuda_gpu_id,
+ PlatformGpuId platform_gpu_id,
int64* memory_limit) {
int64 total_memory = 0;
int64 available_memory = 0;
se::StreamExecutor* se =
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
return errors::Unknown("Failed to query available memory for GPU ",
- cuda_gpu_id.value());
+ platform_gpu_id.value());
}
int64 allocated_memory = 0;
@@ -916,8 +919,8 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
num_gpus_to_use = iter->second;
}
const auto& gpu_options = options.config.gpu_options();
- std::vector<CudaGpuId> visible_gpu_order;
- std::vector<CudaGpuId> valid_cuda_gpu_ids;
+ std::vector<PlatformGpuId> visible_gpu_order;
+ std::vector<PlatformGpuId> valid_platform_gpu_ids;
// If we aren't going to use any GPUs, don't initialize them.
// We don't want to call ParseVisibleDeviceList if num_gpus_to_use is 0,
// because it treats an empty gpu_options.visible_device_list as 'all GPUs are
@@ -926,12 +929,12 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(),
&visible_gpu_order));
TF_RETURN_IF_ERROR(
- GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids));
+ GetValidDeviceIds(visible_gpu_order, &valid_platform_gpu_ids));
}
- if (num_gpus_to_use > valid_cuda_gpu_ids.size()) {
- num_gpus_to_use = valid_cuda_gpu_ids.size();
+ if (num_gpus_to_use > valid_platform_gpu_ids.size()) {
+ num_gpus_to_use = valid_platform_gpu_ids.size();
}
- if (!valid_cuda_gpu_ids.empty()) {
+ if (!valid_platform_gpu_ids.empty()) {
// Save the original device.
int original_device = 0;
cudaError_t err = cudaGetDevice(&original_device);
@@ -941,17 +944,18 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
}
// Force to implicitly initialize CUDA runtime on each valid GPU before
// CreateGPUDevice().
- for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) {
- err = cudaSetDevice(cuda_gpu_id.value());
+ for (PlatformGpuId platform_gpu_id : valid_platform_gpu_ids) {
+ err = cudaSetDevice(platform_gpu_id.value());
if (err != cudaSuccess) {
- return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(),
- " failed. Status: ", cudaGetErrorString(err));
+ return errors::Internal("cudaSetDevice() on GPU:",
+ platform_gpu_id.value(), " failed. Status: ",
+ cudaGetErrorString(err));
}
err = cudaFree(nullptr);
if (err != cudaSuccess) {
- return errors::Internal(
- "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(),
- " failed. Status: ", cudaGetErrorString(err));
+ return errors::Internal("CUDA runtime implicit initialization on GPU:",
+ platform_gpu_id.value(), " failed. Status: ",
+ cudaGetErrorString(err));
}
}
// Reset to the original device.
@@ -977,10 +981,10 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
LOG(INFO) << line_buf;
for (int i = 0; i < visible_gpu_order.size(); ++i) {
line_buf = strings::StrCat(visible_gpu_order[i].value(), ": ");
- CudaGpuId cuda_id_i = visible_gpu_order[i];
+ PlatformGpuId gpu_id_i = visible_gpu_order[i];
for (int j = 0; j < visible_gpu_order.size(); ++j) {
- CudaGpuId cuda_id_j = visible_gpu_order[j];
- if (im.directed_links.find({cuda_id_i, cuda_id_j}) !=
+ PlatformGpuId gpu_id_j = visible_gpu_order[j];
+ if (im.directed_links.find({gpu_id_i, gpu_id_j}) !=
im.directed_links.end()) {
line_buf.append("Y ");
} else {
@@ -993,22 +997,23 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
const auto& virtual_devices = gpu_options.experimental().virtual_devices();
if (!virtual_devices.empty()) {
- TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(
- num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids));
+ TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(num_gpus_to_use, gpu_options,
+ visible_gpu_order,
+ valid_platform_gpu_ids));
// We've verified that num_gpus_to_use >= virtual_devices.size().
num_gpus_to_use = virtual_devices.size();
CHECK(gpu_options.visible_device_list().empty() ||
- valid_cuda_gpu_ids == visible_gpu_order);
+ valid_platform_gpu_ids == visible_gpu_order);
}
int next_tf_gpu_id = 0;
std::vector<int64> memory_limit_bytes;
for (int i = 0; i < num_gpus_to_use; ++i) {
- const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i];
+ const PlatformGpuId platform_gpu_id = valid_platform_gpu_ids[i];
if (virtual_devices.empty() ||
virtual_devices.Get(i).memory_limit_mb_size() == 0) {
int64 single_virtual_device_memory_limit = 0;
TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit(
- gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit));
+ gpu_options, platform_gpu_id, &single_virtual_device_memory_limit));
memory_limit_bytes.push_back(single_virtual_device_memory_limit);
} else {
const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb();
@@ -1021,7 +1026,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
TfGpuId tf_gpu_id(next_tf_gpu_id);
++next_tf_gpu_id;
TF_RETURN_IF_ERROR(
- GpuIdManager::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id));
+ GpuIdManager::InsertTfPlatformGpuIdPair(tf_gpu_id, platform_gpu_id));
}
}
const int num_tf_gpus = next_tf_gpu_id;
@@ -1046,7 +1051,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
return Status::OK();
}
-static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
+static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
const se::DeviceDescription& desc) {
int cc_major;
int cc_minor;
@@ -1055,9 +1060,8 @@ static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
cc_minor = 0;
}
// LINT.IfChange
- return strings::StrCat("device: ", cuda_gpu_id.value(),
- ", name: ", desc.name(),
- ", pci bus id: ", desc.pci_bus_id(),
+ return strings::StrCat("device: ", platform_gpu_id.value(), ", name: ",
+ desc.name(), ", pci bus id: ", desc.pci_bus_id(),
", compute capability: ", cc_major, ".", cc_minor);
// LINT.ThenChange(//tensorflow/python/platform/test.py)
}
@@ -1072,12 +1076,13 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
const string device_name =
strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value());
GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
- CudaGpuId cuda_gpu_id;
- TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+ PlatformGpuId platform_gpu_id;
+ TF_RETURN_IF_ERROR(
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
int numa_node = dev_locality.numa_node();
se::StreamExecutor* se =
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
const se::DeviceDescription& desc = se->GetDeviceDescription();
GPUProcessState* process_state = GPUProcessState::singleton();
Allocator* gpu_allocator = process_state->GetGPUAllocator(
@@ -1098,11 +1103,11 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
// TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit.
BaseGPUDevice* gpu_device = CreateGPUDevice(
options, device_name, static_cast<Bytes>(stats.bytes_limit), dev_locality,
- tf_gpu_id, GetShortDeviceDescription(cuda_gpu_id, desc), gpu_allocator,
- ProcessState::singleton()->GetCPUAllocator(numa_node));
+ tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, desc),
+ gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node));
LOG(INFO) << "Created TensorFlow device (" << device_name << " with "
<< (stats.bytes_limit >> 20) << " MB memory) -> physical GPU ("
- << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
+ << GetShortDeviceDescription(platform_gpu_id, desc) << ")";
TF_RETURN_IF_ERROR(gpu_device->Init(options));
devices->push_back(gpu_device);
@@ -1110,18 +1115,21 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
}
namespace {
-std::unique_ptr<std::map<std::pair<CudaGpuId, CudaGpuId>, bool>>
+std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>>
GetPeerAccessMap(se::Platform* platform,
- const std::vector<CudaGpuId>& visible_gpu_order) {
- std::unique_ptr<std::map<std::pair<CudaGpuId, CudaGpuId>, bool>> map(
- new std::map<std::pair<CudaGpuId, CudaGpuId>, bool>);
- for (CudaGpuId cuda_gpu_i : visible_gpu_order) {
- for (CudaGpuId cuda_gpu_j : visible_gpu_order) {
+ const std::vector<PlatformGpuId>& visible_gpu_order) {
+ std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>> map(
+ new std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>);
+ for (PlatformGpuId platform_gpu_i : visible_gpu_order) {
+ for (PlatformGpuId platform_gpu_j : visible_gpu_order) {
se::StreamExecutor* from =
- GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i)
+ .ValueOrDie();
se::StreamExecutor* to =
- GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie();
- (*map)[{cuda_gpu_i, cuda_gpu_j}] = from->CanEnablePeerAccessTo(to);
+ GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j)
+ .ValueOrDie();
+ (*map)[{platform_gpu_i, platform_gpu_j}] =
+ from->CanEnablePeerAccessTo(to);
}
}
@@ -1131,19 +1139,19 @@ GetPeerAccessMap(se::Platform* platform,
} // namespace
Status BaseGPUDeviceFactory::GetInterconnectMaps(
- const std::vector<CudaGpuId>& visible_gpu_order, se::Platform* gpu_manager,
- std::vector<InterconnectMap>* maps) {
+ const std::vector<PlatformGpuId>& visible_gpu_order,
+ se::Platform* gpu_manager, std::vector<InterconnectMap>* maps) {
// The default interconnect map is obtained from the StreamExecutor.
auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order);
maps->resize(1);
InterconnectMap& imap = maps->at(0);
imap.name = "StreamExecutor";
imap.strength = InterconnectMap::kStreamExecutorStrength;
- for (CudaGpuId cuda_id_i : visible_gpu_order) {
- for (CudaGpuId cuda_id_j : visible_gpu_order) {
- if (cuda_id_i == cuda_id_j) continue;
- if ((*access_map)[{cuda_id_i, cuda_id_j}]) {
- imap.directed_links.insert({cuda_id_i, cuda_id_j});
+ for (PlatformGpuId gpu_id_i : visible_gpu_order) {
+ for (PlatformGpuId gpu_id_j : visible_gpu_order) {
+ if (gpu_id_i == gpu_id_j) continue;
+ if ((*access_map)[{gpu_id_i, gpu_id_j}]) {
+ imap.directed_links.insert({gpu_id_i, gpu_id_j});
}
}
}
@@ -1158,13 +1166,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
all_tf_gpu_ids.push_back(TfGpuId(i));
}
for (TfGpuId tf_gpu_id : all_tf_gpu_ids) {
- CudaGpuId cuda_gpu_id;
- TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+ PlatformGpuId platform_gpu_id;
+ TF_RETURN_IF_ERROR(
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
// Get GPU bus_id from its reported NUMA affinity. Because GPUs are
// virtualized in some environments, we can't just use the GPU id.
// NUMA locales are indexed from 0, buses are indexed from 1.
se::StreamExecutor* se =
- GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
const se::DeviceDescription& desc = se->GetDeviceDescription();
int numa_node = desc.numa_node();
if (numa_node < 0) {
@@ -1174,7 +1183,8 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
// may run into trouble later with data transfer operations. The
// trouble may manifest as slower than expected performance, or
// outright failures.
- LOG(INFO) << "Could not identify NUMA node of CUDA gpu id " << cuda_gpu_id
+ LOG(INFO) << "Could not identify NUMA node of platform GPU id "
+ << platform_gpu_id
<< ", defaulting to 0. Your kernel may not have been built "
<< "with NUMA support.";
numa_node = 0;
@@ -1187,10 +1197,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
LocalLinks* links = dev_locality.mutable_links();
for (const InterconnectMap& imap : interconnects) {
for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
- CudaGpuId cuda_gpu_dst;
+ PlatformGpuId platform_gpu_dst;
TF_RETURN_IF_ERROR(
- GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst));
- if (imap.directed_links.find({cuda_gpu_id, cuda_gpu_dst}) !=
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
+ if (imap.directed_links.find({platform_gpu_id, platform_gpu_dst}) !=
imap.directed_links.end()) {
InterconnectLink* ilink = links->add_link();
ilink->set_device_id(tf_gpu_dst.value());
@@ -1204,10 +1214,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
// add high strength links to the others.
for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
if (tf_gpu_id == tf_gpu_dst) continue;
- CudaGpuId cuda_gpu_dst;
+ PlatformGpuId platform_gpu_dst;
TF_RETURN_IF_ERROR(
- GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst));
- if (cuda_gpu_id == cuda_gpu_dst) {
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
+ if (platform_gpu_id == platform_gpu_dst) {
InterconnectLink* ilink = links->add_link();
ilink->set_device_id(tf_gpu_dst.value());
ilink->set_type("SAME_DEVICE");
@@ -1216,9 +1226,9 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
}
(*localities)[tf_gpu_id] = dev_locality;
- VLOG(1) << "GPUDevice CudaGpuId " << cuda_gpu_id << " TfGpuId " << tf_gpu_id
- << " on bus " << dev_locality.bus_id() << " numa: " << numa_node
- << " pci: " << desc.pci_bus_id()
+ VLOG(1) << "GPUDevice PlatformGpuId " << platform_gpu_id << " TfGpuId "
+ << tf_gpu_id << " on bus " << dev_locality.bus_id()
+ << " numa: " << numa_node << " pci: " << desc.pci_bus_id()
<< " DeviceLocality: " << dev_locality.DebugString();
}
return Status::OK();
@@ -1226,14 +1236,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
static int GetDefaultMinGPUMultiprocessorCount(
se::Platform* gpu_manager,
- const std::vector<CudaGpuId>& visible_gpu_order) {
+ const std::vector<PlatformGpuId>& visible_gpu_order) {
static const int kDefaultMinGPUMultiprocessorCount = 8;
// Find the highest multi-processor count across all visible GPUs.
int max_count = -1;
for (int i = 0; i < visible_gpu_order.size(); ++i) {
auto exec_status =
- GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]);
+ GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_order[i]);
if (!exec_status.ok()) {
continue;
}
@@ -1252,7 +1262,7 @@ static int GetDefaultMinGPUMultiprocessorCount(
static int GetMinGPUMultiprocessorCount(
se::Platform* gpu_manager,
- const std::vector<CudaGpuId>& visible_gpu_order) {
+ const std::vector<PlatformGpuId>& visible_gpu_order) {
const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT");
if (tf_min_gpu_core_count == nullptr ||
@@ -1330,18 +1340,20 @@ std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
}
Status EnablePeerAccess(se::Platform* platform,
- const std::vector<CudaGpuId>& visible_gpu_order) {
+ const std::vector<PlatformGpuId>& visible_gpu_order) {
int possible_peer_count = 0;
int enabled_peer_count = 0;
for (int i = 0; i < visible_gpu_order.size(); ++i) {
- const CudaGpuId cuda_gpu_i = visible_gpu_order[i];
+ const PlatformGpuId platform_gpu_i = visible_gpu_order[i];
for (int j = 0; j < visible_gpu_order.size(); ++j) {
- const CudaGpuId cuda_gpu_j = visible_gpu_order[j];
+ const PlatformGpuId platform_gpu_j = visible_gpu_order[j];
// We have already validated that ExecutorForDevice() calls return OK.
se::StreamExecutor* from =
- GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i)
+ .ValueOrDie();
se::StreamExecutor* to =
- GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j)
+ .ValueOrDie();
if (from->CanEnablePeerAccessTo(to)) {
++possible_peer_count;
@@ -1349,7 +1361,8 @@ Status EnablePeerAccess(se::Platform* platform,
if (!status.ok()) {
LOG(WARNING)
<< "Unable to enable peer access between device ordinals "
- << cuda_gpu_i << " and " << cuda_gpu_j << ", status: " << status;
+ << platform_gpu_i << " and " << platform_gpu_j
+ << ", status: " << status;
} else {
++enabled_peer_count;
}
@@ -1372,22 +1385,23 @@ Status EnablePeerAccess(se::Platform* platform,
} // namespace
Status BaseGPUDeviceFactory::GetValidDeviceIds(
- const std::vector<CudaGpuId>& visible_gpu_order,
- std::vector<CudaGpuId>* ids) {
+ const std::vector<PlatformGpuId>& visible_gpu_order,
+ std::vector<PlatformGpuId>* ids) {
se::Platform* gpu_manager = GPUMachineManager();
bool new_gpu_found = false;
for (int i = 0; i < visible_gpu_order.size(); ++i) {
- const CudaGpuId cuda_gpu_id = visible_gpu_order[i];
+ const PlatformGpuId visible_gpu_id = visible_gpu_order[i];
- // Only perform this once per visible cuda gpu id.
- if (visible_gpu_initialized_[cuda_gpu_id.value()]) {
+ // Only perform this once per visible platform gpu id.
+ if (visible_gpu_initialized_[visible_gpu_id.value()]) {
continue;
}
- visible_gpu_initialized_[cuda_gpu_id.value()] = true;
+ visible_gpu_initialized_[visible_gpu_id.value()] = true;
new_gpu_found = true;
- auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id);
+ auto executor =
+ GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id);
if (!executor.ok()) {
return executor.status();
}
@@ -1435,9 +1449,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
// Filter out devices that don't have the right capability or power.
for (int i = 0; i < visible_gpu_order.size(); ++i) {
- const CudaGpuId visible_gpu_id = visible_gpu_order[i];
+ const PlatformGpuId visible_gpu_id = visible_gpu_order[i];
auto exec_status =
- GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id);
+ GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id);
if (!exec_status.ok()) {
LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id
<< " whose executor is in invalid state: "
@@ -1486,7 +1500,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
if (!ids->empty()) {
std::vector<int> raw_ids(ids->size());
std::transform(ids->begin(), ids->end(), raw_ids.begin(),
- [](CudaGpuId id) -> int { return id.value(); });
+ [](PlatformGpuId id) -> int { return id.value(); });
LOG(INFO) << "Adding visible gpu devices: "
<< str_util::Join(raw_ids, ", ");
}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 56d03d7a8c..684cc0c1de 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -89,12 +89,12 @@ class BaseGPUDevice : public LocalDevice {
void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
DeviceContext* dc, Allocator* allocator) override;
- // Returns the CUDA GPU id of this device within the native driver system;
+ // Returns the platform GPU id of this device within the native driver system;
// e.g., for CUDA this is the ordinal of the GPU within the system.
int gpu_id() const {
- CudaGpuId cuda_gpu_id;
- TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id));
- return cuda_gpu_id.value();
+ PlatformGpuId platform_gpu_id;
+ TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
+ return platform_gpu_id.value();
}
// The executor that provides control for the device; e.g., for CUDA this
@@ -168,14 +168,14 @@ class BaseGPUDeviceFactory : public DeviceFactory {
int32 strength;
static const int kSameDeviceStrength;
static const int kStreamExecutorStrength;
- std::set<std::pair<CudaGpuId, CudaGpuId>> directed_links;
+ std::set<std::pair<PlatformGpuId, PlatformGpuId>> directed_links;
};
protected:
// Populates *maps with interconnect maps for all local direct access
// pathways between GPUs.
virtual Status GetInterconnectMaps(
- const std::vector<CudaGpuId>& visible_gpu_order,
+ const std::vector<PlatformGpuId>& visible_gpu_order,
se::Platform* gpu_manager, std::vector<InterconnectMap>* maps);
struct TfGpuIdHash {
@@ -207,16 +207,16 @@ class BaseGPUDeviceFactory : public DeviceFactory {
Allocator* gpu_allocator,
Allocator* cpu_allocator) = 0;
- // Returns into 'ids' the list of valid CUDA GPU ids, in the order that
+ // Returns into 'ids' the list of valid platform GPU ids, in the order that
// they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc,
// based upon 'visible_gpu_order' which was generated by parsing
// GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU
// ids.
- Status GetValidDeviceIds(const std::vector<CudaGpuId>& visible_gpu_order,
- std::vector<CudaGpuId>* ids);
+ Status GetValidDeviceIds(const std::vector<PlatformGpuId>& visible_gpu_order,
+ std::vector<PlatformGpuId>* ids);
- // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id
- // has been initialized by the process.
+ // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU
+ // platform_gpu_id has been initialized by the process.
std::unordered_map<int, bool> visible_gpu_initialized_;
};
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
index daf59f0560..36294094e9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -30,18 +30,21 @@ namespace tensorflow {
namespace {
const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0";
-int64 GetTotalGPUMemory(CudaGpuId gpu_id) {
+int64 GetTotalGPUMemory(PlatformGpuId gpu_id) {
se::StreamExecutor* se =
- GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id)
+ .ValueOrDie();
int64 total_memory, available_memory;
CHECK(se->DeviceMemoryUsage(&available_memory, &total_memory));
return total_memory;
}
-Status GetComputeCapability(CudaGpuId gpu_id, int* cc_major, int* cc_minor) {
+Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major,
+ int* cc_minor) {
se::StreamExecutor* se =
- GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie();
+ GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id)
+ .ValueOrDie();
if (!se->GetDeviceDescription().cuda_compute_capability(cc_major, cc_minor)) {
*cc_major = 0;
*cc_minor = 0;
@@ -223,7 +226,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
// error.
TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
int cc_major, cc_minor;
- TF_ASSERT_OK(GetComputeCapability(CudaGpuId(0), &cc_major, &cc_minor));
+ TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor));
// Exit early while running on Pascal or later GPUs.
if (cc_major >= 6) {
return;
@@ -244,10 +247,10 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
// more memory than what is available on the device.
TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
static constexpr double kGpuMemoryFraction = 1.2;
- static constexpr CudaGpuId kCudaGpuId(0);
+ static constexpr PlatformGpuId kPlatformGpuId(0);
int cc_major, cc_minor;
- TF_ASSERT_OK(GetComputeCapability(kCudaGpuId, &cc_major, &cc_minor));
+ TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor));
// Exit early if running on pre-Pascal GPUs.
if (cc_major < 6) {
LOG(INFO)
@@ -262,7 +265,7 @@ TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
ASSERT_EQ(1, devices.size());
int64 memory_limit = devices[0]->attributes().memory_limit();
- ASSERT_EQ(memory_limit, static_cast<int64>(GetTotalGPUMemory(kCudaGpuId) *
+ ASSERT_EQ(memory_limit, static_cast<int64>(GetTotalGPUMemory(kPlatformGpuId) *
kGpuMemoryFraction));
AllocatorAttributes allocator_attributes = AllocatorAttributes();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h
index 2a6caea296..f0d9022821 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id.h
@@ -25,10 +25,10 @@ namespace tensorflow {
// physical machine, it can be filtered by CUDA environment variable
// CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but
// result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is
-// called CUDA GPU id as below. See
+// called platform GPU id as below. See
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
// for more details.
-// - CUDA GPU id (also called *visible* GPU id in
+// - *platform* GPU id (also called *visible* GPU id in
// third_party/tensorflow/core/protobuf/config.proto): this is the id that is
// visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is
// generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API
@@ -39,14 +39,14 @@ namespace tensorflow {
// field of the device name "/device:GPU:<id>", and is also the identifier of
// a BaseGPUDevice. Note that the configuration allows us to create multiple
// BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the
-// hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1
+// hardware, so the mapping between TF GPU id and platform GPU id is not a 1:1
// mapping, see the example below.
//
// For example, assuming that in the machine we have GPU device with index 0, 1,
// 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create
-// the following mapping between CUDA GPU id and physical GPU id:
+// the following mapping between platform GPU id and physical GPU id:
//
-// CUDA GPU id -> physical GPU id
+// platform GPU id -> physical GPU id
// 0 -> 1
// 1 -> 2
// 2 -> 3
@@ -56,32 +56,32 @@ namespace tensorflow {
//
// Assuming we configure the Session to create one BaseGPUDevice per GPU
// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
-// the following mappting between TF GPU id and CUDA GPU id:
+// the following mappting between TF GPU id and platform GPU id:
//
-// TF GPU id -> CUDA GPU ID
+// TF GPU id -> platform GPU ID
// 0 (i.e. /device:GPU:0) -> 2
// 1 (i.e. /device:GPU:1) -> 0
//
-// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list,
-// so it won't be used by the TF process.
+// Note that platform GPU id 1 is filtered out by
+// GPUOptions::visible_device_list, so it won't be used by the TF process.
//
// On the other hand, if we configure it to create 2 BaseGPUDevice per GPU
// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
-// the following mappting between TF GPU id and CUDA GPU id:
+// the following mappting between TF GPU id and platform GPU id:
//
-// TF GPU id -> CUDA GPU ID
+// TF GPU id -> platform GPU ID
// 0 (i.e. /device:GPU:0) -> 2
// 1 (i.e. /device:GPU:1) -> 2
// 2 (i.e. /device:GPU:2) -> 0
// 3 (i.e. /device:GPU:3) -> 0
//
-// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to
-// minimize programming errors and improve code readability. Except for the
+// We create strong-typed integer classes for both TF GPU id and platform GPU id
+// to minimize programming errors and improve code readability. Except for the
// StreamExecutor interface (as we don't change its API), whenever we need a
-// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a
-// raw integer.
+// TF GPU id (or platform GPU id) we should use TfGpuId (or PlatformGpuId)
+// instead of a raw integer.
TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32);
-TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32);
+TF_LIB_GTL_DEFINE_INT_TYPE(PlatformGpuId, int32);
} // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
index b5099dc8ef..2b40730119 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
@@ -26,26 +26,27 @@ limitations under the License.
namespace tensorflow {
namespace {
-// Manages the map between TfGpuId and CUDA GPU id.
-class TfToCudaGpuIdMap {
+// Manages the map between TfGpuId and platform GPU id.
+class TfToPlatformGpuIdMap {
public:
- static TfToCudaGpuIdMap* singleton() {
- static auto* id_map = new TfToCudaGpuIdMap;
+ static TfToPlatformGpuIdMap* singleton() {
+ static auto* id_map = new TfToPlatformGpuIdMap;
return id_map;
}
- Status Insert(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) LOCKS_EXCLUDED(mu_) {
+ Status Insert(TfGpuId tf_gpu_id, PlatformGpuId platform_gpu_id)
+ LOCKS_EXCLUDED(mu_) {
std::pair<IdMapType::iterator, bool> result;
{
mutex_lock lock(mu_);
- result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()});
+ result = id_map_.insert({tf_gpu_id.value(), platform_gpu_id.value()});
}
- if (!result.second && cuda_gpu_id.value() != result.first->second) {
+ if (!result.second && platform_gpu_id.value() != result.first->second) {
return errors::AlreadyExists(
"TensorFlow device (GPU:", tf_gpu_id.value(),
") is being mapped to "
"multiple CUDA devices (",
- cuda_gpu_id.value(), " now, and ", result.first->second,
+ platform_gpu_id.value(), " now, and ", result.first->second,
" previously), which is not supported. "
"This may be the result of providing different GPU configurations "
"(ConfigProto.gpu_options, for example different visible_device_list)"
@@ -56,17 +57,17 @@ class TfToCudaGpuIdMap {
return Status::OK();
}
- bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const
+ bool Find(TfGpuId tf_gpu_id, PlatformGpuId* platform_gpu_id) const
LOCKS_EXCLUDED(mu_) {
mutex_lock lock(mu_);
auto result = id_map_.find(tf_gpu_id.value());
if (result == id_map_.end()) return false;
- *cuda_gpu_id = result->second;
+ *platform_gpu_id = result->second;
return true;
}
private:
- TfToCudaGpuIdMap() = default;
+ TfToPlatformGpuIdMap() = default;
void TestOnlyReset() LOCKS_EXCLUDED(mu_) {
mutex_lock lock(mu_);
@@ -78,17 +79,18 @@ class TfToCudaGpuIdMap {
IdMapType id_map_ GUARDED_BY(mu_);
friend class ::tensorflow::GpuIdManager;
- TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap);
+ TF_DISALLOW_COPY_AND_ASSIGN(TfToPlatformGpuIdMap);
};
} // namespace
-Status GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id,
- CudaGpuId cuda_gpu_id) {
- return TfToCudaGpuIdMap::singleton()->Insert(tf_gpu_id, cuda_gpu_id);
+Status GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id,
+ PlatformGpuId platform_gpu_id) {
+ return TfToPlatformGpuIdMap::singleton()->Insert(tf_gpu_id, platform_gpu_id);
}
-Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) {
- if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) {
+Status GpuIdManager::TfToPlatformGpuId(TfGpuId tf_gpu_id,
+ PlatformGpuId* platform_gpu_id) {
+ if (TfToPlatformGpuIdMap::singleton()->Find(tf_gpu_id, platform_gpu_id)) {
return Status::OK();
}
return errors::NotFound("TensorFlow device GPU:", tf_gpu_id.value(),
@@ -96,7 +98,7 @@ Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) {
}
void GpuIdManager::TestOnlyReset() {
- TfToCudaGpuIdMap::singleton()->TestOnlyReset();
+ TfToPlatformGpuIdMap::singleton()->TestOnlyReset();
}
} // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
index 491d92ccdd..62df4310c4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
@@ -21,15 +21,17 @@ limitations under the License.
namespace tensorflow {
-// Class that maintains a map from TfGpuId to CudaGpuId, and manages the
+// Class that maintains a map from TfGpuId to PlatformGpuId, and manages the
// translation between them.
class GpuIdManager {
public:
- // Adds a mapping from tf_gpu_id to cuda_gpu_id.
- static Status InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id);
+ // Adds a mapping from tf_gpu_id to platform_gpu_id.
+ static Status InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id,
+ PlatformGpuId platform_gpu_id);
- // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found.
- static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id);
+ // Gets the platform_gpu_id associated with tf_gpu_id. Returns OK if found.
+ static Status TfToPlatformGpuId(TfGpuId tf_gpu_id,
+ PlatformGpuId* platform_gpu_id);
// Clears the map. Used in unit tests only.
static void TestOnlyReset();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
index a663ec7051..8bf3c6a308 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc
@@ -22,38 +22,38 @@ limitations under the License.
namespace tensorflow {
namespace {
-CudaGpuId TfToCudaGpuId(TfGpuId tf) {
- CudaGpuId cuda;
- TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf, &cuda));
- return cuda;
+PlatformGpuId TfToPlatformGpuId(TfGpuId tf) {
+ PlatformGpuId platform_gpu_id;
+ TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf, &platform_gpu_id));
+ return platform_gpu_id;
}
TEST(GpuIdManagerTest, Basics) {
TfGpuId key_0(0);
- CudaGpuId value_0(0);
- TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0));
- EXPECT_EQ(value_0, TfToCudaGpuId(key_0));
+ PlatformGpuId value_0(0);
+ TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0));
+ EXPECT_EQ(value_0, TfToPlatformGpuId(key_0));
// Multiple calls to map the same value is ok.
- TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0));
- EXPECT_EQ(value_0, TfToCudaGpuId(key_0));
+ TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0));
+ EXPECT_EQ(value_0, TfToPlatformGpuId(key_0));
// Map a different TfGpuId to a different value.
TfGpuId key_1(3);
- CudaGpuId value_1(2);
- TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_1, value_1));
- EXPECT_EQ(value_1, TfToCudaGpuId(key_1));
+ PlatformGpuId value_1(2);
+ TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_1, value_1));
+ EXPECT_EQ(value_1, TfToPlatformGpuId(key_1));
// Mapping a different TfGpuId to the same value is ok.
TfGpuId key_2(10);
- TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_1));
- EXPECT_EQ(value_1, TfToCudaGpuId(key_2));
+ TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_1));
+ EXPECT_EQ(value_1, TfToPlatformGpuId(key_2));
// Mapping the same TfGpuId to a different value.
- ASSERT_FALSE(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_0).ok());
+ ASSERT_FALSE(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_0).ok());
// Getting a nonexistent mapping.
- ASSERT_FALSE(GpuIdManager::TfToCudaGpuId(TfGpuId(100), &value_0).ok());
+ ASSERT_FALSE(GpuIdManager::TfToPlatformGpuId(TfGpuId(100), &value_0).ok());
}
} // namespace
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
index b9c66b3328..b1f10fb1dc 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
@@ -24,34 +24,37 @@ limitations under the License.
namespace tensorflow {
-// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids.
+// Utility methods for translation between Tensorflow GPU ids and platform GPU
+// ids.
class GpuIdUtil {
public:
// Convenient methods for getting the associated executor given a TfGpuId or
- // CudaGpuId.
- static se::port::StatusOr<se::StreamExecutor*> ExecutorForCudaGpuId(
- se::Platform* gpu_manager, CudaGpuId cuda_gpu_id) {
- return gpu_manager->ExecutorForDevice(cuda_gpu_id.value());
+ // PlatformGpuId.
+ static se::port::StatusOr<se::StreamExecutor*> ExecutorForPlatformGpuId(
+ se::Platform* gpu_manager, PlatformGpuId platform_gpu_id) {
+ return gpu_manager->ExecutorForDevice(platform_gpu_id.value());
}
- static se::port::StatusOr<se::StreamExecutor*> ExecutorForCudaGpuId(
- CudaGpuId cuda_gpu_id) {
- return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id);
+ static se::port::StatusOr<se::StreamExecutor*> ExecutorForPlatformGpuId(
+ PlatformGpuId platform_gpu_id) {
+ return ExecutorForPlatformGpuId(GPUMachineManager(), platform_gpu_id);
}
static se::port::StatusOr<se::StreamExecutor*> ExecutorForTfGpuId(
TfGpuId tf_gpu_id) {
- CudaGpuId cuda_gpu_id;
- TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
- return ExecutorForCudaGpuId(cuda_gpu_id);
+ PlatformGpuId platform_gpu_id;
+ TF_RETURN_IF_ERROR(
+ GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+ return ExecutorForPlatformGpuId(platform_gpu_id);
}
- // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate.
+ // Verify that the platform_gpu_id associated with a TfGpuId is legitimate.
static void CheckValidTfGpuId(TfGpuId tf_gpu_id) {
- CudaGpuId cuda_gpu_id;
- TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+ PlatformGpuId platform_gpu_id;
+ TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
const int visible_device_count = GPUMachineManager()->VisibleDeviceCount();
- CHECK_LT(cuda_gpu_id.value(), visible_device_count)
- << "cuda_gpu_id is outside discovered device range."
- << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id
+ CHECK_LT(platform_gpu_id.value(), visible_device_count)
+ << "platform_gpu_id is outside discovered device range."
+ << " TF GPU id: " << tf_gpu_id
+ << " platform GPU id: " << platform_gpu_id
<< " visible device count: " << visible_device_count;
}
};
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index b18688174d..a5b46382f1 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -106,22 +106,23 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
return nullptr;
}
- CudaGpuId cuda_gpu_id;
- TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id));
+ PlatformGpuId platform_gpu_id;
+ TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
gpu_allocator =
- new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+ new GPUBFCAllocator(platform_gpu_id, total_bytes, options,
strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
// If true, checks for memory overwrites by writing
// distinctive patterns on both ends of allocated memory.
if (useCudaMemoryGuardAllocator()) {
- gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id);
- gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id);
+ gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_gpu_id);
+ gpu_allocator = new GPUNanResetAllocator(gpu_allocator, platform_gpu_id);
} else if (useCudaMallocAllocator()) {
// If true, passes all allocation requests through to cudaMalloc
// useful for doing memory debugging with tools like cuda-memcheck
// **WARNING** probably will not work in a multi-gpu scenario
- gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
+ gpu_allocator =
+ new GPUcudaMallocAllocator(gpu_allocator, platform_gpu_id);
}
gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
@@ -138,7 +139,7 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
ProcessState::MemDesc md;
md.loc = ProcessState::MemDesc::GPU;
- md.dev_index = cuda_gpu_id.value();
+ md.dev_index = platform_gpu_id.value();
md.gpu_registered = false;
md.nic_registered = true;
if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index b97603c890..e4f6bf7c86 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@@ -93,13 +93,13 @@ Status SingleMachine::Provision() {
strings::StrCat("Not able to parse GPU device name: ", dev.name()));
}
TfGpuId tf_gpu_id(parsed.id);
- CudaGpuId cuda_gpu_id;
- Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+ PlatformGpuId platform_gpu_id;
+ Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
if (!s.ok()) {
return errors::Unavailable("Unknown TF GPU device with id ",
tf_gpu_id.value(), ": ", s.ToString());
}
- attr = GetLocalGPUInfo(cuda_gpu_id);
+ attr = GetLocalGPUInfo(platform_gpu_id);
} else if (dev.device_type().find("XLA") == string::npos) {
// Filter out the fake XLA devices to avoid double counting the actual
// hardware resources that are available.
diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc
index a7519725a5..567e7c075e 100644
--- a/tensorflow/core/grappler/clusters/utils.cc
+++ b/tensorflow/core/grappler/clusters/utils.cc
@@ -70,13 +70,14 @@ DeviceProperties GetLocalCPUInfo() {
return device;
}
-DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) {
+DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
DeviceProperties device;
device.set_type("GPU");
#if GOOGLE_CUDA
cudaDeviceProp properties;
- cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value());
+ cudaError_t error =
+ cudaGetDeviceProperties(&properties, platform_gpu_id.value());
if (error != cudaSuccess) {
device.set_type("UNKNOWN");
LOG(ERROR) << "Failed to get device properties, error code: " << error;
@@ -122,15 +123,15 @@ DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
} else if (device.type == "GPU") {
if (device.has_id) {
TfGpuId tf_gpu_id(device.id);
- CudaGpuId cuda_gpu_id;
- Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+ PlatformGpuId platform_gpu_id;
+ Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
if (!s.ok()) {
LOG(ERROR) << s;
return unknown;
}
- return GetLocalGPUInfo(cuda_gpu_id);
+ return GetLocalGPUInfo(platform_gpu_id);
} else {
- return GetLocalGPUInfo(CudaGpuId(0));
+ return GetLocalGPUInfo(PlatformGpuId(0));
}
}
return unknown;
diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h
index ca15c48006..f0a342b728 100644
--- a/tensorflow/core/grappler/clusters/utils.h
+++ b/tensorflow/core/grappler/clusters/utils.h
@@ -28,7 +28,7 @@ DeviceProperties GetLocalCPUInfo();
// Returns the DeviceProperties for the specified GPU attached to the server on
// which grappler is running.
-DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id);
+DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id);
// Returns the DeviceProperties of the specified device
DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device);
diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc
index 74218adbac..3863d62980 100644
--- a/tensorflow/core/grappler/clusters/utils_test.cc
+++ b/tensorflow/core/grappler/clusters/utils_test.cc
@@ -31,22 +31,22 @@ TEST(UtilsTest, GetLocalGPUInfo) {
LOG(INFO) << "CUDA is enabled.";
DeviceProperties properties;
- // Invalid CUDA GPU ID.
- properties = GetLocalGPUInfo(CudaGpuId(100));
+ // Invalid platform GPU ID.
+ properties = GetLocalGPUInfo(PlatformGpuId(100));
EXPECT_EQ("UNKNOWN", properties.type());
- // Succeed when a valid CUDA GPU id was inserted.
- properties = GetLocalGPUInfo(CudaGpuId(0));
+ // Succeed when a valid platform GPU id was inserted.
+ properties = GetLocalGPUInfo(PlatformGpuId(0));
EXPECT_EQ("GPU", properties.type());
EXPECT_EQ("NVIDIA", properties.vendor());
#else
LOG(INFO) << "CUDA is not enabled.";
DeviceProperties properties;
- properties = GetLocalGPUInfo(CudaGpuId(0));
+ properties = GetLocalGPUInfo(PlatformGpuId(0));
EXPECT_EQ("GPU", properties.type());
- properties = GetLocalGPUInfo(CudaGpuId(100));
+ properties = GetLocalGPUInfo(PlatformGpuId(100));
EXPECT_EQ("GPU", properties.type());
#endif
}
@@ -74,20 +74,20 @@ TEST(UtilsTest, GetDeviceInfo) {
EXPECT_EQ("NVIDIA", properties.vendor());
#endif
- // TF to CUDA GPU id mapping entry doesn't exist.
+ // TF to platform GPU id mapping entry doesn't exist.
device.has_id = true;
device.id = 0;
properties = GetDeviceInfo(device);
EXPECT_EQ("UNKNOWN", properties.type());
#if GOOGLE_CUDA
- // Invalid CUDA GPU id.
- GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100));
+ // Invalid platform GPU id.
+ GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100));
properties = GetDeviceInfo(device);
EXPECT_EQ("UNKNOWN", properties.type());
- // Valid CUDA GPU id.
- GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0));
+ // Valid platform GPU id.
+ GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(1), PlatformGpuId(0));
device.id = 1;
properties = GetDeviceInfo(device);
EXPECT_EQ("GPU", properties.type());
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index aad00ce039..7691f25327 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -209,13 +209,13 @@ DeviceProperties GetDeviceInfo(const string& device_str) {
if (DeviceNameUtils::ParseFullName(device_str, &parsed)) {
if (parsed.type == "GPU") {
TfGpuId tf_gpu_id(parsed.id);
- CudaGpuId cuda_gpu_id;
- Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+ PlatformGpuId platform_gpu_id;
+ Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
if (!s.ok()) {
// We are probably running simulation without linking cuda libraries.
- cuda_gpu_id = CudaGpuId(parsed.id);
+ platform_gpu_id = PlatformGpuId(parsed.id);
}
- return GetLocalGPUInfo(cuda_gpu_id);
+ return GetLocalGPUInfo(platform_gpu_id);
} else if (parsed.type == "CPU") {
return GetLocalCPUInfo();
}
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index da3a99565e..c68504a272 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -68,7 +68,7 @@ message GPUOptions {
// after the process starts. Users are required to use vendor
// specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
// physical to visible device mapping prior to invoking TensorFlow.
- // 2. In the code, the ids in this list are also called "CUDA GPU id"s,
+ // 2. In the code, the ids in this list are also called "platform GPU id"s,
// and the 'virtual' ids of GPU devices (i.e. the ids in the device
// name "/device:GPU:<id>") are also called "TF GPU id"s. Please
// refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h