Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified

PiperOrigin-RevId: 209647689
author: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-21 13:20:25 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-21 13:20:25 -0700
commit: 7cc6abab4e3569e4e44af7d577a71d15039f014b (patch)
tree: 620113bcef5d4de3a0823313d497b064b2cacebd /tensorflow/contrib/tensorrt
parent: a7e961ac883d45df76a481a796db9ea86eba7c78 (diff)
parent: 4684421d9aa3e63dc943074025ffdc89df1a1980 (diff)
6 files changed, 228 insertions, 26 deletions
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 26236a0435..a0fc3e43a9 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -280,6 +280,7 @@ tf_cuda_library(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core:framework_lite",
+        "//tensorflow/core:gpu_runtime",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -293,6 +294,31 @@ tf_cuda_library(
     ]) + tf_custom_op_library_additional_deps(),
 )
 
+tf_cuda_cc_test(
+    name = "convert_graph_test",
+    size = "medium",
+    srcs = ["convert/convert_graph_test.cc"],
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_windows",
+        "nomac",
+    ],
+    deps = [
+        ":trt_conversion",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_base",
+        "//tensorflow/core:direct_session",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
 # Library for the segmenting portion of TensorRT operation creation
 cc_library(
     name = "segment",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 21ec8b0b30..b019c99882 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -31,6 +31,9 @@ limitations under the License.
 #include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
 #include "tensorflow/contrib/tensorrt/segment/segment.h"
 #include "tensorflow/contrib/tensorrt/test/utils.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -772,33 +775,55 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
     const ConversionParams& params, const EngineInfo& engine) {
   int cuda_device_id = -1;
   tensorflow::Allocator* dev_allocator = nullptr;
-  if (params.cluster) {
-    std::vector<tensorflow::Device*> devices;
-    if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
-      DeviceNameUtils::ParsedName parsed_name;
-      if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
-          parsed_name.has_id) {
-        params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name,
-                                                            &devices);
+  if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
+      engine.device.empty()) {
+    // If device is not set, use the first found GPU device for the conversion.
+    for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
+      TfGpuId tf_gpu_id(tf_gpu_id_value);
+      CudaGpuId cuda_gpu_id;
+      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      if (s.ok()) {
+        VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
+                << cuda_gpu_id.value();
+        cuda_device_id = cuda_gpu_id.value();
+        GPUOptions gpu_options;
+        // If the TF to Cuda gpu id mapping exist, the device and corresponding
+        // allocator must have been initialized already, so the
+        // GetGPUAllocator() call won't create a new allocator.
+        dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
+            gpu_options, tf_gpu_id, 1);
+        break;
       }
+      LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
+                 << s;
     }
-    if (!devices.empty()) {
-      if (devices.size() > 1) {
-        string msg = "Found multiple matching devices using name '";
-        StrAppend(&msg, engine.device, "': ");
-        for (auto d : devices) StrAppend(&msg, d->name(), ", ");
-        StrAppend(&msg, ". Will get the allocator from first one.");
-        LOG(WARNING) << msg;
-      }
-      tensorflow::AllocatorAttributes alloc_attr;
-      cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id;
-      dev_allocator = devices[0]->GetAllocator(alloc_attr);
-      VLOG(1) << "Using allocator " << dev_allocator->Name()
-              << " and cuda_device_id " << cuda_device_id;
-    } else {
-      LOG(WARNING) << "Cluster is set but device '" << engine.device
-                   << "' is not found in the cluster";
+    return std::make_pair(cuda_device_id, dev_allocator);
+  }
+
+  // Use the device requested by the engine.
+  auto device_set = params.cluster->GetDeviceSet();
+  std::vector<tensorflow::Device*> devices;
+  DeviceNameUtils::ParsedName parsed_name;
+  if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
+      parsed_name.has_id) {
+    device_set->FindMatchingDevices(parsed_name, &devices);
+  }
+  if (!devices.empty()) {
+    if (devices.size() > 1) {
+      string msg = "Found multiple matching devices using name '";
+      StrAppend(&msg, engine.device, "': ");
+      for (auto d : devices) StrAppend(&msg, d->name(), ", ");
+      StrAppend(&msg, ". Will get the allocator from first one.");
+      LOG(WARNING) << msg;
     }
+    tensorflow::AllocatorAttributes alloc_attr;
+    cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id;
+    dev_allocator = devices[0]->GetAllocator(alloc_attr);
+    VLOG(1) << "Using allocator " << dev_allocator->Name()
+            << " and cuda_device_id " << cuda_device_id;
+  } else {
+    LOG(WARNING) << "Cluster is set but device '" << engine.device
+                 << "' is not found in the cluster";
   }
   return std::make_pair(cuda_device_id, dev_allocator);
 }
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h
index 9d986e4890..3525202369 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
 
 // Return runtime time TensorRT library version information.
 std::vector<int> GetLoadedTensorRTVersion();
+
+// Helper method for the conversion, expose for testing.
+std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
+    const ConversionParams& params, const EngineInfo& engine);
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
new file mode 100644
index 0000000000..8146bed4b0
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
@@ -0,0 +1,140 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/device_set.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"  // NOLINT
+#include "tensorflow/core/public/session.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+namespace convert {
+
+class FakeCluster : public grappler::Cluster {
+ public:
+  FakeCluster() : Cluster(0) {}
+
+  void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
+
+  const DeviceSet* GetDeviceSet() const override { return device_set_; }
+
+  string type() const override { return ""; }
+  Status Provision() override { return Status::OK(); }
+  Status Initialize(const grappler::GrapplerItem& item) override {
+    return Status::OK();
+  }
+  Status Run(const GraphDef& graph_def,
+             const std::vector<std::pair<string, Tensor>>& feed,
+             const std::vector<string>& fetch,
+             RunMetadata* metadata) override {
+    return Status::OK();
+  }
+
+ private:
+  const DeviceSet* device_set_;
+};
+
+TEST(ConvertGraphTest, GetDeviceAndAllocator) {
+  ConversionParams params;
+  EngineInfo engine_info;
+  {
+    // params.cluster is not set, and no gpu device is available.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(-1, result.first);
+    EXPECT_EQ(nullptr, result.second);
+  }
+
+  // Create a session with two (virtual) gpu device.
+  SessionOptions options;
+  ConfigProto* config = &options.config;
+  GPUOptions* gpu_options = config->mutable_gpu_options();
+  auto virtual_devices =
+      gpu_options->mutable_experimental()->add_virtual_devices();
+  virtual_devices->add_memory_limit_mb(200);
+  virtual_devices->add_memory_limit_mb(200);
+  std::unique_ptr<Session> session(NewSession(options));
+
+  {
+    // params.cluster is not set, should find and return first gpu id and
+    // corresponding allocator.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(0, result.first);
+    EXPECT_NE(nullptr, result.second);
+    EXPECT_EQ("GPU_0_bfc", result.second->Name());
+  }
+
+  FakeCluster cluster;
+  params.cluster = &cluster;
+  {
+    // params.cluster->GetDeviceSet() returns null, should find and return first
+    // gpu id and corresponding allocator.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(0, result.first);
+    EXPECT_NE(nullptr, result.second);
+    EXPECT_EQ("GPU_0_bfc", result.second->Name());
+  }
+
+  // Build the DeviceSet.
+  DeviceSet device_set;
+  const DeviceMgr* device_mgr = nullptr;
+  TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
+  for (auto d : device_mgr->ListDevices()) {
+    device_set.AddDevice(d);
+  }
+  cluster.SetDeviceSet(&device_set);
+  {
+    // engine_info.device is not set, should find and return first gpu id and
+    // corresponding allocator.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(0, result.first);
+    EXPECT_NE(nullptr, result.second);
+    EXPECT_EQ("GPU_0_bfc", result.second->Name());
+  }
+
+  engine_info.device = "/GPU:1";
+  {
+    // Set to use second device.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(0, result.first);
+    EXPECT_NE(nullptr, result.second);
+    EXPECT_EQ("GPU_1_bfc", result.second->Name());
+  }
+
+  engine_info.device = "/GPU:3";
+  {
+    // Set to use nonexistent device.
+    auto result = GetDeviceAndAllocator(params, engine_info);
+    EXPECT_EQ(-1, result.first);
+    EXPECT_EQ(nullptr, result.second);
+  }
+}
+
+}  // namespace convert
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 863074e773..0f5abe6898 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -77,6 +77,10 @@ limitations under the License.
 
 namespace tensorflow {
 namespace tensorrt {
+// TODO(aaroey): put these constants into some class.
+const char* const kInputPHName = "TensorRTInputPH_";
+const char* const kOutputPHName = "TensorRTOutputPH_";
+
 namespace convert {
 using ::tensorflow::str_util::Split;
 using ::tensorflow::strings::StrAppend;
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index a60253740f..9274027e63 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -36,8 +36,9 @@ limitations under the License.
 
 namespace tensorflow {
 namespace tensorrt {
-static const char* kInputPHName = "TensorRTInputPH_";
-static const char* kOutputPHName = "TensorRTOutputPH_";
+extern const char* const kInputPHName;
+extern const char* const kOutputPHName;
+
 namespace convert {
 
 struct EngineConnection {
author	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-21 13:20:25 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-21 13:20:25 -0700
commit	7cc6abab4e3569e4e44af7d577a71d15039f014b (patch)
tree	620113bcef5d4de3a0823313d497b064b2cacebd /tensorflow/contrib/tensorrt
parent	a7e961ac883d45df76a481a796db9ea86eba7c78 (diff)
parent	4684421d9aa3e63dc943074025ffdc89df1a1980 (diff)