aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorrt
diff options
context:
space:
mode:
authorGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-21 13:20:25 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-21 13:20:25 -0700
commit7cc6abab4e3569e4e44af7d577a71d15039f014b (patch)
tree620113bcef5d4de3a0823313d497b064b2cacebd /tensorflow/contrib/tensorrt
parenta7e961ac883d45df76a481a796db9ea86eba7c78 (diff)
parent4684421d9aa3e63dc943074025ffdc89df1a1980 (diff)
Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified
PiperOrigin-RevId: 209647689
Diffstat (limited to 'tensorflow/contrib/tensorrt')
-rw-r--r--tensorflow/contrib/tensorrt/BUILD26
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc73
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.h6
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph_test.cc140
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.cc4
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.h5
6 files changed, 228 insertions, 26 deletions
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 26236a0435..a0fc3e43a9 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -280,6 +280,7 @@ tf_cuda_library(
"//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler:utils",
"//tensorflow/core:framework_lite",
+ "//tensorflow/core:gpu_runtime",
"//tensorflow/core:graph",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
@@ -293,6 +294,31 @@ tf_cuda_library(
]) + tf_custom_op_library_additional_deps(),
)
+tf_cuda_cc_test(
+ name = "convert_graph_test",
+ size = "medium",
+ srcs = ["convert/convert_graph_test.cc"],
+ tags = [
+ "no_cuda_on_cpu_tap",
+ "no_windows",
+ "nomac",
+ ],
+ deps = [
+ ":trt_conversion",
+ "//tensorflow/core/grappler:grappler_item",
+ "//tensorflow/core/grappler/clusters:cluster",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:core_cpu_base",
+ "//tensorflow/core:direct_session",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ ] + if_tensorrt([
+ "@local_config_tensorrt//:nv_infer",
+ ]),
+)
+
# Library for the segmenting portion of TensorRT operation creation
cc_library(
name = "segment",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 21ec8b0b30..b019c99882 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -31,6 +31,9 @@ limitations under the License.
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
#include "tensorflow/contrib/tensorrt/segment/segment.h"
#include "tensorflow/contrib/tensorrt/test/utils.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
#include "tensorflow/core/framework/function.h"
#include "tensorflow/core/framework/graph_to_functiondef.h"
#include "tensorflow/core/framework/node_def_builder.h"
@@ -772,33 +775,55 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
const ConversionParams& params, const EngineInfo& engine) {
int cuda_device_id = -1;
tensorflow::Allocator* dev_allocator = nullptr;
- if (params.cluster) {
- std::vector<tensorflow::Device*> devices;
- if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
- DeviceNameUtils::ParsedName parsed_name;
- if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
- parsed_name.has_id) {
- params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name,
- &devices);
+ if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
+ engine.device.empty()) {
+ // If device is not set, use the first found GPU device for the conversion.
+ for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
+ TfGpuId tf_gpu_id(tf_gpu_id_value);
+ CudaGpuId cuda_gpu_id;
+ Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+ if (s.ok()) {
+ VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
+ << cuda_gpu_id.value();
+ cuda_device_id = cuda_gpu_id.value();
+ GPUOptions gpu_options;
+ // If the TF to Cuda gpu id mapping exist, the device and corresponding
+ // allocator must have been initialized already, so the
+ // GetGPUAllocator() call won't create a new allocator.
+ dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
+ gpu_options, tf_gpu_id, 1);
+ break;
}
+ LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
+ << s;
}
- if (!devices.empty()) {
- if (devices.size() > 1) {
- string msg = "Found multiple matching devices using name '";
- StrAppend(&msg, engine.device, "': ");
- for (auto d : devices) StrAppend(&msg, d->name(), ", ");
- StrAppend(&msg, ". Will get the allocator from first one.");
- LOG(WARNING) << msg;
- }
- tensorflow::AllocatorAttributes alloc_attr;
- cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id;
- dev_allocator = devices[0]->GetAllocator(alloc_attr);
- VLOG(1) << "Using allocator " << dev_allocator->Name()
- << " and cuda_device_id " << cuda_device_id;
- } else {
- LOG(WARNING) << "Cluster is set but device '" << engine.device
- << "' is not found in the cluster";
+ return std::make_pair(cuda_device_id, dev_allocator);
+ }
+
+ // Use the device requested by the engine.
+ auto device_set = params.cluster->GetDeviceSet();
+ std::vector<tensorflow::Device*> devices;
+ DeviceNameUtils::ParsedName parsed_name;
+ if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
+ parsed_name.has_id) {
+ device_set->FindMatchingDevices(parsed_name, &devices);
+ }
+ if (!devices.empty()) {
+ if (devices.size() > 1) {
+ string msg = "Found multiple matching devices using name '";
+ StrAppend(&msg, engine.device, "': ");
+ for (auto d : devices) StrAppend(&msg, d->name(), ", ");
+ StrAppend(&msg, ". Will get the allocator from first one.");
+ LOG(WARNING) << msg;
}
+ tensorflow::AllocatorAttributes alloc_attr;
+ cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id;
+ dev_allocator = devices[0]->GetAllocator(alloc_attr);
+ VLOG(1) << "Using allocator " << dev_allocator->Name()
+ << " and cuda_device_id " << cuda_device_id;
+ } else {
+ LOG(WARNING) << "Cluster is set but device '" << engine.device
+ << "' is not found in the cluster";
}
return std::make_pair(cuda_device_id, dev_allocator);
}
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h
index 9d986e4890..3525202369 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@@ -17,6 +17,7 @@ limitations under the License.
#include <vector>
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
// Return runtime time TensorRT library version information.
std::vector<int> GetLoadedTensorRTVersion();
+
+// Helper method for the conversion, expose for testing.
+std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
+ const ConversionParams& params, const EngineInfo& engine);
+
} // namespace convert
} // namespace tensorrt
} // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
new file mode 100644
index 0000000000..8146bed4b0
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
@@ -0,0 +1,140 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/device_set.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h" // NOLINT
+#include "tensorflow/core/public/session.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+namespace convert {
+
+class FakeCluster : public grappler::Cluster {
+ public:
+ FakeCluster() : Cluster(0) {}
+
+ void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
+
+ const DeviceSet* GetDeviceSet() const override { return device_set_; }
+
+ string type() const override { return ""; }
+ Status Provision() override { return Status::OK(); }
+ Status Initialize(const grappler::GrapplerItem& item) override {
+ return Status::OK();
+ }
+ Status Run(const GraphDef& graph_def,
+ const std::vector<std::pair<string, Tensor>>& feed,
+ const std::vector<string>& fetch,
+ RunMetadata* metadata) override {
+ return Status::OK();
+ }
+
+ private:
+ const DeviceSet* device_set_;
+};
+
+TEST(ConvertGraphTest, GetDeviceAndAllocator) {
+ ConversionParams params;
+ EngineInfo engine_info;
+ {
+ // params.cluster is not set, and no gpu device is available.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(-1, result.first);
+ EXPECT_EQ(nullptr, result.second);
+ }
+
+ // Create a session with two (virtual) gpu device.
+ SessionOptions options;
+ ConfigProto* config = &options.config;
+ GPUOptions* gpu_options = config->mutable_gpu_options();
+ auto virtual_devices =
+ gpu_options->mutable_experimental()->add_virtual_devices();
+ virtual_devices->add_memory_limit_mb(200);
+ virtual_devices->add_memory_limit_mb(200);
+ std::unique_ptr<Session> session(NewSession(options));
+
+ {
+ // params.cluster is not set, should find and return first gpu id and
+ // corresponding allocator.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(0, result.first);
+ EXPECT_NE(nullptr, result.second);
+ EXPECT_EQ("GPU_0_bfc", result.second->Name());
+ }
+
+ FakeCluster cluster;
+ params.cluster = &cluster;
+ {
+ // params.cluster->GetDeviceSet() returns null, should find and return first
+ // gpu id and corresponding allocator.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(0, result.first);
+ EXPECT_NE(nullptr, result.second);
+ EXPECT_EQ("GPU_0_bfc", result.second->Name());
+ }
+
+ // Build the DeviceSet.
+ DeviceSet device_set;
+ const DeviceMgr* device_mgr = nullptr;
+ TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
+ for (auto d : device_mgr->ListDevices()) {
+ device_set.AddDevice(d);
+ }
+ cluster.SetDeviceSet(&device_set);
+ {
+ // engine_info.device is not set, should find and return first gpu id and
+ // corresponding allocator.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(0, result.first);
+ EXPECT_NE(nullptr, result.second);
+ EXPECT_EQ("GPU_0_bfc", result.second->Name());
+ }
+
+ engine_info.device = "/GPU:1";
+ {
+ // Set to use second device.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(0, result.first);
+ EXPECT_NE(nullptr, result.second);
+ EXPECT_EQ("GPU_1_bfc", result.second->Name());
+ }
+
+ engine_info.device = "/GPU:3";
+ {
+ // Set to use nonexistent device.
+ auto result = GetDeviceAndAllocator(params, engine_info);
+ EXPECT_EQ(-1, result.first);
+ EXPECT_EQ(nullptr, result.second);
+ }
+}
+
+} // namespace convert
+} // namespace tensorrt
+} // namespace tensorflow
+
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 863074e773..0f5abe6898 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -77,6 +77,10 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
+// TODO(aaroey): put these constants into some class.
+const char* const kInputPHName = "TensorRTInputPH_";
+const char* const kOutputPHName = "TensorRTOutputPH_";
+
namespace convert {
using ::tensorflow::str_util::Split;
using ::tensorflow::strings::StrAppend;
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index a60253740f..9274027e63 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -36,8 +36,9 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
-static const char* kInputPHName = "TensorRTInputPH_";
-static const char* kOutputPHName = "TensorRTOutputPH_";
+extern const char* const kInputPHName;
+extern const char* const kOutputPHName;
+
namespace convert {
struct EngineConnection {