diff options
-rw-r--r-- | configure.py | 2 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/BUILD | 1 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/convert_graph.cc | 94 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 7 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/convert_nodes.h | 9 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/utils.h | 2 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc | 28 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/kernels/trt_engine_op.h | 10 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/python/trt_convert.py | 12 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc | 1 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_resources.h | 12 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/test/test_tftrt.py | 11 |
12 files changed, 101 insertions, 88 deletions
diff --git a/configure.py b/configure.py index a14d006a73..ad585fa52e 100644 --- a/configure.py +++ b/configure.py @@ -944,7 +944,7 @@ def set_tf_cudnn_version(environ_cp): def is_cuda_compatible(lib, cuda_ver, cudnn_ver): - """Check the compatibility between given library and cudnn/cudart libraries.""" + """Check compatibility between given library and cudnn/cudart libraries.""" ldd_bin = which('ldd') or '/usr/bin/ldd' ldd_out = run_shell([ldd_bin, lib], True) ldd_out = ldd_out.split(os.linesep) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index e7b3fe38e5..adda0b758b 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -207,6 +207,7 @@ tf_cuda_library( ], deps = [ ":trt_logging", + ":utils", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:framework_lite", "//tensorflow/core:lib_proto_parsing", diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index ba7d3b5f86..1c4fd4a0ce 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -49,13 +49,14 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" // NOLINT #include "tensorflow/core/protobuf/device_properties.pb.h" // NOLINT +#include "tensorflow/core/protobuf/rewriter_config.pb.h" // NOLINT #include "tensorflow/core/util/device_name_utils.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include <cuda/include/cuda_runtime_api.h> +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace tensorrt { @@ -238,14 +239,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // Function to get subsegment information structure. -EngineInfo GetEngineInfo( +tensorflow::Status GetEngineInfo( const tensorflow::Graph* g, const tensorflow::grappler::GraphProperties& graph_properties, const std::set<string>& segment_nodes, const std::unordered_map<string, tensorflow::Node*>& node_map, - const std::vector<tensorflow::Node*>& reverse_topo_order) { + const std::vector<tensorflow::Node*>& reverse_topo_order, + EngineInfo* info) { std::vector<int> subgraph_node_ids; - EngineInfo info; std::set<string> segment_devices; int input_port = 0; int output_port = 0; @@ -296,9 +297,9 @@ EngineInfo GetEngineInfo( created_edges.insert({s, port}); input_port++; } - info.connections.emplace_back(input_node->name(), input_node->id(), - edge->src_output(), node_name, node_id, - edge->dst_input(), true, port); + info->connections.emplace_back(input_node->name(), input_node->id(), + edge->src_output(), node_name, node_id, + edge->dst_input(), true, port); } } } @@ -316,28 +317,28 @@ EngineInfo GetEngineInfo( created_edges.insert({s, port}); output_port++; } - info.connections.emplace_back(output_node->name(), output_node->id(), - edge->dst_input(), node_name, node_id, - edge->src_output(), false, port); + info->connections.emplace_back(output_node->name(), output_node->id(), + edge->dst_input(), node_name, node_id, + edge->src_output(), false, port); } } } - ConvertSegmentToGraphDef(g, graph_properties, subgraph_node_ids, - &info.connections, &info.segment_graph_def, - &info.engine_name); + TF_RETURN_IF_ERROR(ConvertSegmentToGraphDef( + g, graph_properties, subgraph_node_ids, &info->connections, + &info->segment_graph_def, &info->engine_name)); // TODO(sami): This should not happen once segmenter is updated. if (segment_devices.size() == 1) { - info.device = *segment_devices.begin(); + info->device = *segment_devices.begin(); } else if (segment_devices.size() > 1) { LOG(WARNING) << "Detected multiple(" << segment_devices.size() << ") devices for the segment. Picking first one to continue " << "but this shouldn't have happened"; - info.device = *segment_devices.begin(); + info->device = *segment_devices.begin(); } else { VLOG(1) << "Segment devices size is 0"; } - return info; + return Status::OK(); } // Function to insert a TRT node into the graph. The graph is not modified if @@ -562,7 +563,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::NodeDefBuilder node_builder( StrCat(name, "_Arg"), tensorflow::FunctionLibraryDefinition::kArgOp); VLOG(1) << "Adding " << StrCat(name, "_Arg"); - node_builder.Attr("T", node->output_type(0)).Attr("index", i).Finalize(&nd); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); tensorflow::Status s; auto node_arg = sgraph.AddNode(nd, &s); if (!s.ok()) { @@ -593,7 +596,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( VLOG(1) << " input " << nout.node << ":" << nout.index << " dtype=" << tensorflow::DataTypeString(nout.data_type); node_builder.Input({nout}); - node_builder.Attr("T", node->output_type(0)).Attr("index", i).Finalize(&nd); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); if (VLOG_IS_ON(3)) { VLOG(3) << nd.DebugString(); } @@ -713,11 +718,12 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { segment_options.exclude_node_list.insert(node); } segment_options.minimum_segment_size = params.minimum_segment_size; - tensorflow::tensorrt::segment::SegmentNodesVector segments; + tensorflow::tensorrt::segment::SegmentNodesVector initial_segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - &graph, IsTensorRTCandidate, segment_options, &segments)); - if (segments.size() > 1) { - VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); + &graph, IsTensorRTCandidate, segment_options, &initial_segments)); + if (initial_segments.size() > 1) { + VLOG(0) << "MULTIPLE tensorrt candidate conversion: " + << initial_segments.size(); } // Get the EngineInfo for each segment. @@ -725,17 +731,24 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); float total_num_nodes_in_segments = 0.; std::vector<EngineInfo> engine_segments; - engine_segments.reserve(segments.size()); + engine_segments.reserve(initial_segments.size()); std::vector<tensorflow::Node*> reverse_topo_order; tensorflow::GetPostOrder(graph, &reverse_topo_order); size_t total_engine_bytes_size = 0; std::vector<size_t> engine_bytes_size; - for (size_t t = 0; t < segments.size(); t++) { - auto& s = segments.at(t); - engine_segments.emplace_back(GetEngineInfo(&graph, *params.graph_properties, - s.first, node_map, - reverse_topo_order)); - auto& curr_engine = engine_segments.back(); + tensorflow::tensorrt::segment::SegmentNodesVector converted_segments; + converted_segments.reserve(initial_segments.size()); + for (size_t t = 0; t < initial_segments.size(); t++) { + auto& curr_segment = initial_segments.at(t); + EngineInfo curr_engine; + Status status = + GetEngineInfo(&graph, *params.graph_properties, curr_segment.first, + node_map, reverse_topo_order, &curr_engine); + if (!status.ok()) { + LOG(WARNING) << "Failed to get engine info for segment " << t << ": " + << status; + continue; + } curr_engine.precision_mode = params.precision_mode; curr_engine.engine_type = (params.is_dyn_op || params.precision_mode == INT8MODE @@ -744,12 +757,19 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { curr_engine.cached_engine_batches = params.cached_engine_batches; curr_engine.maximum_cached_engines = params.max_cached_engines; StrAppend(&curr_engine.engine_name, "my_trt_op_", t); - RegisterSegmentFunctionToFunctionLibrary( + status = RegisterSegmentFunctionToFunctionLibrary( &graph, curr_engine.segment_graph_def, curr_engine.engine_name); + if (!status.ok()) { + LOG(WARNING) << "Failed to register segment graphdef as a function " << t + << ": " << status; + continue; + } engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong()); total_engine_bytes_size += engine_bytes_size.back(); - total_num_nodes_in_segments += s.first.size(); + total_num_nodes_in_segments += curr_segment.first.size(); + engine_segments.push_back(std::move(curr_engine)); + converted_segments.push_back(std::move(curr_segment)); if (VLOG_IS_ON(8)) { string fname = curr_engine.engine_name; @@ -775,7 +795,7 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { engine.max_workspace_size_bytes = params.max_workspace_size_bytes * (engine_bytes_size.at(i) / total_engine_bytes_size + - segments.at(i).first.size() / total_num_nodes_in_segments) / + converted_segments.at(i).first.size() / total_num_nodes_in_segments) / 2.0; // The allocator is used to build the engine. The build and the built engine // will be destroyed after we get the serialized engine string, so it's fine @@ -793,17 +813,17 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { cudaSetDevice(cuda_device_id); auto status = CreateTRTNode(&graph, engine_segments, i, alloc.get(), params.max_batch_size); - // If status is ok, we successfuly added the node to the graph and can + // If status is ok, we successfully added the node to the graph and can // remove segment ops. Otherwise graph is not modified. if (status.ok()) { - for (auto node_name : segments.at(i).first) { + for (auto node_name : converted_segments.at(i).first) { graph.RemoveNode(node_map.at(node_name)); } } else { // Graph is not modified. LOG(WARNING) << "Engine creation for segment " << i << ", composed of " - << segments.at(i).first.size() << " nodes failed: " << status - << ". Skipping..."; + << converted_segments.at(i).first.size() << " nodes failed: " + << status << ". Skipping..."; } } cudaSetDevice(old_cuda_device); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index b5214b461a..146b9c7344 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2130,13 +2130,10 @@ void Converter::register_op_converters() { } // namespace tensorflow::Status ConvertGraphDefToEngine( - const tensorflow::GraphDef& gdef, - int precision_mode, - int max_batch_size, + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, size_t max_workspace_size_bytes, const std::vector<tensorflow::PartialTensorShape>& input_shapes, - Logger* logger, - nvinfer1::IGpuAllocator* allocator, + Logger* logger, nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator, TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, bool* convert_successfully) { diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2da4edf7f5..7684d8d4a2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -78,7 +78,7 @@ struct EngineInfo { EngineInfo() : engine_type(EngineType::TRTStatic), max_workspace_size_bytes(0), - precision_mode(FP32MODE) {}; + precision_mode(FP32MODE) {} string engine_name; string device; @@ -120,13 +120,10 @@ tensorflow::Status ConvertSegmentToGraphDef( // is successful. This is different than successfully building the engine: // building can still fail afterwards. tensorflow::Status ConvertGraphDefToEngine( - const tensorflow::GraphDef& gdef, - int precision_mode, - int max_batch_size, + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, size_t max_workspace_size_bytes, const std::vector<tensorflow::PartialTensorShape>& input_shapes, - Logger* logger, - nvinfer1::IGpuAllocator* allocator, + Logger* logger, nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator, TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, bool* convert_successfully); diff --git a/tensorflow/contrib/tensorrt/convert/utils.h b/tensorflow/contrib/tensorrt/convert/utils.h index 021fdaf8c5..f601c06701 100644 --- a/tensorflow/contrib/tensorrt/convert/utils.h +++ b/tensorflow/contrib/tensorrt/convert/utils.h @@ -31,7 +31,7 @@ struct TrtDestroyer { template <typename T> using TrtUniquePtrType = std::unique_ptr<T, TrtDestroyer<T>>; -} // namespace convert } // namespace tensorrt +} // namespace tensorflow #endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_UTILS_H_ diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index d12f738ac5..75e32559bb 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h" #include <algorithm> -#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" @@ -77,9 +77,8 @@ tensorflow::Status TRTEngineOp::ConstructFunctionHandle(OpKernelContext* ctx) { } auto fdef = lib->GetFunctionLibraryDefinition()->Find(funcdef_name_); if (fdef == nullptr) { - return tensorflow::errors::Internal( - "Native FunctionDef ", funcdef_name_, - " can't be found in function library"); + return tensorflow::errors::Internal("Native FunctionDef ", funcdef_name_, + " can't be found in function library"); } tensorflow::FunctionLibraryRuntime::InstantiateOptions inst_ops; inst_ops.overlay_lib = nullptr; @@ -128,8 +127,8 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) } else if (precision_string == "INT8") { precision_mode_ = convert::INT8MODE; } - calibration_mode_ = (precision_mode_ == convert::INT8MODE && - calibration_data.size() == 0); + calibration_mode_ = + (precision_mode_ == convert::INT8MODE && calibration_data.size() == 0); if (calibration_data.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data)); calibration_data.resize(0); @@ -291,8 +290,8 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, std::vector<void*> buffers(num_binding); for (int i = 0; i < ctx->num_inputs(); i++) { const string inp_name = StrCat(kInputPHName, i); - const size_t binding_index = trt_engine_ptr->getBindingIndex( - inp_name.c_str()); + const size_t binding_index = + trt_engine_ptr->getBindingIndex(inp_name.c_str()); const Tensor& input_tensor = ctx->input(i); const TensorShape& input_shape = input_tensor.shape(); @@ -320,7 +319,7 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, default: LOG(ERROR) << "Unknown TRT data type: " << int(dtype); ctx->SetStatus(tensorflow::errors::InvalidArgument( - "Unknown ouput TRT data type! ", int(dtype))); + "Unknown ouput TRT data type! ", static_cast<int>(dtype))); return; } } @@ -343,8 +342,8 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, &output_shape)); } else { LOG(ERROR) << "output node not found, at " << output_name; - ctx->SetStatus(tensorflow::errors::Internal( - "output ", output_name, " couldn't be found!")); + ctx->SetStatus(tensorflow::errors::Internal("output ", output_name, + " couldn't be found!")); return; } auto status = ctx->allocate_output(i, output_shape, &output_tensor); @@ -370,7 +369,7 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, "INT8 outputs are not supported!")); return; default: - LOG(ERROR) << "Unknown TRT data type: " << int(dtype); + LOG(ERROR) << "Unknown TRT data type: " << static_cast<int>(dtype); ctx->SetStatus(tensorflow::errors::InvalidArgument( "Unsupported output data type! ", int(dtype))); return; @@ -442,7 +441,7 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size, if (allocator == nullptr) { // GetAllocator already set the Status. return null_pair; - }; + } infer->setGpuAllocator(allocator); #endif TrtUniquePtrType<nvinfer1::ICudaEngine> static_engine( @@ -506,8 +505,7 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size, } tensorflow::Status TRTEngineOp::AllocateCalibrationResources( - tensorflow::OpKernelContext* ctx, - TRTCalibrationResource** cr) { + tensorflow::OpKernelContext* ctx, TRTCalibrationResource** cr) { auto cres = new TRTCalibrationResource(); *cr = cres; // Get the allocator. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h index 0d2f9e8a9d..6fe318be6a 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h @@ -52,19 +52,17 @@ class TRTEngineOp : public AsyncOpKernel { private: // Execute calibration - void ExecuteCalibration(OpKernelContext* ctx, - AsyncHelper* helper); + void ExecuteCalibration(OpKernelContext* ctx, AsyncHelper* helper); // Construct a function handle for executing native funcdef graph Status ConstructFunctionHandle(OpKernelContext* ctx); // Execute replaced native segment as function Op. - void ExecuteNativeSegment(OpKernelContext* ctx, - AsyncHelper* helper); + void ExecuteNativeSegment(OpKernelContext* ctx, AsyncHelper* helper); // Allocate necessary resources for calibration - Status AllocateCalibrationResources( - OpKernelContext* ctx, TRTCalibrationResource** cr); + Status AllocateCalibrationResources(OpKernelContext* ctx, + TRTCalibrationResource** cr); // TODO(samikama): context should go to a resource manager! typedef std::pair<TrtUniquePtrType<nvinfer1::ICudaEngine>, diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 490c74a701..79f512dbcf 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -21,9 +21,9 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert -from tensorflow.contrib.tensorrt.wrap_conversion import get_loaded_tensorrt_version from tensorflow.contrib.tensorrt.wrap_conversion import get_linked_tensorrt_version +from tensorflow.contrib.tensorrt.wrap_conversion import get_loaded_tensorrt_version +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors @@ -58,6 +58,10 @@ def create_inference_graph(input_graph_def, precision_mode: one of 'FP32', 'FP16' and 'INT8' minimum_segment_size: the minimum number of nodes required for a subgraph to be replaced by TRTEngineOp. + is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT + network and engine at run time. + maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops. + cached_engine_batches: batch sizes used to pre-create cached engines. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. @@ -81,7 +85,7 @@ def create_inference_graph(input_graph_def, "TensorRT %s but library loaded from environment is TensorRT %s" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version])) + - ". Please make sure that correct version of TensorRT "\ + ". Please make sure that correct version of TensorRT " + "is available in the system and added to ldconfig or LD_LIBRARY_PATH" ) raise RuntimeError("Incompatible TensorRT library version") @@ -178,7 +182,7 @@ def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False): is_calib_graph = False for n in calibration_graph_def.node: if n.op == "TRTEngineOp": - is_calib_graph = is_calib_graph or len(n.attr["calibration_data"].s) == 0 + is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s if not is_calib_graph: tf_logging.error( "Not a calib graph. Doesn't seem to contain any calibration nodes.") diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 59ae860bc0..32e81858b9 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include <atomic> -#include <chrono> #include <unordered_map> #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h index 76863503bd..b7d5ffd674 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resources.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -49,15 +49,15 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { string DebugString() override { std::stringstream oss; - using std::hex; using std::dec; using std::endl; + using std::hex; oss << " Calibrator = " << hex << calibrator_.get() << dec << endl - << " Builder = " << hex << builder_.get() << dec << endl - << " Engine = " << hex << engine_.get() << dec << endl - << " Logger = " << hex << &logger_ << dec << endl - << " Allocator = " << hex << allocator_.get() << dec << endl - << " Thread = " << hex << thr_.get() << dec << endl; + << " Builder = " << hex << builder_.get() << dec << endl + << " Engine = " << hex << engine_.get() << dec << endl + << " Logger = " << hex << &logger_ << dec << endl + << " Allocator = " << hex << allocator_.get() << dec << endl + << " Thread = " << hex << thr_.get() << dec << endl; return oss.str(); } diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 5e74f9295d..090aa8bdb0 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -76,7 +76,7 @@ def get_multi_engine_graph_def(mode="FP32"): g = ops.Graph() with g.as_default(): x = aops.placeholder(shape=[None, 3, 7, 5], name="input", dtype=dtype) - with g.name_scope("Global_scope") as scope: + with g.name_scope("Global_scope"): with g.name_scope("first_scope"): e = cop.constant( np.random.randn(3, 2, 3, 4), name="weights", dtype=dtype) @@ -92,15 +92,14 @@ def get_multi_engine_graph_def(mode="FP32"): b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias2", dtype=dtype) q = conv / b - c = cop.constant(np.random.randn(1, 4, 1, 1), name="bias3", dtype=dtype) edge = mops.sin(q) edge1 = mops.cos(conv) with g.name_scope("test_scope"): de = edge + edge1 - t = t - edge1 - q = q * edge - t = t + q - t = t - de + t -= edge1 + q *= edge + t += q + t -= de k = aops.squeeze(t, name="output") print(k.dtype) return g.as_graph_def() |