diff options
Diffstat (limited to 'tensorflow/contrib/tensorrt/convert/convert_graph.cc')
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/convert_graph.cc | 256 |
1 files changed, 205 insertions, 51 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 970f810473..eea8c8efa2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include <list> #include <map> #include <set> #include <unordered_map> @@ -48,13 +49,29 @@ namespace tensorrt { namespace convert { namespace { -static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set<string> candidate_ops = { - "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" + "Identity", + "Const", + "Conv2D", + "MaxPool", + "BiasAdd", + "Relu", + "Add", + "Mul", + "Sub", + "Rsqrt", + "Pad", + "Mean", + "AvgPool", + "ConcatV2", + "DepthwiseConv2dNative", + "FusedBatchNorm", + "FusedBatchNormV2", + // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -69,6 +86,8 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); + } else { + VLOG(2) << edge->src()->name() << " N, "; } } } @@ -82,7 +101,10 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { + VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); + } else { + VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -109,74 +131,150 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap( } return result; } - -tensorflow::Status ConvertSubGraphToTensorRT( - const std::vector<string>& output_names, - const std::set<int>& subgraph_node_ids, - size_t max_batch_size, // Max batch size that engine will be created for - // Max amount of memory that engine will be allowed to consume, in bytes - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::Graph* graph) { - tensorflow::EdgeSet subgraph_incoming_edges; - GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); - +// TODO(sami): convert references to pointers +struct ConvertGraphParams { + ConvertGraphParams( + tensorflow::Graph& inp_graph, + const std::vector<string>& output_node_names, + const std::set<int>& subgraph_node_id_numbers, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map<string, std::pair<int, string>>* output_edges, + int engine_precision_mode) + : graph(inp_graph), + output_names(output_node_names), + subgraph_node_ids(subgraph_node_id_numbers), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + precision_mode(engine_precision_mode) {} + tensorflow::Graph& graph; + const std::vector<string>& output_names; + const std::set<int>& subgraph_node_ids; + size_t max_batch_size; + size_t max_workspace_size_bytes; + const tensorflow::grappler::GraphProperties& graph_properties; + std::unordered_map<string, std::pair<int, string>>* output_edge_map; + int precision_mode; std::vector<std::pair<int, int>> subgraph_inputs; + std::vector<std::pair<int, int>> subgraph_outputs; + tensorflow::EdgeSet subgraph_incoming_edges; + tensorflow::EdgeSet subgraph_outgoing_edges; +}; - // Collect inputs by looking for incoming edges - for (const tensorflow::Edge* edge : subgraph_incoming_edges) { - subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { + GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_incoming_edges); + for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { + p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set<std::pair<int, int>> subgraph_outputs_set; // Collect outputs referenced from output_names - auto output_name_to_index_map = BuildTensorNameMap(output_names); - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : p->subgraph_node_ids) { + tensorflow::Node* node = p->graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - // Collect outputs referenced from outgoing edges - tensorflow::EdgeSet subgraph_outgoing_edges; - GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - // Impose an ordering on the outputs - std::vector<std::pair<int, int>> subgraph_outputs( - subgraph_outputs_set.begin(), subgraph_outputs_set.end()); - // Build TensorRT node and add it to the graph + p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.insert(p->subgraph_outputs.begin(), + subgraph_outputs_set.begin(), + subgraph_outputs_set.end()); + return tensorflow::Status::OK(); +}; + +tensorflow::Status GetCalibNode(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); tensorflow::NodeDef trt_node_def; - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( - *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size_bytes, graph_properties, - &trt_node_def)); + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; - tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + TF_RETURN_IF_ERROR(status); + + for (auto in_edge : + params->subgraph_incoming_edges) { // loop over incoming edges and + // attach them to calib node + // tensorflow::Node* src_node = in_edge->src(); + auto src_output = in_edge->src_output(); + auto dst_node = in_edge->dst(); + auto dst_input = in_edge->dst_input(); + VLOG(1) << " update edge " << trt_node->name() << ":" << src_output + << " -> " << dst_node->name() << ":" << dst_input; + TF_RETURN_IF_ERROR( + params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); + } + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + tensorflow::NodeDef trt_node_def; + + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); + tensorflow::Status status; + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + // AddNode does not wire edges. + // Re-map incoming edges to use the new TRT node instead of the orig subgraph + std::map<std::pair<int, int>, int> subgraph_edge_to_input_map; + for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { + subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); + } + for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { + std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()}; + int new_src_output = subgraph_edge_to_input_map.at(old_src); + params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, + new_src_output); + params->graph.RemoveEdge(edge); + } + + VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map<std::pair<int, int>, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); + for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), - edge->dst_input())); + TF_RETURN_IF_ERROR(params->graph.UpdateEdge( + trt_node, new_src_output, edge->dst(), edge->dst_input())); } // Remove the original subgraph - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : params->subgraph_node_ids) { + tensorflow::Node* node = params->graph.FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - graph->RemoveNode(node); + params->graph.RemoveNode(node); } return tensorflow::Status::OK(); } @@ -194,12 +292,39 @@ tensorflow::Status BuildNodeMap( } } // namespace +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { + VLOG(0) << "Starting Calib Conversion"; + tensorflow::Graph graph(tensorflow::OpRegistry::Global()); + TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( + tensorflow::GraphConstructorOptions(), graph_def, &graph)); + // get calib nodes + std::vector<tensorflow::Node*> calib_nodes; + for (auto node : graph.op_nodes()) { + if (node->type_string() == "TRTCalibOp") { + VLOG(1) << "Found Calib Node"; + calib_nodes.push_back(node); + } + } + VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); + if (calib_nodes.size() == 0) + return tensorflow::errors::FailedPrecondition( + "Graph doesn't contain any calibration nodes!." + " Please generate calibration graph and run calibration first"); + for (auto n : calib_nodes) { + TF_RETURN_IF_ERROR( + tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); + } + graph.ToGraphDef(infer_graph); + return tensorflow::Status::OK(); +} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector<string>& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { - // Optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, + int precision_mode = FP32MODE, int minimum_segment_size = 3) { + // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -209,16 +334,23 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // Virtual cluster + // virtual cluster tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + // single machine + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); + VLOG(2) << "cpu_cores: " << num_cpu_cores; + VLOG(2) << "gpus: " << num_gpus; + TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // Constant folding + // constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -226,7 +358,6 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); - // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -243,7 +374,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -252,14 +383,37 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map<string, tensorflow::Node*> node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); + std::unordered_map<string, std::pair<int, string>> output_edge_map; + int count = 0; + float total_num_nodes_in_segments = 0.; + for (auto s : segments) { + total_num_nodes_in_segments += s.size(); + } for (const std::set<string>& subgraph_node_names : segments) { std::set<int> subgraph_node_ids; + size_t max_mem_per_engine = + max_workspace_size_bytes * + ((float)subgraph_node_names.size() / total_num_nodes_in_segments); + std::stringstream oss; for (const string& node_name : subgraph_node_names) { + oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - output_names, subgraph_node_ids, max_batch_size, - max_workspace_size_bytes, static_graph_properties, &graph)); + VLOG(2) << "Subgraph nodes" << oss.str(); + ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, + max_mem_per_engine, static_graph_properties, + &output_edge_map, precision_mode); + if (precision_mode == INT8MODE) { + TF_RETURN_IF_ERROR(GetCalibNode(&p)); + } else { + tensorflow::Status status = ConvertSubGraphToTensorRT(&p); + if (status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \n" + << status.ToString() << " SKIPPING......"; + } + count++; + } } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); |