diff options
Diffstat (limited to 'tensorflow/core/graph/mkl_layout_pass.cc')
-rw-r--r-- | tensorflow/core/graph/mkl_layout_pass.cc | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc new file mode 100644 index 0000000000..87850b3e9a --- /dev/null +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -0,0 +1,548 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include <vector> +#include <utility> +#include <string> +#include <memory> +#include <unordered_set> +#include <functional> + +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" + +#include "tensorflow/core/graph/mkl_layout_pass.h" +#include "tensorflow/core/util/mkl_util.h" + +namespace tensorflow { + +// This pass implements rewriting of graph for propagating Mkl +// layout as an additional output tensor (we will loosely call a +// tensor that carries Mkl layout as Mkl tensor henceforth.) +// from every Mkl supported NN layer. +// +// As a example, consider Relu layer. Current definition of Relu +// layer looks like: +// +// O = Relu(A) +// +// Relu has 1 input (A), and 1 output (O). +// +// This rewrite pass will generate a new graph node for Relu +// (new node is called MklRelu) as: +// +// O, O_m = MklRelu(A, A_m) +// +// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). +// Here A input is same as A input of Relu; O output is same +// as O output of Relu. O_m is the additional output tensor +// that will be set by MklRelu, and it represents Mkl tensor +// corresponding to O -- in other words, O_m is some kind of +// metadata for O. A_m is additional input of Relu, and it +// represents metadata for A - as O_m is metadata for O, A_m +// is metadata for A. MklRelu receives this metadata from +// previous layer (in the graph). +// +// When previous layer in the graph is Mkl layer, A_m will +// represent a valid Mkl tensor. But when previous Mkl layer +// is not an Mkl layer, then A_m represents a dummy Mkl tensor. +// +// Rewriting rules: +// - Selection of an op for rewriting happens by registering +// an op with this pass. If an op is not registered, then +// it is not rewritten. +// - Number of inputs after rewriting: +// Since for every input Tensorflow tensor, the rewritten +// layer gets Mkl tensor, rewritten op gets 2*N inputs, +// where N is the number of inputs for original op. +// - Number of outputs after rewriting: +// Since for every output Tensorflow tensor, the rewritten +// layer generates Mkl tensor, rewritten op generates 2*N +// outputs, where N is the number of outputs of original op. +// - Ordering of Tensorflow tensors and Mkl tensors: +// Since every op generates twice the number of inputs and +// outputs, one could imagine different ordering among +// Tensorflow tensors and Mkl tensors. E.g., let's assume +// an op 'Conv2D' takes (A, B) as input, then new op +// 'MklConv2D' can take (A, A_m, B, B_m) as input or it +// can also take (A, B, A_m, B_m) as input. Among N inputs +// one can get N! permutations. +// +// So the question is: which one do we follow? Currently, +// we follow an intuitive order where Mkl tensor follows a +// corresponding Tensorflow tensor immediately. In the +// context of above example, it will be: (A, A_m, B, B_m). +// We follow same ordering rule for output tensors. +// +// NOTE: Current rewriting approach rewrites an op to Mkl op without +// any conditions. But in the future, it may be possible to +// consider conditions such as input shapes and sizes to rewrite +// an op. +// +// Graph rewrite algorithm: +// Algorithm: Graph Rewrite +// Input: Graph G, Names of nodes to rewrite and their new nodes +// Output: Modified Graph G' if nodes are modified, G otherwise. +// Start: +// N = Topological_Sort(G) // N is set of nodes in toposort order. +// foreach node n in N +// do +// if (Is_MKL_Layer(n)) // Can this layer accept Mkl layout as input. +// then +// E = set of <incoming edge and its src_output slot> of n +// E' = {} // new set of edges for rewritten node +// foreach <e,s> in E +// do +// E' U {<e,s>} // First copy edge which generates Tensorflow +// // tensor as it is +// m = Source node of edge e +// if Is_Rewritten(m) // Did we rewrite this node in this pass? +// then +// E' U {<m,s+1>} // If yes, then m will generate Mkl tensor +// // as output. +// else +// d = Generate_Dummy_Mkl_Tensor() // If not, generate dummy +// // Mkl tensor. +// E' U {<d,0>} // Dummy Mkl tensor has only 1 output slot. +// fi +// done +// n' = Build_New_Node(G,new_name,E') +// Mark_Rewritten(n') // Mark new node as being rewritten. +// fi +// done +// +// Explanation: +// For graph rewrite, we visit nodes of the graph in the topological +// sort order. With this ordering, we visit nodes in top-to-bottom +// fashion. We need this order because while visiting a node we want +// all of its input nodes (parents) visited (and rewritten if +// applicable). This is because if we need to rewrite a current node +// then all of its input nodes need to be fixed (in other words they +// cannot be removed later.) +// +// While visiting each node, we first check if it is Mkl layer. If +// it is, then we rewrite that node after constructing new inputs to +// the node. If it is not Mkl layer, then we do not rewrite the node. +// +class MklLayoutRewritePass : public GraphOptimizationPass { + public: + MklLayoutRewritePass() { + csinfo_.conv2d = "Conv2D"; + + ninfo_.push_back({csinfo_.conv2d, GetMklOpName(csinfo_.conv2d), + 2, CopyAttrsConv2D}); + } + + // Standard interface to run pass + Status Run(const GraphOptimizationPassOptions& options); + + // Helper function which does most of heavy lifting for rewriting + // Mkl nodes to propagate Mkl tensor as additional output + // + // Extracts common functionality between Run public interface and + // test interface. + // + // @return true, if and only if graph is mutated; false otherwise. + bool RunPass(std::unique_ptr<Graph>* g); + + private: + /// Structure to specify name of original op, its new name after rewrite, + /// the number of inputs to the original op, and the function to be used + /// to copy attributes for the op + typedef struct { + string name; // Original name of the op in the graph + string newname; // New name of op in the graph + int numins; // Number of inputs to the original op + std::function<void(Node*, NodeBuilder*)> copyattrs; // Function handler + // to copy attributes from old node to new node. + } NodesInfo; + + /// Structure to store all constant strings + struct { + string relu; + string relugrad; + string conv2d; + } csinfo_; + + /// Maintain info about nodes to rewrite + std::vector<NodesInfo> ninfo_; + + /// Hash table to maintain nodes visited in the graph. + std::unordered_set<const Node*> visited_nodes_; + + private: + // Predicate to check if we rewrote node 'n' + // + // If we rewrote the node, then the rewritten node will produce + // Mkl tensor as output. If we did not rewrite the node, then + // we need to insert dummy Mkl node on the input side. + // + // Returns true if node is rewritten, false otherwise. + inline bool IsRewrittenNode(Node* n) const { + return visited_nodes_.find(n) != visited_nodes_.end(); + } + + // Mark the node as rewritten + inline void MarkRewrittenNode(Node* n) { + visited_nodes_.insert(n); + } + + // Get the name of Mkl op from original TensorFlow op + // We prefix 'Mkl' to the original op to get Mkl op. + // TODO(nhasabni) We should move this to mkl_util.h. + inline string GetMklOpName(const string& name) const { + // Prefix that we add to Tensorflow op name to construct Mkl op name. + const char* const kMklOpPrefix = "Mkl"; + return string(kMklOpPrefix) + name; + } + + // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb' + // in graph 'g'. Original node is input in 'orign'. + // + // For details, refer to 'Number of inputs after rewriting' section in the + // documentation above. + // + // Returns Status::OK() if setting up inputs is successful, otherwise + // returns appropriate status code. + Status SetUpInputs(std::unique_ptr<Graph>* g, + const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, + NodeBuilder* nb, Node* orign); + + // Rewrite Node 'n' in graph 'g' with rewrite information specified in 'ni' + // Returns Status::OK() if node rewrite is successful, otherwise returns + // appropriate error status + Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const NodesInfo& ni); + + // Functions specific to operators to copy attributes + // We need operator-specific function to copy attributes because the framework + // does not provide any generic function for it. + static void CopyAttrsConv2D(Node* orign, NodeBuilder* nb); + + // Generate a graph node in graph 'g' representing a dummy Mkl tensor node, + // using node for original node 'orign' and return it in '*out'. + // TODO(nhasabni) We should move this to mkl_util.h + void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out, + Node* orign); +}; + + +// We register Mkl rewrite pass for phase 1 in pre-placement group. +// Do not change the ordering of the Mkl passes. +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1, + MklLayoutRewritePass); + + +static void FillInputs(const Node* n, + gtl::InlinedVector<Node*, 4>* control_edges, + gtl::InlinedVector<std::pair<Node*, int>, 4>* in) { + DCHECK_EQ(in->size(), n->num_inputs()); + control_edges->clear(); + for (const Edge* e : n->in_edges()) { + if (e->IsControlEdge()) { + control_edges->push_back(e->src()); + } else { + (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output()); + } + } + std::sort(control_edges->begin(), control_edges->end()); + if (n->op_def().is_commutative()) { + // For commutative inputs, we sort the input by the input Node* + // to get a canonical ordering (so that add(a,b) and add(b, a) will + // hash to the same value if is_commutative is true for 'add'). + std::sort(in->begin(), in->end()); + } +} + +////////////////////////////////////////////////////////////////////////// + +// Macros to build new node with different number of inputs. +// We need this way because we need to specify all the inputs when +// building a node. Comment at core/graph/node_builder.h, line 85-86. + +#define SETUP_INPUTS1(nb, op1) do { \ + nb->Input(op1.node, op1.index); \ +}while(0) + +#define SETUP_INPUTS2(nb, op1, op2) do { \ + nb->Input(op1.node, op1.index); \ + nb->Input(op2.node, op2.index); \ +}while(0) + +#define SETUP_INPUTS3(nb, op1, op2, op3) do { \ + nb->Input(op1.node, op1.index); \ + nb->Input(op2.node, op2.index); \ + nb->Input(op3.node, op3.index); \ +}while(0) + +#define SETUP_INPUTS4(nb, op1, op2, op3, op4) do { \ + nb->Input(op1.node, op1.index); \ + nb->Input(op2.node, op2.index); \ + nb->Input(op3.node, op3.index); \ + nb->Input(op4.node, op4.index); \ +}while(0) + +#define SETUP_INPUTS5(nb, op1, op2, op3, op4, op5) do {\ + nb->Input(op1.node, op1.index); \ + nb->Input(op2.node, op2.index); \ + nb->Input(op3.node, op3.index); \ + nb->Input(op4.node, op4.index); \ + nb->Input(op5.node, op5.index); \ +}while(0) + +// TODO(nhasabni) We should move this to mkl_util.h. +void MklLayoutRewritePass::GetDummyMklTensorNode( + std::unique_ptr<Graph>* g, Node** out, Node* orign) { + // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent + // dummy Mkl tensor. 8 = 2*size_t. + const DataType dt = DataTypeToEnum<uint8>::v(); + TensorProto proto; + proto.set_dtype(dt); + uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + proto.set_tensor_content(const_cast<const void*>( + static_cast<void*>(&zero)), 8); + TensorShape dummy_shape({8}); + dummy_shape.AsProto(proto.mutable_tensor_shape()); + TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") + .Attr("value", proto) + .Attr("dtype", dt) + .Device(orign->def().device()) // We place this node on same + // device as device of original + // node. + .Finalize(&**g, out)); +} + +Status MklLayoutRewritePass::SetUpInputs(std::unique_ptr<Graph>* g, + const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, + NodeBuilder* nb, Node* orign) { + std::vector<NodeBuilder::NodeOut> new_inputs; + + // 1. Let's setup inputs for the new node. + for (int i = 0; i < inputs.size(); i++) { + Node* n = inputs[i].first; + // First let's copy original TF tensor input as it is. + new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second)); + + // Second, let's add edge to propagate Mkl tensors from input Mkl layers, + // or generate a dummy Mkl tensor representing not-mkl-tensor case. + if (IsRewrittenNode(n)) { + // If we have visited this node and rewritten it, then it will generate + // an edge that will receive Mkl tensor from a node. + // First, let's assert that this op is Mkl layer. + DataType T; + TF_CHECK_OK(GetNodeAttr(n->def(), "T", &T)); + // If this op has been rewritten, then its name must have been same as + // Mkl op. + CHECK_EQ(mkl_layer_registry::IsMklLayer(n->type_string()), true); + // src slot number for Mkl tensor would be the one next to TF tensor + // slot number. + new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second+1)); + } else { + // If we have not visited the node and rewritten it, then we need + // to create a dummy node that will feed a non-Mkl tensor to this node. + // DummyMklTensor node has no input and generates only 1 output + // (dummy Mkl tensor) as output slot number 0. + Node* dmt = nullptr; + GetDummyMklTensorNode(g, &dmt, orign); + CHECK_NOTNULL(dmt); + new_inputs.push_back(NodeBuilder::NodeOut(dmt, 0)); + } + } + + // The total number of inputs to new node _must_ be 2 times the number + // of inputs to the original node: N original Tensorflow tensors and + // N for Mkl tensors corresponding to each Tensorflow tensors. + CHECK_EQ(new_inputs.size(), inputs.size() * 2); + + // 2. Let's build the node with new inputs. + switch (new_inputs.size()) { + case 0: // We don't need to do anything for no input as we have + // already built node. + break; + case 1: SETUP_INPUTS1(nb, new_inputs[0]); break; + case 2: SETUP_INPUTS2(nb, new_inputs[0], + new_inputs[1]); break; + case 3: SETUP_INPUTS3(nb, new_inputs[0], + new_inputs[1], + new_inputs[2]); break; + case 4: SETUP_INPUTS4(nb, new_inputs[0], + new_inputs[1], + new_inputs[2], + new_inputs[3]); break; + case 5: SETUP_INPUTS5(nb, new_inputs[0], + new_inputs[1], + new_inputs[2], + new_inputs[3], + new_inputs[4]); break; + default: { + return Status(error::Code::UNIMPLEMENTED, + "Could not create node with given number of inputs"); + } + } + + return Status::OK(); +} + +void MklLayoutRewritePass::CopyAttrsConv2D(Node* orign, NodeBuilder* nb) { + DataType T; + string data_format; + string padding; + std::vector<int32> strides; + bool use_cudnn_on_gpu; + + // Get all attributes from old node. + TF_CHECK_OK(GetNodeAttr(orign->def(), "T", &T)); + TF_CHECK_OK(GetNodeAttr(orign->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orign->def(), "padding", &padding)); + TF_CHECK_OK(GetNodeAttr(orign->def(), "data_format", &data_format)); + TF_CHECK_OK(GetNodeAttr(orign->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu)); + + // Add attributes to new node. + nb->Attr("T", T); + nb->Attr("strides", strides); + nb->Attr("padding", padding); + nb->Attr("data_format", data_format); + nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); +} + +Status MklLayoutRewritePass::RewriteNode( + std::unique_ptr<Graph>* g, Node* orign, const NodesInfo& ni) { + VLOG(1) << "MKLLayoutRewritePass: Original node:" << orign->DebugString(); + + // Get all inputs. + const int num = orign->num_inputs(); + CHECK_EQ(num, ni.numins); + gtl::InlinedVector<Node*, 4> control_edges; + gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num); + FillInputs(orign, &control_edges, &inputs); + + // Build new node. We use same name as original node, but change the op name. + NodeBuilder nb(orign->name().c_str(), ni.newname.c_str()); + // Copy user-specified device assigned to original node to new node. + nb.Device(orign->def().device()); + // Set up new inputs to the rewritten node. + Status s = SetUpInputs(g, inputs, &nb, orign); + if (s != Status::OK()) { + return s; + } + // Copy attributes from original node to new node. + ni.copyattrs(orign, &nb); + // Set the Mkl layer label for this op. + nb.Attr("_kernel", mkl_layer_registry::kMklLayerLabel); + Node* newn = nullptr; + + // Finalize graph and get new node. + TF_CHECK_OK(nb.Finalize(&**g, &newn)); + CHECK_NOTNULL(newn); + + // Incoming edges from 'orign' node to new 'newn' node are already copied + // in BuildNode. Copy outgoing edges from 'orign' node to new 'newn' node. + for (const Edge* e : orign->out_edges()) { + (*g)->AddEdge(newn, e->src_output(), e->dst(), e->dst_input()); + } + + // Copy the runtime device assigned from original code to new node. + newn->set_assigned_device_name(orign->assigned_device_name()); + + // Delete original node and mark new node as rewritten. + (*g)->RemoveNode(orign); + MarkRewrittenNode(newn); + + VLOG(1) << "MKLLayoutRewritePass: New node:" << newn->DebugString(); + return Status::OK(); +} + +bool MklLayoutRewritePass::RunPass( + std::unique_ptr<Graph>* g) { + bool result = false; + CHECK_NOTNULL(g); + + DumpGraph("Before running MklLayoutRewritePass", &**g); + + std::vector<Node*> order; + GetReversePostOrder(**g, &order); // This will give us topological sort. + + for (Node* n : order) { + if (!n->IsOp()) { + continue; + } + + for (const NodesInfo& ni : ninfo_) { + DataType dtype = DT_INVALID; + // An op needs to have data type (T) attribute and its corresponding + // Mkl op name must be supported. + if (GetNodeAttr(n->def(), "T", &dtype) == Status::OK() && + mkl_layer_registry::IsMklLayer(GetMklOpName(n->type_string())) && + n->type_string().compare(ni.name) == 0) { + string node_name = n->name(); + string op_name = n->type_string(); + + VLOG(1) << "MKLLayoutRewritePass: Scheduled node " << node_name + << " with op " << op_name << " for rewrite using" + << " layout optimization."; + + if (RewriteNode(g, n, ni) == Status::OK()) { + VLOG(1) << "MKLLayoutRewritePass: Successfully rewrote node " + << node_name << " with op " << op_name + << " for Mkl layout optimization."; + result = true; + break; // We found matching nodesinfo so no need to search next. + } + } + } + } + + DumpGraph("After running MklLayoutRewritePass", &**g); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +// Run function for the pass +/////////////////////////////////////////////////////////////////////////////// + +bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) { + return MklLayoutRewritePass().RunPass(g); +} + +Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) { + if (options.graph == nullptr) { + return Status::OK(); + } + + // Get the ownership of graph + std::unique_ptr<Graph>* g = std::move(options.graph); + + RunPass(g); + + // Return the ownership of graph back + options.graph->reset(g->release()); + + return Status::OK(); +} + +} // namespace tensorflow + +#endif |