aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/graph/mkl_layout_pass.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/graph/mkl_layout_pass.cc')
-rw-r--r--tensorflow/core/graph/mkl_layout_pass.cc548
1 files changed, 548 insertions, 0 deletions
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
new file mode 100644
index 0000000000..87850b3e9a
--- /dev/null
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -0,0 +1,548 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include <unordered_set>
+#include <functional>
+
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+
+#include "tensorflow/core/graph/mkl_layout_pass.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+// This pass implements rewriting of graph for propagating Mkl
+// layout as an additional output tensor (we will loosely call a
+// tensor that carries Mkl layout as Mkl tensor henceforth.)
+// from every Mkl supported NN layer.
+//
+// As a example, consider Relu layer. Current definition of Relu
+// layer looks like:
+//
+// O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu
+// (new node is called MklRelu) as:
+//
+// O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m).
+// Here A input is same as A input of Relu; O output is same
+// as O output of Relu. O_m is the additional output tensor
+// that will be set by MklRelu, and it represents Mkl tensor
+// corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it
+// represents metadata for A - as O_m is metadata for O, A_m
+// is metadata for A. MklRelu receives this metadata from
+// previous layer (in the graph).
+//
+// When previous layer in the graph is Mkl layer, A_m will
+// represent a valid Mkl tensor. But when previous Mkl layer
+// is not an Mkl layer, then A_m represents a dummy Mkl tensor.
+//
+// Rewriting rules:
+// - Selection of an op for rewriting happens by registering
+// an op with this pass. If an op is not registered, then
+// it is not rewritten.
+// - Number of inputs after rewriting:
+// Since for every input Tensorflow tensor, the rewritten
+// layer gets Mkl tensor, rewritten op gets 2*N inputs,
+// where N is the number of inputs for original op.
+// - Number of outputs after rewriting:
+// Since for every output Tensorflow tensor, the rewritten
+// layer generates Mkl tensor, rewritten op generates 2*N
+// outputs, where N is the number of outputs of original op.
+// - Ordering of Tensorflow tensors and Mkl tensors:
+// Since every op generates twice the number of inputs and
+// outputs, one could imagine different ordering among
+// Tensorflow tensors and Mkl tensors. E.g., let's assume
+// an op 'Conv2D' takes (A, B) as input, then new op
+// 'MklConv2D' can take (A, A_m, B, B_m) as input or it
+// can also take (A, B, A_m, B_m) as input. Among N inputs
+// one can get N! permutations.
+//
+// So the question is: which one do we follow? Currently,
+// we follow an intuitive order where Mkl tensor follows a
+// corresponding Tensorflow tensor immediately. In the
+// context of above example, it will be: (A, A_m, B, B_m).
+// We follow same ordering rule for output tensors.
+//
+// NOTE: Current rewriting approach rewrites an op to Mkl op without
+// any conditions. But in the future, it may be possible to
+// consider conditions such as input shapes and sizes to rewrite
+// an op.
+//
+// Graph rewrite algorithm:
+// Algorithm: Graph Rewrite
+// Input: Graph G, Names of nodes to rewrite and their new nodes
+// Output: Modified Graph G' if nodes are modified, G otherwise.
+// Start:
+// N = Topological_Sort(G) // N is set of nodes in toposort order.
+// foreach node n in N
+// do
+// if (Is_MKL_Layer(n)) // Can this layer accept Mkl layout as input.
+// then
+// E = set of <incoming edge and its src_output slot> of n
+// E' = {} // new set of edges for rewritten node
+// foreach <e,s> in E
+// do
+// E' U {<e,s>} // First copy edge which generates Tensorflow
+// // tensor as it is
+// m = Source node of edge e
+// if Is_Rewritten(m) // Did we rewrite this node in this pass?
+// then
+// E' U {<m,s+1>} // If yes, then m will generate Mkl tensor
+// // as output.
+// else
+// d = Generate_Dummy_Mkl_Tensor() // If not, generate dummy
+// // Mkl tensor.
+// E' U {<d,0>} // Dummy Mkl tensor has only 1 output slot.
+// fi
+// done
+// n' = Build_New_Node(G,new_name,E')
+// Mark_Rewritten(n') // Mark new node as being rewritten.
+// fi
+// done
+//
+// Explanation:
+// For graph rewrite, we visit nodes of the graph in the topological
+// sort order. With this ordering, we visit nodes in top-to-bottom
+// fashion. We need this order because while visiting a node we want
+// all of its input nodes (parents) visited (and rewritten if
+// applicable). This is because if we need to rewrite a current node
+// then all of its input nodes need to be fixed (in other words they
+// cannot be removed later.)
+//
+// While visiting each node, we first check if it is Mkl layer. If
+// it is, then we rewrite that node after constructing new inputs to
+// the node. If it is not Mkl layer, then we do not rewrite the node.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+ MklLayoutRewritePass() {
+ csinfo_.conv2d = "Conv2D";
+
+ ninfo_.push_back({csinfo_.conv2d, GetMklOpName(csinfo_.conv2d),
+ 2, CopyAttrsConv2D});
+ }
+
+ // Standard interface to run pass
+ Status Run(const GraphOptimizationPassOptions& options);
+
+ // Helper function which does most of heavy lifting for rewriting
+ // Mkl nodes to propagate Mkl tensor as additional output
+ //
+ // Extracts common functionality between Run public interface and
+ // test interface.
+ //
+ // @return true, if and only if graph is mutated; false otherwise.
+ bool RunPass(std::unique_ptr<Graph>* g);
+
+ private:
+ /// Structure to specify name of original op, its new name after rewrite,
+ /// the number of inputs to the original op, and the function to be used
+ /// to copy attributes for the op
+ typedef struct {
+ string name; // Original name of the op in the graph
+ string newname; // New name of op in the graph
+ int numins; // Number of inputs to the original op
+ std::function<void(Node*, NodeBuilder*)> copyattrs; // Function handler
+ // to copy attributes from old node to new node.
+ } NodesInfo;
+
+ /// Structure to store all constant strings
+ struct {
+ string relu;
+ string relugrad;
+ string conv2d;
+ } csinfo_;
+
+ /// Maintain info about nodes to rewrite
+ std::vector<NodesInfo> ninfo_;
+
+ /// Hash table to maintain nodes visited in the graph.
+ std::unordered_set<const Node*> visited_nodes_;
+
+ private:
+ // Predicate to check if we rewrote node 'n'
+ //
+ // If we rewrote the node, then the rewritten node will produce
+ // Mkl tensor as output. If we did not rewrite the node, then
+ // we need to insert dummy Mkl node on the input side.
+ //
+ // Returns true if node is rewritten, false otherwise.
+ inline bool IsRewrittenNode(Node* n) const {
+ return visited_nodes_.find(n) != visited_nodes_.end();
+ }
+
+ // Mark the node as rewritten
+ inline void MarkRewrittenNode(Node* n) {
+ visited_nodes_.insert(n);
+ }
+
+ // Get the name of Mkl op from original TensorFlow op
+ // We prefix 'Mkl' to the original op to get Mkl op.
+ // TODO(nhasabni) We should move this to mkl_util.h.
+ inline string GetMklOpName(const string& name) const {
+ // Prefix that we add to Tensorflow op name to construct Mkl op name.
+ const char* const kMklOpPrefix = "Mkl";
+ return string(kMklOpPrefix) + name;
+ }
+
+ // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+ // in graph 'g'. Original node is input in 'orign'.
+ //
+ // For details, refer to 'Number of inputs after rewriting' section in the
+ // documentation above.
+ //
+ // Returns Status::OK() if setting up inputs is successful, otherwise
+ // returns appropriate status code.
+ Status SetUpInputs(std::unique_ptr<Graph>* g,
+ const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+ NodeBuilder* nb, Node* orign);
+
+ // Rewrite Node 'n' in graph 'g' with rewrite information specified in 'ni'
+ // Returns Status::OK() if node rewrite is successful, otherwise returns
+ // appropriate error status
+ Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const NodesInfo& ni);
+
+ // Functions specific to operators to copy attributes
+ // We need operator-specific function to copy attributes because the framework
+ // does not provide any generic function for it.
+ static void CopyAttrsConv2D(Node* orign, NodeBuilder* nb);
+
+ // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+ // using node for original node 'orign' and return it in '*out'.
+ // TODO(nhasabni) We should move this to mkl_util.h
+ void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+ Node* orign);
+};
+
+
+// We register Mkl rewrite pass for phase 1 in pre-placement group.
+// Do not change the ordering of the Mkl passes.
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1,
+ MklLayoutRewritePass);
+
+
+static void FillInputs(const Node* n,
+ gtl::InlinedVector<Node*, 4>* control_edges,
+ gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+ DCHECK_EQ(in->size(), n->num_inputs());
+ control_edges->clear();
+ for (const Edge* e : n->in_edges()) {
+ if (e->IsControlEdge()) {
+ control_edges->push_back(e->src());
+ } else {
+ (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+ }
+ }
+ std::sort(control_edges->begin(), control_edges->end());
+ if (n->op_def().is_commutative()) {
+ // For commutative inputs, we sort the input by the input Node*
+ // to get a canonical ordering (so that add(a,b) and add(b, a) will
+ // hash to the same value if is_commutative is true for 'add').
+ std::sort(in->begin(), in->end());
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+// Macros to build new node with different number of inputs.
+// We need this way because we need to specify all the inputs when
+// building a node. Comment at core/graph/node_builder.h, line 85-86.
+
+#define SETUP_INPUTS1(nb, op1) do { \
+ nb->Input(op1.node, op1.index); \
+}while(0)
+
+#define SETUP_INPUTS2(nb, op1, op2) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+}while(0)
+
+#define SETUP_INPUTS3(nb, op1, op2, op3) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+}while(0)
+
+#define SETUP_INPUTS4(nb, op1, op2, op3, op4) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+ nb->Input(op4.node, op4.index); \
+}while(0)
+
+#define SETUP_INPUTS5(nb, op1, op2, op3, op4, op5) do {\
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+ nb->Input(op4.node, op4.index); \
+ nb->Input(op5.node, op5.index); \
+}while(0)
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(
+ std::unique_ptr<Graph>* g, Node** out, Node* orign) {
+ // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+ // dummy Mkl tensor. 8 = 2*size_t.
+ const DataType dt = DataTypeToEnum<uint8>::v();
+ TensorProto proto;
+ proto.set_dtype(dt);
+ uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ proto.set_tensor_content(const_cast<const void*>(
+ static_cast<void*>(&zero)), 8);
+ TensorShape dummy_shape({8});
+ dummy_shape.AsProto(proto.mutable_tensor_shape());
+ TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+ .Attr("value", proto)
+ .Attr("dtype", dt)
+ .Device(orign->def().device()) // We place this node on same
+ // device as device of original
+ // node.
+ .Finalize(&**g, out));
+}
+
+Status MklLayoutRewritePass::SetUpInputs(std::unique_ptr<Graph>* g,
+ const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+ NodeBuilder* nb, Node* orign) {
+ std::vector<NodeBuilder::NodeOut> new_inputs;
+
+ // 1. Let's setup inputs for the new node.
+ for (int i = 0; i < inputs.size(); i++) {
+ Node* n = inputs[i].first;
+ // First let's copy original TF tensor input as it is.
+ new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second));
+
+ // Second, let's add edge to propagate Mkl tensors from input Mkl layers,
+ // or generate a dummy Mkl tensor representing not-mkl-tensor case.
+ if (IsRewrittenNode(n)) {
+ // If we have visited this node and rewritten it, then it will generate
+ // an edge that will receive Mkl tensor from a node.
+ // First, let's assert that this op is Mkl layer.
+ DataType T;
+ TF_CHECK_OK(GetNodeAttr(n->def(), "T", &T));
+ // If this op has been rewritten, then its name must have been same as
+ // Mkl op.
+ CHECK_EQ(mkl_layer_registry::IsMklLayer(n->type_string()), true);
+ // src slot number for Mkl tensor would be the one next to TF tensor
+ // slot number.
+ new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second+1));
+ } else {
+ // If we have not visited the node and rewritten it, then we need
+ // to create a dummy node that will feed a non-Mkl tensor to this node.
+ // DummyMklTensor node has no input and generates only 1 output
+ // (dummy Mkl tensor) as output slot number 0.
+ Node* dmt = nullptr;
+ GetDummyMklTensorNode(g, &dmt, orign);
+ CHECK_NOTNULL(dmt);
+ new_inputs.push_back(NodeBuilder::NodeOut(dmt, 0));
+ }
+ }
+
+ // The total number of inputs to new node _must_ be 2 times the number
+ // of inputs to the original node: N original Tensorflow tensors and
+ // N for Mkl tensors corresponding to each Tensorflow tensors.
+ CHECK_EQ(new_inputs.size(), inputs.size() * 2);
+
+ // 2. Let's build the node with new inputs.
+ switch (new_inputs.size()) {
+ case 0: // We don't need to do anything for no input as we have
+ // already built node.
+ break;
+ case 1: SETUP_INPUTS1(nb, new_inputs[0]); break;
+ case 2: SETUP_INPUTS2(nb, new_inputs[0],
+ new_inputs[1]); break;
+ case 3: SETUP_INPUTS3(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2]); break;
+ case 4: SETUP_INPUTS4(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2],
+ new_inputs[3]); break;
+ case 5: SETUP_INPUTS5(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2],
+ new_inputs[3],
+ new_inputs[4]); break;
+ default: {
+ return Status(error::Code::UNIMPLEMENTED,
+ "Could not create node with given number of inputs");
+ }
+ }
+
+ return Status::OK();
+}
+
+void MklLayoutRewritePass::CopyAttrsConv2D(Node* orign, NodeBuilder* nb) {
+ DataType T;
+ string data_format;
+ string padding;
+ std::vector<int32> strides;
+ bool use_cudnn_on_gpu;
+
+ // Get all attributes from old node.
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "T", &T));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "strides", &strides));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "padding", &padding));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "data_format", &data_format));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+ // Add attributes to new node.
+ nb->Attr("T", T);
+ nb->Attr("strides", strides);
+ nb->Attr("padding", padding);
+ nb->Attr("data_format", data_format);
+ nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+Status MklLayoutRewritePass::RewriteNode(
+ std::unique_ptr<Graph>* g, Node* orign, const NodesInfo& ni) {
+ VLOG(1) << "MKLLayoutRewritePass: Original node:" << orign->DebugString();
+
+ // Get all inputs.
+ const int num = orign->num_inputs();
+ CHECK_EQ(num, ni.numins);
+ gtl::InlinedVector<Node*, 4> control_edges;
+ gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num);
+ FillInputs(orign, &control_edges, &inputs);
+
+ // Build new node. We use same name as original node, but change the op name.
+ NodeBuilder nb(orign->name().c_str(), ni.newname.c_str());
+ // Copy user-specified device assigned to original node to new node.
+ nb.Device(orign->def().device());
+ // Set up new inputs to the rewritten node.
+ Status s = SetUpInputs(g, inputs, &nb, orign);
+ if (s != Status::OK()) {
+ return s;
+ }
+ // Copy attributes from original node to new node.
+ ni.copyattrs(orign, &nb);
+ // Set the Mkl layer label for this op.
+ nb.Attr("_kernel", mkl_layer_registry::kMklLayerLabel);
+ Node* newn = nullptr;
+
+ // Finalize graph and get new node.
+ TF_CHECK_OK(nb.Finalize(&**g, &newn));
+ CHECK_NOTNULL(newn);
+
+ // Incoming edges from 'orign' node to new 'newn' node are already copied
+ // in BuildNode. Copy outgoing edges from 'orign' node to new 'newn' node.
+ for (const Edge* e : orign->out_edges()) {
+ (*g)->AddEdge(newn, e->src_output(), e->dst(), e->dst_input());
+ }
+
+ // Copy the runtime device assigned from original code to new node.
+ newn->set_assigned_device_name(orign->assigned_device_name());
+
+ // Delete original node and mark new node as rewritten.
+ (*g)->RemoveNode(orign);
+ MarkRewrittenNode(newn);
+
+ VLOG(1) << "MKLLayoutRewritePass: New node:" << newn->DebugString();
+ return Status::OK();
+}
+
+bool MklLayoutRewritePass::RunPass(
+ std::unique_ptr<Graph>* g) {
+ bool result = false;
+ CHECK_NOTNULL(g);
+
+ DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+ std::vector<Node*> order;
+ GetReversePostOrder(**g, &order); // This will give us topological sort.
+
+ for (Node* n : order) {
+ if (!n->IsOp()) {
+ continue;
+ }
+
+ for (const NodesInfo& ni : ninfo_) {
+ DataType dtype = DT_INVALID;
+ // An op needs to have data type (T) attribute and its corresponding
+ // Mkl op name must be supported.
+ if (GetNodeAttr(n->def(), "T", &dtype) == Status::OK() &&
+ mkl_layer_registry::IsMklLayer(GetMklOpName(n->type_string())) &&
+ n->type_string().compare(ni.name) == 0) {
+ string node_name = n->name();
+ string op_name = n->type_string();
+
+ VLOG(1) << "MKLLayoutRewritePass: Scheduled node " << node_name
+ << " with op " << op_name << " for rewrite using"
+ << " layout optimization.";
+
+ if (RewriteNode(g, n, ni) == Status::OK()) {
+ VLOG(1) << "MKLLayoutRewritePass: Successfully rewrote node "
+ << node_name << " with op " << op_name
+ << " for Mkl layout optimization.";
+ result = true;
+ break; // We found matching nodesinfo so no need to search next.
+ }
+ }
+ }
+ }
+
+ DumpGraph("After running MklLayoutRewritePass", &**g);
+
+ return result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+ return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
+ if (options.graph == nullptr) {
+ return Status::OK();
+ }
+
+ // Get the ownership of graph
+ std::unique_ptr<Graph>* g = std::move(options.graph);
+
+ RunPass(g);
+
+ // Return the ownership of graph back
+ options.graph->reset(g->release());
+
+ return Status::OK();
+}
+
+} // namespace tensorflow
+
+#endif