Rewriting training graph to simulate the precision loss for quantized inference.

This finds all the matmul and conv2d ops (with the most precision loss) and convert its inputs according to their types. This rewriting uses the quantize_and_dequantize op to convert tensors with the following types. 1. Const/Variable OP: This is quantized as signed tensors with no given range. 2. Activation OP: Set the range accordingly for different types of activations. Currently we handle {Relu, Relu6, Sigmoid, Tanh} 3. Identity OP: The quantization parameters depend on what its input is. 4. Pooling OPs: various pooling ops. Also depends on its input. 5. Reshape OP: Also depends on the first input to this op. 6. Not-Listed-Above OP: If there is only 1 such op, consider it as the model input. However, if there are >1 unknown ops, then return an error for now to avoid unexpected bahavior. Note: The list above might not be a complete list. Please let us know if you see the CHECK failure so we can include your use case. Change: 124190453
author: Jianmin Chen <goog.jmchen@gmail.com> 2016-06-06 15:18:35 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-06-06 16:33:17 -0700
commit: a8a25d85a5c57f8cbb4b22aa5bd7e9c86e5aedd8 (patch)
tree: c999c38e537a506a674d39f9e546f8dba0c508e5
parent: 17f56a8f11e9a98213ecc1d7d9059ef986b52d89 (diff)
5 files changed, 439 insertions, 0 deletions
diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc
new file mode 100644
index 0000000000..23ce7daeff
--- /dev/null
+++ b/tensorflow/core/graph/quantize_training.cc
@@ -0,0 +1,229 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <algorithm>
+#include <atomic>
+#include <set>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/core/graph/quantize_training.h"
+
+#include "tensorflow/core/common_runtime/executor.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/common_runtime/memory_types.h"
+#include "tensorflow/core/framework/log_memory.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/subgraph.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+// Node types to rewrite. Insert quantize_and_dequantize op for their inputs.
+const std::unordered_set<string, StringPiece::Hasher> nodes_to_rewrite{
+    "MatMul", "Conv2D"};
+
+// Contains necessary parameters to convert an edge.
+struct EdgeToConvert {
+  // Edge is not owned here.
+  const Edge* edge;
+  int32 num_bits;
+  bool signed_input;
+  bool range_given;
+  float input_min;
+  float input_max;
+
+  EdgeToConvert(const Edge* e, int32 bits, bool sign, bool range, float min,
+                float max) {
+    edge = e;
+    num_bits = bits;
+    signed_input = sign;
+    range_given = range;
+    input_min = min;
+    input_max = max;
+  }
+};
+
+// Decide if a node is in backward pass by checking if its name is led by
+// "gradients".
+// TODO(jmchen): Make this check more robust as it is not guaranteed that the
+// forward node will not be named with a leading "gradients".
+inline bool IsGradientNode(const Graph* graph, const Node* node) {
+  static const string tag = "gradients";
+  return (node->name().compare(0, tag.size(), tag) == 0);
+}
+
+// Find the type of the input to set the parameters for the
+// quantize_and_dequantize op.
+// Returns true if the root tensor op type is known, false otherwise.
+bool FindType(const Graph* graph, const Node* node, bool* signed_input,
+              bool* range_given, float* input_min, float* input_max) {
+  const string src_op = node->type_string();
+  if (src_op == "Const" || src_op == "Variable") {
+    *signed_input = true;
+    *range_given = false;
+  } else if (src_op == "Relu") {
+    // Range is not given for Relu.
+    *signed_input = false;
+    *range_given = false;
+  } else if (src_op == "Relu6") {
+    *signed_input = false;
+    *range_given = true;
+    *input_min = 0;
+    *input_max = 6;
+  } else if (src_op == "Sigmoid") {
+    *signed_input = false;
+    *range_given = true;
+    *input_min = 0;
+    *input_max = 1;
+  } else if (src_op == "Tanh") {
+    *signed_input = true;
+    *range_given = true;
+    *input_min = -1;
+    *input_max = 1;
+  } else if (src_op == "Reshape") {
+    // Reshape has 2 inputs and the first one is the tensor.
+    for (const Edge* edge : node->in_edges()) {
+      if (edge->src_output() != Graph::kControlSlot && edge->dst_input() == 0) {
+        FindType(graph, edge->src(), signed_input, range_given, input_min,
+                 input_max);
+      }
+    }
+  } else if (src_op == "Identity" || src_op == "MaxPool" ||
+             src_op == "AvgPool" || src_op == "MaxPool3D" ||
+             src_op == "AvgPool3D") {
+    // All these Ops only have 1 data input.
+    for (const Edge* edge : node->in_edges()) {
+      if (edge->src_output() != Graph::kControlSlot) {
+        FindType(graph, edge->src(), signed_input, range_given, input_min,
+                 input_max);
+      }
+    }
+  } else {
+    // Unknown type, could be the model input examples.
+    // TODO: Set the params for input with user's hint.
+    *signed_input = true;
+    *range_given = false;
+    return false;
+  }
+
+  return true;
+}
+
+// Insert conversion op, connect it to the graph and remove the old edge.
+Status ProcessTargetEdges(Graph* graph,
+                          const std::vector<EdgeToConvert>& target_edges) {
+  // Remember previous convert ops to avoid duplicated conversion on the same
+  // input.
+  std::unordered_map<string, Node*, StringPiece::Hasher> name_index;
+  for (const EdgeToConvert edge : target_edges) {
+    Node* convert_node;
+    string name =
+        strings::StrCat(edge.edge->src()->name(), "/_QuantizeAndDequantize");
+
+    auto iter = name_index.find(name);
+    if (iter == name_index.end()) {
+      TF_RETURN_IF_ERROR(NodeBuilder(name, "_QuantizeAndDequantize")
+                             .Input(edge.edge->src())
+                             .Attr("signed_input", edge.signed_input)
+                             .Attr("num_bits", edge.num_bits)
+                             .Attr("range_given", edge.range_given)
+                             .Attr("input_min", edge.input_min)
+                             .Attr("input_max", edge.input_max)
+                             .Finalize(graph, &convert_node));
+
+      name_index[name] = convert_node;
+    } else {
+      convert_node = iter->second;
+    }
+
+    graph->AddEdge(convert_node, 0, edge.edge->dst(), edge.edge->dst_input());
+    graph->RemoveEdge(edge.edge);
+  }
+
+  return Status::OK();
+}
+
+}  // namespace
+
+Status DoQuantizeTraining(int32 num_bits, Graph* graph) {
+  if (graph == nullptr) {
+    return errors::InvalidArgument("Cannot accept empty graph pointer.");
+  }
+
+  if (num_bits < 1 || num_bits > 63) {
+    return errors::OutOfRange("num_bits should be in range [1, 63] but is: ",
+                              num_bits);
+  }
+  int potential_input = 0;
+  std::vector<EdgeToConvert> target_edges;
+  for (Node* node : graph->nodes()) {
+    if (nodes_to_rewrite.find(node->type_string()) != nodes_to_rewrite.end() &&
+        !IsGradientNode(graph, node)) {
+      // Find out which types are the inputs and convert them accordingly.
+      // 1. Const/Variable OP: This is quantized as signed tensors with no given
+      // range.
+      // 2. Activation OP: Set the range accordingly for different types of
+      // activations. Currently we handle {Relu, Relu6, Sigmoid, Tanh}
+      // 3. Identity OP: The quantization parameters depend on its input.
+      // 4. Pooling OPs: various pooling ops. Also depends on its input.
+      // 5. Reshape OP: Also depends on the first input to this op.
+      // 6. Not-Listed-Above OP: If there is only 1 such op, consider it as the
+      // model input. However, if there are >1 unknown ops, then returns an
+      // error for now to avoid unexpected bahavior.
+      // Note: The list above might not be a complete list. Please let us
+      // know if you see the error so we can handle your case.
+      for (const Edge* edge : node->in_edges()) {
+        if (edge->src_output() == Graph::kControlSlot) {
+          // Skip the control dependency input.
+          continue;
+        } else {
+          bool signed_input = false;
+          bool range_given = false;
+          float input_min = 0;
+          float input_max = 0;
+          bool known_op = FindType(graph, edge->src(), &signed_input,
+                                   &range_given, &input_min, &input_max);
+          if (!known_op) {
+            // Unknown op is considered as input.
+            // Only support one input for now.
+            // TODO: Make this configurable if this is the desirable way to find
+            // input.
+            if (potential_input > 0) {
+              return errors::Unimplemented(
+                  "Find a second unknown op: ", edge->src()->name(),
+                  " with type: ", edge->src()->type_string(),
+                  "; Unknown ops are considered as model input for now and "
+                  "only 1 input is supported currently.");
+            }
+            potential_input++;
+          }
+
+          target_edges.emplace_back(EdgeToConvert(
+              edge, num_bits, signed_input, range_given, input_min, input_max));
+        }
+      }
+    }
+  }
+
+  TF_RETURN_IF_ERROR(ProcessTargetEdges(graph, target_edges));
+
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/graph/quantize_training.h b/tensorflow/core/graph/quantize_training.h
new file mode 100644
index 0000000000..694c491620
--- /dev/null
+++ b/tensorflow/core/graph/quantize_training.h
@@ -0,0 +1,37 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
+#define TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
+
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+// Rewrites graph for quantized training.
+// Rewrites the forward pass to include the precision loss with quantization so
+// the model can learn to deal with such loss and achieve better accuracy when
+// it is quantized later for inference.
+// Note that the num_bits should be in [1, 63] and 'g' must be not null.
+//
+// On success, returns OK.
+//
+// On failure, returns the error status. Possible errors include:
+//    - num_bits out of range.
+//    - g is null.
+//    - More than 1 unknown ops encountered.
+Status DoQuantizeTraining(int32 num_bits, Graph* g);
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
diff --git a/tensorflow/core/graph/quantize_training_test.cc b/tensorflow/core/graph/quantize_training_test.cc
new file mode 100644
index 0000000000..d6663e0a50
--- /dev/null
+++ b/tensorflow/core/graph/quantize_training_test.cc
@@ -0,0 +1,161 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/core/graph/quantize_training.h"
+
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+
+class QuantizeTrainingTest : public ::testing::Test {
+ protected:
+  QuantizeTrainingTest() { Reset(); }
+  void Reset() { g_.reset(new Graph(OpRegistry::Global())); }
+
+  template <typename T>
+  Node* Constant(gtl::ArraySlice<T> values, TensorShape shape) {
+    return test::graph::Constant(g_.get(), test::AsTensor(values, shape));
+  }
+
+  std::unique_ptr<Graph> g_;
+};
+
+TEST_F(QuantizeTrainingTest, NormalGraph) {
+  // Construct the following graph
+  /*
+           m1      m2
+        /      \ /     \
+      Relu   Identity   c
+        |       |
+        a       b
+  */
+  Reset();
+  Graph* g = g_.get();
+  Node* a = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
+  Node* b = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
+  Node* c = Constant<float>({0.0, 1.0, 1.0, 0.0}, {2, 2});
+  g->AddControlEdge(g->source_node(), a);
+  g->AddControlEdge(g->source_node(), b);
+  g->AddControlEdge(g->source_node(), c);
+  Node* relu = test::graph::Relu(g, a);
+  Node* identity = test::graph::Identity(g, b);
+  Node* m1 = test::graph::Matmul(g, relu, identity, false, false);
+  Node* m2 = test::graph::Matmul(g, identity, c, false, false);
+  g->AddControlEdge(m1, g->sink_node());
+  g->AddControlEdge(m2, g->sink_node());
+
+  // The graph after the rewriting should be:
+  // "Q" is the quantize_and_dequantize op.
+  // Note the Q in the middle is shared by both m1 and m2.
+  /*
+         m1       m2
+      /      \ /     \
+      Q       Q       Q
+      |       |       |
+    Relu   Identity   c
+      |       |
+      a       b
+  */
+  int num_bits = 8;
+  // 4 edges to modify
+  TF_ASSERT_OK(DoQuantizeTraining(num_bits, g));
+
+  // There should be 12 nodes in total including the source and sink nodes.
+  EXPECT_EQ(12, g->num_nodes());
+  // Nodes m1 and m2's inputs should be the quantize_and_dequantize op.
+  std::vector<Node*> target_nodes{m1, m2};
+  for (Node* n : target_nodes) {
+    for (Node* in : n->in_nodes()) {
+      EXPECT_EQ("_QuantizeAndDequantize", in->type_string());
+    }
+  }
+
+  // relu, identity, c should now connect to the quantize_and_dequantize nodes.
+  std::vector<Node*> target_inputs{relu, identity, c};
+  for (Node* n : target_inputs) {
+    for (Node* out : n->out_nodes()) {
+      EXPECT_EQ("_QuantizeAndDequantize", out->type_string());
+    }
+  }
+
+  // Quantize_and_dequantize node for identity should have signed_input==true.
+  NodeDef identity_Q = identity->out_nodes().begin()->def();
+  ASSERT_EQ("true",
+            SummarizeAttrValue(identity_Q.attr().find("signed_input")->second));
+  // Quantize_and_dequantize node for relu should have signed_input==false.
+  NodeDef relu_Q = relu->out_nodes().begin()->def();
+  ASSERT_EQ("false",
+            SummarizeAttrValue(relu_Q.attr().find("signed_input")->second));
+}
+
+TEST_F(QuantizeTrainingTest, WithBackwardNodes) {
+  // Construct the same graph plus another backward Matmul.
+  Reset();
+  Graph* g = g_.get();
+  Node* a = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
+  Node* b = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
+  Node* c = Constant<float>({0.0, 1.0, 1.0, 0.0}, {2, 2});
+  g->AddControlEdge(g->source_node(), a);
+  g->AddControlEdge(g->source_node(), b);
+  g->AddControlEdge(g->source_node(), c);
+  Node* relu = test::graph::Relu(g, a);
+  Node* identity = test::graph::Identity(g, b);
+  Node* m1 = test::graph::Matmul(g, relu, identity, false, false);
+  Node* m2 = test::graph::Matmul(g, identity, c, false, false);
+  g->AddControlEdge(m1, g->sink_node());
+  g->AddControlEdge(m2, g->sink_node());
+
+  // Add a Matmul node with name starting with "gradients".
+  Node* backward_m;
+  TF_ASSERT_OK(NodeBuilder(g->NewName("gradients/n"), "MatMul")
+                   .Input(m1)
+                   .Input(m2)
+                   .Attr("transpose_a", true)
+                   .Attr("transpose_b", false)
+                   .Finalize(g, &backward_m));
+  g->AddControlEdge(backward_m, g->sink_node());
+
+  int num_bits = 8;
+  // Still 4 changes since the inputs of backward node will not be converted.
+  TF_ASSERT_OK(DoQuantizeTraining(num_bits, g));
+
+  // Nodes m1 and m2's inputs should now be the quantize_and_dequantize op.
+  EXPECT_EQ(13, g->num_nodes());
+  EXPECT_EQ(2, m2->num_inputs());
+}
+
+#undef SIMPLE_GRAPH
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/graph/testlib.cc b/tensorflow/core/graph/testlib.cc
index 0d0a84db79..ec878437dc 100644
--- a/tensorflow/core/graph/testlib.cc
+++ b/tensorflow/core/graph/testlib.cc
@@ -384,6 +384,15 @@ Node* GetSessionTensor(Graph* g, Node* in) {
   return ret;
 }
 
+Node* Relu(Graph* g, Node* in) {
+  Node* ret;
+  TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Relu")
+                  .Input(in, 0)
+                  .Attr("T", DT_FLOAT)
+                  .Finalize(g, &ret));
+  return ret;
+}
+
 void ToGraphDef(Graph* g, GraphDef* gdef) { g->ToGraphDef(gdef); }
 
 }  // end namespace graph
diff --git a/tensorflow/core/graph/testlib.h b/tensorflow/core/graph/testlib.h
index 511f6b4310..bc4863563f 100644
--- a/tensorflow/core/graph/testlib.h
+++ b/tensorflow/core/graph/testlib.h
@@ -169,6 +169,9 @@ Node* GetSessionTensor(Graph* g, Node* in);
 // given in "tensors".
 Node* Concat(Graph* g, Node* concat_dim, gtl::ArraySlice<Node*> tensors);
 
+// Add a Relu node in "g".
+Node* Relu(Graph* g, Node* in);
+
 }  // end namespace graph
 }  // end namespace test
 }  // end namespace tensorflow
author	Jianmin Chen <goog.jmchen@gmail.com>	2016-06-06 15:18:35 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-06-06 16:33:17 -0700
commit	a8a25d85a5c57f8cbb4b22aa5bd7e9c86e5aedd8 (patch)
tree	c999c38e537a506a674d39f9e546f8dba0c508e5
parent	17f56a8f11e9a98213ecc1d7d9059ef986b52d89 (diff)