Avoid allocation of std::function<> object per node executed, as

well as a hash-table lookup per allocated output. Instead, we now pre-compute the AllocatorAttributes for every output tensor in the graph into an array (indexed by a base number per node + output index), and changed OpKernelContext::Params to provide a pointer to the base of the array for the node, rather than providing a std::function<>. Updated test code to avoid so much code duplication when setting up the OpKernelContext::Params object in various places. Used gtl::InlinedVector<...> instead of std::vector<...> in a few places in tensorflow/core/kernels/reduction_ops_common.h Didn't make a measurable change in overall performance but allocations and time spent in the std::function destructor code was significantly reduced. Change: 112103260
author: A. Unique TensorFlower <nobody@tensorflow.org> 2016-01-13 17:30:08 -0800
committer: Vijay Vasudevan <vrv@google.com> 2016-01-13 17:30:08 -0800
commit: 6dbfb95100b73ad26ebebb9be9c0429dc0cece8a (patch)
tree: 83a15ef5538529dd3a4fece1276388c20f9260bb /tensorflow
parent: cbdf278dbd998aab57898763605f6863e9b42b76 (diff)
8 files changed, 88 insertions, 134 deletions
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index e42d2e260f..5de368da87 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -175,18 +175,11 @@ struct NodeItem {
   // ExecutorImpl::tensors_[input_start] is the 1st positional input
   // for this node.
   int input_start = 0;
-};
 
-// Map from std::pair<node_id, output_index> to attributes.
-struct pairhash {
- public:
-  template <typename T, typename U>
-  std::size_t operator()(const std::pair<T, U>& x) const {
-    return std::hash<T>()(x.first) ^ std::hash<U>()(x.second);
-  }
+  // ExecutorImpl::output_attrs_[output_attr_start] is the 1st
+  // positional attribute for the 0th output of this node.
+  int output_attr_start = 0;
 };
-typedef std::unordered_map<std::pair<int, int>, AllocatorAttributes, pairhash>
-    DevAttrMap;
 
 typedef gtl::InlinedVector<TensorValue, 4> TensorValueVec;
 typedef gtl::InlinedVector<DeviceContext*, 4> DeviceContextVec;
@@ -231,14 +224,15 @@ class ExecutorImpl : public Executor {
   // Owned.
   LocalExecutorParams params_;
   const Graph* graph_;
-  std::vector<NodeItem> nodes_;  // nodes_.size == graph_.num_node_ids().
-  int total_tensors_ = 0;        // total_tensors_ = sum(nodes_[*].num_inputs())
+  std::vector<NodeItem> nodes_;   // nodes_.size == graph_.num_node_ids().
+  int total_input_tensors_ = 0;   // == sum(nodes_[*].num_inputs())
+  int total_output_tensors_ = 0;  // == sum(nodes_[*].num_outputs())
 
   // The number of inputs for each frame in this graph. This is static
   // information of the graph.
   std::unordered_map<string, int> frame_input_count_;
 
-  DevAttrMap alloc_attr_;
+  std::vector<AllocatorAttributes> output_attrs_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(ExecutorImpl);
 };
@@ -248,7 +242,8 @@ Status ExecutorImpl::Initialize() {
   nodes_.resize(num_nodes);
 
   Status s;
-  total_tensors_ = 0;
+  total_input_tensors_ = 0;
+  total_output_tensors_ = 0;
 
   // Preprocess every node in the graph to create an instance of op
   // kernel for each node;
@@ -256,8 +251,13 @@ Status ExecutorImpl::Initialize() {
     const int id = n->id();
     NodeItem* item = &nodes_[id];
     item->node = n;
-    item->input_start = total_tensors_;
-    total_tensors_ += n->num_inputs();
+
+    item->input_start = total_input_tensors_;
+    total_input_tensors_ += n->num_inputs();
+
+    item->output_attr_start = total_output_tensors_;
+    total_output_tensors_ += n->num_outputs();
+
     s = params_.create_kernel(n->def(), &item->kernel);
     if (!s.ok()) {
       s = AttachDef(s, n->def());
@@ -283,22 +283,32 @@ Status ExecutorImpl::SetAllocAttrs() {
   Device* device = params_.device;
   DeviceNameUtils::ParsedName local_dev_name = device->parsed_name();
 
+  output_attrs_.resize(total_output_tensors_);
   for (const Node* n : graph_->nodes()) {
+    NodeItem* item = &nodes_[n->id()];
+    const int base_index = item->output_attr_start;
     // Examine the out edges of each node looking for special use
     // cases that may affect memory allocation attributes.
     for (auto e : n->out_edges()) {
+      const int index = e->src_output();
       AllocatorAttributes attr;
       s = InferAllocAttr(n, e->dst(), local_dev_name, &attr);
       if (!s.ok()) return s;
       if (attr.value != 0) {
-        VLOG(2) << "node " << n->name() << " gets attr " << attr.value
-                << " for output " << e->src_output();
-        alloc_attr_[std::make_pair(n->id(), e->src_output())].Merge(attr);
-      } else {
-        VLOG(2) << "default output attr for node " << n->name() << " output "
-                << e->src_output();
+        if (!e->IsControlEdge()) {
+          output_attrs_[base_index + index].Merge(attr);
+        }
       }
     }
+
+    for (int out = 0; out < n->num_outputs(); out++) {
+      OpKernel* op_kernel = item->kernel;
+      DCHECK_LT(out, op_kernel->output_memory_types().size());
+      bool on_host = op_kernel->output_memory_types()[out] == HOST_MEMORY;
+      AllocatorAttributes h;
+      h.set_on_host(on_host);
+      output_attrs_[base_index + out].Merge(h);
+    }
   }
   return s;
 }
@@ -712,7 +722,8 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
   iter_state->outstanding_frame_count = 0;
   iter_state->pending_count = new std::vector<int>;
   iter_state->dead_count = new std::vector<int>(impl->graph_->num_node_ids());
-  iter_state->input_tensors = new std::vector<Entry>(impl_->total_tensors_);
+  iter_state->input_tensors =
+      new std::vector<Entry>(impl_->total_input_tensors_);
 
   // Initialize the executor state.
   outstanding_frames_.insert({root_frame_->frame_name, root_frame_});
@@ -793,32 +804,6 @@ void ExecutorState::RunAsync(Executor::DoneCallback done) {
 
 namespace {
 
-// This function is provided for use by OpKernelContext when allocating
-// the index'th output of node.  It provides access to the
-// AllocatorAttributes computed during initialization to determine in
-// which memory region the tensor should be allocated.
-AllocatorAttributes OutputAttributes(const DevAttrMap* attr_map,
-                                     const Node* node,
-                                     const OpKernel* op_kernel, int index) {
-  DCHECK_GE(index, 0);
-
-  AllocatorAttributes attr;
-  int nid = node->id();
-  const auto& iter = attr_map->find(std::make_pair(nid, index));
-  if (iter != attr_map->end()) {
-    attr = iter->second;
-    VLOG(2) << "nondefault attr " << attr.value << " for node " << node->name()
-            << " output " << index;
-  } else {
-    VLOG(2) << "default attr for node " << node->name() << " output " << index;
-  }
-
-  DCHECK_LT(index, op_kernel->output_memory_types().size());
-  bool on_host = op_kernel->output_memory_types()[index] == HOST_MEMORY;
-  attr.set_on_host(on_host);
-  return attr;
-}
-
 // Helpers to make a copy of 'p' and makes a copy of the input type
 // vector and the device context vector.
 //
@@ -926,9 +911,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
       params.op_kernel = op_kernel;
       params.frame_iter = FrameAndIter(input_frame->frame_id, input_iter);
       params.is_input_dead = is_input_dead;
-      params.output_alloc_attr = [this, node, op_kernel](int index) {
-        return OutputAttributes(&impl_->alloc_attr_, node, op_kernel, index);
-      };
+      params.output_attr_array =
+          gtl::vector_as_array(&impl_->output_attrs_) + item.output_attr_start;
 
       async = op_kernel->AsAsync();
       if (async) {
@@ -1439,7 +1423,8 @@ void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
     InitializePending(impl_->graph_, iter_state->pending_count);
     iter_state->dead_count =
         new std::vector<int>(impl_->graph_->num_node_ids());
-    iter_state->input_tensors = new std::vector<Entry>(impl_->total_tensors_);
+    iter_state->input_tensors =
+        new std::vector<Entry>(impl_->total_input_tensors_);
 
     auto frame_pending = impl_->frame_input_count_.find(enter_name);
     DCHECK(frame_pending != impl_->frame_input_count_.end());
@@ -1470,7 +1455,8 @@ void ExecutorState::IncrementIteration(FrameState* frame,
   iter_state->pending_count = new std::vector<int>;
   InitializePending(impl_->graph_, iter_state->pending_count);
   iter_state->dead_count = new std::vector<int>(impl_->graph_->num_node_ids());
-  iter_state->input_tensors = new std::vector<Entry>(impl_->total_tensors_);
+  iter_state->input_tensors =
+      new std::vector<Entry>(impl_->total_input_tensors_);
 
   // Activate the successors of the deferred roots in the new iteration.
   ActivateNexts(frame, next_iter, ready);
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 3fa012b852..ec1d600fa1 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -436,7 +436,9 @@ class OpKernelContext {
     DeviceBase* device = nullptr;
 
     bool track_allocations = false;
-    std::function<AllocatorAttributes(int index)> output_alloc_attr = nullptr;
+
+    // Array indexed by output number for this node
+    const AllocatorAttributes* output_attr_array = nullptr;
 
     // Shared resources accessible by this op kernel invocation.
     ResourceMgr* resource_manager = nullptr;
@@ -642,7 +644,7 @@ class OpKernelContext {
   // Tensors allocated via allocate_temp. There may be a performance
   // penalty to using a Tensor that was not allocated using
   // allocate_output. This is because allocate_output uses the
-  // AllocatorAttributes stored in output_alloc_attr for the
+  // AllocatorAttributes stored in output_attr_array for the
   // designated output. In some cases, using the wrong attributes may
   // cause an extra copy of the Tensor's buffer.
 
@@ -658,9 +660,9 @@ class OpKernelContext {
   Status allocate_output(const string& name, const TensorShape& shape,
                          Tensor** tensor) TF_MUST_USE_RESULT;
   // The following methods use the supplied attributes instead of
-  // those in output_alloc_attr. The caller is responsible for
+  // those in output_attr_array. The caller is responsible for
   // ensuring that the attributes are "compatible" with the
-  // output_alloc_attr, e.g. the tensor is allocated on the correct
+  // output_attr_array, e.g. the tensor is allocated on the correct
   // device. See comment above.
   Status allocate_output(int index, const TensorShape& shape, Tensor** tensor,
                          AllocatorAttributes attr) TF_MUST_USE_RESULT;
@@ -767,7 +769,7 @@ class OpKernelContext {
   }
 
   AllocatorAttributes output_alloc_attr(int index) const {
-    return params_.output_alloc_attr(index);
+    return params_.output_attr_array[index];
   }
 
   gtl::InlinedVector<WrappedAllocator, 4> wrapped_allocators() const {
@@ -1121,8 +1123,7 @@ inline Status OpKernelContext::allocate_output(int index,
                                                Tensor** output) {
   DCHECK_GE(index, 0);
   DCHECK_LT(index, num_outputs());
-  DCHECK(params_.output_alloc_attr);
-  AllocatorAttributes attr = params_.output_alloc_attr(index);
+  AllocatorAttributes attr = output_alloc_attr(index);
   return allocate_output(index, shape, output, attr);
 }
 
diff --git a/tensorflow/core/kernels/core_ops_test.cc b/tensorflow/core/kernels/core_ops_test.cc
index 0117a571fc..63078c4ddd 100644
--- a/tensorflow/core/kernels/core_ops_test.cc
+++ b/tensorflow/core/kernels/core_ops_test.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
@@ -443,12 +444,8 @@ static void BM_LRNFloat(int iters, int depth, int cols, int rows,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> context(new OpKernelContext(params));
 
@@ -527,12 +524,8 @@ static void BM_AvgPool(int iters, int batch_size, int rows, int cols, int depth,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> avgpool_context(new OpKernelContext(params));
 
@@ -635,12 +628,8 @@ static void BM_AvgPoolBk(int iters, int batch_size, int rows, int cols,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> avgpool_context(new OpKernelContext(params));
 
@@ -725,12 +714,8 @@ static void BM_MaxPool(int iters, int batch_size, int rows, int cols, int depth,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> maxpool_context(new OpKernelContext(params));
 
@@ -903,12 +888,8 @@ static void BM_ReluFloat(int iters, int batch_size, int rows, int cols,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> relu_context(new OpKernelContext(params));
 
@@ -975,12 +956,8 @@ static void BM_ImageNetSoftmaxFwd(int iters, int batch_size, int node_depth,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> softmax_context(new OpKernelContext(params));
 
diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h
index 152e5b2d03..bb470c8c3a 100644
--- a/tensorflow/core/kernels/ops_testutil.h
+++ b/tensorflow/core/kernels/ops_testutil.h
@@ -52,6 +52,19 @@ namespace test {
 NodeDef Node(const string& name, const string& op,
              const std::vector<string>& inputs);
 
+inline void SetOutputAttrs(OpKernelContext::Params* params,
+                           std::vector<AllocatorAttributes>* attrs) {
+  attrs->clear();
+  for (int index = 0; index < params->op_kernel->num_outputs(); index++) {
+    AllocatorAttributes attr;
+    const bool on_host =
+        (params->op_kernel->output_memory_types()[index] == HOST_MEMORY);
+    attr.set_on_host(on_host);
+    attrs->push_back(attr);
+  }
+  params->output_attr_array = gtl::vector_as_array(attrs);
+}
+
 }  // namespace test
 
 // Helpful functions to test operators.
@@ -142,13 +155,8 @@ class OpsTestBase : public ::testing::Test {
     params.frame_iter = FrameAndIter(0, 0);
     params.inputs = &inputs_;
     params.op_kernel = kernel_.get();
-    params.output_alloc_attr = [this, &params](int index) {
-      AllocatorAttributes attr;
-      const bool on_host =
-          (kernel_->output_memory_types()[index] == HOST_MEMORY);
-      attr.set_on_host(on_host);
-      return attr;
-    };
+    std::vector<AllocatorAttributes> attrs;
+    test::SetOutputAttrs(&params, &attrs);
     checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
     params.slice_reader_cache = &slice_reader_cache_wrapper;
 
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 44911c9d36..1099dd7193 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -228,11 +228,9 @@ class ReductionHelper {
 
  private:
   bool reduce_first_axis_;  // True if need to reduce the 0-th dimension.
-  gtl::InlinedVector<int64, 4>
-      data_reshape_;  // Reshape the data before reduction.
-  gtl::InlinedVector<int64, 4> out_shape_;  // The final output shape.
-  gtl::InlinedVector<int64, 4>
-      out_reshape_;  // Reshape the output for reduction.
+  gtl::InlinedVector<int64, 4> data_reshape_;  // Reshape data before reduction.
+  gtl::InlinedVector<int64, 4> out_shape_;     // The final output shape.
+  gtl::InlinedVector<int64, 4> out_reshape_;   // Reshape output for reduction.
 };
 
 }  // end namespace
diff --git a/tensorflow/core/kernels/restore_op_test.cc b/tensorflow/core/kernels/restore_op_test.cc
index 3ea3251430..aa8f626c0f 100644
--- a/tensorflow/core/kernels/restore_op_test.cc
+++ b/tensorflow/core/kernels/restore_op_test.cc
@@ -164,12 +164,8 @@ TEST_F(RestoreOpTest, RestoreSimple) {
     params.frame_iter = FrameAndIter(0, 0);
     params.inputs = &inputs;
     params.op_kernel = op.get();
-    params.output_alloc_attr = [&device, &op, &params](int index) {
-      AllocatorAttributes attr;
-      const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-      attr.set_on_host(on_host);
-      return attr;
-    };
+    std::vector<AllocatorAttributes> attrs;
+    test::SetOutputAttrs(&params, &attrs);
     checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
     params.slice_reader_cache = &slice_reader_cache_wrapper;
 
@@ -392,12 +388,8 @@ TEST_F(RestoreSliceOpTest, RestoreInt) {
     params.frame_iter = FrameAndIter(0, 0);
     params.inputs = &inputs;
     params.op_kernel = op.get();
-    params.output_alloc_attr = [&device, &op, &params](int index) {
-      AllocatorAttributes attr;
-      const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-      attr.set_on_host(on_host);
-      return attr;
-    };
+    std::vector<AllocatorAttributes> attrs;
+    test::SetOutputAttrs(&params, &attrs);
     checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
     params.slice_reader_cache = &slice_reader_cache_wrapper;
 
diff --git a/tensorflow/core/kernels/segment_reduction_ops_test.cc b/tensorflow/core/kernels/segment_reduction_ops_test.cc
index 00d8e13338..4121ec232b 100644
--- a/tensorflow/core/kernels/segment_reduction_ops_test.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops_test.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 #include "tensorflow/core/public/tensor.h"
@@ -72,13 +73,8 @@ static void BM_SegmentReduction(int iters, string reduction, Index num_rows,
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &reduction_inputs;
   params.op_kernel = reduction_op.get();
-  params.output_alloc_attr = [&device, &reduction_op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host =
-        (reduction_op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> reduction_context(
       new OpKernelContext(params));
diff --git a/tensorflow/core/kernels/sparse_to_dense_op_test.cc b/tensorflow/core/kernels/sparse_to_dense_op_test.cc
index b807ee30b4..84ba13ee17 100644
--- a/tensorflow/core/kernels/sparse_to_dense_op_test.cc
+++ b/tensorflow/core/kernels/sparse_to_dense_op_test.cc
@@ -255,12 +255,8 @@ static void BM_SparseToDense(int iters, const int bm_arg) {
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
   params.op_kernel = op.get();
-  params.output_alloc_attr = [&device, &op, &params](int index) {
-    AllocatorAttributes attr;
-    const bool on_host = (op->output_memory_types()[index] == HOST_MEMORY);
-    attr.set_on_host(on_host);
-    return attr;
-  };
+  std::vector<AllocatorAttributes> attrs;
+  test::SetOutputAttrs(&params, &attrs);
 
   std::unique_ptr<OpKernelContext> sparse_context(new OpKernelContext(params));
   op->Compute(sparse_context.get());
author	A. Unique TensorFlower <nobody@tensorflow.org>	2016-01-13 17:30:08 -0800
committer	Vijay Vasudevan <vrv@google.com>	2016-01-13 17:30:08 -0800
commit	6dbfb95100b73ad26ebebb9be9c0429dc0cece8a (patch)
tree	83a15ef5538529dd3a4fece1276388c20f9260bb /tensorflow
parent	cbdf278dbd998aab57898763605f6863e9b42b76 (diff)