diff options
author | 2016-05-04 08:14:06 -0800 | |
---|---|---|
committer | 2016-05-04 09:21:36 -0700 | |
commit | f7a662e595f1631c12d15173344ee0f50d2cd9f8 (patch) | |
tree | ab87e6527dadcc2b7b9367ce70c62052bc343cd5 /tensorflow | |
parent | a1bc10f9e28cd3d33e91bfaedd8199e4590f2893 (diff) |
Reduce overhead of ExecutorState::ProcessOutputs:
- Cache result of LogMemory::IsEnabled.
- Init Tensor for logging only if logging is enabled.
- Add move constructor to Tensor and use it in ProcessOutputs.
Also added benchmark for copy vs move in tensor_test.cc
Change: 121487202
Diffstat (limited to 'tensorflow')
-rw-r--r-- | tensorflow/core/BUILD | 1 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/executor.cc | 40 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor.cc | 5 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor.h | 26 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor_shape.h | 28 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor_shape_test.cc | 9 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor_test.cc | 48 |
7 files changed, 135 insertions, 22 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 963ea1fc4d..792142af62 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -786,6 +786,7 @@ cc_library( "lib/gtl/edit_distance.h", "lib/gtl/int_type.h", "lib/gtl/iterator_range.h", + "lib/gtl/manual_constructor.h", "lib/gtl/top_n.h", "lib/io/iterator.h", "lib/io/match.h", diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 0c322cac76..b73f7c2aa9 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -44,6 +44,7 @@ limitations under the License. #include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/manual_constructor.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -644,6 +645,9 @@ class ExecutorState { const bool vlog_; // true if VLOG_IS_ON(1). Used to check vlog cheaply. + // true if LogMemory::IsEnabled(). Used to check memory enabled cheaply. + const bool log_memory_; + int64 step_id_; // Not owned. Rendezvous* rendezvous_; @@ -796,6 +800,7 @@ class ExecutorState { ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl) : vlog_(VLOG_IS_ON(1)), + log_memory_(LogMemory::IsEnabled()), step_id_(args.step_id), rendezvous_(args.rendezvous), session_state_(args.session_state), @@ -1260,9 +1265,6 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, Entry* out = &((*outputs)[i]); out->has_value = true; - // This value is filled in below if LogMemory::IsEnabled. - Tensor value_to_log; - // Set the device context of the output entry. out->device_context = device_context; @@ -1273,23 +1275,31 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, DataType dtype = val->dtype(); if (val.is_ref()) dtype = MakeRefType(dtype); if (dtype == item.output_type(i)) { + if (stats_collector_ && val.tensor->IsInitialized()) { + nodestats::SetOutput(stats, i, val.tensor); + } if (val.is_ref()) { out->ref = val.tensor; out->ref_mu = val.mutex_if_ref; - if (LogMemory::IsEnabled()) { - // Dereference the tensor under the lock. - mutex_lock l(*out->ref_mu); - value_to_log = *out->ref; + if (log_memory_) { + Tensor to_log; + { + // Dereference the tensor under the lock. + mutex_lock l(*out->ref_mu); + to_log = *out->ref; + } + LogMemory::RecordTensorOutput(ctx->op_kernel().name(), + ctx->step_id(), i, to_log); } } else { - out->val = *val.tensor; - if (LogMemory::IsEnabled()) { - value_to_log = out->val; + // NOTE that std::move is used here, so val.tensor goes to + // uninitialized state (val.tensor->IsInitialized return false). + out->val = std::move(*val.tensor); + if (log_memory_) { + LogMemory::RecordTensorOutput(ctx->op_kernel().name(), + ctx->step_id(), i, out->val); } } - if (stats_collector_ && val.tensor->IsInitialized()) { - nodestats::SetOutput(stats, i, val.tensor); - } } else { s.Update(errors::Internal("Output ", i, " of type ", DataTypeString(dtype), @@ -1297,10 +1307,6 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, DataTypeString(item.output_type(i)), " for node ", SummarizeNodeDef(node->def()))); } - if (LogMemory::IsEnabled()) { - LogMemory::RecordTensorOutput(ctx->op_kernel().name(), ctx->step_id(), - i, value_to_log); - } } if (!val.is_ref()) { // If OpKernelContext returns outputs via pass-by-value, we diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 1291fae3ae..ea528b958b 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -389,11 +389,6 @@ Tensor::Tensor() : Tensor(DT_FLOAT) {} Tensor::Tensor(DataType type) : shape_({0}), buf_(nullptr) { set_dtype(type); } -Tensor::Tensor(const Tensor& other) : shape_(other.shape()), buf_(other.buf_) { - set_dtype(other.dtype()); - RefIfNonNull(buf_); -} - Tensor::Tensor(DataType type, const TensorShape& shape, TensorBuffer* buf) : shape_(shape), buf_(buf) { set_dtype(type); diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index a8578e789d..f183a0aa67 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -84,6 +84,11 @@ class Tensor { Tensor(const Tensor& other); /// Copy constructor. + // Move constructor. After this call, <other> is safely destructible and can + // be assigned to, but other calls on it (e.g. shape manipulation) are not + // valid. + Tensor(Tensor&& other); + ~Tensor(); /// Returns the data type. @@ -137,6 +142,9 @@ class Tensor { return *this; } + /// Move operator. See move constructor for details. + Tensor& operator=(Tensor&& other); + /// \brief Copy the other tensor into this tensor and reshape it. /// /// This tensor shares other's underlying storage. Returns `true` @@ -543,6 +551,24 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_outer_dims() const { return shaped<T, NDIMS>(ComputeFlatOuterDims(NDIMS)); } +inline Tensor::Tensor(const Tensor& other) + : shape_(other.shape()), buf_(other.buf_) { + if (buf_) buf_->Ref(); +} + +inline Tensor::Tensor(Tensor&& other) + : shape_(std::move(other.shape())), buf_(other.buf_) { + other.buf_ = nullptr; +} + +inline Tensor& Tensor::operator=(Tensor&& other) { + shape_ = std::move(other.shape_); + if (buf_) buf_->Unref(); + buf_ = other.buf_; + other.buf_ = nullptr; + return *this; +} + } // namespace tensorflow #endif // TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_ diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index 84947e308a..7cc8223016 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -54,6 +54,13 @@ class TensorShape { TensorShape(const TensorShape& b); void operator=(const TensorShape& b); + /// Move the specified shape. After moving, <b> is safe for destruction and + // can be reassigned into, but its dimensions and number of elements can be + // nonsensical (e.g., negative dimension sizes, or number of elements not + // properly recomputed). + TensorShape(TensorShape&& b); + void operator=(TensorShape&& b); + /// Returns `true` iff `proto` is a valid tensor shape. static bool IsValid(const TensorShapeProto& proto); @@ -308,6 +315,15 @@ inline TensorShape::TensorShape(const TensorShape& b) { } } +inline TensorShape::TensorShape(TensorShape&& b) { + num_elements_ = b.num_elements_; + memcpy(buf(), b.buf(), sizeof(u_.buf)); + // memcpy above Implicitly does: + // set_ndims_byte(b.ndims_byte()); + // set_tag(b.tag()); + b.set_tag(REP16); // other shape no longer owns out-of-line data, if any. +} + inline TensorShape::~TensorShape() { if (tag() == REP_OUT_OF_LINE) { DestructorOutOfLine(); @@ -326,6 +342,18 @@ inline void TensorShape::operator=(const TensorShape& b) { } } +inline void TensorShape::operator=(TensorShape&& b) { + if (tag() == REP_OUT_OF_LINE) { + DestructorOutOfLine(); + } + num_elements_ = b.num_elements_; + memcpy(buf(), b.buf(), sizeof(u_.buf)); + // memcpy above Implicitly does: + // set_ndims_byte(b.ndims_byte()); + // set_tag(b.tag()); + b.set_tag(REP16); // other shape no longer owns out-of-line data, if any. +} + } // namespace tensorflow #endif // TENSORFLOW_CORE_FRAMEWORK_TENSOR_SHAPE_H_ diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc index 5eeaeb61da..f3e1a7cbcb 100644 --- a/tensorflow/core/framework/tensor_shape_test.cc +++ b/tensorflow/core/framework/tensor_shape_test.cc @@ -473,6 +473,15 @@ TEST(TensorShapeTest, Randomized) { fprintf(stderr, "ITERATION %d: %s\n", i, sp.DebugString().c_str()); } EXPECT_EQ(s.num_elements(), sold.num_elements()); + + // Test moves. + TensorShape copy = s; + TensorShape moved(std::move(copy)); + EXPECT_EQ(s, moved); + copy = s; + moved = std::move(copy); + EXPECT_EQ(s, moved); + int64 ne = sold.num_elements(); int r = gen.Uniform(100); if (r < 10) { diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index f13c85c188..eed7417d36 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -112,6 +112,22 @@ void TestCopies(const Tensor& t) { Tensor t2 = test::AsTensor(values, t.shape()); test::ExpectTensorEqual<T>(t, t2); } + { + LOG(INFO) << "Move constructor"; + Tensor t2 = t; + Tensor t3(std::move(t2)); + test::ExpectTensorEqual<T>(t, t3); + EXPECT_TRUE(t3.IsInitialized()); + EXPECT_FALSE(t2.IsInitialized()); + } + { + LOG(INFO) << "Move assignment"; + Tensor t2 = t; + Tensor t3 = std::move(t2); + test::ExpectTensorEqual<T>(t, t3); + EXPECT_TRUE(t3.IsInitialized()); + EXPECT_FALSE(t2.IsInitialized()); + } } TEST(Tensor_Float, Simple) { @@ -791,4 +807,36 @@ TEST(Tensor, EmptyTensorData) { EXPECT_EQ(empty.tensor_data().size(), 0); } +// Benchmark create and destroy a tensor, with an allocated buffer. +static void BM_CreateAndDestroyWithBuf(int iters) { + TensorShape shape({10, 20}); + Allocator* allocator = cpu_allocator(); + while (--iters) { + Tensor a(allocator, DT_FLOAT, shape); + } +} +BENCHMARK(BM_CreateAndDestroyWithBuf); + +// Benchmark create+copy a tensor, with an allocated buffer. +static void BM_CreateAndCopyCtrWithBuf(int iters) { + TensorShape shape({10, 20}); + Allocator* allocator = cpu_allocator(); + while (--iters) { + Tensor a(allocator, DT_FLOAT, shape); + Tensor b(a); + } +} +BENCHMARK(BM_CreateAndCopyCtrWithBuf); + +// Benchmark create+move a tensor, with an allocated buffer. +static void BM_CreateAndMoveCtrWithBuf(int iters) { + TensorShape shape({10, 20}); + Allocator* allocator = cpu_allocator(); + while (--iters) { + Tensor a(allocator, DT_FLOAT, shape); + Tensor b(std::move(a)); + } +} +BENCHMARK(BM_CreateAndMoveCtrWithBuf); + } // namespace tensorflow |