aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-05-04 08:14:06 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-05-04 09:21:36 -0700
commitf7a662e595f1631c12d15173344ee0f50d2cd9f8 (patch)
treeab87e6527dadcc2b7b9367ce70c62052bc343cd5 /tensorflow
parenta1bc10f9e28cd3d33e91bfaedd8199e4590f2893 (diff)
Reduce overhead of ExecutorState::ProcessOutputs:
- Cache result of LogMemory::IsEnabled. - Init Tensor for logging only if logging is enabled. - Add move constructor to Tensor and use it in ProcessOutputs. Also added benchmark for copy vs move in tensor_test.cc Change: 121487202
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/core/BUILD1
-rw-r--r--tensorflow/core/common_runtime/executor.cc40
-rw-r--r--tensorflow/core/framework/tensor.cc5
-rw-r--r--tensorflow/core/framework/tensor.h26
-rw-r--r--tensorflow/core/framework/tensor_shape.h28
-rw-r--r--tensorflow/core/framework/tensor_shape_test.cc9
-rw-r--r--tensorflow/core/framework/tensor_test.cc48
7 files changed, 135 insertions, 22 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 963ea1fc4d..792142af62 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -786,6 +786,7 @@ cc_library(
"lib/gtl/edit_distance.h",
"lib/gtl/int_type.h",
"lib/gtl/iterator_range.h",
+ "lib/gtl/manual_constructor.h",
"lib/gtl/top_n.h",
"lib/io/iterator.h",
"lib/io/match.h",
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 0c322cac76..b73f7c2aa9 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -44,6 +44,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/notification.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/lib/gtl/manual_constructor.h"
#include "tensorflow/core/lib/gtl/stl_util.h"
#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/lib/strings/str_util.h"
@@ -644,6 +645,9 @@ class ExecutorState {
const bool vlog_; // true if VLOG_IS_ON(1). Used to check vlog cheaply.
+ // true if LogMemory::IsEnabled(). Used to check memory enabled cheaply.
+ const bool log_memory_;
+
int64 step_id_;
// Not owned.
Rendezvous* rendezvous_;
@@ -796,6 +800,7 @@ class ExecutorState {
ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
: vlog_(VLOG_IS_ON(1)),
+ log_memory_(LogMemory::IsEnabled()),
step_id_(args.step_id),
rendezvous_(args.rendezvous),
session_state_(args.session_state),
@@ -1260,9 +1265,6 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
Entry* out = &((*outputs)[i]);
out->has_value = true;
- // This value is filled in below if LogMemory::IsEnabled.
- Tensor value_to_log;
-
// Set the device context of the output entry.
out->device_context = device_context;
@@ -1273,23 +1275,31 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
DataType dtype = val->dtype();
if (val.is_ref()) dtype = MakeRefType(dtype);
if (dtype == item.output_type(i)) {
+ if (stats_collector_ && val.tensor->IsInitialized()) {
+ nodestats::SetOutput(stats, i, val.tensor);
+ }
if (val.is_ref()) {
out->ref = val.tensor;
out->ref_mu = val.mutex_if_ref;
- if (LogMemory::IsEnabled()) {
- // Dereference the tensor under the lock.
- mutex_lock l(*out->ref_mu);
- value_to_log = *out->ref;
+ if (log_memory_) {
+ Tensor to_log;
+ {
+ // Dereference the tensor under the lock.
+ mutex_lock l(*out->ref_mu);
+ to_log = *out->ref;
+ }
+ LogMemory::RecordTensorOutput(ctx->op_kernel().name(),
+ ctx->step_id(), i, to_log);
}
} else {
- out->val = *val.tensor;
- if (LogMemory::IsEnabled()) {
- value_to_log = out->val;
+ // NOTE that std::move is used here, so val.tensor goes to
+ // uninitialized state (val.tensor->IsInitialized return false).
+ out->val = std::move(*val.tensor);
+ if (log_memory_) {
+ LogMemory::RecordTensorOutput(ctx->op_kernel().name(),
+ ctx->step_id(), i, out->val);
}
}
- if (stats_collector_ && val.tensor->IsInitialized()) {
- nodestats::SetOutput(stats, i, val.tensor);
- }
} else {
s.Update(errors::Internal("Output ", i, " of type ",
DataTypeString(dtype),
@@ -1297,10 +1307,6 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
DataTypeString(item.output_type(i)),
" for node ", SummarizeNodeDef(node->def())));
}
- if (LogMemory::IsEnabled()) {
- LogMemory::RecordTensorOutput(ctx->op_kernel().name(), ctx->step_id(),
- i, value_to_log);
- }
}
if (!val.is_ref()) {
// If OpKernelContext returns outputs via pass-by-value, we
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 1291fae3ae..ea528b958b 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -389,11 +389,6 @@ Tensor::Tensor() : Tensor(DT_FLOAT) {}
Tensor::Tensor(DataType type) : shape_({0}), buf_(nullptr) { set_dtype(type); }
-Tensor::Tensor(const Tensor& other) : shape_(other.shape()), buf_(other.buf_) {
- set_dtype(other.dtype());
- RefIfNonNull(buf_);
-}
-
Tensor::Tensor(DataType type, const TensorShape& shape, TensorBuffer* buf)
: shape_(shape), buf_(buf) {
set_dtype(type);
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index a8578e789d..f183a0aa67 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -84,6 +84,11 @@ class Tensor {
Tensor(const Tensor& other); /// Copy constructor.
+ // Move constructor. After this call, <other> is safely destructible and can
+ // be assigned to, but other calls on it (e.g. shape manipulation) are not
+ // valid.
+ Tensor(Tensor&& other);
+
~Tensor();
/// Returns the data type.
@@ -137,6 +142,9 @@ class Tensor {
return *this;
}
+ /// Move operator. See move constructor for details.
+ Tensor& operator=(Tensor&& other);
+
/// \brief Copy the other tensor into this tensor and reshape it.
///
/// This tensor shares other's underlying storage. Returns `true`
@@ -543,6 +551,24 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_outer_dims() const {
return shaped<T, NDIMS>(ComputeFlatOuterDims(NDIMS));
}
+inline Tensor::Tensor(const Tensor& other)
+ : shape_(other.shape()), buf_(other.buf_) {
+ if (buf_) buf_->Ref();
+}
+
+inline Tensor::Tensor(Tensor&& other)
+ : shape_(std::move(other.shape())), buf_(other.buf_) {
+ other.buf_ = nullptr;
+}
+
+inline Tensor& Tensor::operator=(Tensor&& other) {
+ shape_ = std::move(other.shape_);
+ if (buf_) buf_->Unref();
+ buf_ = other.buf_;
+ other.buf_ = nullptr;
+ return *this;
+}
+
} // namespace tensorflow
#endif // TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_
diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h
index 84947e308a..7cc8223016 100644
--- a/tensorflow/core/framework/tensor_shape.h
+++ b/tensorflow/core/framework/tensor_shape.h
@@ -54,6 +54,13 @@ class TensorShape {
TensorShape(const TensorShape& b);
void operator=(const TensorShape& b);
+ /// Move the specified shape. After moving, <b> is safe for destruction and
+ // can be reassigned into, but its dimensions and number of elements can be
+ // nonsensical (e.g., negative dimension sizes, or number of elements not
+ // properly recomputed).
+ TensorShape(TensorShape&& b);
+ void operator=(TensorShape&& b);
+
/// Returns `true` iff `proto` is a valid tensor shape.
static bool IsValid(const TensorShapeProto& proto);
@@ -308,6 +315,15 @@ inline TensorShape::TensorShape(const TensorShape& b) {
}
}
+inline TensorShape::TensorShape(TensorShape&& b) {
+ num_elements_ = b.num_elements_;
+ memcpy(buf(), b.buf(), sizeof(u_.buf));
+ // memcpy above Implicitly does:
+ // set_ndims_byte(b.ndims_byte());
+ // set_tag(b.tag());
+ b.set_tag(REP16); // other shape no longer owns out-of-line data, if any.
+}
+
inline TensorShape::~TensorShape() {
if (tag() == REP_OUT_OF_LINE) {
DestructorOutOfLine();
@@ -326,6 +342,18 @@ inline void TensorShape::operator=(const TensorShape& b) {
}
}
+inline void TensorShape::operator=(TensorShape&& b) {
+ if (tag() == REP_OUT_OF_LINE) {
+ DestructorOutOfLine();
+ }
+ num_elements_ = b.num_elements_;
+ memcpy(buf(), b.buf(), sizeof(u_.buf));
+ // memcpy above Implicitly does:
+ // set_ndims_byte(b.ndims_byte());
+ // set_tag(b.tag());
+ b.set_tag(REP16); // other shape no longer owns out-of-line data, if any.
+}
+
} // namespace tensorflow
#endif // TENSORFLOW_CORE_FRAMEWORK_TENSOR_SHAPE_H_
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index 5eeaeb61da..f3e1a7cbcb 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -473,6 +473,15 @@ TEST(TensorShapeTest, Randomized) {
fprintf(stderr, "ITERATION %d: %s\n", i, sp.DebugString().c_str());
}
EXPECT_EQ(s.num_elements(), sold.num_elements());
+
+ // Test moves.
+ TensorShape copy = s;
+ TensorShape moved(std::move(copy));
+ EXPECT_EQ(s, moved);
+ copy = s;
+ moved = std::move(copy);
+ EXPECT_EQ(s, moved);
+
int64 ne = sold.num_elements();
int r = gen.Uniform(100);
if (r < 10) {
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index f13c85c188..eed7417d36 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -112,6 +112,22 @@ void TestCopies(const Tensor& t) {
Tensor t2 = test::AsTensor(values, t.shape());
test::ExpectTensorEqual<T>(t, t2);
}
+ {
+ LOG(INFO) << "Move constructor";
+ Tensor t2 = t;
+ Tensor t3(std::move(t2));
+ test::ExpectTensorEqual<T>(t, t3);
+ EXPECT_TRUE(t3.IsInitialized());
+ EXPECT_FALSE(t2.IsInitialized());
+ }
+ {
+ LOG(INFO) << "Move assignment";
+ Tensor t2 = t;
+ Tensor t3 = std::move(t2);
+ test::ExpectTensorEqual<T>(t, t3);
+ EXPECT_TRUE(t3.IsInitialized());
+ EXPECT_FALSE(t2.IsInitialized());
+ }
}
TEST(Tensor_Float, Simple) {
@@ -791,4 +807,36 @@ TEST(Tensor, EmptyTensorData) {
EXPECT_EQ(empty.tensor_data().size(), 0);
}
+// Benchmark create and destroy a tensor, with an allocated buffer.
+static void BM_CreateAndDestroyWithBuf(int iters) {
+ TensorShape shape({10, 20});
+ Allocator* allocator = cpu_allocator();
+ while (--iters) {
+ Tensor a(allocator, DT_FLOAT, shape);
+ }
+}
+BENCHMARK(BM_CreateAndDestroyWithBuf);
+
+// Benchmark create+copy a tensor, with an allocated buffer.
+static void BM_CreateAndCopyCtrWithBuf(int iters) {
+ TensorShape shape({10, 20});
+ Allocator* allocator = cpu_allocator();
+ while (--iters) {
+ Tensor a(allocator, DT_FLOAT, shape);
+ Tensor b(a);
+ }
+}
+BENCHMARK(BM_CreateAndCopyCtrWithBuf);
+
+// Benchmark create+move a tensor, with an allocated buffer.
+static void BM_CreateAndMoveCtrWithBuf(int iters) {
+ TensorShape shape({10, 20});
+ Allocator* allocator = cpu_allocator();
+ while (--iters) {
+ Tensor a(allocator, DT_FLOAT, shape);
+ Tensor b(std::move(a));
+ }
+}
+BENCHMARK(BM_CreateAndMoveCtrWithBuf);
+
} // namespace tensorflow