aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/segment_reduction_ops_test.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <nobody@tensorflow.org>2016-01-13 17:30:08 -0800
committerGravatar Vijay Vasudevan <vrv@google.com>2016-01-13 17:30:08 -0800
commit6dbfb95100b73ad26ebebb9be9c0429dc0cece8a (patch)
tree83a15ef5538529dd3a4fece1276388c20f9260bb /tensorflow/core/kernels/segment_reduction_ops_test.cc
parentcbdf278dbd998aab57898763605f6863e9b42b76 (diff)
Avoid allocation of std::function<> object per node executed, as
well as a hash-table lookup per allocated output. Instead, we now pre-compute the AllocatorAttributes for every output tensor in the graph into an array (indexed by a base number per node + output index), and changed OpKernelContext::Params to provide a pointer to the base of the array for the node, rather than providing a std::function<>. Updated test code to avoid so much code duplication when setting up the OpKernelContext::Params object in various places. Used gtl::InlinedVector<...> instead of std::vector<...> in a few places in tensorflow/core/kernels/reduction_ops_common.h Didn't make a measurable change in overall performance but allocations and time spent in the std::function destructor code was significantly reduced. Change: 112103260
Diffstat (limited to 'tensorflow/core/kernels/segment_reduction_ops_test.cc')
-rw-r--r--tensorflow/core/kernels/segment_reduction_ops_test.cc10
1 files changed, 3 insertions, 7 deletions
diff --git a/tensorflow/core/kernels/segment_reduction_ops_test.cc b/tensorflow/core/kernels/segment_reduction_ops_test.cc
index 00d8e13338..4121ec232b 100644
--- a/tensorflow/core/kernels/segment_reduction_ops_test.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops_test.cc
@@ -31,6 +31,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/tensor.h"
@@ -72,13 +73,8 @@ static void BM_SegmentReduction(int iters, string reduction, Index num_rows,
params.frame_iter = FrameAndIter(0, 0);
params.inputs = &reduction_inputs;
params.op_kernel = reduction_op.get();
- params.output_alloc_attr = [&device, &reduction_op, &params](int index) {
- AllocatorAttributes attr;
- const bool on_host =
- (reduction_op->output_memory_types()[index] == HOST_MEMORY);
- attr.set_on_host(on_host);
- return attr;
- };
+ std::vector<AllocatorAttributes> attrs;
+ test::SetOutputAttrs(&params, &attrs);
std::unique_ptr<OpKernelContext> reduction_context(
new OpKernelContext(params));