diff options
author | 2016-01-13 17:30:08 -0800 | |
---|---|---|
committer | 2016-01-13 17:30:08 -0800 | |
commit | 6dbfb95100b73ad26ebebb9be9c0429dc0cece8a (patch) | |
tree | 83a15ef5538529dd3a4fece1276388c20f9260bb /tensorflow/core/kernels/segment_reduction_ops_test.cc | |
parent | cbdf278dbd998aab57898763605f6863e9b42b76 (diff) |
Avoid allocation of std::function<> object per node executed, as
well as a hash-table lookup per allocated output.
Instead, we now pre-compute the AllocatorAttributes for every output
tensor in the graph into an array (indexed by a base number per node +
output index), and changed OpKernelContext::Params to provide
a pointer to the base of the array for the node, rather than providing
a std::function<>.
Updated test code to avoid so much code duplication when setting up
the OpKernelContext::Params object in various places.
Used gtl::InlinedVector<...> instead of std::vector<...> in a few
places in tensorflow/core/kernels/reduction_ops_common.h
Didn't make a measurable change in overall performance but allocations and
time spent in the std::function destructor code was significantly reduced.
Change: 112103260
Diffstat (limited to 'tensorflow/core/kernels/segment_reduction_ops_test.cc')
-rw-r--r-- | tensorflow/core/kernels/segment_reduction_ops_test.cc | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/tensorflow/core/kernels/segment_reduction_ops_test.cc b/tensorflow/core/kernels/segment_reduction_ops_test.cc index 00d8e13338..4121ec232b 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_test.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_test.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" +#include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/tensor.h" @@ -72,13 +73,8 @@ static void BM_SegmentReduction(int iters, string reduction, Index num_rows, params.frame_iter = FrameAndIter(0, 0); params.inputs = &reduction_inputs; params.op_kernel = reduction_op.get(); - params.output_alloc_attr = [&device, &reduction_op, ¶ms](int index) { - AllocatorAttributes attr; - const bool on_host = - (reduction_op->output_memory_types()[index] == HOST_MEMORY); - attr.set_on_host(on_host); - return attr; - }; + std::vector<AllocatorAttributes> attrs; + test::SetOutputAttrs(¶ms, &attrs); std::unique_ptr<OpKernelContext> reduction_context( new OpKernelContext(params)); |