1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// ScatterAddNdim implements a scatter_add that can operate on sparse
// updates without being limited to the first dimension for indices.
#include "tensorflow/contrib/tensor_forest/kernels/tree_utils.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
using tensorforest::CheckTensorBounds;
class ScatterAddNdim : public OpKernel {
public:
explicit ScatterAddNdim(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
Tensor input_tensor = context->mutable_input(0, false);
const Tensor& indices_tensor = context->input(1);
const Tensor& deltas_tensor = context->input(2);
if (indices_tensor.shape().dim_size(0) > 0) {
OP_REQUIRES(context, indices_tensor.shape().dims() == 2,
errors::InvalidArgument("indices should be two-dimensional"));
const int32 delta_dims = deltas_tensor.shape().dims();
OP_REQUIRES(
context,
indices_tensor.shape().dim_size(1) + delta_dims ==
input_tensor.shape().dims() + 1,
errors::InvalidArgument(
"Number of indices dimensions should be the same as input "
"rank."));
OP_REQUIRES(
context,
indices_tensor.shape().dim_size(0) ==
deltas_tensor.shape().dim_size(0),
errors::InvalidArgument(
"Number of updates should be same as number of indices."));
} else {
return;
}
// Check tensor bounds.
if (!CheckTensorBounds(context, input_tensor)) return;
if (!CheckTensorBounds(context, indices_tensor)) return;
if (!CheckTensorBounds(context, deltas_tensor)) return;
auto input = input_tensor.flat<float>();
const auto indices = indices_tensor.tensor<int32, 2>();
const auto deltas = deltas_tensor.unaligned_flat<float>();
const int32 num_dims =
static_cast<int32>(indices_tensor.shape().dim_size(1));
// Figure out if indices don't specify a complete position in the
// input tensor.
int32 num_data_per_index = 1;
for (int32 i = 0; i < input_tensor.shape().dims() - num_dims; ++i) {
num_data_per_index *= input_tensor.shape().dim_size(num_dims + i);
}
// Calculate index multipliers.
std::vector<int32> multipliers;
OP_REQUIRES(context, input.size() < std::numeric_limits<int32>::max(),
errors::InvalidArgument(
"Input must contain less than 2^31 total elements"));
int32 last_size = static_cast<int32>(input.size());
for (int32 j = 0; j < num_dims; j++) {
const int32 m = last_size / input_tensor.shape().dim_size(j);
multipliers.push_back(m);
last_size = m;
}
// Perform updates.
for (int32 i = 0; i < indices_tensor.shape().dim_size(0); i++) {
int32 start_index = 0;
for (int32 j = 0; j < num_dims; j++) {
start_index += indices(i, j) * multipliers[j];
}
for (int32 offset = 0; offset < num_data_per_index; ++offset) {
const int32 input_index = start_index + offset;
const int32 delta_index = i * num_data_per_index + offset;
CHECK(input_index < input.size());
CHECK(delta_index < deltas.size());
input(input_index) += deltas(delta_index);
}
}
}
};
REGISTER_KERNEL_BUILDER(Name("ScatterAddNdim").Device(DEVICE_CPU),
ScatterAddNdim);
} // namespace tensorflow
|