/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope_internal.h" #include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h" namespace tensorflow { namespace grappler { namespace { const char* const kExpandDimsPrefix = "vectorized/expanddims/"; // Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading // dimension, which may cause automatic broadcasting rules to expand the // input dimensions wrongly when the unstacked shapes have different ranks. // To avoid that, we reshape stacked inputs to the maximum rank they need // to be broadcasted to. // // For example, suppose we have inputs A and B, where A is a stacked tensor with // shape [n, 5] (where n is the stack size) and B is an unstacked tensor with // shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules // would expand the dimensions of A to [1, n, 5], then (incorrectly) check that // the dimensions n and 7 are compatible, and if so, create an output of shape // [12, 7, 5]. However, correct addition of these inputs would create an output // with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A // *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before // broadcasting. Status ExpandDimsForBroadcast(std::vector* inputs, Graph* g) { Status status; Scope parent = NewInternalScope(g, &status, nullptr); Scope s = parent.NewSubScope(kExpandDimsPrefix); // TODO(rachelim): We can potentially get rid of all these ops if shapes are // known statically Output const_0 = ops::Const(s, 0); Output const_1 = ops::Const(s, 1); std::vector ranks; ranks.reserve(inputs->size()); // Get the stacked rank of each input for (const auto& input : *inputs) { Output rank = ops::Rank(s, Output(input.node, input.output_index)); if (!input.stacked) { // If the input is unstacked, add 1 rank = ops::Add(s, rank, const_1); } ranks.push_back(rank); } // Pack the ranks into one tensor to get the max Output packed_ranks = ops::Stack(s, ranks); Output max_rank = ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true)); std::vector expanded_inputs; expanded_inputs.reserve(inputs->size()); // For all inputs that are stacked, expand dimensions after dim 0. for (size_t i = 0; i < inputs->size(); ++i) { if (!inputs->at(i).stacked) { expanded_inputs.push_back(inputs->at(i)); continue; } Output input(inputs->at(i).node, inputs->at(i).output_index); // Number of dimensions to expand Output rank_diff = ops::Sub(s, max_rank, ranks[i]); // [1] * rank_diff Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff); Output const_vec_1 = ops::Const(s, {1}); Output shape = ops::Shape(s, input); // shape[:1] Output concat_pre = ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, ops::StridedSlice::Attrs().BeginMask(1)); // shape[1:] Output concat_post = ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1, ops::StridedSlice::Attrs().EndMask(1)); // tf.concat([shape[:1], ones, shape[1:]], 0) Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0); Output result = ops::Reshape(s, input, new_shape); expanded_inputs.push_back({result.node(), 0, true}); } inputs->swap(expanded_inputs); return status; } class AddVectorizer : public Vectorizer { public: Status Vectorize(const Node& node, Graph* outer_scope, std::vector&& inputs, std::vector* outputs) override { if (node.num_inputs() != 2) { return errors::Internal("Add op should only have two inputs."); } TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope)); // Add new Add node with the same op and attrs as the original node Node* new_add_node; TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add") .Input(inputs[0].node, inputs[0].output_index) .Input(inputs[1].node, inputs[1].output_index) .Finalize(outer_scope, &new_add_node)); // Add output mappings outputs->push_back({new_add_node, 0, true}); return Status::OK(); } }; REGISTER_VECTORIZER("Add", AddVectorizer); } // namespace } // namespace grappler } // namespace tensorflow