diff options
Diffstat (limited to 'tensorflow/core/kernels/unique_op.cc')
-rw-r--r-- | tensorflow/core/kernels/unique_op.cc | 113 |
1 files changed, 97 insertions, 16 deletions
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 701c5f6d2b..d087784c8a 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include <functional> #include <unordered_map> #include <utility> @@ -21,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { @@ -33,8 +35,6 @@ class UniqueOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), - errors::InvalidArgument("unique expects a 1D vector.")); // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, @@ -42,31 +42,102 @@ class UniqueOp : public OpKernel { errors::InvalidArgument( "unique does not support input tensors larger than ", std::numeric_limits<int32>::max(), " elements")); - auto Tin = input.vec<T>(); - const int64 N = static_cast<int64>(Tin.size()); + + int64 axis = 0; + std::vector<int64> new_sizes{1, input.NumElements(), 1}; + if (context->num_inputs() == 1) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + // In case of UniqueV2, the axis is a 1D vector. The purpose is + // to allow specifying either "no axis" or "axis". The `[]` means + // "no axis", while `[x]` means `axis = x`. + const Tensor& axis_tensor = context->input(1); + OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), + errors::InvalidArgument("axis expects a 1D vector.")); + OP_REQUIRES( + context, axis_tensor.NumElements() <= 1, + errors::InvalidArgument( + "axis does not support input tensors larger than 1 elements")); + if (axis_tensor.NumElements() == 0) { + OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), + errors::InvalidArgument("unique expects a 1D vector.")); + } else { + auto axis_vec = axis_tensor.vec<int64>(); + axis = axis_vec(0); + axis = axis < 0 ? axis + input.dims() : axis; + OP_REQUIRES(context, 0 <= axis && axis < input.dims(), + errors::InvalidArgument("axis has to be between [0, ", + input.dims(), ")")); + if (axis > 0) { + for (int64 i = 0; i < axis; i++) { + new_sizes[0] *= input.dim_size(i); + } + } + new_sizes[1] = input.dim_size(axis); + if (axis + 1 < input.dims()) { + for (int64 i = axis + 1; i < input.dims(); i++) { + new_sizes[2] *= input.dim_size(i); + } + } + } + } + + auto Tin = input.shaped<T, 3>(new_sizes); Tensor* idx = nullptr; - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, input.shape(), &idx)); + OP_REQUIRES_OK(context, context->allocate_output( + 1, TensorShape({Tin.dimension(1)}), &idx)); auto idx_vec = idx->template vec<TIndex>(); - std::unordered_map<T, TIndex> uniq; - uniq.reserve(2 * N); - for (int64 i = 0, j = 0; i < N; ++i) { - auto it = uniq.insert(std::make_pair(Tin(i), j)); + auto hash_fn = [&Tin](const int64& key) -> unsigned long { + size_t h = 0; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + h = Hash64Combine(h, hash<T>{}(Tin(i, key, j))); + } + } + return h; + }; + + auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) { + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + if (Tin(i, lhs, j) != Tin(i, rhs, j)) { + return false; + } + } + } + return true; + }; + + std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)> + uniq(0, hash_fn, equal_to_fn); + + uniq.reserve(2 * Tin.dimension(1)); + + for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { + auto it = uniq.insert(std::make_pair(i, j)); idx_vec(i) = it.first->second; if (it.second) { ++j; } } + int64 uniq_size = static_cast<int64>(uniq.size()); + new_sizes[1] = uniq_size; + TensorShape output_shape(input.shape()); + output_shape.set_dim(axis, uniq_size); Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({uniq_size}), &output)); - auto output_vec = output->template vec<T>(); + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + auto Tout = output->shaped<T, 3>(new_sizes); for (auto it : uniq) { - output_vec(it.second) = it.first; + for (int64 i = 0; i < Tin.dimension(0); i++) { + for (int64 j = 0; j < Tin.dimension(2); j++) { + Tout(i, it.second, j) = Tin(i, it.first, j); + } + } } if (num_outputs() > 2) { @@ -74,7 +145,7 @@ class UniqueOp : public OpKernel { 2, TensorShape({uniq_size}), &output)); auto count_output_vec = output->template vec<TIndex>(); count_output_vec.setZero(); - for (int64 i = 0; i < N; ++i) { + for (int64 i = 0; i < Tin.dimension(1); ++i) { count_output_vec(idx_vec(i))++; } } @@ -92,6 +163,16 @@ class UniqueOp : public OpKernel { .TypeConstraint<type>("T") \ .TypeConstraint<int64>("out_idx"), \ UniqueOp<type, int64>); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int32>("out_idx"), \ + UniqueOp<type, int32>); \ + REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int64>("out_idx"), \ + UniqueOp<type, int64>); \ REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ .Device(DEVICE_CPU) \ .TypeConstraint<type>("T") \ @@ -176,5 +257,5 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("y") .HostMemory("idx"), UniqueOp<int64, int64>); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow |