aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/unique_op.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-01-05 11:13:07 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-05 11:16:58 -0800
commit9878a2e64771e8feef527ec3d221163200bdaf30 (patch)
tree91a78f91d04a1f1c6b126971c52bbd2f7d2142bc /tensorflow/core/kernels/unique_op.cc
parent30f495c3c0cde337a4b9a06c9de91d3b69a84d98 (diff)
Introduces back a faster specialization of the unique op for cases where uniqueness is applied on single elements. This produces up to 3x performance improvements in the microbenchmark, which was lost when support was added for UniqueV2.
PiperOrigin-RevId: 180951153
Diffstat (limited to 'tensorflow/core/kernels/unique_op.cc')
-rw-r--r--tensorflow/core/kernels/unique_op.cc109
1 files changed, 72 insertions, 37 deletions
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 782470210f..e64b27b572 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -83,65 +83,100 @@ class UniqueOp : public OpKernel {
}
}
- auto Tin = input.shaped<T, 3>(new_sizes);
-
Tensor* idx = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(
- 1, TensorShape({Tin.dimension(1)}), &idx));
+ 1, TensorShape({new_sizes[1]}), &idx));
auto idx_vec = idx->template vec<TIndex>();
- auto hash_fn = [&Tin](const int64& key) -> unsigned long {
- size_t h = 0;
- for (int64 i = 0; i < Tin.dimension(0); i++) {
- for (int64 j = 0; j < Tin.dimension(2); j++) {
- h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
+ int64 uniq_size;
+ if (new_sizes[0] == 1 && new_sizes[2] == 1) {
+ // Specialized and faster implementation when unique is run over single
+ // elements. Here we put T directly into the map rather than ints pointing
+ // to them as in the general case.
+ auto Tin = input.flat<T>();
+ const int64 N = static_cast<int64>(Tin.size());
+
+ std::unordered_map<T, TIndex> uniq;
+ uniq.reserve(2 * N);
+ for (int64 i = 0, j = 0; i < N; ++i) {
+ auto it = uniq.insert(std::make_pair(Tin(i), j));
+ idx_vec(i) = it.first->second;
+ if (it.second) {
+ ++j;
}
}
- return h;
- };
- auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
- for (int64 i = 0; i < Tin.dimension(0); i++) {
- for (int64 j = 0; j < Tin.dimension(2); j++) {
- if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
- return false;
+ uniq_size = static_cast<int64>(uniq.size());
+ TensorShape output_shape(input.shape());
+ output_shape.set_dim(axis, uniq_size);
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(0, output_shape, &output));
+ auto Tout = output->flat<T>();
+
+ for (auto it : uniq) {
+ Tout(it.second) = it.first;
+ }
+ } else {
+ // General implementation when unique is run over multiple elements.
+ auto Tin = input.shaped<T, 3>(new_sizes);
+
+ auto hash_fn = [&Tin](const int64& key) {
+ size_t h = 0;
+ for (int64 i = 0; i < Tin.dimension(0); i++) {
+ for (int64 j = 0; j < Tin.dimension(2); j++) {
+ h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
}
}
- }
- return true;
- };
+ return h;
+ };
- std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
- uniq(0, hash_fn, equal_to_fn);
+ auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
+ for (int64 i = 0; i < Tin.dimension(0); i++) {
+ for (int64 j = 0; j < Tin.dimension(2); j++) {
+ if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ };
+
+ std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
+ uniq(0, hash_fn, equal_to_fn);
- uniq.reserve(2 * Tin.dimension(1));
+ uniq.reserve(2 * Tin.dimension(1));
- for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
- auto it = uniq.insert(std::make_pair(i, j));
- idx_vec(i) = it.first->second;
- if (it.second) {
- ++j;
+ for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
+ auto it = uniq.insert(std::make_pair(i, j));
+ idx_vec(i) = it.first->second;
+ if (it.second) {
+ ++j;
+ }
}
- }
- int64 uniq_size = static_cast<int64>(uniq.size());
- new_sizes[1] = uniq_size;
- TensorShape output_shape(input.shape());
- output_shape.set_dim(axis, uniq_size);
- Tensor* output = nullptr;
- OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
- auto Tout = output->shaped<T, 3>(new_sizes);
+ uniq_size = static_cast<int64>(uniq.size());
+ new_sizes[1] = uniq_size;
+ TensorShape output_shape(input.shape());
+ output_shape.set_dim(axis, uniq_size);
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(0, output_shape, &output));
+ auto Tout = output->shaped<T, 3>(new_sizes);
- for (auto it : uniq) {
- Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
+ for (auto it : uniq) {
+ Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
+ }
}
if (num_outputs() > 2) {
+ Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(
2, TensorShape({uniq_size}), &output));
auto count_output_vec = output->template vec<TIndex>();
count_output_vec.setZero();
- for (int64 i = 0; i < Tin.dimension(1); ++i) {
+ const int N = idx_vec.size();
+ for (int64 i = 0; i < N; ++i) {
count_output_vec(idx_vec(i))++;
}
}