From a9a49560208484f1f99efdde889da6147e8722fe Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 29 Jan 2021 12:09:30 -0800 Subject: Export of internal Abseil changes -- c68f1886f5e8fd90eb0c2d2e68feaf00a7cdacda by CJ Johnson : Introduce absl::Cleanup to the OSS repo PiperOrigin-RevId: 354583156 -- 17030cf388e10f7eb959e3e566326d1072ce392e by Abseil Team : Internal change only PiperOrigin-RevId: 354574953 -- e979d7236d4f3252e79ddda6739b67a9a326bf6d by CJ Johnson : Internal change PiperOrigin-RevId: 354545297 -- 7ea02b3783f7f49ef97d86a8f6580a19cc57df14 by Abseil Team : Pre-allocate memory for vectors where the size is known. PiperOrigin-RevId: 354344576 -- 9246c7cb11f1d6444f79ebe25acc69a8a9b870e0 by Matt Kulukundis : Add support for Elbrus 2000 (e2k) Import of https://github.com/abseil/abseil-cpp/pull/889 PiperOrigin-RevId: 354344013 -- 0fc93d359cc1fb307552e917b37b7b2e7eed822f by Abseil Team : Integrate CordRepRing logic into cord (but do not enable it) PiperOrigin-RevId: 354312238 -- eda05622f7da71466723acb33403f783529df24b by Abseil Team : Protect ignore diagnostic with "__has_warning". PiperOrigin-RevId: 354112334 -- 47716c5d8fb10efa4fdd801d28bac414c6f8ec32 by Abseil Team : Rearrange InlinedVector copy constructor and destructor to treat a few special cases inline and then tail-call a non-inlined routine for the rest. In particular, we optimize for empty vectors in both cases. Added a couple of benchmarks that copy either an InlVec or an InlVec>. Speed difference: ``` BM_CopyTrivial/0 0.92ns +- 0% 0.47ns +- 0% -48.91% (p=0.000 n=11+12) BM_CopyTrivial/1 0.92ns +- 0% 1.15ns +- 0% +25.00% (p=0.000 n=10+9) BM_CopyTrivial/8 8.57ns +- 0% 10.72ns +- 1% +25.16% (p=0.000 n=10+12) BM_CopyNonTrivial/0 3.21ns +- 0% 0.70ns +- 0% -78.23% (p=0.000 n=12+10) BM_CopyNonTrivial/1 5.88ns +- 1% 5.51ns +- 0% -6.28% (p=0.000 n=10+8) BM_CopyNonTrivial/8 21.5ns +- 1% 15.2ns +- 2% -29.23% (p=0.000 n=12+12) ``` Note: the slowdowns are a few cycles which is expected given the procedure call added in that case. We decided this is a good tradeoff given the code size reductions and the more significant speedups for empty vectors. Size difference (as measured by nm): ``` BM_CopyTrivial from 1048 bytes to 326 bytes. BM_CopyNonTrivial from 749 bytes to 470 bytes. ``` Code size for a large binary drops by ~500KB (from 349415719 to 348906015 348906191). All of the benchmarks that showed a significant difference: Ones that improve with this CL: ``` BM_CopyNonTrivial/0 3.21ns +- 0% 0.70ns +- 0% -78.23% (p=0.000 n=12+10) BM_InlinedVectorFillString/0 0.93ns +- 0% 0.24ns +- 0% -74.19% (p=0.000 n=12+10) BM_InlinedVectorAssignments/1 10.5ns +- 0% 4.1ns +- 0% -60.64% (p=0.000 n=11+10) BM_InlinedVectorAssignments/2 10.7ns +- 0% 4.4ns +- 0% -59.08% (p=0.000 n=11+11) BM_CopyTrivial/0 0.92ns +- 0% 0.47ns +- 0% -48.91% (p=0.000 n=11+12) BM_CopyNonTrivial/8 21.5ns +- 1% 15.2ns +- 2% -29.23% (p=0.000 n=12+12) BM_StdVectorEmpty 0.47ns +- 1% 0.35ns +- 0% -24.73% (p=0.000 n=12+12) BM_StdVectorSize 0.46ns +- 2% 0.35ns +- 0% -24.32% (p=0.000 n=12+12) BM_SwapElements/0 3.44ns +- 0% 2.76ns +- 1% -19.83% (p=0.000 n=11+11) BM_InlinedVectorFillRange/256 20.7ns +- 1% 17.8ns +- 0% -14.08% (p=0.000 n=12+9) BM_CopyNonTrivial/1 5.88ns +- 1% 5.51ns +- 0% -6.28% (p=0.000 n=10+8) BM_SwapElements/1 4.19ns +- 0% 3.95ns +- 1% -5.63% (p=0.000 n=11+12) BM_SwapElements/1 4.18ns +- 0% 3.99ns +- 0% -4.70% (p=0.000 n=9+11) BM_SwapElements/0 2.41ns +- 0% 2.31ns +- 0% -4.45% (p=0.000 n=12+12) BM_InlinedVectorFillRange/64 8.25ns +- 0% 8.04ns +- 0% -2.51% (p=0.000 n=12+11) BM_SwapElements/1 82.4ns +- 0% 81.5ns +- 0% -1.06% (p=0.000 n=12+12) ``` Ones that get worse with this CL: ``` BM_CopyTrivial/1 0.92ns +- 0% 1.15ns +- 0% +25.00% (p=0.000 n=10+9) BM_CopyTrivial/8 8.57ns +- 0% 10.72ns +- 1% +25.16% (p=0.000 n=10+12) BM_SwapElements/512 1.48ns +- 1% 1.66ns +- 1% +11.88% (p=0.000 n=12+12) BM_InlinedVectorFillString/1 11.5ns +- 0% 12.8ns +- 1% +11.62% (p=0.000 n=12+11) BM_SwapElements/64 1.48ns +- 2% 1.66ns +- 1% +11.66% (p=0.000 n=12+11) BM_SwapElements/1k 1.48ns +- 1% 1.65ns +- 2% +11.32% (p=0.000 n=12+12) BM_SwapElements/512 1.48ns +- 2% 1.58ns +- 4% +6.62% (p=0.000 n=11+12) BM_SwapElements/1k 1.49ns +- 2% 1.58ns +- 3% +6.05% (p=0.000 n=12+12) BM_SwapElements/64 1.48ns +- 2% 1.57ns +- 4% +6.04% (p=0.000 n=11+12) BM_InlinedVectorFillRange/1 4.81ns +- 0% 5.05ns +- 0% +4.83% (p=0.000 n=11+11) BM_InlinedVectorFillString/8 79.4ns +- 1% 83.1ns +- 1% +4.64% (p=0.000 n=10+12) BM_StdVectorFillString/1 16.3ns +- 0% 16.6ns +- 0% +2.13% (p=0.000 n=11+8) ``` PiperOrigin-RevId: 353906786 -- 8e26518b3cec9c598e5e9573c46c3bd1b03a67ef by Abseil Team : Internal change PiperOrigin-RevId: 353737330 -- f206ae0983e58c9904ed8b8f05f9caf564a446be by Matt Kulukundis : Import of CCTZ from GitHub. PiperOrigin-RevId: 353682256 GitOrigin-RevId: c68f1886f5e8fd90eb0c2d2e68feaf00a7cdacda Change-Id: I5790c1036c4f543c701d1039848fabf7ae881ad8 --- absl/container/inlined_vector.h | 8 ++-- absl/container/inlined_vector_benchmark.cc | 22 ++++++++++ absl/container/internal/inlined_vector.h | 66 ++++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 6 deletions(-) (limited to 'absl/container') diff --git a/absl/container/inlined_vector.h b/absl/container/inlined_vector.h index 90bb96e8..7c182342 100644 --- a/absl/container/inlined_vector.h +++ b/absl/container/inlined_vector.h @@ -167,11 +167,13 @@ class InlinedVector { // Creates an inlined vector by copying the contents of `other` using `alloc`. InlinedVector(const InlinedVector& other, const allocator_type& alloc) : storage_(alloc) { - if (IsMemcpyOk::value && !other.storage_.GetIsAllocated()) { + if (other.empty()) { + // Empty; nothing to do. + } else if (IsMemcpyOk::value && !other.storage_.GetIsAllocated()) { + // Memcpy-able and do not need allocation. storage_.MemcpyFrom(other.storage_); } else { - storage_.Initialize(IteratorValueAdapter(other.data()), - other.size()); + storage_.InitFrom(other.storage_); } } diff --git a/absl/container/inlined_vector_benchmark.cc b/absl/container/inlined_vector_benchmark.cc index b8dafe93..e256fad6 100644 --- a/absl/container/inlined_vector_benchmark.cc +++ b/absl/container/inlined_vector_benchmark.cc @@ -534,6 +534,28 @@ void BM_ConstructFromMove(benchmark::State& state) { ABSL_INTERNAL_BENCHMARK_ONE_SIZE(BM_ConstructFromMove, TrivialType); ABSL_INTERNAL_BENCHMARK_ONE_SIZE(BM_ConstructFromMove, NontrivialType); +// Measure cost of copy-constructor+destructor. +void BM_CopyTrivial(benchmark::State& state) { + const int n = state.range(0); + InlVec src(n); + for (auto s : state) { + InlVec copy(src); + benchmark::DoNotOptimize(copy); + } +} +BENCHMARK(BM_CopyTrivial)->Arg(0)->Arg(1)->Arg(kLargeSize); + +// Measure cost of copy-constructor+destructor. +void BM_CopyNonTrivial(benchmark::State& state) { + const int n = state.range(0); + InlVec> src(n); + for (auto s : state) { + InlVec> copy(src); + benchmark::DoNotOptimize(copy); + } +} +BENCHMARK(BM_CopyNonTrivial)->Arg(0)->Arg(1)->Arg(kLargeSize); + template void BM_AssignSizeRef(benchmark::State& state) { auto size = ToSize; diff --git a/absl/container/internal/inlined_vector.h b/absl/container/internal/inlined_vector.h index 120849cd..b8aec45b 100644 --- a/absl/container/internal/inlined_vector.h +++ b/absl/container/internal/inlined_vector.h @@ -81,6 +81,23 @@ void DestroyElements(AllocatorType* alloc_ptr, Pointer destroy_first, } } +// If kUseMemcpy is true, memcpy(dst, src, n); else do nothing. +// Useful to avoid compiler warnings when memcpy() is used for T values +// that are not trivially copyable in non-reachable code. +template +inline void MemcpyIfAllowed(void* dst, const void* src, size_t n); + +// memcpy when allowed. +template <> +inline void MemcpyIfAllowed(void* dst, const void* src, size_t n) { + memcpy(dst, src, n); +} + +// Do nothing for types that are not memcpy-able. This function is only +// called from non-reachable branches. +template <> +inline void MemcpyIfAllowed(void*, const void*, size_t) {} + template void ConstructElements(AllocatorType* alloc_ptr, Pointer construct_first, @@ -310,9 +327,14 @@ class Storage { : metadata_(alloc, /* size and is_allocated */ 0) {} ~Storage() { - pointer data = GetIsAllocated() ? GetAllocatedData() : GetInlinedData(); - inlined_vector_internal::DestroyElements(GetAllocPtr(), data, GetSize()); - DeallocateIfAllocated(); + if (GetSizeAndIsAllocated() == 0) { + // Empty and not allocated; nothing to do. + } else if (IsMemcpyOk::value) { + // No destructors need to be run; just deallocate if necessary. + DeallocateIfAllocated(); + } else { + DestroyContents(); + } } // --------------------------------------------------------------------------- @@ -370,6 +392,8 @@ class Storage { // Storage Member Mutators // --------------------------------------------------------------------------- + ABSL_ATTRIBUTE_NOINLINE void InitFrom(const Storage& other); + template void Initialize(ValueAdapter values, size_type new_size); @@ -452,6 +476,8 @@ class Storage { } private: + ABSL_ATTRIBUTE_NOINLINE void DestroyContents(); + using Metadata = container_internal::CompressedTuple; @@ -476,6 +502,40 @@ class Storage { Data data_; }; +template +void Storage::DestroyContents() { + pointer data = GetIsAllocated() ? GetAllocatedData() : GetInlinedData(); + inlined_vector_internal::DestroyElements(GetAllocPtr(), data, GetSize()); + DeallocateIfAllocated(); +} + +template +void Storage::InitFrom(const Storage& other) { + const auto n = other.GetSize(); + assert(n > 0); // Empty sources handled handled in caller. + const_pointer src; + pointer dst; + if (!other.GetIsAllocated()) { + dst = GetInlinedData(); + src = other.GetInlinedData(); + } else { + // Because this is only called from the `InlinedVector` constructors, it's + // safe to take on the allocation with size `0`. If `ConstructElements(...)` + // throws, deallocation will be automatically handled by `~Storage()`. + size_type new_capacity = ComputeCapacity(GetInlinedCapacity(), n); + dst = AllocatorTraits::allocate(*GetAllocPtr(), new_capacity); + SetAllocatedData(dst, new_capacity); + src = other.GetAllocatedData(); + } + if (IsMemcpyOk::value) { + MemcpyIfAllowed(dst, src, sizeof(dst[0]) * n); + } else { + auto values = IteratorValueAdapter(src); + inlined_vector_internal::ConstructElements(GetAllocPtr(), dst, &values, n); + } + GetSizeAndIsAllocated() = other.GetSizeAndIsAllocated(); +} + template template auto Storage::Initialize(ValueAdapter values, size_type new_size) -- cgit v1.2.3