summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-08-01 07:58:20 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-08-01 07:59:13 -0700
commit22091f4c0d6626b3ef40446ce3d4ccab19425ca3 (patch)
tree1fd4c904411a2c4aa2be7958b1095e5ffd71cb1e
parentd7aae58cb693aaf8a3351be2e5fce2eefeff30c9 (diff)
Speed up StrAppend by up to 4x.
PiperOrigin-RevId: 552802740 Change-Id: I662da1b03bfffb7939b44ea3850566d3c209c6cc
-rw-r--r--absl/strings/str_cat.cc178
-rw-r--r--absl/strings/str_cat.h152
2 files changed, 134 insertions, 196 deletions
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 2e49c31b..5ec02995 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -14,17 +14,9 @@
#include "absl/strings/str_cat.h"
-#include <assert.h>
-
-#include <algorithm>
-#include <cstddef>
#include <cstdint>
-#include <cstring>
#include <string>
-#include "absl/strings/ascii.h"
-#include "absl/strings/internal/resize_uninitialized.h"
-#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
namespace absl {
@@ -37,170 +29,26 @@ ABSL_NAMESPACE_BEGIN
// of a mix of raw C strings, string_views, strings, and integer values.
// ----------------------------------------------------------------------
-// Append is merely a version of memcpy that returns the address of the byte
-// after the area just overwritten.
-static char* Append(char* out, const AlphaNum& x) {
- // memcpy is allowed to overwrite arbitrary memory, so doing this after the
- // call would force an extra fetch of x.size().
- char* after = out + x.size();
- if (x.size() != 0) {
- memcpy(out, x.data(), x.size());
- }
- return after;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
- std::string result;
- absl::strings_internal::STLStringResizeUninitialized(&result,
- a.size() + b.size());
- char* const begin = &result[0];
- char* out = begin;
- out = Append(out, a);
- out = Append(out, b);
- assert(out == begin + result.size());
- return result;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) {
- std::string result;
- strings_internal::STLStringResizeUninitialized(
- &result, a.size() + b.size() + c.size());
- char* const begin = &result[0];
- char* out = begin;
- out = Append(out, a);
- out = Append(out, b);
- out = Append(out, c);
- assert(out == begin + result.size());
- return result;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
- const AlphaNum& d) {
- std::string result;
- strings_internal::STLStringResizeUninitialized(
- &result, a.size() + b.size() + c.size() + d.size());
- char* const begin = &result[0];
- char* out = begin;
- out = Append(out, a);
- out = Append(out, b);
- out = Append(out, c);
- out = Append(out, d);
- assert(out == begin + result.size());
- return result;
-}
-
namespace strings_internal {
-// Do not call directly - these are not part of the public API.
-std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
- std::string result;
- size_t total_size = 0;
- for (absl::string_view piece : pieces) total_size += piece.size();
- strings_internal::STLStringResizeUninitialized(&result, total_size);
-
- char* const begin = &result[0];
- char* out = begin;
- for (absl::string_view piece : pieces) {
- const size_t this_size = piece.size();
- if (this_size != 0) {
- memcpy(out, piece.data(), this_size);
- out += this_size;
- }
- }
- assert(out == begin + result.size());
- return result;
+bool HaveOverlap(const std::string& x, absl::string_view y) {
+ if (y.empty()) return false;
+ // TODO(b/290623057): Re-evaluate the check below: it detects when buffers
+ // overlap (which is good) but it also introduces undefined behaviour when
+ // buffers don't overlap (substracting pointers that do not belong to the same
+ // array is UB [expr.add]). In other words, if compiler assumes that a program
+ // never has UB, then it can replace `assert(HaveOverlap(x, y))` with
+ // `assert(false)`.
+ return (uintptr_t(y.data() - x.data()) <= uintptr_t(x.size()));
}
-// It's possible to call StrAppend with an absl::string_view that is itself a
-// fragment of the string we're appending to. However the results of this are
-// random. Therefore, check for this in debug mode. Use unsigned math so we
-// only have to do one comparison. Note, there's an exception case: appending an
-// empty string is always allowed.
-#define ASSERT_NO_OVERLAP(dest, src) \
- assert(((src).size() == 0) || \
- (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size())))
-
-void AppendPieces(std::string* dest,
- std::initializer_list<absl::string_view> pieces) {
- size_t old_size = dest->size();
- size_t total_size = old_size;
- for (absl::string_view piece : pieces) {
- ASSERT_NO_OVERLAP(*dest, piece);
- total_size += piece.size();
- }
- strings_internal::STLStringResizeUninitializedAmortized(dest, total_size);
-
- char* const begin = &(*dest)[0];
- char* out = begin + old_size;
- for (absl::string_view piece : pieces) {
- const size_t this_size = piece.size();
- if (this_size != 0) {
- memcpy(out, piece.data(), this_size);
- out += this_size;
- }
- }
- assert(out == begin + dest->size());
+#if defined(__GNUC__) && !defined(__clang__)
+char* AppendAlphaNum(char* dst, const AlphaNum& a) {
+ return std::copy_n(a.data(), a.size(), dst);
}
+#endif
} // namespace strings_internal
-void StrAppend(std::string* dest, const AlphaNum& a) {
- ASSERT_NO_OVERLAP(*dest, a);
- std::string::size_type old_size = dest->size();
- strings_internal::STLStringResizeUninitializedAmortized(dest,
- old_size + a.size());
- char* const begin = &(*dest)[0];
- char* out = begin + old_size;
- out = Append(out, a);
- assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
- ASSERT_NO_OVERLAP(*dest, a);
- ASSERT_NO_OVERLAP(*dest, b);
- std::string::size_type old_size = dest->size();
- strings_internal::STLStringResizeUninitializedAmortized(
- dest, old_size + a.size() + b.size());
- char* const begin = &(*dest)[0];
- char* out = begin + old_size;
- out = Append(out, a);
- out = Append(out, b);
- assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c) {
- ASSERT_NO_OVERLAP(*dest, a);
- ASSERT_NO_OVERLAP(*dest, b);
- ASSERT_NO_OVERLAP(*dest, c);
- std::string::size_type old_size = dest->size();
- strings_internal::STLStringResizeUninitializedAmortized(
- dest, old_size + a.size() + b.size() + c.size());
- char* const begin = &(*dest)[0];
- char* out = begin + old_size;
- out = Append(out, a);
- out = Append(out, b);
- out = Append(out, c);
- assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c, const AlphaNum& d) {
- ASSERT_NO_OVERLAP(*dest, a);
- ASSERT_NO_OVERLAP(*dest, b);
- ASSERT_NO_OVERLAP(*dest, c);
- ASSERT_NO_OVERLAP(*dest, d);
- std::string::size_type old_size = dest->size();
- strings_internal::STLStringResizeUninitializedAmortized(
- dest, old_size + a.size() + b.size() + c.size() + d.size());
- char* const begin = &(*dest)[0];
- char* out = begin + old_size;
- out = Append(out, a);
- out = Append(out, b);
- out = Append(out, c);
- out = Append(out, d);
- assert(out == begin + dest->size());
-}
-
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index d5f71ff0..f5f2c016 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -91,7 +91,9 @@
#include <array>
#include <cstdint>
#include <cstring>
+#include <functional>
#include <string>
+#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
@@ -99,6 +101,7 @@
#include "absl/base/attributes.h"
#include "absl/base/port.h"
#include "absl/strings/internal/has_absl_stringify.h"
+#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/stringify_sink.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
@@ -409,6 +412,93 @@ class AlphaNum {
char digits_[numbers_internal::kFastToBufferSize];
};
+namespace strings_internal {
+
+// Inlining this function improves performance. However, on GCC this triggers
+// [-Werror=stringop-overflow=], and in that case we split declaration and
+// definition as a workaround.
+#if defined(__GNUC__) && !defined(__clang__)
+char* AppendAlphaNum(char* dst, const AlphaNum& a);
+#else
+inline char* AppendAlphaNum(char* dst, const AlphaNum& a) {
+ return std::copy_n(a.data(), a.size(), dst);
+}
+#endif
+
+// It's possible to call StrAppend with an absl::string_view that is itself a
+// fragment of the string we're appending to. However the results of this are
+// UB. Therefore, check for this in debug mode. Use unsigned math so we
+// only have to do one comparison. Note, there's an exception case: appending an
+// empty string is always allowed.
+bool HaveOverlap(const std::string& x, absl::string_view y);
+
+// C++14 compatible alternative to left
+// https://en.cppreference.com/w/cpp/language/fold expression.
+// TODO(b/290784225): Remove `FoldLeft` once Abseil drops C++14 support.
+template <typename F, typename Acc>
+auto FoldLeft(const F&, Acc&& acc) {
+ return std::forward<Acc>(acc);
+}
+template <typename F, typename Acc, typename First, typename... Rest>
+auto FoldLeft(const F& f, Acc&& acc, First&& first, Rest&&... rest) {
+ return FoldLeft(f, f(std::forward<Acc>(acc), std::forward<First>(first)),
+ std::forward<Rest>(rest)...);
+}
+
+template <std::size_t... Indices, typename Tuple>
+inline void StrAppendTemplate(std::index_sequence<Indices...>,
+ std::string* dest, Tuple args) {
+ // Note on `Is` and `Tuple`:
+ //
+ // The goal is to pass N arguments to `StrAppend` and retain `N` as a
+ // compile-time constant. Thus, we cannot use dynamically sized collections
+ // such as `absl::Span` or `std::initialized_list`: `std::tuple` comes to
+ // the rescue.
+ // `Tuple` is a fixed-sized collection of N elements of `const AlphaNum&`.
+ // `Indices` is deduced from `std::make_index_sequence<N>` and is used as a
+ // helper to writing Fold expressions on `Tuple`.
+ assert(FoldLeft(std::logical_and<>{},
+ !HaveOverlap(*dest, std::get<Indices>(args).Piece())...));
+ size_t old_size = dest->size();
+ size_t new_size =
+ FoldLeft(std::plus<>{}, old_size, std::get<Indices>(args).size()...);
+ strings_internal::STLStringResizeUninitializedAmortized(dest, new_size);
+ FoldLeft(AppendAlphaNum, &(*dest)[old_size], std::get<Indices>(args)...);
+}
+
+template <std::size_t... Indices, typename Tuple>
+inline std::string StrCatTemplate(std::index_sequence<Indices...>, Tuple args) {
+ // See implementation comments in `StrAppendTemplate`.
+ std::string dest;
+ size_t size = FoldLeft(std::plus<size_t>{}, size_t{0},
+ std::get<Indices>(args).size()...);
+ strings_internal::STLStringResizeUninitializedAmortized(&dest, size);
+ FoldLeft(AppendAlphaNum, &dest[0], std::get<Indices>(args)...);
+ return dest;
+}
+
+// Template functions `StrAppendImpl` and `StrCatImpl` are not exported directly
+// due to historical reasons: there are many existing dependencies that expect
+// 0-4 argument overloads to be non-template functions. Hence implementations
+// are wrapped to keep the compatibility (0-4 arguments only, 5+ arguments are
+// templates).
+
+template <typename... AV>
+inline void StrAppendImpl(std::string* dest, const AV&... args) {
+ strings_internal::StrAppendTemplate(
+ std::make_index_sequence<sizeof...(AV)>{}, dest,
+ std::forward_as_tuple(static_cast<const AlphaNum&>(args)...));
+}
+
+template <typename... AV>
+inline std::string StrCatImpl(const AV&... args) {
+ return strings_internal::StrCatTemplate(
+ std::make_index_sequence<sizeof...(AV)>{},
+ std::forward_as_tuple(static_cast<const AlphaNum&>(args)...));
+}
+
+} // namespace strings_internal
+
// -----------------------------------------------------------------------------
// StrCat()
// -----------------------------------------------------------------------------
@@ -436,36 +526,32 @@ class AlphaNum {
// quadratic time operation with O(n) dynamic allocations.
//
// See `StrAppend()` below for more information.
-
-namespace strings_internal {
-
-// Do not call directly - this is not part of the public API.
-std::string CatPieces(std::initializer_list<absl::string_view> pieces);
-void AppendPieces(std::string* dest,
- std::initializer_list<absl::string_view> pieces);
-
-} // namespace strings_internal
-
ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
-
ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) {
return std::string(a.data(), a.size());
}
-
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c);
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c, const AlphaNum& d);
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+ const AlphaNum& b) {
+ return strings_internal::StrCatImpl(a, b);
+}
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+ const AlphaNum& b,
+ const AlphaNum& c) {
+ return strings_internal::StrCatImpl(a, b, c);
+}
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+ const AlphaNum& b,
+ const AlphaNum& c,
+ const AlphaNum& d) {
+ return strings_internal::StrCatImpl(a, b, c, d);
+}
// Support 5 or more arguments
template <typename... AV>
ABSL_MUST_USE_RESULT inline std::string StrCat(
const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d,
const AlphaNum& e, const AV&... args) {
- return strings_internal::CatPieces(
- {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
- static_cast<const AlphaNum&>(args).Piece()...});
+ return strings_internal::StrCatImpl(a, b, c, d, e, args...);
}
// -----------------------------------------------------------------------------
@@ -494,23 +580,27 @@ ABSL_MUST_USE_RESULT inline std::string StrCat(
// std::string s = "foobar";
// absl::string_view p = s;
// StrAppend(&s, p);
-
inline void StrAppend(std::string*) {}
-void StrAppend(std::string* dest, const AlphaNum& a);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
- const AlphaNum& c, const AlphaNum& d);
-
+inline void StrAppend(std::string* dest, const AlphaNum& a) {
+ strings_internal::StrAppendImpl(dest, a);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
+ strings_internal::StrAppendImpl(dest, a, b);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c) {
+ strings_internal::StrAppendImpl(dest, a, b, c);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d) {
+ strings_internal::StrAppendImpl(dest, a, b, c, d);
+}
// Support 5 or more arguments
template <typename... AV>
inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
const AV&... args) {
- strings_internal::AppendPieces(
- dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
- static_cast<const AlphaNum&>(args).Piece()...});
+ strings_internal::StrAppendImpl(dest, a, b, c, d, e, args...);
}
// Helper function for the future StrCat default floating-point format, %.6g