22 files changed, 618 insertions, 94 deletions
diff --git a/absl/base/internal/low_level_scheduling.h b/absl/base/internal/low_level_scheduling.h
index 31261298..ed0b4bfa 100644
--- a/absl/base/internal/low_level_scheduling.h
+++ b/absl/base/internal/low_level_scheduling.h
@@ -29,9 +29,6 @@ extern "C" void __google_enable_rescheduling(bool disable_result);
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
-class CondVar;
-class Mutex;
-
 namespace base_internal {
 
 class SchedulingHelper;  // To allow use of SchedulingGuard.
@@ -80,8 +77,6 @@ class SchedulingGuard {
   };
 
   // Access to SchedulingGuard is explicitly permitted.
-  friend class absl::CondVar;
-  friend class absl::Mutex;
   friend class SchedulingHelper;
   friend class SpinLock;
 
diff --git a/absl/base/internal/spinlock.h b/absl/base/internal/spinlock.h
index 2222398b..e6ac9e64 100644
--- a/absl/base/internal/spinlock.h
+++ b/absl/base/internal/spinlock.h
@@ -64,7 +64,14 @@ class ABSL_LOCKABLE SpinLock {
   constexpr SpinLock(absl::ConstInitType, base_internal::SchedulingMode mode)
       : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) {}
 
+  // For global SpinLock instances prefer trivial destructor when possible.
+  // Default but non-trivial destructor in some build configurations causes an
+  // extra static initializer.
+#ifdef ABSL_INTERNAL_HAVE_TSAN_INTERFACE
   ~SpinLock() { ABSL_TSAN_MUTEX_DESTROY(this, __tsan_mutex_not_static); }
+#else
+  ~SpinLock() = default;
+#endif
 
   // Acquire this SpinLock.
   inline void Lock() ABSL_EXCLUSIVE_LOCK_FUNCTION() {
diff --git a/absl/base/spinlock_test_common.cc b/absl/base/spinlock_test_common.cc
index b68c51a1..b33c54ba 100644
--- a/absl/base/spinlock_test_common.cc
+++ b/absl/base/spinlock_test_common.cc
@@ -20,6 +20,7 @@
 #include <limits>
 #include <random>
 #include <thread>  // NOLINT(build/c++11)
+#include <type_traits>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -103,6 +104,10 @@ static void ThreadedTest(SpinLock* spinlock) {
   }
 }
 
+#ifndef THREAD_SANITIZER
+static_assert(std::is_trivially_destructible<SpinLock>(), "");
+#endif
+
 TEST(SpinLock, StackNonCooperativeDisablesScheduling) {
   SpinLock spinlock(base_internal::SCHEDULE_KERNEL_ONLY);
   spinlock.Lock();
diff --git a/absl/container/internal/hash_function_defaults_test.cc b/absl/container/internal/hash_function_defaults_test.cc
index 2d05a0b7..59576b8e 100644
--- a/absl/container/internal/hash_function_defaults_test.cc
+++ b/absl/container/internal/hash_function_defaults_test.cc
@@ -337,11 +337,11 @@ ABSL_NAMESPACE_END
 }  // namespace absl
 
 enum Hash : size_t {
-  kStd = 0x2,       // std::hash
+  kStd = 0x1,       // std::hash
 #ifdef _MSC_VER
   kExtension = kStd,  // In MSVC, std::hash == ::hash
 #else                 // _MSC_VER
-  kExtension = 0x4,  // ::hash (GCC extension)
+  kExtension = 0x2,  // ::hash (GCC extension)
 #endif                // _MSC_VER
 };
 
diff --git a/absl/debugging/symbolize_elf.inc b/absl/debugging/symbolize_elf.inc
index ed77159e..328869f1 100644
--- a/absl/debugging/symbolize_elf.inc
+++ b/absl/debugging/symbolize_elf.inc
@@ -1376,7 +1376,7 @@ int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) {
 
   if (!g_decorators_mu.TryLock()) {
     // Someone else is using decorators. Get out.
-    return false;
+    return -2;
   }
   int ret = ticket;
   if (g_num_decorators >= kMaxDecorators) {
diff --git a/absl/flags/BUILD.bazel b/absl/flags/BUILD.bazel
index 006911fd..524e7027 100644
--- a/absl/flags/BUILD.bazel
+++ b/absl/flags/BUILD.bazel
@@ -106,6 +106,9 @@ cc_library(
 
 cc_library(
     name = "commandlineflag_internal",
+    srcs = [
+        "internal/commandlineflag.cc",
+    ],
     hdrs = [
         "internal/commandlineflag.h",
     ],
diff --git a/absl/flags/CMakeLists.txt b/absl/flags/CMakeLists.txt
index ef75db8e..343774de 100644
--- a/absl/flags/CMakeLists.txt
+++ b/absl/flags/CMakeLists.txt
@@ -95,6 +95,8 @@ absl_cc_library(
 absl_cc_library(
   NAME
     flags_commandlineflag_internal
+  SRCS
+    "internal/commandlineflag.cc"
   HDRS
     "internal/commandlineflag.h"
   COPTS
diff --git a/absl/flags/commandlineflag.cc b/absl/flags/commandlineflag.cc
index 217b2d87..9f3b4a5a 100644
--- a/absl/flags/commandlineflag.cc
+++ b/absl/flags/commandlineflag.cc
@@ -30,9 +30,5 @@ bool CommandLineFlag::ParseFrom(absl::string_view value, std::string* error) {
                    flags_internal::kProgrammaticChange, *error);
 }
 
-namespace flags_internal {
-FlagStateInterface::~FlagStateInterface() {}
-}  // namespace flags_internal
 ABSL_NAMESPACE_END
 }  // namespace absl
-
diff --git a/absl/flags/commandlineflag.h b/absl/flags/commandlineflag.h
index 7e21d05d..f2fa0897 100644
--- a/absl/flags/commandlineflag.h
+++ b/absl/flags/commandlineflag.h
@@ -108,6 +108,10 @@ class CommandLineFlag {
     U u;
 
     Read(&u.value);
+    // allow retired flags to be "read", so we can report invalid access.
+    if (IsRetired()) {
+      return absl::nullopt;
+    }
     return std::move(u.value);
   }
 
diff --git a/absl/flags/internal/commandlineflag.cc b/absl/flags/internal/commandlineflag.cc
new file mode 100644
index 00000000..4482955c
--- /dev/null
+++ b/absl/flags/internal/commandlineflag.cc
@@ -0,0 +1,26 @@
+//
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/flags/internal/commandlineflag.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace flags_internal {
+
+FlagStateInterface::~FlagStateInterface() {}
+
+}  // namespace flags_internal
+ABSL_NAMESPACE_END
+}  // namespace absl
diff --git a/absl/flags/internal/flag.h b/absl/flags/internal/flag.h
index 2cc44e00..89e43ad7 100644
--- a/absl/flags/internal/flag.h
+++ b/absl/flags/internal/flag.h
@@ -482,7 +482,8 @@ class FlagImpl final : public CommandLineFlag {
   friend class FlagState;
 
   // Ensures that `data_guard_` is initialized and returns it.
-  absl::Mutex* DataGuard() const ABSL_LOCK_RETURNED((absl::Mutex*)&data_guard_);
+  absl::Mutex* DataGuard() const
+      ABSL_LOCK_RETURNED(reinterpret_cast<absl::Mutex*>(data_guard_));
   // Returns heap allocated value of type T initialized with default value.
   std::unique_ptr<void, DynValueDeleter> MakeInitValue() const
       ABSL_EXCLUSIVE_LOCKS_REQUIRED(*DataGuard());
diff --git a/absl/flags/internal/usage.cc b/absl/flags/internal/usage.cc
index 35b6427b..0805df31 100644
--- a/absl/flags/internal/usage.cc
+++ b/absl/flags/internal/usage.cc
@@ -250,14 +250,14 @@ void FlagsHelpImpl(std::ostream& out, flags_internal::FlagKindFilter filter_cb,
       matching_flags;
 
   flags_internal::ForEachFlag([&](absl::CommandLineFlag& flag) {
-    std::string flag_filename = flag.Filename();
-
     // Ignore retired flags.
     if (flag.IsRetired()) return;
 
     // If the flag has been stripped, pretend that it doesn't exist.
     if (flag.Help() == flags_internal::kStrippedFlagHelp) return;
 
+    std::string flag_filename = flag.Filename();
+
     // Make sure flag satisfies the filter
     if (!filter_cb || !filter_cb(flag_filename)) return;
 
diff --git a/absl/flags/parse.cc b/absl/flags/parse.cc
index e2c88ff8..4f4bb3d5 100644
--- a/absl/flags/parse.cc
+++ b/absl/flags/parse.cc
@@ -729,12 +729,13 @@ std::vector<char*> ParseCommandLineImpl(int argc, char* argv[],
     }
 
     // 100. Set the located flag to a new new value, unless it is retired.
-    // Setting retired flag fails, but we ignoring it here.
-    if (flag->IsRetired()) continue;
-
+    // Setting retired flag fails, but we ignoring it here while also reporting
+    // access to retired flag.
     std::string error;
     if (!flags_internal::PrivateHandleAccessor::ParseFrom(
             *flag, value, SET_FLAGS_VALUE, kCommandLine, error)) {
+      if (flag->IsRetired()) continue;
+
       flags_internal::ReportUsageError(error, true);
       success = false;
     } else {
diff --git a/absl/flags/reflection.cc b/absl/flags/reflection.cc
index 5fc945f2..02b7c06a 100644
--- a/absl/flags/reflection.cc
+++ b/absl/flags/reflection.cc
@@ -88,12 +88,6 @@ CommandLineFlag* FlagRegistry::FindFlagLocked(absl::string_view name) {
   if (i == flags_.end()) {
     return nullptr;
   }
-
-  if (i->second->IsRetired()) {
-    flags_internal::ReportUsageError(
-        absl::StrCat("Accessing retired flag '", name, "'"), false);
-  }
-
   return i->second;
 }
 
@@ -155,7 +149,7 @@ void FlagRegistry::RegisterFlag(CommandLineFlag& flag) {
     } else {
       flags_internal::ReportUsageError(
           absl::StrCat(
-              "Something wrong with flag '", flag.Name(), "' in file '",
+              "Something is wrong with flag '", flag.Name(), "' in file '",
               flag.Filename(), "'. One possibility: file '", flag.Filename(),
               "' is being linked both statically and dynamically into this "
               "executable. e.g. some files listed as srcs to a test and also "
@@ -206,16 +200,34 @@ class RetiredFlagObj final : public CommandLineFlag {
 
  private:
   absl::string_view Name() const override { return name_; }
-  std::string Filename() const override { return "RETIRED"; }
+  std::string Filename() const override {
+    OnAccess();
+    return "RETIRED";
+  }
   FlagFastTypeId TypeId() const override { return type_id_; }
-  std::string Help() const override { return ""; }
+  std::string Help() const override {
+    OnAccess();
+    return "";
+  }
   bool IsRetired() const override { return true; }
-  bool IsSpecifiedOnCommandLine() const override { return false; }
-  std::string DefaultValue() const override { return ""; }
-  std::string CurrentValue() const override { return ""; }
+  bool IsSpecifiedOnCommandLine() const override {
+    OnAccess();
+    return false;
+  }
+  std::string DefaultValue() const override {
+    OnAccess();
+    return "";
+  }
+  std::string CurrentValue() const override {
+    OnAccess();
+    return "";
+  }
 
   // Any input is valid
-  bool ValidateInputValue(absl::string_view) const override { return true; }
+  bool ValidateInputValue(absl::string_view) const override {
+    OnAccess();
+    return true;
+  }
 
   std::unique_ptr<flags_internal::FlagStateInterface> SaveState() override {
     return nullptr;
@@ -223,12 +235,18 @@ class RetiredFlagObj final : public CommandLineFlag {
 
   bool ParseFrom(absl::string_view, flags_internal::FlagSettingMode,
                  flags_internal::ValueSource, std::string&) override {
+    OnAccess();
     return false;
   }
 
-  void CheckDefaultValueParsingRoundtrip() const override {}
+  void CheckDefaultValueParsingRoundtrip() const override { OnAccess(); }
 
-  void Read(void*) const override {}
+  void Read(void*) const override { OnAccess(); }
+
+  void OnAccess() const {
+    flags_internal::ReportUsageError(
+        absl::StrCat("Accessing retired flag '", name_, "'"), false);
+  }
 
   // Data members
   const char* const name_;
diff --git a/absl/random/BUILD.bazel b/absl/random/BUILD.bazel
index 694331c2..81e150e6 100644
--- a/absl/random/BUILD.bazel
+++ b/absl/random/BUILD.bazel
@@ -199,6 +199,7 @@ cc_test(
 cc_test(
     name = "distributions_test",
     size = "small",
+    timeout = "moderate",
     srcs = [
         "distributions_test.cc",
     ],
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 8220896d..ef412639 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -720,6 +720,7 @@ cc_test(
     visibility = ["//visibility:private"],
     deps = [
         ":str_format_internal",
+        ":strings",
         "//absl/base:raw_logging_internal",
         "//absl/types:optional",
         "@com_google_googletest//:gtest_main",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index c0ea0c8e..d7237231 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -475,6 +475,7 @@ absl_cc_test(
   COPTS
     ${ABSL_TEST_COPTS}
   DEPS
+    absl::strings
     absl::str_format_internal
     absl::raw_logging_internal
     absl::int128
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 68f53987..8ecffc4b 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -495,7 +495,7 @@ static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) {
 
 // This will trigger LNK2005 in MSVC.
 #ifndef COMPILER_MSVC
-const unsigned char Cord::InlineRep::kMaxInline;
+constexpr unsigned char Cord::InlineRep::kMaxInline;
 #endif  // COMPILER_MSVC
 
 inline void Cord::InlineRep::set_data(const char* data, size_t n,
diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc
index e37d0546..488d4cd4 100644
--- a/absl/strings/internal/str_format/convert_test.cc
+++ b/absl/strings/internal/str_format/convert_test.cc
@@ -12,6 +12,7 @@
 #include "gtest/gtest.h"
 #include "absl/base/internal/raw_logging.h"
 #include "absl/strings/internal/str_format/bind.h"
+#include "absl/strings/match.h"
 #include "absl/types/optional.h"
 
 namespace absl {
@@ -19,6 +20,13 @@ ABSL_NAMESPACE_BEGIN
 namespace str_format_internal {
 namespace {
 
+struct NativePrintfTraits {
+  bool hex_float_has_glibc_rounding;
+  bool hex_float_prefers_denormal_repr;
+  bool hex_float_uses_minimal_precision_when_not_specified;
+  bool hex_float_optimizes_leading_digit_bit_count;
+};
+
 template <typename T, size_t N>
 size_t ArraySize(T (&)[N]) {
   return N;
@@ -118,6 +126,63 @@ std::string StrPrint(const char *format, ...) {
   return result;
 }
 
+NativePrintfTraits VerifyNativeImplementationImpl() {
+  NativePrintfTraits result;
+
+  // >>> hex_float_has_glibc_rounding. To have glibc's rounding behavior we need
+  // to meet three requirements:
+  //
+  //   - The threshold for rounding up is 8 (for e.g. MSVC uses 9).
+  //   - If the digits lower than than the 8 are non-zero then we round up.
+  //   - If the digits lower than the 8 are all zero then we round toward even.
+  //
+  // The numbers below represent all the cases covering {below,at,above} the
+  // threshold (8) with both {zero,non-zero} lower bits and both {even,odd}
+  // preceding digits.
+  const double d0079 = 65657.0;  // 0x1.0079p+16
+  const double d0179 = 65913.0;  // 0x1.0179p+16
+  const double d0080 = 65664.0;  // 0x1.0080p+16
+  const double d0180 = 65920.0;  // 0x1.0180p+16
+  const double d0081 = 65665.0;  // 0x1.0081p+16
+  const double d0181 = 65921.0;  // 0x1.0181p+16
+  result.hex_float_has_glibc_rounding =
+      StartsWith(StrPrint("%.2a", d0079), "0x1.00") &&
+      StartsWith(StrPrint("%.2a", d0179), "0x1.01") &&
+      StartsWith(StrPrint("%.2a", d0080), "0x1.00") &&
+      StartsWith(StrPrint("%.2a", d0180), "0x1.02") &&
+      StartsWith(StrPrint("%.2a", d0081), "0x1.01") &&
+      StartsWith(StrPrint("%.2a", d0181), "0x1.02");
+
+  // >>> hex_float_prefers_denormal_repr. Formatting `denormal` on glibc yields
+  // "0x0.0000000000001p-1022", whereas on std libs that don't use denormal
+  // representation it would either be 0x1p-1074 or 0x1.0000000000000-1074.
+  const double denormal = std::numeric_limits<double>::denorm_min();
+  result.hex_float_prefers_denormal_repr =
+      StartsWith(StrPrint("%a", denormal), "0x0.0000000000001");
+
+  // >>> hex_float_uses_minimal_precision_when_not_specified. Some (non-glibc)
+  // libs will format the following as "0x1.0079000000000p+16".
+  result.hex_float_uses_minimal_precision_when_not_specified =
+      (StrPrint("%a", d0079) == "0x1.0079p+16");
+
+  // >>> hex_float_optimizes_leading_digit_bit_count. The number 1.5, when
+  // formatted by glibc should yield "0x1.8p+0" for `double` and "0xcp-3" for
+  // `long double`, i.e., number of bits in the leading digit is adapted to the
+  // number of bits in the mantissa.
+  const double d_15 = 1.5;
+  const long double ld_15 = 1.5;
+  result.hex_float_optimizes_leading_digit_bit_count =
+      StartsWith(StrPrint("%a", d_15), "0x1.8") &&
+      StartsWith(StrPrint("%La", ld_15), "0xc");
+
+  return result;
+}
+
+const NativePrintfTraits &VerifyNativeImplementation() {
+  static NativePrintfTraits native_traits = VerifyNativeImplementationImpl();
+  return native_traits;
+}
+
 class FormatConvertTest : public ::testing::Test { };
 
 template <typename T>
@@ -476,6 +541,7 @@ TEST_F(FormatConvertTest, Uint128) {
 
 template <typename Floating>
 void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) {
+  const NativePrintfTraits &native_traits = VerifyNativeImplementation();
   // Reserve the space to ensure we don't allocate memory in the output itself.
   std::string str_format_result;
   str_format_result.reserve(1 << 20);
@@ -493,13 +559,23 @@ void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) {
                    'e', 'E'}) {
       std::string fmt_str = std::string(fmt) + f;
 
-      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
+      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' &&
+          f != 'a' && f != 'A') {
         // This particular test takes way too long with snprintf.
         // Disable for the case we are not implementing natively.
         continue;
       }
 
+      if ((f == 'a' || f == 'A') &&
+          !native_traits.hex_float_has_glibc_rounding) {
+        continue;
+      }
+
       for (Floating d : floats) {
+        if (!native_traits.hex_float_prefers_denormal_repr &&
+            (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) {
+          continue;
+        }
         int i = -10;
         FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)};
         UntypedFormatSpecImpl format(fmt_str);
@@ -766,6 +842,111 @@ TEST_F(FormatConvertTest, DoubleRound) {
             "1837869002408041296803276054561138153076171875");
 }
 
+TEST_F(FormatConvertTest, DoubleRoundA) {
+  const NativePrintfTraits &native_traits = VerifyNativeImplementation();
+  std::string s;
+  const auto format = [&](const char *fmt, double d) -> std::string & {
+    s.clear();
+    FormatArgImpl args[1] = {FormatArgImpl(d)};
+    AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args));
+    if (native_traits.hex_float_has_glibc_rounding) {
+      EXPECT_EQ(StrPrint(fmt, d), s);
+    }
+    return s;
+  };
+
+  // 0x1.00018000p+100
+  const double on_boundary_odd = 1267679614447900152596896153600.0;
+  EXPECT_EQ(format("%.0a", on_boundary_odd), "0x1p+100");
+  EXPECT_EQ(format("%.1a", on_boundary_odd), "0x1.0p+100");
+  EXPECT_EQ(format("%.2a", on_boundary_odd), "0x1.00p+100");
+  EXPECT_EQ(format("%.3a", on_boundary_odd), "0x1.000p+100");
+  EXPECT_EQ(format("%.4a", on_boundary_odd), "0x1.0002p+100");  // round
+  EXPECT_EQ(format("%.5a", on_boundary_odd), "0x1.00018p+100");
+  EXPECT_EQ(format("%.6a", on_boundary_odd), "0x1.000180p+100");
+
+  // 0x1.00028000p-2
+  const double on_boundary_even = 0.250009536743164062500;
+  EXPECT_EQ(format("%.0a", on_boundary_even), "0x1p-2");
+  EXPECT_EQ(format("%.1a", on_boundary_even), "0x1.0p-2");
+  EXPECT_EQ(format("%.2a", on_boundary_even), "0x1.00p-2");
+  EXPECT_EQ(format("%.3a", on_boundary_even), "0x1.000p-2");
+  EXPECT_EQ(format("%.4a", on_boundary_even), "0x1.0002p-2");  // no round
+  EXPECT_EQ(format("%.5a", on_boundary_even), "0x1.00028p-2");
+  EXPECT_EQ(format("%.6a", on_boundary_even), "0x1.000280p-2");
+
+  // 0x1.00018001p+1
+  const double slightly_over = 2.00004577683284878730773925781250;
+  EXPECT_EQ(format("%.0a", slightly_over), "0x1p+1");
+  EXPECT_EQ(format("%.1a", slightly_over), "0x1.0p+1");
+  EXPECT_EQ(format("%.2a", slightly_over), "0x1.00p+1");
+  EXPECT_EQ(format("%.3a", slightly_over), "0x1.000p+1");
+  EXPECT_EQ(format("%.4a", slightly_over), "0x1.0002p+1");
+  EXPECT_EQ(format("%.5a", slightly_over), "0x1.00018p+1");
+  EXPECT_EQ(format("%.6a", slightly_over), "0x1.000180p+1");
+
+  // 0x1.00017fffp+0
+  const double slightly_under = 1.000022887950763106346130371093750;
+  EXPECT_EQ(format("%.0a", slightly_under), "0x1p+0");
+  EXPECT_EQ(format("%.1a", slightly_under), "0x1.0p+0");
+  EXPECT_EQ(format("%.2a", slightly_under), "0x1.00p+0");
+  EXPECT_EQ(format("%.3a", slightly_under), "0x1.000p+0");
+  EXPECT_EQ(format("%.4a", slightly_under), "0x1.0001p+0");
+  EXPECT_EQ(format("%.5a", slightly_under), "0x1.00018p+0");
+  EXPECT_EQ(format("%.6a", slightly_under), "0x1.000180p+0");
+  EXPECT_EQ(format("%.7a", slightly_under), "0x1.0001800p+0");
+
+  // 0x1.1b3829ac28058p+3
+  const double hex_value = 8.85060580848964661981881363317370414733886718750;
+  EXPECT_EQ(format("%.0a", hex_value), "0x1p+3");
+  EXPECT_EQ(format("%.1a", hex_value), "0x1.2p+3");
+  EXPECT_EQ(format("%.2a", hex_value), "0x1.1bp+3");
+  EXPECT_EQ(format("%.3a", hex_value), "0x1.1b4p+3");
+  EXPECT_EQ(format("%.4a", hex_value), "0x1.1b38p+3");
+  EXPECT_EQ(format("%.5a", hex_value), "0x1.1b383p+3");
+  EXPECT_EQ(format("%.6a", hex_value), "0x1.1b382ap+3");
+  EXPECT_EQ(format("%.7a", hex_value), "0x1.1b3829bp+3");
+  EXPECT_EQ(format("%.8a", hex_value), "0x1.1b3829acp+3");
+  EXPECT_EQ(format("%.9a", hex_value), "0x1.1b3829ac3p+3");
+  EXPECT_EQ(format("%.10a", hex_value), "0x1.1b3829ac28p+3");
+  EXPECT_EQ(format("%.11a", hex_value), "0x1.1b3829ac280p+3");
+  EXPECT_EQ(format("%.12a", hex_value), "0x1.1b3829ac2806p+3");
+  EXPECT_EQ(format("%.13a", hex_value), "0x1.1b3829ac28058p+3");
+  EXPECT_EQ(format("%.14a", hex_value), "0x1.1b3829ac280580p+3");
+  EXPECT_EQ(format("%.15a", hex_value), "0x1.1b3829ac2805800p+3");
+  EXPECT_EQ(format("%.16a", hex_value), "0x1.1b3829ac28058000p+3");
+  EXPECT_EQ(format("%.17a", hex_value), "0x1.1b3829ac280580000p+3");
+  EXPECT_EQ(format("%.18a", hex_value), "0x1.1b3829ac2805800000p+3");
+  EXPECT_EQ(format("%.19a", hex_value), "0x1.1b3829ac28058000000p+3");
+  EXPECT_EQ(format("%.20a", hex_value), "0x1.1b3829ac280580000000p+3");
+  EXPECT_EQ(format("%.21a", hex_value), "0x1.1b3829ac2805800000000p+3");
+
+  // 0x1.0818283848586p+3
+  const double hex_value2 = 8.2529488658208371987257123691961169242858886718750;
+  EXPECT_EQ(format("%.0a", hex_value2), "0x1p+3");
+  EXPECT_EQ(format("%.1a", hex_value2), "0x1.1p+3");
+  EXPECT_EQ(format("%.2a", hex_value2), "0x1.08p+3");
+  EXPECT_EQ(format("%.3a", hex_value2), "0x1.082p+3");
+  EXPECT_EQ(format("%.4a", hex_value2), "0x1.0818p+3");
+  EXPECT_EQ(format("%.5a", hex_value2), "0x1.08183p+3");
+  EXPECT_EQ(format("%.6a", hex_value2), "0x1.081828p+3");
+  EXPECT_EQ(format("%.7a", hex_value2), "0x1.0818284p+3");
+  EXPECT_EQ(format("%.8a", hex_value2), "0x1.08182838p+3");
+  EXPECT_EQ(format("%.9a", hex_value2), "0x1.081828385p+3");
+  EXPECT_EQ(format("%.10a", hex_value2), "0x1.0818283848p+3");
+  EXPECT_EQ(format("%.11a", hex_value2), "0x1.08182838486p+3");
+  EXPECT_EQ(format("%.12a", hex_value2), "0x1.081828384858p+3");
+  EXPECT_EQ(format("%.13a", hex_value2), "0x1.0818283848586p+3");
+  EXPECT_EQ(format("%.14a", hex_value2), "0x1.08182838485860p+3");
+  EXPECT_EQ(format("%.15a", hex_value2), "0x1.081828384858600p+3");
+  EXPECT_EQ(format("%.16a", hex_value2), "0x1.0818283848586000p+3");
+  EXPECT_EQ(format("%.17a", hex_value2), "0x1.08182838485860000p+3");
+  EXPECT_EQ(format("%.18a", hex_value2), "0x1.081828384858600000p+3");
+  EXPECT_EQ(format("%.19a", hex_value2), "0x1.0818283848586000000p+3");
+  EXPECT_EQ(format("%.20a", hex_value2), "0x1.08182838485860000000p+3");
+  EXPECT_EQ(format("%.21a", hex_value2), "0x1.081828384858600000000p+3");
+}
+
 // We don't actually store the results. This is just to exercise the rest of the
 // machinery.
 struct NullSink {
@@ -797,6 +978,7 @@ TEST_F(FormatConvertTest, LongDouble) {
   // implementation against the native one there.
   return;
 #endif  // _MSC_VER
+  const NativePrintfTraits &native_traits = VerifyNativeImplementation();
   const char *const kFormats[] = {"%",    "%.3", "%8.5", "%9",  "%.5000",
                                   "%.60", "%+",  "% ",   "%-10"};
 
@@ -839,12 +1021,20 @@ TEST_F(FormatConvertTest, LongDouble) {
                    'e', 'E'}) {
       std::string fmt_str = std::string(fmt) + 'L' + f;
 
-      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
+      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' &&
+          f != 'a' && f != 'A') {
         // This particular test takes way too long with snprintf.
         // Disable for the case we are not implementing natively.
         continue;
       }
 
+      if (f == 'a' || f == 'A') {
+        if (!native_traits.hex_float_has_glibc_rounding ||
+            !native_traits.hex_float_optimizes_leading_digit_bit_count) {
+          continue;
+        }
+      }
+
       for (auto d : doubles) {
         FormatArgImpl arg(d);
         UntypedFormatSpecImpl format(fmt_str);
@@ -860,6 +1050,7 @@ TEST_F(FormatConvertTest, LongDouble) {
 }
 
 TEST_F(FormatConvertTest, IntAsDouble) {
+  const NativePrintfTraits &native_traits = VerifyNativeImplementation();
   const int kMin = std::numeric_limits<int>::min();
   const int kMax = std::numeric_limits<int>::max();
   const int ia[] = {
@@ -875,14 +1066,16 @@ TEST_F(FormatConvertTest, IntAsDouble) {
       const char *fmt;
     };
     const double dx = static_cast<double>(fx);
-    const Expectation kExpect[] = {
-      { __LINE__, StrPrint("%f", dx), "%f" },
-      { __LINE__, StrPrint("%12f", dx), "%12f" },
-      { __LINE__, StrPrint("%.12f", dx), "%.12f" },
-      { __LINE__, StrPrint("%12a", dx), "%12a" },
-      { __LINE__, StrPrint("%.12a", dx), "%.12a" },
+    std::vector<Expectation> expect = {
+        {__LINE__, StrPrint("%f", dx), "%f"},
+        {__LINE__, StrPrint("%12f", dx), "%12f"},
+        {__LINE__, StrPrint("%.12f", dx), "%.12f"},
+        {__LINE__, StrPrint("%.12a", dx), "%.12a"},
     };
-    for (const Expectation &e : kExpect) {
+    if (native_traits.hex_float_uses_minimal_precision_when_not_specified) {
+      expect.push_back({__LINE__, StrPrint("%12a", dx), "%12a"});
+    }
+    for (const Expectation &e : expect) {
       SCOPED_TRACE(e.line);
       SCOPED_TRACE(e.fmt);
       UntypedFormatSpecImpl format(e.fmt);
@@ -927,6 +1120,25 @@ TEST_F(FormatConvertTest, ExpectedFailures) {
   EXPECT_TRUE(FormatFails("%*d", ""));
 }
 
+// Sanity check to make sure that we are testing what we think we're testing on
+// e.g. the x86_64+glibc platform.
+TEST_F(FormatConvertTest, GlibcHasCorrectTraits) {
+#if !defined(__GLIBC__) || !defined(__x86_64__)
+  return;
+#endif
+  const NativePrintfTraits &native_traits = VerifyNativeImplementation();
+  // If one of the following tests break then it is either because the above PP
+  // macro guards failed to exclude a new platform (likely) or because something
+  // has changed in the implemention of glibc sprintf float formatting behavior.
+  // If the latter, then the code that computes these flags needs to be
+  // revisited and/or possibly the StrFormat implementation.
+  EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding);
+  EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr);
+  EXPECT_TRUE(
+      native_traits.hex_float_uses_minimal_precision_when_not_specified);
+  EXPECT_TRUE(native_traits.hex_float_optimizes_leading_digit_bit_count);
+}
+
 }  // namespace
 }  // namespace str_format_internal
 ABSL_NAMESPACE_END
diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc
index 39fc5f60..6eb7b9fc 100644
--- a/absl/strings/internal/str_format/float_conversion.cc
+++ b/absl/strings/internal/str_format/float_conversion.cc
@@ -15,6 +15,7 @@
 #include "absl/functional/function_ref.h"
 #include "absl/meta/type_traits.h"
 #include "absl/numeric/int128.h"
+#include "absl/strings/numbers.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
 
@@ -453,26 +454,31 @@ Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) {
   }
 }
 
-void FinalPrint(absl::string_view data, int trailing_zeros,
-                const FormatState &state) {
+void FinalPrint(const FormatState &state, absl::string_view data,
+                int padding_offset, int trailing_zeros,
+                absl::string_view data_postfix) {
   if (state.conv.width() < 0) {
     // No width specified. Fast-path.
     if (state.sign_char != '\0') state.sink->Append(1, state.sign_char);
     state.sink->Append(data);
     state.sink->Append(trailing_zeros, '0');
+    state.sink->Append(data_postfix);
     return;
   }
 
-  auto padding =
-      ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + data.size() +
-                              static_cast<size_t>(trailing_zeros),
-                          state);
+  auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) +
+                                         data.size() + data_postfix.size() +
+                                         static_cast<size_t>(trailing_zeros),
+                                     state);
 
   state.sink->Append(padding.left_spaces, ' ');
   if (state.sign_char != '\0') state.sink->Append(1, state.sign_char);
+  // Padding in general needs to be inserted somewhere in the middle of `data`.
+  state.sink->Append(data.substr(0, padding_offset));
   state.sink->Append(padding.zeros, '0');
-  state.sink->Append(data);
+  state.sink->Append(data.substr(padding_offset));
   state.sink->Append(trailing_zeros, '0');
+  state.sink->Append(data_postfix);
   state.sink->Append(padding.right_spaces, ' ');
 }
 
@@ -525,10 +531,11 @@ void FormatFFast(Int v, int exp, const FormatState &state) {
   // In `alt` mode (flag #) we keep the `.` even if there are no fractional
   // digits. In non-alt mode, we strip it.
   if (!state.ShouldPrintDot()) --size;
-  FinalPrint(absl::string_view(integral_digits_start, size),
+  FinalPrint(state, absl::string_view(integral_digits_start, size),
+             /*padding_offset=*/0,
              static_cast<int>(state.precision - (fractional_digits_end -
                                                  fractional_digits_start)),
-             state);
+             /*data_postfix=*/"");
 }
 
 // Slow %f formatter for when the shifted value does not fit in a uint128, and
@@ -655,6 +662,257 @@ void FormatF(Int mantissa, int exp, const FormatState &state) {
   return FormatFFast(mantissa, exp, state);
 }
 
+// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to
+// bits 4-7.
+template <typename Int>
+uint8_t GetNibble(Int n, int nibble_index) {
+  constexpr Int mask_low_nibble = Int{0xf};
+  int shift = nibble_index * 4;
+  n &= mask_low_nibble << shift;
+  return static_cast<uint8_t>((n >> shift) & 0xf);
+}
+
+// Add one to the given nibble, applying carry to higher nibbles. Returns true
+// if overflow, false otherwise.
+template <typename Int>
+bool IncrementNibble(int nibble_index, Int *n) {
+  constexpr int kShift = sizeof(Int) * 8 - 1;
+  constexpr int kNumNibbles = sizeof(Int) * 8 / 4;
+  Int before = *n >> kShift;
+  // Here we essentially want to take the number 1 and move it into the requsted
+  // nibble, then add it to *n to effectively increment the nibble. However,
+  // ASan will complain if we try to shift the 1 beyond the limits of the Int,
+  // i.e., if the nibble_index is out of range. So therefore we check for this
+  // and if we are out of range we just add 0 which leaves *n unchanged, which
+  // seems like the reasonable thing to do in that case.
+  *n +=
+      ((nibble_index * 4 >= sizeof(Int) * 8) ? 0
+                                             : (Int{1} << (nibble_index * 4)));
+  Int after = *n >> kShift;
+  return (before && !after) || (nibble_index >= kNumNibbles);
+}
+
+// Return a mask with 1's in the given nibble and all lower nibbles.
+template <typename Int>
+Int MaskUpToNibbleInclusive(int nibble_index) {
+  constexpr int kNumNibbles = sizeof(Int) * 8 / 4;
+  static const Int ones = ~Int{0};
+  return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1));
+}
+
+// Return a mask with 1's below the given nibble.
+template <typename Int>
+Int MaskUpToNibbleExclusive(int nibble_index) {
+  return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1);
+}
+
+template <typename Int>
+Int MoveToNibble(uint8_t nibble, int nibble_index) {
+  return Int{nibble} << (4 * nibble_index);
+}
+
+// Given mantissa size, find optimal # of mantissa bits to put in initial digit.
+//
+// In the hex representation we keep a single hex digit to the left of the dot.
+// However, the question as to how many bits of the mantissa should be put into
+// that hex digit in theory is arbitrary, but in practice it is optimal to
+// choose based on the size of the mantissa. E.g., for a `double`, there are 53
+// mantissa bits, so that means that we should put 1 bit to the left of the dot,
+// thereby leaving 52 bits to the right, which is evenly divisible by four and
+// thus all fractional digits represent actual precision. For a `long double`,
+// on the other hand, there are 64 bits of mantissa, thus we can use all four
+// bits for the initial hex digit and still have a number left over (60) that is
+// a multiple of four. Once again, the goal is to have all fractional digits
+// represent real precision.
+template <typename Float>
+constexpr int HexFloatLeadingDigitSizeInBits() {
+  return std::numeric_limits<Float>::digits % 4 > 0
+             ? std::numeric_limits<Float>::digits % 4
+             : 4;
+}
+
+// This function captures the rounding behavior of glibc for hex float
+// representations. E.g. when rounding 0x1.ab800000 to a precision of .2
+// ("%.2a") glibc will round up because it rounds toward the even number (since
+// 0xb is an odd number, it will round up to 0xc). However, when rounding at a
+// point that is not followed by 800000..., it disregards the parity and rounds
+// up if > 8 and rounds down if < 8.
+template <typename Int>
+bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed) {
+  // If the last nibble (hex digit) to be displayed is the lowest on in the
+  // mantissa then that means that we don't have any further nibbles to inform
+  // rounding, so don't round.
+  if (final_nibble_displayed <= 0) {
+    return false;
+  }
+  int rounding_nibble_idx = final_nibble_displayed - 1;
+  constexpr int kTotalNibbles = sizeof(Int) * 8 / 4;
+  assert(final_nibble_displayed <= kTotalNibbles);
+  Int mantissa_up_to_rounding_nibble_inclusive =
+      mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx);
+  Int eight = MoveToNibble<Int>(8, rounding_nibble_idx);
+  if (mantissa_up_to_rounding_nibble_inclusive != eight) {
+    return mantissa_up_to_rounding_nibble_inclusive > eight;
+  }
+  // Nibble in question == 8.
+  uint8_t should_round_at_8 =
+      (final_nibble_displayed >= kTotalNibbles)
+          ? true
+          : (GetNibble(mantissa, final_nibble_displayed) % 2 == 1);
+  return should_round_at_8;
+}
+
+// Stores values associated with a Float type needed by the FormatA
+// implementation in order to avoid templatizing that function by the Float
+// type.
+struct HexFloatTypeParams {
+  template <typename Float>
+  explicit HexFloatTypeParams(Float)
+      : min_exponent(std::numeric_limits<Float>::min_exponent - 1),
+        leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) {
+    assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4);
+  }
+
+  int min_exponent;
+  int leading_digit_size_bits;
+};
+
+// Hex Float Rounding. First check if we need to round; if so, then we do that
+// by manipulating (incrementing) the mantissa, that way we can later print the
+// mantissa digits by iterating through them in the same way regardless of
+// whether a rounding happened.
+template <typename Int>
+void FormatARound(bool precision_specified, const FormatState &state,
+                  uint8_t *leading, Int *mantissa, int *exp) {
+  constexpr int kTotalNibbles = sizeof(Int) * 8 / 4;
+  // Index of the last nibble that we could display given precision.
+  int final_nibble_displayed =
+      precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0;
+  if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed)) {
+    // Need to round up.
+    bool overflow = IncrementNibble(final_nibble_displayed, mantissa);
+    *leading += (overflow ? 1 : 0);
+    if (ABSL_PREDICT_FALSE(*leading > 15)) {
+      // We have overflowed the leading digit. This would mean that we would
+      // need two hex digits to the left of the dot, which is not allowed. So
+      // adjust the mantissa and exponent so that the result is always 1.0eXXX.
+      *leading = 1;
+      *mantissa = 0;
+      *exp += 4;
+    }
+  }
+  // Now that we have handled a possible round-up we can go ahead and zero out
+  // all the nibbles of the mantissa that we won't need.
+  if (precision_specified) {
+    *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed);
+  }
+}
+
+template <typename Int>
+void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading,
+                      Int *mantissa, int *exp) {
+  constexpr int kIntBits = sizeof(Int) * 8;
+  static const Int kHighIntBit = Int{1} << (kIntBits - 1);
+  const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits;
+  // Normalize mantissa so that highest bit set is in MSB position, unless we
+  // get interrupted by the exponent threshold.
+  while (*mantissa && !(*mantissa & kHighIntBit)) {
+    if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) {
+      *mantissa >>= (float_traits.min_exponent - *exp);
+      *exp = float_traits.min_exponent;
+      return;
+    }
+    *mantissa <<= 1;
+    --*exp;
+  }
+  // Extract bits for leading digit then shift them away leaving the
+  // fractional part.
+  *leading =
+      static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount));
+  *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp;
+  *mantissa <<= kLeadDigitBitsCount;
+}
+
+template <typename Int>
+void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp,
+             bool uppercase, const FormatState &state) {
+  // Int properties.
+  constexpr int kIntBits = sizeof(Int) * 8;
+  constexpr int kTotalNibbles = sizeof(Int) * 8 / 4;
+  // Did the user specify a precision explicitly?
+  const bool precision_specified = state.conv.precision() >= 0;
+
+  // ========== Normalize/Denormalize ==========
+  exp += kIntBits;  // make all digits fractional digits.
+  // This holds the (up to four) bits of leading digit, i.e., the '1' in the
+  // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal.
+  uint8_t leading = 0;
+  FormatANormalize(float_traits, &leading, &mantissa, &exp);
+
+  // =============== Rounding ==================
+  // Check if we need to round; if so, then we do that by manipulating
+  // (incrementing) the mantissa before beginning to print characters.
+  FormatARound(precision_specified, state, &leading, &mantissa, &exp);
+
+  // ============= Format Result ===============
+  // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the
+  // size with long double which is the largest of the floats.
+  constexpr size_t kBufSizeForHexFloatRepr =
+      2                                               // 0x
+      + std::numeric_limits<long double>::digits / 4  // number of hex digits
+      + 1                                             // round up
+      + 1;                                            // "." (dot)
+  char digits_buffer[kBufSizeForHexFloatRepr];
+  char *digits_iter = digits_buffer;
+  const char *const digits =
+      static_cast<const char *>("0123456789ABCDEF0123456789abcdef") +
+      (uppercase ? 0 : 16);
+
+  // =============== Hex Prefix ================
+  *digits_iter++ = '0';
+  *digits_iter++ = uppercase ? 'X' : 'x';
+
+  // ========== Non-Fractional Digit ===========
+  *digits_iter++ = digits[leading];
+
+  // ================== Dot ====================
+  // There are three reasons we might need a dot. Keep in mind that, at this
+  // point, the mantissa holds only the fractional part.
+  if ((precision_specified && state.precision > 0) ||
+      (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) {
+    *digits_iter++ = '.';
+  }
+
+  // ============ Fractional Digits ============
+  int digits_emitted = 0;
+  while (mantissa > 0) {
+    *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)];
+    mantissa <<= 4;
+    ++digits_emitted;
+  }
+  int trailing_zeros =
+      precision_specified ? state.precision - digits_emitted : 0;
+  assert(trailing_zeros >= 0);
+  auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer);
+
+  // =============== Exponent ==================
+  constexpr size_t kBufSizeForExpDecRepr =
+      numbers_internal::kFastToBufferSize  // requred for FastIntToBuffer
+      + 1                                  // 'p' or 'P'
+      + 1;                                 // '+' or '-'
+  char exp_buffer[kBufSizeForExpDecRepr];
+  exp_buffer[0] = uppercase ? 'P' : 'p';
+  exp_buffer[1] = exp >= 0 ? '+' : '-';
+  numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2);
+
+  // ============ Assemble Result ==============
+  FinalPrint(state,           //
+             digits_result,   // 0xN.NNN...
+             2,               // offset in `data` to start padding if needed.
+             trailing_zeros,  // num remaining mantissa padding zeros
+             exp_buffer);     // exponent
+}
+
 char *CopyStringTo(absl::string_view v, char *out) {
   std::memcpy(out, v.data(), v.size());
   return out + v.size();
@@ -1103,7 +1361,10 @@ bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv,
     }
   } else if (c == FormatConversionCharInternal::a ||
              c == FormatConversionCharInternal::A) {
-    return FallbackToSnprintf(v, conv, sink);
+    bool uppercase = (c == FormatConversionCharInternal::A);
+    FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa,
+            decomposed.exponent, uppercase, {sign_char, precision, conv, sink});
+    return true;
   } else {
     return false;
   }
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index d9afe2f3..dd5d25b0 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -141,12 +141,12 @@ namespace strings_internal {
 std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
   std::string result;
   size_t total_size = 0;
-  for (const absl::string_view piece : pieces) total_size += piece.size();
+  for (const absl::string_view& piece : pieces) total_size += piece.size();
   strings_internal::STLStringResizeUninitialized(&result, total_size);
 
   char* const begin = &result[0];
   char* out = begin;
-  for (const absl::string_view piece : pieces) {
+  for (const absl::string_view& piece : pieces) {
     const size_t this_size = piece.size();
     if (this_size != 0) {
       memcpy(out, piece.data(), this_size);
@@ -170,7 +170,7 @@ void AppendPieces(std::string* dest,
                   std::initializer_list<absl::string_view> pieces) {
   size_t old_size = dest->size();
   size_t total_size = old_size;
-  for (const absl::string_view piece : pieces) {
+  for (const absl::string_view& piece : pieces) {
     ASSERT_NO_OVERLAP(*dest, piece);
     total_size += piece.size();
   }
@@ -178,7 +178,7 @@ void AppendPieces(std::string* dest,
 
   char* const begin = &(*dest)[0];
   char* out = begin + old_size;
-  for (const absl::string_view piece : pieces) {
+  for (const absl::string_view& piece : pieces) {
     const size_t this_size = piece.size();
     if (this_size != 0) {
       memcpy(out, piece.data(), this_size);
diff --git a/absl/synchronization/mutex.cc b/absl/synchronization/mutex.cc
index 05f5c041..c7968f06 100644
--- a/absl/synchronization/mutex.cc
+++ b/absl/synchronization/mutex.cc
@@ -39,6 +39,7 @@
 #include <thread>  // NOLINT(build/c++11)
 
 #include "absl/base/attributes.h"
+#include "absl/base/call_once.h"
 #include "absl/base/config.h"
 #include "absl/base/dynamic_annotations.h"
 #include "absl/base/internal/atomic_hook.h"
@@ -58,7 +59,6 @@
 
 using absl::base_internal::CurrentThreadIdentityIfPresent;
 using absl::base_internal::PerThreadSynch;
-using absl::base_internal::SchedulingGuard;
 using absl::base_internal::ThreadIdentity;
 using absl::synchronization_internal::GetOrCreateCurrentThreadIdentity;
 using absl::synchronization_internal::GraphCycles;
@@ -86,28 +86,6 @@ ABSL_CONST_INIT std::atomic<OnDeadlockCycle> synch_deadlock_detection(
     kDeadlockDetectionDefault);
 ABSL_CONST_INIT std::atomic<bool> synch_check_invariants(false);
 
-// ------------------------------------------ spinlock support
-
-// Make sure read-only globals used in the Mutex code are contained on the
-// same cacheline and cacheline aligned to eliminate any false sharing with
-// other globals from this and other modules.
-static struct MutexGlobals {
-  MutexGlobals() {
-    // Find machine-specific data needed for Delay() and
-    // TryAcquireWithSpinning(). This runs in the global constructor
-    // sequence, and before that zeros are safe values.
-    num_cpus = absl::base_internal::NumCPUs();
-    spinloop_iterations = num_cpus > 1 ? 1500 : 0;
-  }
-  int num_cpus;
-  int spinloop_iterations;
-  // Pad this struct to a full cacheline to prevent false sharing.
-  char padding[ABSL_CACHELINE_SIZE - 2 * sizeof(int)];
-} ABSL_CACHELINE_ALIGNED mutex_globals;
-static_assert(
-    sizeof(MutexGlobals) == ABSL_CACHELINE_SIZE,
-    "MutexGlobals must occupy an entire cacheline to prevent false sharing");
-
 ABSL_INTERNAL_ATOMIC_HOOK_ATTRIBUTES
     absl::base_internal::AtomicHook<void (*)(int64_t wait_cycles)>
         submit_profile_data;
@@ -144,7 +122,22 @@ void RegisterSymbolizer(bool (*fn)(const void *pc, char *out, int out_size)) {
   symbolizer.Store(fn);
 }
 
-// spinlock delay on iteration c.  Returns new c.
+struct ABSL_CACHELINE_ALIGNED MutexGlobals {
+  absl::once_flag once;
+  int num_cpus = 0;
+  int spinloop_iterations = 0;
+};
+
+static const MutexGlobals& GetMutexGlobals() {
+  ABSL_CONST_INIT static MutexGlobals data;
+  absl::base_internal::LowLevelCallOnce(&data.once, [&]() {
+    data.num_cpus = absl::base_internal::NumCPUs();
+    data.spinloop_iterations = data.num_cpus > 1 ? 1500 : 0;
+  });
+  return data;
+}
+
+// Spinlock delay on iteration c.  Returns new c.
 namespace {
   enum DelayMode { AGGRESSIVE, GENTLE };
 };
@@ -154,22 +147,25 @@ static int Delay(int32_t c, DelayMode mode) {
   // gentle then spin only a few times before yielding.  Aggressive spinning is
   // used to ensure that an Unlock() call, which  must get the spin lock for
   // any thread to make progress gets it without undue delay.
-  int32_t limit = (mutex_globals.num_cpus > 1) ?
-      ((mode == AGGRESSIVE) ? 5000 : 250) : 0;
+  const int32_t limit =
+      GetMutexGlobals().num_cpus > 1 ? (mode == AGGRESSIVE ? 5000 : 250) : 0;
   if (c < limit) {
-    c++;               // spin
+    // Spin.
+    c++;
   } else {
     ABSL_TSAN_MUTEX_PRE_DIVERT(nullptr, 0);
-    if (c == limit) {  // yield once
+    if (c == limit) {
+      // Yield once.
       AbslInternalMutexYield();
       c++;
-    } else {           // then wait
+    } else {
+      // Then wait.
       absl::SleepFor(absl::Microseconds(10));
       c = 0;
     }
     ABSL_TSAN_MUTEX_POST_DIVERT(nullptr, 0);
   }
-  return (c);
+  return c;
 }
 
 // --------------------------Generic atomic ops
@@ -1055,7 +1051,6 @@ static PerThreadSynch *DequeueAllWakeable(PerThreadSynch *head,
 // Try to remove thread s from the list of waiters on this mutex.
 // Does nothing if s is not on the waiter list.
 void Mutex::TryRemove(PerThreadSynch *s) {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   intptr_t v = mu_.load(std::memory_order_relaxed);
   // acquire spinlock & lock
   if ((v & (kMuWait | kMuSpin | kMuWriter | kMuReader)) == kMuWait &&
@@ -1439,7 +1434,7 @@ void Mutex::AssertNotHeld() const {
 // Attempt to acquire *mu, and return whether successful.  The implementation
 // may spin for a short while if the lock cannot be acquired immediately.
 static bool TryAcquireWithSpinning(std::atomic<intptr_t>* mu) {
-  int c = mutex_globals.spinloop_iterations;
+  int c = GetMutexGlobals().spinloop_iterations;
   do {  // do/while somewhat faster on AMD
     intptr_t v = mu->load(std::memory_order_relaxed);
     if ((v & (kMuReader|kMuEvent)) != 0) {
@@ -1899,7 +1894,6 @@ static void CheckForMutexCorruption(intptr_t v, const char* label) {
 }
 
 void Mutex::LockSlowLoop(SynchWaitParams *waitp, int flags) {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   int c = 0;
   intptr_t v = mu_.load(std::memory_order_relaxed);
   if ((v & kMuEvent) != 0) {
@@ -2019,7 +2013,6 @@ void Mutex::LockSlowLoop(SynchWaitParams *waitp, int flags) {
 // or it is in the process of blocking on a condition variable; it must requeue
 // itself on the mutex/condvar to wait for its condition to become true.
 ABSL_ATTRIBUTE_NOINLINE void Mutex::UnlockSlow(SynchWaitParams *waitp) {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   intptr_t v = mu_.load(std::memory_order_relaxed);
   this->AssertReaderHeld();
   CheckForMutexCorruption(v, "Unlock");
@@ -2335,7 +2328,6 @@ void Mutex::Trans(MuHow how) {
 // It will later acquire the mutex with high probability.  Otherwise, we
 // enqueue thread w on this mutex.
 void Mutex::Fer(PerThreadSynch *w) {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   int c = 0;
   ABSL_RAW_CHECK(w->waitp->cond == nullptr,
                  "Mutex::Fer while waiting on Condition");
@@ -2434,7 +2426,6 @@ CondVar::~CondVar() {
 
 // Remove thread s from the list of waiters on this condition variable.
 void CondVar::Remove(PerThreadSynch *s) {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   intptr_t v;
   int c = 0;
   for (v = cv_.load(std::memory_order_relaxed);;
@@ -2595,7 +2586,6 @@ void CondVar::Wakeup(PerThreadSynch *w) {
 }
 
 void CondVar::Signal() {
-  SchedulingGuard::ScopedDisable disable_rescheduling;
   ABSL_TSAN_MUTEX_PRE_SIGNAL(nullptr, 0);
   intptr_t v;
   int c = 0;