1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
// Copyright 2023 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include "absl/base/internal/raw_logging.h"
#include "absl/base/no_destructor.h"
#include "benchmark/benchmark.h"
namespace {
// Number of static-NoDestructor-in-a-function to exercise.
// This must be low enough not to hit template instantiation limits
// (happens around 1000).
constexpr int kNumObjects = 1; // set to 512 when doing benchmarks
// 1 is faster to compile: just one templated
// function instantiation
// Size of individual objects to benchmark static-NoDestructor-in-a-function
// usage with.
constexpr int kObjSize = sizeof(void*)*1;
// Simple object of kObjSize bytes (rounded to int).
// We benchmark complete reading of its state via Verify().
class BM_Blob {
public:
BM_Blob(int val) { for (auto& d : data_) d = val; }
BM_Blob() : BM_Blob(-1) {}
void Verify(int val) const { // val must be the c-tor argument
for (auto& d : data_) ABSL_INTERNAL_CHECK(d == val, "");
}
private:
int data_[kObjSize / sizeof(int) > 0 ? kObjSize / sizeof(int) : 1];
};
// static-NoDestructor-in-a-function pattern instances.
// We'll instantiate kNumObjects of them.
template<int i>
const BM_Blob& NoDestrBlobFunc() {
static absl::NoDestructor<BM_Blob> x(i);
return *x;
}
// static-heap-ptr-in-a-function pattern instances
// We'll instantiate kNumObjects of them.
template<int i>
const BM_Blob& OnHeapBlobFunc() {
static BM_Blob* x = new BM_Blob(i);
return *x;
}
// Type for NoDestrBlobFunc or OnHeapBlobFunc.
typedef const BM_Blob& (*FuncType)();
// ========================================================================= //
// Simple benchmarks that read a single BM_Blob over and over, hence
// all they touch fits into L1 CPU cache:
// Direct non-POD global variable (style guide violation) as a baseline.
static BM_Blob direct_blob(0);
void BM_Direct(benchmark::State& state) {
for (auto s : state) {
direct_blob.Verify(0);
}
}
BENCHMARK(BM_Direct);
void BM_NoDestr(benchmark::State& state) {
for (auto s : state) {
NoDestrBlobFunc<0>().Verify(0);
}
}
BENCHMARK(BM_NoDestr);
void BM_OnHeap(benchmark::State& state) {
for (auto s : state) {
OnHeapBlobFunc<0>().Verify(0);
}
}
BENCHMARK(BM_OnHeap);
// ========================================================================= //
// Benchmarks that read kNumObjects of BM_Blob over and over, hence with
// appropriate values of sizeof(BM_Blob) and kNumObjects their working set
// can exceed a given layer of CPU cache.
// Type of benchmark to select between NoDestrBlobFunc and OnHeapBlobFunc.
enum BM_Type { kNoDestr, kOnHeap, kDirect };
// BlobFunc<n>(t, i) returns the i-th function of type t.
// n must be larger than i (we'll use kNumObjects for n).
template<int n>
FuncType BlobFunc(BM_Type t, int i) {
if (i == n) {
switch (t) {
case kNoDestr: return &NoDestrBlobFunc<n>;
case kOnHeap: return &OnHeapBlobFunc<n>;
case kDirect: return nullptr;
}
}
return BlobFunc<n-1>(t, i);
}
template<>
FuncType BlobFunc<0>(BM_Type t, int i) {
ABSL_INTERNAL_CHECK(i == 0, "");
switch (t) {
case kNoDestr: return &NoDestrBlobFunc<0>;
case kOnHeap: return &OnHeapBlobFunc<0>;
case kDirect: return nullptr;
}
return nullptr;
}
// Direct non-POD global variables (style guide violation) as a baseline.
static BM_Blob direct_blobs[kNumObjects];
// Helper that cheaply maps benchmark iteration to randomish index in
// [0, kNumObjects).
int RandIdx(int i) {
// int64 is to avoid overflow and generating negative return values:
return (static_cast<int64_t>(i) * 13) % kNumObjects;
}
// Generic benchmark working with kNumObjects for any of the possible BM_Type.
template <BM_Type t>
void BM_Many(benchmark::State& state) {
FuncType funcs[kNumObjects];
for (int i = 0; i < kNumObjects; ++i) {
funcs[i] = BlobFunc<kNumObjects-1>(t, i);
}
if (t == kDirect) {
for (auto s : state) {
int idx = RandIdx(state.iterations());
direct_blobs[idx].Verify(-1);
}
} else {
for (auto s : state) {
int idx = RandIdx(state.iterations());
funcs[idx]().Verify(idx);
}
}
}
void BM_DirectMany(benchmark::State& state) { BM_Many<kDirect>(state); }
void BM_NoDestrMany(benchmark::State& state) { BM_Many<kNoDestr>(state); }
void BM_OnHeapMany(benchmark::State& state) { BM_Many<kOnHeap>(state); }
BENCHMARK(BM_DirectMany);
BENCHMARK(BM_NoDestrMany);
BENCHMARK(BM_OnHeapMany);
} // namespace
|