aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Craig Tiller <ctiller@google.com>2017-02-21 10:02:31 -0800
committerGravatar Craig Tiller <ctiller@google.com>2017-02-21 10:02:31 -0800
commitf3dec9c995e5c2b85460107a9ad9d937facd9a49 (patch)
tree4c3b5dec3aea261bc7bf47914db9a967c36938a4
parent04bd54ac912f92f189a0c884c8ec01255390a17c (diff)
Add counters for rmw atomic operations to microbenchmarks
-rw-r--r--Makefile2
-rw-r--r--build.yaml2
-rw-r--r--include/grpc/impl/codegen/atm_gcc_atomic.h30
-rw-r--r--src/core/lib/support/sync_posix.c9
-rw-r--r--test/cpp/microbenchmarks/bm_closure.cc51
-rw-r--r--test/cpp/microbenchmarks/bm_fullstack.cc20
6 files changed, 92 insertions, 22 deletions
diff --git a/Makefile b/Makefile
index 8f92816672..d47d967e47 100644
--- a/Makefile
+++ b/Makefile
@@ -217,7 +217,7 @@ CC_counters = $(DEFAULT_CC)
CXX_counters = $(DEFAULT_CXX)
LD_counters = $(DEFAULT_CC)
LDXX_counters = $(DEFAULT_CXX)
-CPPFLAGS_counters = -O2 -DGPR_MU_COUNTERS
+CPPFLAGS_counters = -O2 -DGPR_LOW_LEVEL_COUNTERS
DEFINES_counters = NDEBUG
diff --git a/build.yaml b/build.yaml
index 141526cb59..120c7a85b1 100644
--- a/build.yaml
+++ b/build.yaml
@@ -3919,7 +3919,7 @@ configs:
CPPFLAGS: -O2 -DGRPC_BASIC_PROFILER -DGRPC_TIMERS_RDTSC
DEFINES: NDEBUG
counters:
- CPPFLAGS: -O2 -DGPR_MU_COUNTERS
+ CPPFLAGS: -O2 -DGPR_LOW_LEVEL_COUNTERS
DEFINES: NDEBUG
dbg:
CPPFLAGS: -O0
diff --git a/include/grpc/impl/codegen/atm_gcc_atomic.h b/include/grpc/impl/codegen/atm_gcc_atomic.h
index 7d4ae98cf7..e5a623f723 100644
--- a/include/grpc/impl/codegen/atm_gcc_atomic.h
+++ b/include/grpc/impl/codegen/atm_gcc_atomic.h
@@ -40,6 +40,15 @@
typedef intptr_t gpr_atm;
+#ifdef GPR_LOW_LEVEL_COUNTERS
+extern gpr_atm gpr_counter_rmw;
+#define GPR_ATM_INC_COUNTER(counter) \
+ __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED)
+#define GPR_ATM_INC_RMW_THEN(blah) (GPR_ATM_INC_COUNTER(gpr_counter_rmw), blah)
+#else
+#define GPR_ATM_INC_RMW_THEN(blah) blah
+#endif
+
#define gpr_atm_full_barrier() (__atomic_thread_fence(__ATOMIC_SEQ_CST))
#define gpr_atm_acq_load(p) (__atomic_load_n((p), __ATOMIC_ACQUIRE))
@@ -50,25 +59,28 @@ typedef intptr_t gpr_atm;
(__atomic_store_n((p), (intptr_t)(value), __ATOMIC_RELAXED))
#define gpr_atm_no_barrier_fetch_add(p, delta) \
- (__atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED))
+ GPR_ATM_INC_RMW_THEN( \
+ __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_RELAXED))
#define gpr_atm_full_fetch_add(p, delta) \
- (__atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL))
+ GPR_ATM_INC_RMW_THEN( \
+ __atomic_fetch_add((p), (intptr_t)(delta), __ATOMIC_ACQ_REL))
static __inline int gpr_atm_no_barrier_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
- return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_RELAXED,
- __ATOMIC_RELAXED);
+ return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+ p, &o, n, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
}
static __inline int gpr_atm_acq_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
- return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_ACQUIRE,
- __ATOMIC_RELAXED);
+ return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+ p, &o, n, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
}
static __inline int gpr_atm_rel_cas(gpr_atm *p, gpr_atm o, gpr_atm n) {
- return __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_RELEASE,
- __ATOMIC_RELAXED);
+ return GPR_ATM_INC_RMW_THEN(__atomic_compare_exchange_n(
+ p, &o, n, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
}
-#define gpr_atm_full_xchg(p, n) __atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL)
+#define gpr_atm_full_xchg(p, n) \
+ GPR_ATM_INC_RMW_THEN(__atomic_exchange_n((p), (n), __ATOMIC_ACQ_REL))
#endif /* GRPC_IMPL_CODEGEN_ATM_GCC_ATOMIC_H */
diff --git a/src/core/lib/support/sync_posix.c b/src/core/lib/support/sync_posix.c
index de0f0484b5..3b7d780608 100644
--- a/src/core/lib/support/sync_posix.c
+++ b/src/core/lib/support/sync_posix.c
@@ -42,8 +42,9 @@
#include <time.h>
#include "src/core/lib/profiling/timers.h"
-#ifdef GPR_MU_COUNTERS
-gpr_atm grpc_mu_locks = 0;
+#ifdef GPR_LOW_LEVEL_COUNTERS
+gpr_atm gpr_mu_locks = 0;
+gpr_atm gpr_counter_rmw = 0;
#endif
void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); }
@@ -51,8 +52,8 @@ void gpr_mu_init(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_init(mu, NULL) == 0); }
void gpr_mu_destroy(gpr_mu* mu) { GPR_ASSERT(pthread_mutex_destroy(mu) == 0); }
void gpr_mu_lock(gpr_mu* mu) {
-#ifdef GPR_MU_COUNTERS
- gpr_atm_no_barrier_fetch_add(&grpc_mu_locks, 1);
+#ifdef GPR_LOW_LEVEL_COUNTERS
+ GPR_ATM_INC_COUNTER(gpr_mu_locks);
#endif
GPR_TIMER_BEGIN("gpr_mu_lock", 0);
GPR_ASSERT(pthread_mutex_lock(mu) == 0);
diff --git a/test/cpp/microbenchmarks/bm_closure.cc b/test/cpp/microbenchmarks/bm_closure.cc
index 80d6610e13..16d05781bb 100644
--- a/test/cpp/microbenchmarks/bm_closure.cc
+++ b/test/cpp/microbenchmarks/bm_closure.cc
@@ -43,13 +43,46 @@ extern "C" {
#include "third_party/benchmark/include/benchmark/benchmark.h"
+#include <sstream>
+
+#ifdef GPR_LOW_LEVEL_COUNTERS
+extern "C" gpr_atm gpr_mu_locks;
+#endif
+
static class InitializeStuff {
public:
InitializeStuff() { grpc_init(); }
~InitializeStuff() { grpc_shutdown(); }
} initialize_stuff;
+class TrackCounters {
+ public:
+ TrackCounters(benchmark::State& state) : state_(state) {}
+
+ ~TrackCounters() {
+ std::ostringstream out;
+#ifdef GPR_LOW_LEVEL_COUNTERS
+ out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) -
+ mu_locks_at_start_) /
+ (double)state_.iterations())
+ << " atm_rmw/iter:"
+ << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) -
+ rmw_at_start_) /
+ (double)state_.iterations());
+#endif
+ state_.SetLabel(out.str());
+ }
+
+ private:
+ benchmark::State& state_;
+#ifdef GPR_LOW_LEVEL_COUNTERS
+ const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks);
+ const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw);
+#endif
+};
+
static void BM_NoOpExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
while (state.KeepRunning()) {
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
grpc_exec_ctx_finish(&exec_ctx);
@@ -58,6 +91,7 @@ static void BM_NoOpExecCtx(benchmark::State& state) {
BENCHMARK(BM_NoOpExecCtx);
static void BM_WellFlushed(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
while (state.KeepRunning()) {
grpc_exec_ctx_flush(&exec_ctx);
@@ -69,6 +103,7 @@ BENCHMARK(BM_WellFlushed);
static void DoNothing(grpc_exec_ctx* exec_ctx, void* arg, grpc_error* error) {}
static void BM_ClosureInitAgainstExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_closure c;
while (state.KeepRunning()) {
benchmark::DoNotOptimize(
@@ -78,6 +113,7 @@ static void BM_ClosureInitAgainstExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureInitAgainstExecCtx);
static void BM_ClosureInitAgainstCombiner(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner = grpc_combiner_create(NULL);
grpc_closure c;
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
@@ -91,6 +127,7 @@ static void BM_ClosureInitAgainstCombiner(benchmark::State& state) {
BENCHMARK(BM_ClosureInitAgainstCombiner);
static void BM_ClosureRunOnExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_closure c;
grpc_closure_init(&c, DoNothing, NULL, grpc_schedule_on_exec_ctx);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
@@ -103,6 +140,7 @@ static void BM_ClosureRunOnExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureRunOnExecCtx);
static void BM_ClosureCreateAndRun(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
while (state.KeepRunning()) {
grpc_closure_run(&exec_ctx, grpc_closure_create(DoNothing, NULL,
@@ -114,6 +152,7 @@ static void BM_ClosureCreateAndRun(benchmark::State& state) {
BENCHMARK(BM_ClosureCreateAndRun);
static void BM_ClosureInitAndRun(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
grpc_closure c;
while (state.KeepRunning()) {
@@ -126,6 +165,7 @@ static void BM_ClosureInitAndRun(benchmark::State& state) {
BENCHMARK(BM_ClosureInitAndRun);
static void BM_ClosureSchedOnExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_closure c;
grpc_closure_init(&c, DoNothing, NULL, grpc_schedule_on_exec_ctx);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
@@ -138,6 +178,7 @@ static void BM_ClosureSchedOnExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureSchedOnExecCtx);
static void BM_ClosureSched2OnExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_closure c1;
grpc_closure c2;
grpc_closure_init(&c1, DoNothing, NULL, grpc_schedule_on_exec_ctx);
@@ -153,6 +194,7 @@ static void BM_ClosureSched2OnExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureSched2OnExecCtx);
static void BM_ClosureSched3OnExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_closure c1;
grpc_closure c2;
grpc_closure c3;
@@ -171,6 +213,7 @@ static void BM_ClosureSched3OnExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureSched3OnExecCtx);
static void BM_AcquireMutex(benchmark::State& state) {
+ TrackCounters track_counters(state);
// for comparison with the combiner stuff below
gpr_mu mu;
gpr_mu_init(&mu);
@@ -185,6 +228,7 @@ static void BM_AcquireMutex(benchmark::State& state) {
BENCHMARK(BM_AcquireMutex);
static void BM_ClosureSchedOnCombiner(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner = grpc_combiner_create(NULL);
grpc_closure c;
grpc_closure_init(&c, DoNothing, NULL,
@@ -200,6 +244,7 @@ static void BM_ClosureSchedOnCombiner(benchmark::State& state) {
BENCHMARK(BM_ClosureSchedOnCombiner);
static void BM_ClosureSched2OnCombiner(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner = grpc_combiner_create(NULL);
grpc_closure c1;
grpc_closure c2;
@@ -219,6 +264,7 @@ static void BM_ClosureSched2OnCombiner(benchmark::State& state) {
BENCHMARK(BM_ClosureSched2OnCombiner);
static void BM_ClosureSched3OnCombiner(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner = grpc_combiner_create(NULL);
grpc_closure c1;
grpc_closure c2;
@@ -242,6 +288,7 @@ static void BM_ClosureSched3OnCombiner(benchmark::State& state) {
BENCHMARK(BM_ClosureSched3OnCombiner);
static void BM_ClosureSched2OnTwoCombiners(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner1 = grpc_combiner_create(NULL);
grpc_combiner* combiner2 = grpc_combiner_create(NULL);
grpc_closure c1;
@@ -263,6 +310,7 @@ static void BM_ClosureSched2OnTwoCombiners(benchmark::State& state) {
BENCHMARK(BM_ClosureSched2OnTwoCombiners);
static void BM_ClosureSched4OnTwoCombiners(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_combiner* combiner1 = grpc_combiner_create(NULL);
grpc_combiner* combiner2 = grpc_combiner_create(NULL);
grpc_closure c1;
@@ -323,6 +371,7 @@ class Rescheduler {
};
static void BM_ClosureReschedOnExecCtx(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
Rescheduler r(state, grpc_schedule_on_exec_ctx);
r.ScheduleFirst(&exec_ctx);
@@ -331,6 +380,7 @@ static void BM_ClosureReschedOnExecCtx(benchmark::State& state) {
BENCHMARK(BM_ClosureReschedOnExecCtx);
static void BM_ClosureReschedOnCombiner(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
grpc_combiner* combiner = grpc_combiner_create(NULL);
Rescheduler r(state, grpc_combiner_scheduler(combiner, false));
@@ -342,6 +392,7 @@ static void BM_ClosureReschedOnCombiner(benchmark::State& state) {
BENCHMARK(BM_ClosureReschedOnCombiner);
static void BM_ClosureReschedOnCombinerFinally(benchmark::State& state) {
+ TrackCounters track_counters(state);
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
grpc_combiner* combiner = grpc_combiner_create(NULL);
Rescheduler r(state, grpc_combiner_finally_scheduler(combiner, false));
diff --git a/test/cpp/microbenchmarks/bm_fullstack.cc b/test/cpp/microbenchmarks/bm_fullstack.cc
index c63de0ce0a..5bb456ab46 100644
--- a/test/cpp/microbenchmarks/bm_fullstack.cc
+++ b/test/cpp/microbenchmarks/bm_fullstack.cc
@@ -99,8 +99,9 @@ static void ApplyCommonChannelArguments(ChannelArguments* c) {
c->SetInt(GRPC_ARG_MAX_SEND_MESSAGE_LENGTH, INT_MAX);
}
-#ifdef GPR_MU_COUNTERS
-extern "C" gpr_atm grpc_mu_locks;
+#ifdef GPR_LOW_LEVEL_COUNTERS
+extern "C" gpr_atm gpr_mu_locks;
+extern "C" gpr_atm gpr_counter_rmw;
#endif
class BaseFixture {
@@ -108,10 +109,14 @@ class BaseFixture {
void Finish(benchmark::State& s) {
std::ostringstream out;
this->AddToLabel(out, s);
-#ifdef GPR_MU_COUNTERS
- out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&grpc_mu_locks) -
+#ifdef GPR_LOW_LEVEL_COUNTERS
+ out << " locks/iter:" << ((double)(gpr_atm_no_barrier_load(&gpr_mu_locks) -
mu_locks_at_start_) /
- (double)s.iterations());
+ (double)s.iterations())
+ << " atm_rmw/iter:"
+ << ((double)(gpr_atm_no_barrier_load(&gpr_counter_rmw) -
+ rmw_at_start_) /
+ (double)s.iterations());
#endif
grpc_memory_counters counters_at_end = grpc_memory_counters_snapshot();
out << " allocs/iter:"
@@ -128,8 +133,9 @@ class BaseFixture {
virtual void AddToLabel(std::ostream& out, benchmark::State& s) = 0;
private:
-#ifdef GPR_MU_COUNTERS
- const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&grpc_mu_locks);
+#ifdef GPR_LOW_LEVEL_COUNTERS
+ const size_t mu_locks_at_start_ = gpr_atm_no_barrier_load(&gpr_mu_locks);
+ const size_t rmw_at_start_ = gpr_atm_no_barrier_load(&gpr_counter_rmw);
#endif
grpc_memory_counters counters_at_start_ = grpc_memory_counters_snapshot();
};