aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2014-08-27 06:34:16 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2014-08-27 06:34:16 -0700
commit4473be874f70d4a8833180a7aebcc9d8a0b1de9e (patch)
tree4d7ae9af1d20de8b3eaaff06048890b51742ea62
parent5e8dbd31de0dff44aff83295bdfe518477a5a088 (diff)
Clean up some benches that answer questions we're no longer asking.
NOTREECHECKS=true BUG=skia: R=reed@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/512503002
-rw-r--r--bench/MemcpyBench.cpp83
-rw-r--r--bench/MemoryBench.cpp107
-rw-r--r--bench/StackBench.cpp179
-rw-r--r--gyp/bench.gypi1
4 files changed, 0 insertions, 370 deletions
diff --git a/bench/MemcpyBench.cpp b/bench/MemcpyBench.cpp
index f550192751..567320be7c 100644
--- a/bench/MemcpyBench.cpp
+++ b/bench/MemcpyBench.cpp
@@ -67,87 +67,6 @@ BENCH(memcpy32_memcpy, 1000)
BENCH(memcpy32_memcpy, 10000)
BENCH(memcpy32_memcpy, 100000)
-// Let the compiler's autovectorizer do what it thinks is best.
-static void memcpy32_autovectorize(uint32_t* dst, const uint32_t* src, int count) {
- while (count --> 0) {
- *dst++ = *src++;
- }
-}
-BENCH(memcpy32_autovectorize, 10)
-BENCH(memcpy32_autovectorize, 100)
-BENCH(memcpy32_autovectorize, 1000)
-BENCH(memcpy32_autovectorize, 10000)
-BENCH(memcpy32_autovectorize, 100000)
-
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-
-// Align dst to 16 bytes, then use aligned stores. src isn't algined, so use unaligned loads.
-static void memcpy32_sse2_align(uint32_t* dst, const uint32_t* src, int count) {
- if (count >= 16) {
- while (uintptr_t(dst) & 0xF) {
- *dst++ = *src++;
- count--;
- }
-
- __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
- const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
- dst += 16 * (count / 16);
- src += 16 * (count / 16);
- while (count >= 16) {
- __m128i a = _mm_loadu_si128(src128++);
- __m128i b = _mm_loadu_si128(src128++);
- __m128i c = _mm_loadu_si128(src128++);
- __m128i d = _mm_loadu_si128(src128++);
-
- _mm_store_si128(dst128++, a);
- _mm_store_si128(dst128++, b);
- _mm_store_si128(dst128++, c);
- _mm_store_si128(dst128++, d);
-
- count -= 16;
- }
- }
-
- while (count --> 0) {
- *dst++ = *src++;
- }
-}
-BENCH(memcpy32_sse2_align, 10)
-BENCH(memcpy32_sse2_align, 100)
-BENCH(memcpy32_sse2_align, 1000)
-BENCH(memcpy32_sse2_align, 10000)
-BENCH(memcpy32_sse2_align, 100000)
-
-// Leave both dst and src unaliged, and so use unaligned stores for dst and unaligned loads for src.
-static void memcpy32_sse2_unalign(uint32_t* dst, const uint32_t* src, int count) {
- __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
- const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
- dst += 16 * (count / 16);
- src += 16 * (count / 16);
- while (count >= 16) {
- __m128i a = _mm_loadu_si128(src128++);
- __m128i b = _mm_loadu_si128(src128++);
- __m128i c = _mm_loadu_si128(src128++);
- __m128i d = _mm_loadu_si128(src128++);
-
- _mm_storeu_si128(dst128++, a);
- _mm_storeu_si128(dst128++, b);
- _mm_storeu_si128(dst128++, c);
- _mm_storeu_si128(dst128++, d);
-
- count -= 16;
- }
-
- while (count --> 0) {
- *dst++ = *src++;
- }
-}
-BENCH(memcpy32_sse2_unalign, 10)
-BENCH(memcpy32_sse2_unalign, 100)
-BENCH(memcpy32_sse2_unalign, 1000)
-BENCH(memcpy32_sse2_unalign, 10000)
-BENCH(memcpy32_sse2_unalign, 100000)
-
// Test our chosen best, from SkUtils.h
BENCH(sk_memcpy32, 10)
BENCH(sk_memcpy32, 100)
@@ -155,6 +74,4 @@ BENCH(sk_memcpy32, 1000)
BENCH(sk_memcpy32, 10000)
BENCH(sk_memcpy32, 100000)
-#endif // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-
#undef BENCH
diff --git a/bench/MemoryBench.cpp b/bench/MemoryBench.cpp
index f8af1683b0..05ec83b3e0 100644
--- a/bench/MemoryBench.cpp
+++ b/bench/MemoryBench.cpp
@@ -56,110 +56,3 @@ private:
DEF_BENCH( return new ChunkAllocBench(64); )
DEF_BENCH( return new ChunkAllocBench(8*1024); )
-static int* calloc(size_t num) {
- return (int*)sk_calloc_throw(num*sizeof(int));
-}
-
-static int* malloc_bzero(size_t num) {
- const size_t bytes = num*sizeof(int);
- int* ints = (int*)sk_malloc_throw(bytes);
- sk_bzero(ints, bytes);
- return ints;
-}
-
-class ZerosBench : public Benchmark {
- size_t fNum;
- bool fRead;
- bool fWrite;
- bool fUseCalloc;
- SkString fName;
-public:
- ZerosBench(size_t num, bool read, bool write, bool useCalloc)
- : fNum(num)
- , fRead(read)
- , fWrite(write)
- , fUseCalloc(useCalloc) {
- fName.printf("memory_%s", useCalloc ? "calloc" : "malloc_bzero");
- if (read && write) {
- fName.appendf("_rw");
- } else if (read) {
- fName.appendf("_r");
- } else if (write) {
- fName.appendf("_w");
- }
- fName.appendf("_" SK_SIZE_T_SPECIFIER, num);
- }
-
- virtual bool isSuitableFor(Backend backend) SK_OVERRIDE {
- return backend == kNonRendering_Backend;
- }
-
-protected:
- virtual const char* onGetName() SK_OVERRIDE {
- return fName.c_str();
- }
-
- virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE {
- for (int i = 0; i < loops; i++) {
- int* zeros = fUseCalloc ? calloc(fNum) : malloc_bzero(fNum);
- if (fRead) {
- volatile int x = 15;
- for (size_t j = 0; j < fNum; j++) {
- x ^= zeros[j];
- }
- }
- if (fWrite) {
- for (size_t j = 0; j < fNum; j++) {
- zeros[j] = 15;
- }
- }
- sk_free(zeros);
- }
- }
-};
-
-// zero count r w useCalloc?
-DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 0))
-DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 1))
-DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 0))
-DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 1))
-DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 0))
-DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 1))
-DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 0))
-DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 1))
-
-DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 0))
-DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 1))
-DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 0))
-DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 1))
-DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 0))
-DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 1))
-DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 0))
-DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 1))
-
-DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 0))
-DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 1))
-DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 0))
-DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 1))
-DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 0))
-DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 1))
-DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 0))
-DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 1))
-
-DEF_BENCH(return new ZerosBench(300, 0, 0, 0))
-DEF_BENCH(return new ZerosBench(300, 0, 0, 1))
-DEF_BENCH(return new ZerosBench(300, 0, 1, 0))
-DEF_BENCH(return new ZerosBench(300, 0, 1, 1))
-DEF_BENCH(return new ZerosBench(300, 1, 0, 0))
-DEF_BENCH(return new ZerosBench(300, 1, 0, 1))
-DEF_BENCH(return new ZerosBench(300, 1, 1, 0))
-DEF_BENCH(return new ZerosBench(300, 1, 1, 1))
-
-DEF_BENCH(return new ZerosBench(4, 0, 0, 0))
-DEF_BENCH(return new ZerosBench(4, 0, 0, 1))
-DEF_BENCH(return new ZerosBench(4, 0, 1, 0))
-DEF_BENCH(return new ZerosBench(4, 0, 1, 1))
-DEF_BENCH(return new ZerosBench(4, 1, 0, 0))
-DEF_BENCH(return new ZerosBench(4, 1, 0, 1))
-DEF_BENCH(return new ZerosBench(4, 1, 1, 0))
-DEF_BENCH(return new ZerosBench(4, 1, 1, 1))
diff --git a/bench/StackBench.cpp b/bench/StackBench.cpp
deleted file mode 100644
index 3b41cb65ce..0000000000
--- a/bench/StackBench.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright 2014 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "Benchmark.h"
-#include "SkRandom.h"
-
-#include "SkChunkAlloc.h"
-#include "SkDeque.h"
-#include "SkTArray.h"
-#include "SkTDArray.h"
-
-// This file has several benchmarks using various data structures to do stack-like things:
-// - push
-// - push, immediately pop
-// - push many, pop all of them
-// - serial access
-// - random access
-// When a data structure doesn't suppport an operation efficiently, we leave that combination out.
-// Where possible we hint to the data structure to allocate in 4K pages.
-//
-// These benchmarks may help you decide which data structure to use for a dynamically allocated
-// ordered list of allocations that grows on one end.
-//
-// Current overall winner (01/2014): SkTDArray.
-// It wins every benchmark on every machine I tried (Desktop, Nexus S, Laptop).
-
-template <typename Impl>
-struct StackBench : public Benchmark {
- virtual bool isSuitableFor(Backend b) SK_OVERRIDE { return b == kNonRendering_Backend; }
- virtual const char* onGetName() SK_OVERRIDE { return Impl::kName; }
- virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE { Impl::bench(loops); }
-};
-
-#define BENCH(name) \
- struct name { static const char* const kName; static void bench(int); }; \
- const char* const name::kName = #name; \
- DEF_BENCH(return new StackBench<name>();) \
- void name::bench(int loops)
-
-static const int K = 2049;
-
-// Add K items, then iterate through them serially many times.
-
-BENCH(Deque_Serial) {
- SkDeque s(sizeof(int), 1024);
- for (int i = 0; i < K; i++) *(int*)s.push_back() = i;
-
- volatile int junk = 0;
- for (int j = 0; j < loops; j++) {
- SkDeque::Iter it(s, SkDeque::Iter::kFront_IterStart);
- while(void* p = it.next()) {
- junk += *(int*)p;
- }
- }
-}
-
-BENCH(TArray_Serial) {
- SkTArray<int, true> s;
- for (int i = 0; i < K; i++) s.push_back(i);
-
- volatile int junk = 0;
- for (int j = 0; j < loops; j++) {
- for (int i = 0; i < s.count(); i++) junk += s[i];
- }
-}
-
-BENCH(TDArray_Serial) {
- SkTDArray<int> s;
- for (int i = 0; i < K; i++) s.push(i);
-
- volatile int junk = 0;
- for (int j = 0; j < loops; j++) {
- for (int i = 0; i < s.count(); i++) junk += s[i];
- }
-}
-
-// Add K items, then randomly access them many times.
-
-BENCH(TArray_RandomAccess) {
- SkTArray<int, true> s;
- for (int i = 0; i < K; i++) s.push_back(i);
-
- SkRandom rand;
- volatile int junk = 0;
- for (int i = 0; i < K*loops; i++) {
- junk += s[rand.nextULessThan(K)];
- }
-}
-
-BENCH(TDArray_RandomAccess) {
- SkTDArray<int> s;
- for (int i = 0; i < K; i++) s.push(i);
-
- SkRandom rand;
- volatile int junk = 0;
- for (int i = 0; i < K*loops; i++) {
- junk += s[rand.nextULessThan(K)];
- }
-}
-
-// Push many times.
-
-BENCH(ChunkAlloc_Push) {
- SkChunkAlloc s(4096);
- for (int i = 0; i < K*loops; i++) s.allocThrow(sizeof(int));
-}
-
-BENCH(Deque_Push) {
- SkDeque s(sizeof(int), 1024);
- for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i;
-}
-
-BENCH(TArray_Push) {
- SkTArray<int, true> s;
- for (int i = 0; i < K*loops; i++) s.push_back(i);
-}
-
-BENCH(TDArray_Push) {
- SkTDArray<int> s;
- for (int i = 0; i < K*loops; i++) s.push(i);
-}
-
-// Push then immediately pop many times.
-
-BENCH(ChunkAlloc_PushPop) {
- SkChunkAlloc s(4096);
- for (int i = 0; i < K*loops; i++) {
- void* p = s.allocThrow(sizeof(int));
- s.unalloc(p);
- }
-}
-
-BENCH(Deque_PushPop) {
- SkDeque s(sizeof(int), 1024);
- for (int i = 0; i < K*loops; i++) {
- *(int*)s.push_back() = i;
- s.pop_back();
- }
-}
-
-BENCH(TArray_PushPop) {
- SkTArray<int, true> s;
- for (int i = 0; i < K*loops; i++) {
- s.push_back(i);
- s.pop_back();
- }
-}
-
-BENCH(TDArray_PushPop) {
- SkTDArray<int> s;
- for (int i = 0; i < K*loops; i++) {
- s.push(i);
- s.pop();
- }
-}
-
-// Push many items, then pop them all.
-
-BENCH(Deque_PushAllPopAll) {
- SkDeque s(sizeof(int), 1024);
- for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i;
- for (int i = 0; i < K*loops; i++) s.pop_back();
-}
-
-BENCH(TArray_PushAllPopAll) {
- SkTArray<int, true> s;
- for (int i = 0; i < K*loops; i++) s.push_back(i);
- for (int i = 0; i < K*loops; i++) s.pop_back();
-}
-
-BENCH(TDArray_PushAllPopAll) {
- SkTDArray<int> s;
- for (int i = 0; i < K*loops; i++) s.push(i);
- for (int i = 0; i < K*loops; i++) s.pop();
-}
diff --git a/gyp/bench.gypi b/gyp/bench.gypi
index 72ed89a477..085b167d77 100644
--- a/gyp/bench.gypi
+++ b/gyp/bench.gypi
@@ -92,7 +92,6 @@
'../bench/ShaderMaskBench.cpp',
'../bench/SkipZeroesBench.cpp',
'../bench/SortBench.cpp',
- '../bench/StackBench.cpp',
'../bench/StrokeBench.cpp',
'../bench/TableBench.cpp',
'../bench/TextBench.cpp',