diff options
author | 2014-08-27 06:34:16 -0700 | |
---|---|---|
committer | 2014-08-27 06:34:16 -0700 | |
commit | 4473be874f70d4a8833180a7aebcc9d8a0b1de9e (patch) | |
tree | 4d7ae9af1d20de8b3eaaff06048890b51742ea62 | |
parent | 5e8dbd31de0dff44aff83295bdfe518477a5a088 (diff) |
Clean up some benches that answer questions we're no longer asking.
NOTREECHECKS=true
BUG=skia:
R=reed@google.com, mtklein@google.com
Author: mtklein@chromium.org
Review URL: https://codereview.chromium.org/512503002
-rw-r--r-- | bench/MemcpyBench.cpp | 83 | ||||
-rw-r--r-- | bench/MemoryBench.cpp | 107 | ||||
-rw-r--r-- | bench/StackBench.cpp | 179 | ||||
-rw-r--r-- | gyp/bench.gypi | 1 |
4 files changed, 0 insertions, 370 deletions
diff --git a/bench/MemcpyBench.cpp b/bench/MemcpyBench.cpp index f550192751..567320be7c 100644 --- a/bench/MemcpyBench.cpp +++ b/bench/MemcpyBench.cpp @@ -67,87 +67,6 @@ BENCH(memcpy32_memcpy, 1000) BENCH(memcpy32_memcpy, 10000) BENCH(memcpy32_memcpy, 100000) -// Let the compiler's autovectorizer do what it thinks is best. -static void memcpy32_autovectorize(uint32_t* dst, const uint32_t* src, int count) { - while (count --> 0) { - *dst++ = *src++; - } -} -BENCH(memcpy32_autovectorize, 10) -BENCH(memcpy32_autovectorize, 100) -BENCH(memcpy32_autovectorize, 1000) -BENCH(memcpy32_autovectorize, 10000) -BENCH(memcpy32_autovectorize, 100000) - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - -// Align dst to 16 bytes, then use aligned stores. src isn't algined, so use unaligned loads. -static void memcpy32_sse2_align(uint32_t* dst, const uint32_t* src, int count) { - if (count >= 16) { - while (uintptr_t(dst) & 0xF) { - *dst++ = *src++; - count--; - } - - __m128i* dst128 = reinterpret_cast<__m128i*>(dst); - const __m128i* src128 = reinterpret_cast<const __m128i*>(src); - dst += 16 * (count / 16); - src += 16 * (count / 16); - while (count >= 16) { - __m128i a = _mm_loadu_si128(src128++); - __m128i b = _mm_loadu_si128(src128++); - __m128i c = _mm_loadu_si128(src128++); - __m128i d = _mm_loadu_si128(src128++); - - _mm_store_si128(dst128++, a); - _mm_store_si128(dst128++, b); - _mm_store_si128(dst128++, c); - _mm_store_si128(dst128++, d); - - count -= 16; - } - } - - while (count --> 0) { - *dst++ = *src++; - } -} -BENCH(memcpy32_sse2_align, 10) -BENCH(memcpy32_sse2_align, 100) -BENCH(memcpy32_sse2_align, 1000) -BENCH(memcpy32_sse2_align, 10000) -BENCH(memcpy32_sse2_align, 100000) - -// Leave both dst and src unaliged, and so use unaligned stores for dst and unaligned loads for src. -static void memcpy32_sse2_unalign(uint32_t* dst, const uint32_t* src, int count) { - __m128i* dst128 = reinterpret_cast<__m128i*>(dst); - const __m128i* src128 = reinterpret_cast<const __m128i*>(src); - dst += 16 * (count / 16); - src += 16 * (count / 16); - while (count >= 16) { - __m128i a = _mm_loadu_si128(src128++); - __m128i b = _mm_loadu_si128(src128++); - __m128i c = _mm_loadu_si128(src128++); - __m128i d = _mm_loadu_si128(src128++); - - _mm_storeu_si128(dst128++, a); - _mm_storeu_si128(dst128++, b); - _mm_storeu_si128(dst128++, c); - _mm_storeu_si128(dst128++, d); - - count -= 16; - } - - while (count --> 0) { - *dst++ = *src++; - } -} -BENCH(memcpy32_sse2_unalign, 10) -BENCH(memcpy32_sse2_unalign, 100) -BENCH(memcpy32_sse2_unalign, 1000) -BENCH(memcpy32_sse2_unalign, 10000) -BENCH(memcpy32_sse2_unalign, 100000) - // Test our chosen best, from SkUtils.h BENCH(sk_memcpy32, 10) BENCH(sk_memcpy32, 100) @@ -155,6 +74,4 @@ BENCH(sk_memcpy32, 1000) BENCH(sk_memcpy32, 10000) BENCH(sk_memcpy32, 100000) -#endif // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - #undef BENCH diff --git a/bench/MemoryBench.cpp b/bench/MemoryBench.cpp index f8af1683b0..05ec83b3e0 100644 --- a/bench/MemoryBench.cpp +++ b/bench/MemoryBench.cpp @@ -56,110 +56,3 @@ private: DEF_BENCH( return new ChunkAllocBench(64); ) DEF_BENCH( return new ChunkAllocBench(8*1024); ) -static int* calloc(size_t num) { - return (int*)sk_calloc_throw(num*sizeof(int)); -} - -static int* malloc_bzero(size_t num) { - const size_t bytes = num*sizeof(int); - int* ints = (int*)sk_malloc_throw(bytes); - sk_bzero(ints, bytes); - return ints; -} - -class ZerosBench : public Benchmark { - size_t fNum; - bool fRead; - bool fWrite; - bool fUseCalloc; - SkString fName; -public: - ZerosBench(size_t num, bool read, bool write, bool useCalloc) - : fNum(num) - , fRead(read) - , fWrite(write) - , fUseCalloc(useCalloc) { - fName.printf("memory_%s", useCalloc ? "calloc" : "malloc_bzero"); - if (read && write) { - fName.appendf("_rw"); - } else if (read) { - fName.appendf("_r"); - } else if (write) { - fName.appendf("_w"); - } - fName.appendf("_" SK_SIZE_T_SPECIFIER, num); - } - - virtual bool isSuitableFor(Backend backend) SK_OVERRIDE { - return backend == kNonRendering_Backend; - } - -protected: - virtual const char* onGetName() SK_OVERRIDE { - return fName.c_str(); - } - - virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE { - for (int i = 0; i < loops; i++) { - int* zeros = fUseCalloc ? calloc(fNum) : malloc_bzero(fNum); - if (fRead) { - volatile int x = 15; - for (size_t j = 0; j < fNum; j++) { - x ^= zeros[j]; - } - } - if (fWrite) { - for (size_t j = 0; j < fNum; j++) { - zeros[j] = 15; - } - } - sk_free(zeros); - } - } -}; - -// zero count r w useCalloc? -DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 0)) -DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 1)) -DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 0)) -DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 1)) -DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 0)) -DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 1)) -DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 0)) -DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 1)) - -DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 0)) -DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 1)) -DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 0)) -DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 1)) -DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 0)) -DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 1)) -DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 0)) -DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 1)) - -DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 0)) -DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 1)) -DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 0)) -DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 1)) -DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 0)) -DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 1)) -DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 0)) -DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 1)) - -DEF_BENCH(return new ZerosBench(300, 0, 0, 0)) -DEF_BENCH(return new ZerosBench(300, 0, 0, 1)) -DEF_BENCH(return new ZerosBench(300, 0, 1, 0)) -DEF_BENCH(return new ZerosBench(300, 0, 1, 1)) -DEF_BENCH(return new ZerosBench(300, 1, 0, 0)) -DEF_BENCH(return new ZerosBench(300, 1, 0, 1)) -DEF_BENCH(return new ZerosBench(300, 1, 1, 0)) -DEF_BENCH(return new ZerosBench(300, 1, 1, 1)) - -DEF_BENCH(return new ZerosBench(4, 0, 0, 0)) -DEF_BENCH(return new ZerosBench(4, 0, 0, 1)) -DEF_BENCH(return new ZerosBench(4, 0, 1, 0)) -DEF_BENCH(return new ZerosBench(4, 0, 1, 1)) -DEF_BENCH(return new ZerosBench(4, 1, 0, 0)) -DEF_BENCH(return new ZerosBench(4, 1, 0, 1)) -DEF_BENCH(return new ZerosBench(4, 1, 1, 0)) -DEF_BENCH(return new ZerosBench(4, 1, 1, 1)) diff --git a/bench/StackBench.cpp b/bench/StackBench.cpp deleted file mode 100644 index 3b41cb65ce..0000000000 --- a/bench/StackBench.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright 2014 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "Benchmark.h" -#include "SkRandom.h" - -#include "SkChunkAlloc.h" -#include "SkDeque.h" -#include "SkTArray.h" -#include "SkTDArray.h" - -// This file has several benchmarks using various data structures to do stack-like things: -// - push -// - push, immediately pop -// - push many, pop all of them -// - serial access -// - random access -// When a data structure doesn't suppport an operation efficiently, we leave that combination out. -// Where possible we hint to the data structure to allocate in 4K pages. -// -// These benchmarks may help you decide which data structure to use for a dynamically allocated -// ordered list of allocations that grows on one end. -// -// Current overall winner (01/2014): SkTDArray. -// It wins every benchmark on every machine I tried (Desktop, Nexus S, Laptop). - -template <typename Impl> -struct StackBench : public Benchmark { - virtual bool isSuitableFor(Backend b) SK_OVERRIDE { return b == kNonRendering_Backend; } - virtual const char* onGetName() SK_OVERRIDE { return Impl::kName; } - virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE { Impl::bench(loops); } -}; - -#define BENCH(name) \ - struct name { static const char* const kName; static void bench(int); }; \ - const char* const name::kName = #name; \ - DEF_BENCH(return new StackBench<name>();) \ - void name::bench(int loops) - -static const int K = 2049; - -// Add K items, then iterate through them serially many times. - -BENCH(Deque_Serial) { - SkDeque s(sizeof(int), 1024); - for (int i = 0; i < K; i++) *(int*)s.push_back() = i; - - volatile int junk = 0; - for (int j = 0; j < loops; j++) { - SkDeque::Iter it(s, SkDeque::Iter::kFront_IterStart); - while(void* p = it.next()) { - junk += *(int*)p; - } - } -} - -BENCH(TArray_Serial) { - SkTArray<int, true> s; - for (int i = 0; i < K; i++) s.push_back(i); - - volatile int junk = 0; - for (int j = 0; j < loops; j++) { - for (int i = 0; i < s.count(); i++) junk += s[i]; - } -} - -BENCH(TDArray_Serial) { - SkTDArray<int> s; - for (int i = 0; i < K; i++) s.push(i); - - volatile int junk = 0; - for (int j = 0; j < loops; j++) { - for (int i = 0; i < s.count(); i++) junk += s[i]; - } -} - -// Add K items, then randomly access them many times. - -BENCH(TArray_RandomAccess) { - SkTArray<int, true> s; - for (int i = 0; i < K; i++) s.push_back(i); - - SkRandom rand; - volatile int junk = 0; - for (int i = 0; i < K*loops; i++) { - junk += s[rand.nextULessThan(K)]; - } -} - -BENCH(TDArray_RandomAccess) { - SkTDArray<int> s; - for (int i = 0; i < K; i++) s.push(i); - - SkRandom rand; - volatile int junk = 0; - for (int i = 0; i < K*loops; i++) { - junk += s[rand.nextULessThan(K)]; - } -} - -// Push many times. - -BENCH(ChunkAlloc_Push) { - SkChunkAlloc s(4096); - for (int i = 0; i < K*loops; i++) s.allocThrow(sizeof(int)); -} - -BENCH(Deque_Push) { - SkDeque s(sizeof(int), 1024); - for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i; -} - -BENCH(TArray_Push) { - SkTArray<int, true> s; - for (int i = 0; i < K*loops; i++) s.push_back(i); -} - -BENCH(TDArray_Push) { - SkTDArray<int> s; - for (int i = 0; i < K*loops; i++) s.push(i); -} - -// Push then immediately pop many times. - -BENCH(ChunkAlloc_PushPop) { - SkChunkAlloc s(4096); - for (int i = 0; i < K*loops; i++) { - void* p = s.allocThrow(sizeof(int)); - s.unalloc(p); - } -} - -BENCH(Deque_PushPop) { - SkDeque s(sizeof(int), 1024); - for (int i = 0; i < K*loops; i++) { - *(int*)s.push_back() = i; - s.pop_back(); - } -} - -BENCH(TArray_PushPop) { - SkTArray<int, true> s; - for (int i = 0; i < K*loops; i++) { - s.push_back(i); - s.pop_back(); - } -} - -BENCH(TDArray_PushPop) { - SkTDArray<int> s; - for (int i = 0; i < K*loops; i++) { - s.push(i); - s.pop(); - } -} - -// Push many items, then pop them all. - -BENCH(Deque_PushAllPopAll) { - SkDeque s(sizeof(int), 1024); - for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i; - for (int i = 0; i < K*loops; i++) s.pop_back(); -} - -BENCH(TArray_PushAllPopAll) { - SkTArray<int, true> s; - for (int i = 0; i < K*loops; i++) s.push_back(i); - for (int i = 0; i < K*loops; i++) s.pop_back(); -} - -BENCH(TDArray_PushAllPopAll) { - SkTDArray<int> s; - for (int i = 0; i < K*loops; i++) s.push(i); - for (int i = 0; i < K*loops; i++) s.pop(); -} diff --git a/gyp/bench.gypi b/gyp/bench.gypi index 72ed89a477..085b167d77 100644 --- a/gyp/bench.gypi +++ b/gyp/bench.gypi @@ -92,7 +92,6 @@ '../bench/ShaderMaskBench.cpp', '../bench/SkipZeroesBench.cpp', '../bench/SortBench.cpp', - '../bench/StackBench.cpp', '../bench/StrokeBench.cpp', '../bench/TableBench.cpp', '../bench/TextBench.cpp', |