aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/MemsetBench.cpp
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-04-09 14:05:17 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-04-09 14:05:17 -0700
commit9ff378b01be0b0a3fc35677a2155ba4ade286cc2 (patch)
tree2d9221ef31e3632dde74bda9a1023bc1ce71b27c /bench/MemsetBench.cpp
parenta1e41c6d9a2029eb836c6120bb154ccd25e1588d (diff)
Rewrite memset benches, then use results to add a small-N optimization.
The benches for N <= 10 get around 2x faster on my N7 and N9. I believe this is because of the reduced function-call-then-function-pointer-call overhead on the N7, and additionally because it seems autovectorization beats our NEON code for small N on the N9. My desktop is unchanged, though that's probably because N=10 lies well within a region where memset's performance is essentially constant: N=100 takes only about 2x as long as N=1 and N=10, which perform nearly identically. BUG=skia: Review URL: https://codereview.chromium.org/1073863002
Diffstat (limited to 'bench/MemsetBench.cpp')
-rw-r--r--bench/MemsetBench.cpp151
1 files changed, 60 insertions, 91 deletions
diff --git a/bench/MemsetBench.cpp b/bench/MemsetBench.cpp
index e0390bba38..d1682bdd70 100644
--- a/bench/MemsetBench.cpp
+++ b/bench/MemsetBench.cpp
@@ -6,110 +6,79 @@
*/
#include "Benchmark.h"
-#include "SkCanvas.h"
-#include "SkString.h"
+#include "SkTemplates.h"
#include "SkUtils.h"
+template <typename T, bool kInline>
class MemsetBench : public Benchmark {
- SkString fName;
-
-protected:
- int fMinSize;
- int fMaxSize;
- enum {
- kBufferSize = 10000,
- VALUE32 = 0x12345678,
- VALUE16 = 0x1234
- };
-
- enum MemsetType {
- MEMSET16 = 16,
- MEMSET32 = 32
- };
-
public:
- MemsetBench(MemsetType type, int minSize, int maxSize) {
- SkASSERT((minSize < maxSize) && (maxSize <= kBufferSize));
- fMinSize = minSize;
- fMaxSize = maxSize;
- fName.printf("memset%d_%d_%d", type, minSize, maxSize);
- }
+ explicit MemsetBench(int n)
+ : fN(n)
+ , fBuffer(n)
+ , fName(SkStringPrintf("memset%d_%d%s", sizeof(T)*8, n, kInline ? "_inline" : "")) {}
- bool isSuitableFor(Backend backend) override {
- return backend == kNonRendering_Backend;
- }
+ bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
+ const char* onGetName() override { return fName.c_str(); }
- virtual void performTest() = 0;
-
-protected:
- const char* onGetName() override {
- return fName.c_str();
- }
-
- void onDraw(const int loops, SkCanvas* canvas) override {
- for (int i = 0; i < loops; ++i) {
- this->performTest();
- }
- }
+ void onDraw(const int loops, SkCanvas*) override;
private:
- typedef Benchmark INHERITED;
+ int fN;
+ SkAutoTMalloc<T> fBuffer;
+ SkString fName;
};
-class Memset32Bench : public MemsetBench {
- uint32_t kBuffer[kBufferSize + 3];
-public:
- Memset32Bench(int minSize, int maxSize)
- : INHERITED(MEMSET32, minSize, maxSize) {}
+template <> void MemsetBench<uint32_t, false>::onDraw(const int loops, SkCanvas*) {
+ for (int i = 0; i < 1000*loops; i++) {
+ sk_memset32(fBuffer.get(), 0xFACEB004, fN);
+ }
+}
-protected:
- void performTest() override {
- for(int j = fMinSize; j < fMaxSize; ++j){
- sk_memset32(kBuffer, VALUE32, j);
- sk_memset32(kBuffer + 1, VALUE32, j);
- sk_memset32(kBuffer + 2, VALUE32, j);
- sk_memset32(kBuffer + 3, VALUE32, j);
- }
+template <> void MemsetBench<uint16_t, false>::onDraw(const int loops, SkCanvas*) {
+ for (int i = 0; i < 1000*loops; i++) {
+ sk_memset16(fBuffer.get(), 0x4973, fN);
}
-private:
- typedef MemsetBench INHERITED;
-};
+}
-class Memset16Bench : public MemsetBench {
- uint16_t kBuffer[kBufferSize + 7];
-public:
- Memset16Bench(int minSize, int maxSize)
- : INHERITED(MEMSET16, minSize, maxSize) {}
+template <typename T>
+static void memsetT(T* dst, T val, int n) {
+ for (int i = 0; i < n; i++) { dst[i] = val; }
+}
-protected:
- void performTest() override {
- for(int j = fMinSize; j < fMaxSize; ++j){
- sk_memset16(kBuffer, VALUE16, j);
- sk_memset16(kBuffer + 1, VALUE16, j);
- sk_memset16(kBuffer + 2, VALUE16, j);
- sk_memset16(kBuffer + 3, VALUE16, j);
- sk_memset16(kBuffer + 4, VALUE16, j);
- sk_memset16(kBuffer + 5, VALUE16, j);
- sk_memset16(kBuffer + 6, VALUE16, j);
- sk_memset16(kBuffer + 7, VALUE16, j);
- }
+template <> void MemsetBench<uint32_t, true>::onDraw(const int loops, SkCanvas*) {
+ for (int i = 0; i < 1000*loops; i++) {
+ memsetT<uint32_t>(fBuffer.get(), 0xFACEB004, fN);
}
-private:
- typedef MemsetBench INHERITED;
-};
+}
-DEF_BENCH(return new Memset32Bench(1, 600);)
-DEF_BENCH(return new Memset32Bench(600, 800);)
-DEF_BENCH(return new Memset32Bench(800, 1000);)
-DEF_BENCH(return new Memset32Bench(1000, 2000);)
-DEF_BENCH(return new Memset32Bench(2000, 3000);)
-DEF_BENCH(return new Memset32Bench(3000, 4000);)
-DEF_BENCH(return new Memset32Bench(4000, 5000);)
-
-DEF_BENCH(return new Memset16Bench(1, 600);)
-DEF_BENCH(return new Memset16Bench(600, 800);)
-DEF_BENCH(return new Memset16Bench(800, 1000);)
-DEF_BENCH(return new Memset16Bench(1000, 2000);)
-DEF_BENCH(return new Memset16Bench(2000, 3000);)
-DEF_BENCH(return new Memset16Bench(3000, 4000);)
-DEF_BENCH(return new Memset16Bench(4000, 5000);)
+template <> void MemsetBench<uint16_t, true>::onDraw(const int loops, SkCanvas*) {
+ for (int i = 0; i < 1000*loops; i++) {
+ memsetT<uint16_t>(fBuffer.get(), 0x4973, fN);
+ }
+}
+
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(1)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(1)));
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(10)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(10)));
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(100)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(100)));
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(1000)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(1000)));
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(10000)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(10000)));
+DEF_BENCH(return (new MemsetBench<uint32_t, true>(100000)));
+DEF_BENCH(return (new MemsetBench<uint32_t, false>(100000)));
+
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(1)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(1)));
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(10)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(10)));
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(100)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(100)));
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(1000)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(1000)));
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(10000)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(10000)));
+DEF_BENCH(return (new MemsetBench<uint16_t, true>(100000)));
+DEF_BENCH(return (new MemsetBench<uint16_t, false>(100000)));