aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_stages.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-03-02 11:16:22 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-03-02 17:20:58 +0000
commit4e7fc0c5da88e0e4ccc1dff23f4f2ff134130acd (patch)
tree72f58353cc8b48792da8f354a5e36f608036b4f6 /src/jumper/SkJumper_stages.cpp
parentb56dedf70bdbf1a5e1e04dfbc83a374bffe6b00f (diff)
SkJumper: be more precise by rejecting data sections.
This allows %rip addressing as long as it's not going into a data section. This lets us use switch tables, avoiding loops and stack. On HSW, SkRasterPipeline_f16: 90 -> 63 SkRasterPipeline_srgb: 170 -> 97 Change-Id: I3ca2e4ff819b70beea78be75579f9d80c06979e8 Reviewed-on: https://skia-review.googlesource.com/9146 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r--src/jumper/SkJumper_stages.cpp24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index ca7469aa0d..88a1201809 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -240,11 +240,17 @@ static const size_t kStride = sizeof(F) / sizeof(float);
template <typename V, typename T>
static inline V load(const T* src, size_t tail) {
#if defined(JUMPER)
+ __builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
V v{}; // Any inactive lanes are zeroed.
- #pragma nounroll
- for (size_t i = 0; i < tail; i++) {
- v[i] = src[i];
+ switch (tail-1) {
+ case 6: v[6] = src[6];
+ case 5: v[5] = src[5];
+ case 4: v[4] = src[4];
+ case 3: v[3] = src[3];
+ case 2: v[2] = src[2];
+ case 1: v[1] = src[1];
+ case 0: v[0] = src[0];
}
return v;
}
@@ -272,10 +278,16 @@ static inline V load(const T* src, size_t tail) {
template <typename V, typename T>
static inline void store(T* dst, V v, size_t tail) {
#if defined(JUMPER)
+ __builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
- #pragma nounroll
- for (size_t i = 0; i < tail; i++) {
- dst[i] = v[i];
+ switch (tail-1) {
+ case 6: dst[6] = v[6];
+ case 5: dst[5] = v[5];
+ case 4: dst[4] = v[4];
+ case 3: dst[3] = v[3];
+ case 2: dst[2] = v[2];
+ case 1: dst[1] = v[1];
+ case 0: dst[0] = v[0];
}
return;
}