aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/Sk4px.h
blob: af078ca92b3151b6f7578d20161d5b7cd7d5dc0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*
 * Copyright 2015 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#ifndef Sk4px_DEFINED
#define Sk4px_DEFINED

#include "SkNx.h"
#include "SkColor.h"

// 1, 2 or 4 SkPMColors, generally vectorized.
class Sk4px : public Sk16b {
public:
    Sk4px(SkAlpha a) : INHERITED(a) {} // Duplicate 16x.
    Sk4px(SkPMColor);                  // Duplicate 4x.
    Sk4px(const Sk16b& v) : INHERITED(v) {}

    // ARGB argb XYZW xyzw -> AAAA aaaa XXXX xxxx
    Sk4px alphas() const;

    // When loading or storing fewer than 4 SkPMColors, we use the low lanes.
    static Sk4px Load4(const SkPMColor[4]);
    static Sk4px Load2(const SkPMColor[2]);
    static Sk4px Load1(const SkPMColor[1]);

    // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.
    static Sk4px Load4Alphas(const SkAlpha[4]);  // AaXx -> AAAA aaaa XXXX xxxx
    static Sk4px Load2Alphas(const SkAlpha[2]);  // Aa   -> AAAA aaaa 0000 0000

    void store4(SkPMColor[4]) const;
    void store2(SkPMColor[2]) const;
    void store1(SkPMColor[1]) const;

    // 1, 2, or 4 SkPMColors with 16-bit components.
    // This is most useful as the result of a multiply, e.g. from mulWiden().
    class Wide : public Sk16h {
    public:
        Wide(const Sk16h& v) : Sk16h(v) {}

        // Pack the top byte of each component back down into 4 SkPMColors.
        Sk4px addNarrowHi(const Sk16h&) const;

        Sk4px div255TruncNarrow() const { return this->addNarrowHi(*this >> 8); }
        Sk4px div255RoundNarrow() const {
            return Sk4px::Wide(*this + Sk16h(128)).div255TruncNarrow();
        }

    private:
        typedef Sk16h INHERITED;
    };

    Wide widenLo() const;               // ARGB -> 0A 0R 0G 0B
    Wide widenHi() const;               // ARGB -> A0 R0 G0 B0
    Wide mulWiden(const Sk16b&) const;  // 8-bit x 8-bit -> 16-bit components.

    // A generic driver that maps fn over a src array into a dst array.
    // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels).
    template <typename Fn>
    static void MapSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn) {
        // This looks a bit odd, but it helps loop-invariant hoisting across different calls to fn.
        // Basically, we need to make sure we keep things inside a single loop.
        while (count > 0) {
            if (count >= 8) {
                Sk4px dst0 = fn(Load4(src+0)),
                      dst4 = fn(Load4(src+4));
                dst0.store4(dst+0);
                dst4.store4(dst+4);
                dst += 8; src += 8; count -= 8;
                continue;  // Keep our stride at 8 pixels as long as possible.
            }
            SkASSERT(count <= 7);
            if (count >= 4) {
                fn(Load4(src)).store4(dst);
                dst += 4; src += 4; count -= 4;
            }
            if (count >= 2) {
                fn(Load2(src)).store2(dst);
                dst += 2; src += 2; count -= 2;
            }
            if (count >= 1) {
                fn(Load1(src)).store1(dst);
            }
            break;
        }
    }

    // As above, but with dst4' = fn(dst4, src4).
    template <typename Fn>
    static void MapDstSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn) {
        while (count > 0) {
            if (count >= 8) {
                Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)),
                      dst4 = fn(Load4(dst+4), Load4(src+4));
                dst0.store4(dst+0);
                dst4.store4(dst+4);
                dst += 8; src += 8; count -= 8;
                continue;  // Keep our stride at 8 pixels as long as possible.
            }
            SkASSERT(count <= 7);
            if (count >= 4) {
                fn(Load4(dst), Load4(src)).store4(dst);
                dst += 4; src += 4; count -= 4;
            }
            if (count >= 2) {
                fn(Load2(dst), Load2(src)).store2(dst);
                dst += 2; src += 2; count -= 2;
            }
            if (count >= 1) {
                fn(Load1(dst), Load1(src)).store1(dst);
            }
            break;
        }
    }

    // As above, but with dst4' = fn(dst4, src4, alpha4).
    template <typename Fn>
    static void MapDstSrcAlpha(
            int count, SkPMColor* dst, const SkPMColor* src, const SkAlpha* a, Fn fn) {
        while (count > 0) {
            if (count >= 8) {
                Sk4px alpha0 = Load4Alphas(a+0),
                      alpha4 = Load4Alphas(a+4);
                Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), alpha0),
                      dst4 = fn(Load4(dst+4), Load4(src+4), alpha4);
                dst0.store4(dst+0);
                dst4.store4(dst+4);
                dst += 8; src += 8; a += 8; count -= 8;
                continue;  // Keep our stride at 8 pixels as long as possible.
            }
            SkASSERT(count <= 7);
            if (count >= 4) {
                Sk4px alpha = Load4Alphas(a);
                fn(Load4(dst), Load4(src), alpha).store4(dst);
                dst += 4; src += 4; a += 4; count -= 4;
            }
            if (count >= 2) {
                Sk4px alpha = Load2Alphas(a);
                fn(Load2(dst), Load2(src), alpha).store2(dst);
                dst += 2; src += 2; a += 2; count -= 2;
            }
            if (count >= 1) {
                Sk4px alpha(*a);
                fn(Load1(dst), Load1(src), alpha).store1(dst);
            }
            break;
        }
    }

private:
    typedef Sk16b INHERITED;
};

#ifdef SKNX_NO_SIMD
    #include "../opts/Sk4px_none.h"
#else
    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
        #include "../opts/Sk4px_SSE2.h"
    #elif defined(SK_ARM_HAS_NEON)
        #include "../opts/Sk4px_NEON.h"
    #else
        #include "../opts/Sk4px_none.h"
    #endif
#endif

#endif//Sk4px_DEFINED