aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlend_opts.h
blob: 86ae50288a9bd4eac9a01af397cbe32fc865c409 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*
 * Copyright 2016 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

/*
ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench  --samples 300 --nompd --match LinearSrcOver -q
 */

#ifndef SkBlend_opts_DEFINED
#define SkBlend_opts_DEFINED

#include "SkNx.h"
#include "SkPM4fPriv.h"

#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
    #include <immintrin.h>
#endif

namespace SK_OPTS_NS {

static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
    if (src >= 0xFF000000) {
        *dst = src;
        return;
    }
    auto d = Sk4f_fromS32(*dst),
         s = Sk4f_fromS32( src);
    *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}

static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
    srcover_srgb_srgb_1(dst++, *src++);
    srcover_srgb_srgb_1(dst++, *src++);
    srcover_srgb_srgb_1(dst++, *src++);
    srcover_srgb_srgb_1(dst  , *src  );
}

#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41

    static inline __m128i load(const uint32_t* p) {
        return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
    }

    static inline void store(uint32_t* p, __m128i v) {
        _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
    }

    static void srcover_srgb_srgb(
            uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
        const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
        while (ndst > 0) {
            int count = SkTMin(ndst, nsrc);
            ndst -= count;
            const uint32_t* src = srcStart;
            const uint32_t* end = dst + (count & ~3);

            while (dst < end) {
                __m128i pixels = load(src);

                if (_mm_testc_si128(pixels, alphaMask)) {
                    store(dst, pixels);
                } else if (!_mm_testz_si128(pixels, alphaMask)) {
                    srcover_srgb_srgb_4(dst, src);
                }

                dst += 4;
                src += 4;
            }

            count = count & 3;
            while (count-- > 0) {
                srcover_srgb_srgb_1(dst++, *src++);
            }
        }
    }

#else

    static void srcover_srgb_srgb(
        uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
        while (ndst > 0) {
            int n = SkTMin(ndst, nsrc);

            for (int i = 0; i < n; i++) {
                srcover_srgb_srgb_1(dst++, src[i]);
            }
            ndst -= n;
        }
    }

#endif

}  // namespace SK_OPTS_NS

#endif//SkBlend_opts_DEFINED