aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkOpts.cpp
blob: ee88b231690b0e8d7f0cd895a9609f9bf680235b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*
 * Copyright 2015 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkOnce.h"
#include "SkOpts.h"

#define SK_OPTS_NS sk_default
#include "SkBlitMask_opts.h"
#include "SkBlitRow_opts.h"
#include "SkBlurImageFilter_opts.h"
#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMatrix_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"

#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
    #if defined(SK_BUILD_FOR_WIN32)
        #include <intrin.h>
        static void cpuid (uint32_t abcd[4]) { __cpuid  ((int*)abcd, 1);    }
        static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); }
        static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
    #else
        #include <cpuid.h>
        #if !defined(__cpuid_count)  // Old Mac Clang doesn't have this defined.
            #define  __cpuid_count(eax, ecx, a, b, c, d) \
                __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx))
        #endif
        static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); }
        static void cpuid7(uint32_t abcd[4]) {
            __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]);
        }
        static uint64_t xgetbv(uint32_t xcr) {
            uint32_t eax, edx;
            __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
            return (uint64_t)(edx) << 32 | eax;
        }
    #endif
#elif !defined(SK_ARM_HAS_NEON)      && \
       defined(SK_CPU_ARM32)         && \
       defined(SK_BUILD_FOR_ANDROID) && \
      !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
    #include <cpu-features.h>
#endif

namespace sk_default {

// These variable names in these functions just pretend the input is BGRA.
// They work fine with both RGBA and BGRA.

static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
    for (int i = 0; i < count; i++) {
        uint8_t a = src[i] >> 24,
                r = src[i] >> 16,
                g = src[i] >>  8,
                b = src[i] >>  0;
        r = (r*a+127)/255;
        g = (g*a+127)/255;
        b = (b*a+127)/255;
        dst[i] = (uint32_t)a << 24
               | (uint32_t)r << 16
               | (uint32_t)g <<  8
               | (uint32_t)b <<  0;
    }
}

static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
    for (int i = 0; i < count; i++) {
        uint8_t a = src[i] >> 24,
                r = src[i] >> 16,
                g = src[i] >>  8,
                b = src[i] >>  0;
        dst[i] = (uint32_t)a << 24
               | (uint32_t)b << 16
               | (uint32_t)g <<  8
               | (uint32_t)r <<  0;
    }
}

static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
    for (int i = 0; i < count; i++) {
        uint8_t a = src[i] >> 24,
                r = src[i] >> 16,
                g = src[i] >>  8,
                b = src[i] >>  0;
        r = (r*a+127)/255;
        g = (g*a+127)/255;
        b = (b*a+127)/255;
        dst[i] = (uint32_t)a << 24
               | (uint32_t)b << 16
               | (uint32_t)g <<  8
               | (uint32_t)r <<  0;
    }
}

}  // namespace sk_default

namespace SkOpts {
    // Define default function pointer values here...
    // If our global compile options are set high enough, these defaults might even be
    // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
    // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
    decltype(rsqrt)                     rsqrt = sk_default::rsqrt;
    decltype(memset16)               memset16 = sk_default::memset16;
    decltype(memset32)               memset32 = sk_default::memset32;
    decltype(create_xfermode) create_xfermode = sk_default::create_xfermode;
    decltype(color_cube_filter_span) color_cube_filter_span = sk_default::color_cube_filter_span;

    decltype(box_blur_xx) box_blur_xx = sk_default::box_blur_xx;
    decltype(box_blur_xy) box_blur_xy = sk_default::box_blur_xy;
    decltype(box_blur_yx) box_blur_yx = sk_default::box_blur_yx;

    decltype(dilate_x) dilate_x = sk_default::dilate_x;
    decltype(dilate_y) dilate_y = sk_default::dilate_y;
    decltype( erode_x)  erode_x = sk_default::erode_x;
    decltype( erode_y)  erode_y = sk_default::erode_y;

    decltype(texture_compressor)       texture_compressor = sk_default::texture_compressor;
    decltype(fill_block_dimensions) fill_block_dimensions = sk_default::fill_block_dimensions;

    decltype(blit_mask_d32_a8) blit_mask_d32_a8 = sk_default::blit_mask_d32_a8;

    decltype(blit_row_color32) blit_row_color32 = sk_default::blit_row_color32;

    decltype(matrix_translate)       matrix_translate       = sk_default::matrix_translate;
    decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate;
    decltype(matrix_affine)          matrix_affine          = sk_default::matrix_affine;

    decltype(       premul_xxxa)        premul_xxxa = sk_default::       premul_xxxa;
    decltype(       swaprb_xxxa)        swaprb_xxxa = sk_default::       swaprb_xxxa;
    decltype(premul_swaprb_xxxa) premul_swaprb_xxxa = sk_default::premul_swaprb_xxxa;

    // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
    void Init_ssse3();
    void Init_sse41();
    void Init_sse42() {}
    void Init_avx();
    void Init_avx2() {}
    void Init_neon();

    static void init() {
        // TODO: Chrome's not linking _sse* opts on iOS simulator builds.  Bug or feature?
    #if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS)
        uint32_t abcd[] = {0,0,0,0};
        cpuid(abcd);
        if (abcd[2] & (1<< 9)) { Init_ssse3(); }
        if (abcd[2] & (1<<19)) { Init_sse41(); }
        if (abcd[2] & (1<<20)) { Init_sse42(); }

        // AVX detection's kind of a pain.  This is cribbed from Chromium.
        if ( (  abcd[2] & (7<<26)) == (7<<26) &&    // Check bits 26-28 of ecx are all set,
             (xgetbv(0) & 6      ) == 6          ){ // and  check the OS supports XSAVE.
            Init_avx();

            // AVX2 additionally needs bit 5 set on ebx after calling cpuid(7).
            uint32_t abcd7[] = {0,0,0,0};
            cpuid7(abcd7);
            if (abcd7[1] & (1<<5)) { Init_avx2(); }
        }

    #elif !defined(SK_ARM_HAS_NEON)      && \
           defined(SK_CPU_ARM32)         && \
           defined(SK_BUILD_FOR_ANDROID) && \
          !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
        if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) { Init_neon(); }
    #endif
    }

    SK_DECLARE_STATIC_ONCE(gInitOnce);
    void Init() { SkOnce(&gInitOnce, init); }

#if SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
    static struct AutoInit {
        AutoInit() { Init(); }
    } gAutoInit;
#endif
}