diff options
-rw-r--r-- | gyp/common_conditions.gypi | 6 | ||||
-rw-r--r-- | gyp/common_variables.gypi | 2 | ||||
-rw-r--r-- | gyp/opts.gyp | 17 | ||||
-rw-r--r-- | include/core/SkOnce.h | 8 | ||||
-rw-r--r-- | include/core/SkPreConfig.h | 4 | ||||
-rwxr-xr-x | platform_tools/barelinux/bin/arm64_make | 9 | ||||
-rw-r--r-- | src/core/SkUtilsArm.h | 2 | ||||
-rw-r--r-- | src/opts/SkBitmapProcState_opts_arm.cpp | 8 | ||||
-rw-r--r-- | src/opts/SkXfermode_opts_arm_neon.cpp | 76 |
9 files changed, 22 insertions, 110 deletions
diff --git a/gyp/common_conditions.gypi b/gyp/common_conditions.gypi index 777473b9cf..82f63f375b 100644 --- a/gyp/common_conditions.gypi +++ b/gyp/common_conditions.gypi @@ -8,12 +8,6 @@ 'SK_FORCE_DISTANCEFIELD_FONTS=<(skia_force_distancefield_fonts)', ], 'conditions' : [ - [ 'skia_arch_type == "arm64"', { - 'cflags': [ - '-ffp-contract=off', - ], - }], - [ 'skia_os == "win"', { 'defines': [ diff --git a/gyp/common_variables.gypi b/gyp/common_variables.gypi index 0d749f4004..6145d47e01 100644 --- a/gyp/common_variables.gypi +++ b/gyp/common_variables.gypi @@ -72,7 +72,7 @@ }, { 'skia_poppler_enabled%': 0, }], - [ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "mac"] or skia_arch_type == "arm64"', { + [ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "mac"]', { 'skia_arch_width%': 64, }, { 'skia_arch_width%': 32, diff --git a/gyp/opts.gyp b/gyp/opts.gyp index 783ea4eb7a..2fd0e95809 100644 --- a/gyp/opts.gyp +++ b/gyp/opts.gyp @@ -122,23 +122,6 @@ '-mno-apcs-frame', ] }], - [ 'skia_arch_type == "arm64"', { - 'sources': [ - '../src/opts/SkBitmapProcState_arm_neon.cpp', - '../src/opts/SkBitmapProcState_matrixProcs_neon.cpp', - '../src/opts/SkBitmapProcState_opts_arm.cpp', - '../src/opts/SkBlitMask_opts_arm.cpp', - '../src/opts/SkBlitMask_opts_arm_neon.cpp', - '../src/opts/SkBlitRow_opts_none.cpp', - '../src/opts/SkBlurImage_opts_arm.cpp', - '../src/opts/SkBlurImage_opts_neon.cpp', - '../src/opts/SkMorphology_opts_arm.cpp', - '../src/opts/SkMorphology_opts_neon.cpp', - '../src/opts/SkUtils_opts_none.cpp', - '../src/opts/SkXfermode_opts_arm.cpp', - '../src/opts/SkXfermode_opts_arm_neon.cpp', - ], - }], ], }, # For the same lame reasons as what is done for skia_opts, we have to diff --git a/include/core/SkOnce.h b/include/core/SkOnce.h index d5330b9408..daeb819d42 100644 --- a/include/core/SkOnce.h +++ b/include/core/SkOnce.h @@ -85,10 +85,12 @@ inline static void compiler_barrier() { #endif inline static void full_barrier_on_arm() { -#if (defined(SK_CPU_ARM) && SK_ARM_ARCH >= 7) || defined(SK_CPU_ARM64) - asm volatile("dmb ish" : : : "memory"); -#elif defined(SK_CPU_ARM) +#ifdef SK_CPU_ARM +# if SK_ARM_ARCH >= 7 + asm volatile("dmb" : : : "memory"); +# else asm volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); +# endif #endif } diff --git a/include/core/SkPreConfig.h b/include/core/SkPreConfig.h index 73b54886bf..406a159d89 100644 --- a/include/core/SkPreConfig.h +++ b/include/core/SkPreConfig.h @@ -174,10 +174,6 @@ #endif #endif -#if defined(__aarch64__) - #define SK_CPU_ARM64 -#endif - ////////////////////////////////////////////////////////////////////// #if !defined(SKIA_IMPLEMENTATION) diff --git a/platform_tools/barelinux/bin/arm64_make b/platform_tools/barelinux/bin/arm64_make index 96f16557e6..587181e927 100755 --- a/platform_tools/barelinux/bin/arm64_make +++ b/platform_tools/barelinux/bin/arm64_make @@ -7,7 +7,7 @@ usage() { cat >&2 <<EOF -arm64_make - this script builds a AArch64 version of skia that +arm64_make - this script builds a ARMv7 Aarch64 version of skia that does not depend on external libraries, perfect for putting in an embedded system running Linux. @@ -45,7 +45,12 @@ done export GYP_DEFINES="${GYP_DEFINES} \ skia_gpu=0 \ - skia_arch_type=arm64 \ + skia_arch_type=arm \ + skia_arch_width=64 \ + armv7=1 \ + armv8=1 \ + arm_neon=0 \ + arm_thumb=0 \ " "$(dirname "$0")/barelinux_make" -t "$BUILD_TYPE" diff --git a/src/core/SkUtilsArm.h b/src/core/SkUtilsArm.h index 93f626c6e7..b9a26143ee 100644 --- a/src/core/SkUtilsArm.h +++ b/src/core/SkUtilsArm.h @@ -23,7 +23,7 @@ #if defined(SK_CPU_ARM) && defined(__ARM_HAVE_OPTIONAL_NEON_SUPPORT) # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_DYNAMIC -#elif defined(SK_CPU_ARM) && defined(__ARM_HAVE_NEON) || defined(SK_CPU_ARM64) +#elif defined(SK_CPU_ARM) && defined(__ARM_HAVE_NEON) # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_ALWAYS #else # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_NONE diff --git a/src/opts/SkBitmapProcState_opts_arm.cpp b/src/opts/SkBitmapProcState_opts_arm.cpp index c2376d42a7..96fbebd4e1 100644 --- a/src/opts/SkBitmapProcState_opts_arm.cpp +++ b/src/opts/SkBitmapProcState_opts_arm.cpp @@ -15,7 +15,7 @@ #include "SkConvolver.h" -#if !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) +#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) void SI8_D16_nofilter_DX_arm( const SkBitmapProcState& s, const uint32_t* SK_RESTRICT xy, @@ -186,7 +186,7 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s, s.fBitmap->getColorTable()->unlockColors(); } -#endif // !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) +#endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) /////////////////////////////////////////////////////////////////////////////// @@ -194,7 +194,6 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s, otherwise the shader won't even look at the matrix/sampler */ void SkBitmapProcState::platformProcs() { -#if !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) bool isOpaque = 256 == fAlphaScale; bool justDx = false; @@ -204,6 +203,7 @@ void SkBitmapProcState::platformProcs() { switch (fBitmap->config()) { case SkBitmap::kIndex8_Config: +#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) if (justDx && SkPaint::kNone_FilterLevel == fFilterLevel) { #if 0 /* crashing on android device */ fSampleProc16 = SI8_D16_nofilter_DX_arm; @@ -215,11 +215,11 @@ void SkBitmapProcState::platformProcs() { fShaderProc32 = NULL; } } +#endif break; default: break; } -#endif } /////////////////////////////////////////////////////////////////////////////// diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp index 70e92af66b..17a82f554b 100644 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ b/src/opts/SkXfermode_opts_arm_neon.cpp @@ -41,13 +41,8 @@ static inline uint16x8_t SkAlphaMulAlpha_neon8_16(uint8x8_t color, uint8x8_t alp static inline uint8x8_t SkDiv255Round_neon8_32_8(int32x4_t p1, int32x4_t p2) { uint16x8_t tmp; -#ifdef SK_CPU_ARM64 - tmp = vmovn_high_u32(vmovn_u32(vreinterpretq_u32_s32(p1)), - vreinterpretq_u32_s32(p2)); -#else tmp = vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(p1)), vmovn_u32(vreinterpretq_u32_s32(p2))); -#endif tmp += vdupq_n_u16(128); tmp += vshrq_n_u16(tmp, 8); @@ -71,11 +66,7 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val // Test if <= 0 cmp1 = vcleq_s32(val1, vdupq_n_s32(0)); cmp2 = vcleq_s32(val2, vdupq_n_s32(0)); -#ifdef SK_CPU_ARM64 - cmp16 = vmovn_high_u32(vmovn_u32(cmp1), cmp2); -#else cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); -#endif cmp8_1 = vmovn_u16(cmp16); // Init to zero @@ -84,11 +75,7 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val // Test if >= 255*255 cmp1 = vcgeq_s32(val1, vdupq_n_s32(255*255)); cmp2 = vcgeq_s32(val2, vdupq_n_s32(255*255)); -#ifdef SK_CPU_ARM64 - cmp16 = vmovn_high_u32(vmovn_u32(cmp1), cmp2); -#else cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); -#endif cmp8 = vmovn_u16(cmp16); // Insert 255 where true @@ -422,19 +409,11 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc, if (overlay) { dc2 = vshll_n_u8(dc, 1); scdc2_1 = vmull_u16(vget_low_u16(dc2), vget_low_u16(vmovl_u8(sc))); -#ifdef SK_CPU_ARM64 - scdc2_2 = vmull_high_u16(dc2, vmovl_u8(sc)); -#else scdc2_2 = vmull_u16(vget_high_u16(dc2), vget_high_u16(vmovl_u8(sc))); -#endif } else { sc2 = vshll_n_u8(sc, 1); scdc2_1 = vmull_u16(vget_low_u16(sc2), vget_low_u16(vmovl_u8(dc))); -#ifdef SK_CPU_ARM64 - scdc2_2 = vmull_high_u16(sc2, vmovl_u8(dc)); -#else scdc2_2 = vmull_u16(vget_high_u16(sc2), vget_high_u16(vmovl_u8(dc))); -#endif } // Calc COM @@ -442,20 +421,12 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc, com1 = vreinterpretq_s32_u32( vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); com2 = vreinterpretq_s32_u32( -#ifdef SK_CPU_ARM64 - vmull_high_u16(const255, sc_plus_dc)); -#else vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); -#endif // Calc SUB int32x4_t sub1, sub2; sub1 = vreinterpretq_s32_u32(vaddl_u16(vget_low_u16(scda), vget_low_u16(dcsa))); -#ifdef SK_CPU_ARM64 - sub2 = vreinterpretq_s32_u32(vaddl_high_u16(scda, dcsa)); -#else sub2 = vreinterpretq_s32_u32(vaddl_u16(vget_high_u16(scda), vget_high_u16(dcsa))); -#endif sub1 = vsubq_s32(sub1, vreinterpretq_s32_u32(scdc2_1)); sub2 = vsubq_s32(sub2, vreinterpretq_s32_u32(scdc2_2)); @@ -473,14 +444,10 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc, int32x4_t val2_1, val2_2; uint32x4_t cmp1, cmp2; - // Doing a signed lengthening allows to save a few instructions - // thanks to sign extension. - cmp1 = vreinterpretq_u32_s32(vmovl_s16(vreinterpret_s16_u16(vget_low_u16(cmp)))); -#ifdef SK_CPU_ARM64 - cmp2 = vreinterpretq_u32_s32(vmovl_high_s16(vreinterpretq_s16_u16(cmp))); -#else - cmp2 = vreinterpretq_u32_s32(vmovl_s16(vreinterpret_s16_u16(vget_high_u16(cmp)))); -#endif + cmp1 = vmovl_u16(vget_low_u16(cmp)); + cmp1 |= vshlq_n_u32(cmp1, 16); + cmp2 = vmovl_u16(vget_high_u16(cmp)); + cmp2 |= vshlq_n_u32(cmp2, 16); // Calc COM - SUB val1_1 = com1 - sub1; @@ -491,11 +458,7 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc, val2_2 = com2 + sub2; val2_1 = vsubq_s32(val2_1, vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sada)))); -#ifdef SK_CPU_ARM64 - val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_high_u16(sada))); -#else val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sada)))); -#endif // Insert where needed val1_1 = vbslq_s32(cmp1, val1_1, val2_1); @@ -665,19 +628,11 @@ static inline uint8x8_t exclusion_color(uint8x8_t sc, uint8x8_t dc, term1_1 = vreinterpretq_s32_u32( vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); term1_2 = vreinterpretq_s32_u32( -#ifdef SK_CPU_ARM64 - vmull_high_u16(const255, sc_plus_dc)); -#else vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); -#endif /* Calc the second term */ term2_1 = vreinterpretq_s32_u32(vshll_n_u16(vget_low_u16(scdc), 1)); -#ifdef SK_CPU_ARM64 - term2_2 = vreinterpretq_s32_u32(vshll_high_n_u16(scdc, 1)); -#else term2_2 = vreinterpretq_s32_u32(vshll_n_u16(vget_high_u16(scdc), 1)); -#endif return clamp_div255round_simd8_32(term1_1 - term2_1, term1_2 - term2_2); } @@ -706,18 +661,10 @@ static inline uint8x8_t blendfunc_multiply_color(uint8x8_t sc, uint8x8_t dc, scdc = vmull_u8(sc, dc); val1 = vaddl_u16(vget_low_u16(t1), vget_low_u16(t2)); -#ifdef SK_CPU_ARM64 - val2 = vaddl_high_u16(t1, t2); -#else val2 = vaddl_u16(vget_high_u16(t1), vget_high_u16(t2)); -#endif val1 = vaddw_u16(val1, vget_low_u16(scdc)); -#ifdef SK_CPU_ARM64 - val2 = vaddw_high_u16(val2, scdc); -#else val2 = vaddw_u16(val2, vget_high_u16(scdc)); -#endif return clamp_div255round_simd8_32( vreinterpretq_s32_u32(val1), vreinterpretq_s32_u32(val2)); @@ -761,10 +708,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], while (count >= 8) { uint8x8x4_t vsrc, vdst, vres; -#ifdef SK_CPU_ARM64 - vsrc = vld4_u8((uint8_t*)src); - vdst = vld4_u8((uint8_t*)dst); -#else #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) asm volatile ( "vld4.u8 %h[vsrc], [%[src]]! \t\n" @@ -797,7 +740,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], vsrc.val[2] = d2; vdst.val[2] = d6; vsrc.val[3] = d3; vdst.val[3] = d7; #endif -#endif // #ifdef SK_CPU_ARM64 vres = procSIMD(vsrc, vdst); @@ -805,9 +747,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], count -= 8; dst += 8; -#ifdef SK_CPU_ARM64 - src += 8; -#endif } // Leftovers for (int i = 0; i < count; i++) { @@ -844,9 +783,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, vdst = vld1q_u16(dst); -#ifdef SK_CPU_ARM64 - vsrc = vld4_u8((uint8_t*)src); -#else #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) asm volatile ( "vld4.u8 %h[vsrc], [%[src]]! \t\n" @@ -870,7 +806,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, vsrc.val[2] = d2; vsrc.val[3] = d3; #endif -#endif // #ifdef SK_CPU_ARM64 vdst32 = SkPixel16ToPixel32_neon8(vdst); vres = procSIMD(vsrc, vdst32); @@ -880,9 +815,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, count -= 8; dst += 8; -#ifdef SK_CPU_ARM64 - src += 8; -#endif } for (int i = 0; i < count; i++) { SkPMColor dstC = SkPixel16ToPixel32(dst[i]); |