aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar digit@google.com <digit@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-08-01 14:25:07 +0000
committerGravatar digit@google.com <digit@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-08-01 14:25:07 +0000
commitfce02aca62525c3041226501574f740f7ea3714b (patch)
tree9053a19f5850210917e47ef54c778ccd5cae686d
parent47de6787a5aa677157fd468c5798eeb01c6c9139 (diff)
arm: dynamic NEON support for SkBitmapProcState matrix operations.
This patch implements dynamic ARM NEON support for the functions implemented by src/core/SkBitmapProcState_matrixProcs.cpp. - Because the SkBitmapProcState_matrix_{clamp,repeat}.h headers are NEON-specific, they are renamed with a _neon.h suffix, and moved to src/opts/ (from src/core/) - Add a new file src/opts/SkBitmapProcState_matrixProcs_neon.cpp which implements the NEON code paths for all builds, and add it to the 'opts_neon' static library. - Modify SkBitmapProcState_matrixProcs.cpp to select the right code-path depending on our build configuration. Note that in the case where 'arm_neon == 1', we do not embed regular ARM code paths in the final binary. Only 'arm_neon_optional == 1' builds will contain both regular and NEON code paths at the same time. Note that there doesn't seem to be a simple way to put the NEON-specific selection from that currently is in SkBitmapProcState_matrixProcs.cpp into src/opts/. Doing so would require much more drastic restructuring. This is also true of the other SkBitmapProcState source files that will be touched in a future patch. Review URL: https://codereview.appspot.com/6453065 git-svn-id: http://skia.googlecode.com/svn/trunk@4888 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r--gyp/opts.gyp6
-rw-r--r--src/core/SkBitmapProcState_matrixProcs.cpp128
-rw-r--r--src/core/SkUtilsArm.cpp9
-rw-r--r--src/opts/SkBitmapProcState_matrixProcs_neon.cpp145
-rw-r--r--src/opts/SkBitmapProcState_matrix_clamp_neon.h (renamed from src/core/SkBitmapProcState_matrix_clamp.h)10
-rw-r--r--src/opts/SkBitmapProcState_matrix_repeat_neon.h (renamed from src/core/SkBitmapProcState_matrix_repeat.h)45
6 files changed, 197 insertions, 146 deletions
diff --git a/gyp/opts.gyp b/gyp/opts.gyp
index ca2b1c1028..8c85b9ab43 100644
--- a/gyp/opts.gyp
+++ b/gyp/opts.gyp
@@ -126,6 +126,7 @@
'../include/config',
'../include/core',
'../src/core',
+ '../src/opts',
],
'cflags!': [
'-fno-omit-frame-pointer',
@@ -134,12 +135,15 @@
'-mfpu=vfpv3-d16',
],
'cflags': [
- '-fomit-frame-pointer',
'-mfpu=neon',
+ '-fomit-frame-pointer',
],
'sources': [
'../src/opts/memset16_neon.S',
'../src/opts/memset32_neon.S',
+ '../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
+ '../src/opts/SkBitmapProcState_matrix_clamp_neon.h',
+ '../src/opts/SkBitmapProcState_matrix_repeat_neon.h',
],
},
],
diff --git a/src/core/SkBitmapProcState_matrixProcs.cpp b/src/core/SkBitmapProcState_matrixProcs.cpp
index 1e12f9a921..77c6200d03 100644
--- a/src/core/SkBitmapProcState_matrixProcs.cpp
+++ b/src/core/SkBitmapProcState_matrixProcs.cpp
@@ -8,6 +8,7 @@
#include "SkPerspIter.h"
#include "SkShader.h"
#include "SkUtils.h"
+#include "SkUtilsArm.h"
// Helper to ensure that when we shift down, we do it w/o sign-extension
// so the caller doesn't have to manually mask off the top 16 bits
@@ -67,27 +68,31 @@ static inline bool can_truncate_to_fixed_for_decal(SkFractionalInt frX,
void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
+// Compile neon code paths if needed
+#if !SK_ARM_NEON_IS_NONE
+
+// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
+extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
+extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
+
+#endif // !SK_ARM_NEON_IS_NONE
+
+// Compile non-neon code path if needed
+#if !SK_ARM_NEON_IS_ALWAYS
#define MAKENAME(suffix) ClampX_ClampY ## suffix
#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
#define CHECK_FOR_DECAL
-#if defined(__ARM_HAVE_NEON)
- #include "SkBitmapProcState_matrix_clamp.h"
-#else
- #include "SkBitmapProcState_matrix.h"
-#endif
+#include "SkBitmapProcState_matrix.h"
#define MAKENAME(suffix) RepeatX_RepeatY ## suffix
#define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
#define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-#if defined(__ARM_HAVE_NEON)
- #include "SkBitmapProcState_matrix_repeat.h"
-#else
- #include "SkBitmapProcState_matrix.h"
+#include "SkBitmapProcState_matrix.h"
#endif
#define MAKENAME(suffix) GeneralXY ## suffix
@@ -228,52 +233,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
{
int i;
-#if defined(__ARM_HAVE_NEON)
- if (count >= 8) {
- /* SkFixed is 16.16 fixed point */
- SkFixed dx2 = dx+dx;
- SkFixed dx4 = dx2+dx2;
- SkFixed dx8 = dx4+dx4;
-
- /* now build fx/fx+dx/fx+2dx/fx+3dx */
- SkFixed fx1, fx2, fx3;
- int32x2_t lower, upper;
- int32x4_t lbase, hbase;
- uint16_t *dst16 = (uint16_t *)dst;
-
- fx1 = fx+dx;
- fx2 = fx1+dx;
- fx3 = fx2+dx;
-
- /* avoid an 'lbase unitialized' warning */
- lbase = vdupq_n_s32(fx);
- lbase = vsetq_lane_s32(fx1, lbase, 1);
- lbase = vsetq_lane_s32(fx2, lbase, 2);
- lbase = vsetq_lane_s32(fx3, lbase, 3);
- hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
-
- /* take upper 16 of each, store, and bump everything */
- do {
- int32x4_t lout, hout;
- uint16x8_t hi16;
-
- lout = lbase;
- hout = hbase;
- /* gets hi's of all louts then hi's of all houts */
- asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
- hi16 = vreinterpretq_u16_s32(hout);
- vst1q_u16(dst16, hi16);
-
- /* on to the next */
- lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
- hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
- dst16 += 8;
- count -= 8;
- fx += dx8;
- } while (count >= 8);
- dst = (uint32_t *) dst16;
- }
-#else
for (i = (count >> 2); i > 0; --i)
{
*dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
@@ -282,7 +241,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
fx += dx+dx;
}
count &= 3;
-#endif
uint16_t* xx = (uint16_t*)dst;
for (i = count; i > 0; --i) {
@@ -293,42 +251,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
{
-#if defined(__ARM_HAVE_NEON)
- if (count >= 8) {
- int32x4_t wide_fx;
- int32x4_t wide_fx2;
- int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
-
- wide_fx = vdupq_n_s32(fx);
- wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
- wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
- wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
-
- wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
-
- while (count >= 8) {
- int32x4_t wide_out;
- int32x4_t wide_out2;
-
- wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
- wide_out = vorrq_s32(wide_out,
- vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
-
- wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
- wide_out2 = vorrq_s32(wide_out2,
- vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
-
- vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
- vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
-
- dst += 8;
- fx += dx*8;
- wide_fx = vaddq_s32(wide_fx, wide_dx8);
- wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
- count -= 8;
- }
- }
-#endif
if (count & 1)
{
@@ -574,7 +496,17 @@ SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
// clamp gets special version of filterOne
fFilterOneX = SK_Fixed1;
fFilterOneY = SK_Fixed1;
+#if SK_ARM_NEON_IS_NONE
return ClampX_ClampY_Procs[index];
+#elif SK_ARM_NEON_IS_ALWAYS
+ return ClampX_ClampY_Procs_neon[index];
+#else // SK_ARM_NEON_IS_DYNAMIC
+ if (sk_cpu_arm_has_neon()) {
+ return ClampX_ClampY_Procs_neon[index];
+ } else {
+ return ClampX_ClampY_Procs[index];
+ }
+#endif
}
// all remaining procs use this form for filterOne
@@ -584,9 +516,19 @@ SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
if (SkShader::kRepeat_TileMode == fTileModeX &&
SkShader::kRepeat_TileMode == fTileModeY)
{
+#if SK_ARM_NEON_IS_NONE
return RepeatX_RepeatY_Procs[index];
+#elif SK_ARM_NEON_IS_ALWAYS
+ return RepeatX_RepeatY_Procs_neon[index];
+#else // SK_ARM_NEON_IS_DYNAMIC
+ if (sk_cpu_arm_has_neon()) {
+ return RepeatX_RepeatY_Procs_neon[index];
+ } else {
+ return RepeatX_RepeatY_Procs[index];
+ }
+#endif
}
-
+
fTileProcX = choose_tile_proc(fTileModeX);
fTileProcY = choose_tile_proc(fTileModeY);
fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
diff --git a/src/core/SkUtilsArm.cpp b/src/core/SkUtilsArm.cpp
index 4f6ef3a76a..e48457642d 100644
--- a/src/core/SkUtilsArm.cpp
+++ b/src/core/SkUtilsArm.cpp
@@ -33,8 +33,7 @@
// A function used to determine at runtime if the target CPU supports
// the ARM NEON instruction set. This implementation is Linux-specific.
-static bool sk_cpu_arm_check_neon(void)
-{
+static bool sk_cpu_arm_check_neon(void) {
bool result = false;
#if NEON_DEBUG
@@ -164,13 +163,11 @@ static pthread_once_t sOnce;
static bool sHasArmNeon;
// called through pthread_once()
-void sk_cpu_arm_probe_features(void)
-{
+void sk_cpu_arm_probe_features(void) {
sHasArmNeon = sk_cpu_arm_check_neon();
}
-bool sk_cpu_arm_has_neon(void)
-{
+bool sk_cpu_arm_has_neon(void) {
pthread_once(&sOnce, sk_cpu_arm_probe_features);
return sHasArmNeon;
}
diff --git a/src/opts/SkBitmapProcState_matrixProcs_neon.cpp b/src/opts/SkBitmapProcState_matrixProcs_neon.cpp
new file mode 100644
index 0000000000..7ebdddc5a4
--- /dev/null
+++ b/src/opts/SkBitmapProcState_matrixProcs_neon.cpp
@@ -0,0 +1,145 @@
+/* NEON optimized code (C) COPYRIGHT 2009 Motorola
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkBitmapProcState.h"
+#include "SkPerspIter.h"
+#include "SkShader.h"
+#include "SkUtilsArm.h"
+
+extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
+extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
+
+static void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
+static void decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
+
+static unsigned SK_USHIFT16(unsigned x) {
+ return x >> 16;
+}
+
+#define MAKENAME(suffix) ClampX_ClampY ## suffix ## _neon
+#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
+#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
+#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
+#define CHECK_FOR_DECAL
+#include "SkBitmapProcState_matrix_clamp_neon.h"
+
+#define MAKENAME(suffix) RepeatX_RepeatY ## suffix ## _neon
+#define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
+#define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
+#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+#include "SkBitmapProcState_matrix_repeat_neon.h"
+
+
+void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
+{
+ int i;
+
+ if (count >= 8) {
+ /* SkFixed is 16.16 fixed point */
+ SkFixed dx2 = dx+dx;
+ SkFixed dx4 = dx2+dx2;
+ SkFixed dx8 = dx4+dx4;
+
+ /* now build fx/fx+dx/fx+2dx/fx+3dx */
+ SkFixed fx1, fx2, fx3;
+ int32x2_t lower, upper;
+ int32x4_t lbase, hbase;
+ uint16_t *dst16 = (uint16_t *)dst;
+
+ fx1 = fx+dx;
+ fx2 = fx1+dx;
+ fx3 = fx2+dx;
+
+ /* avoid an 'lbase unitialized' warning */
+ lbase = vdupq_n_s32(fx);
+ lbase = vsetq_lane_s32(fx1, lbase, 1);
+ lbase = vsetq_lane_s32(fx2, lbase, 2);
+ lbase = vsetq_lane_s32(fx3, lbase, 3);
+ hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
+
+ /* take upper 16 of each, store, and bump everything */
+ do {
+ int32x4_t lout, hout;
+ uint16x8_t hi16;
+
+ lout = lbase;
+ hout = hbase;
+ /* gets hi's of all louts then hi's of all houts */
+ asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
+ hi16 = vreinterpretq_u16_s32(hout);
+ vst1q_u16(dst16, hi16);
+
+ /* on to the next */
+ lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
+ hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
+ dst16 += 8;
+ count -= 8;
+ fx += dx8;
+ } while (count >= 8);
+ dst = (uint32_t *) dst16;
+ }
+
+ uint16_t* xx = (uint16_t*)dst;
+ for (i = count; i > 0; --i) {
+ *xx++ = SkToU16(fx >> 16); fx += dx;
+ }
+}
+
+void decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
+{
+ if (count >= 8) {
+ int32x4_t wide_fx;
+ int32x4_t wide_fx2;
+ int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
+
+ wide_fx = vdupq_n_s32(fx);
+ wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
+ wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
+ wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
+
+ wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
+
+ while (count >= 8) {
+ int32x4_t wide_out;
+ int32x4_t wide_out2;
+
+ wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
+ wide_out = vorrq_s32(wide_out,
+ vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
+
+ wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
+ wide_out2 = vorrq_s32(wide_out2,
+ vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
+
+ vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
+ vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
+
+ dst += 8;
+ fx += dx*8;
+ wide_fx = vaddq_s32(wide_fx, wide_dx8);
+ wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
+ count -= 8;
+ }
+ }
+
+ if (count & 1)
+ {
+ SkASSERT((fx >> (16 + 14)) == 0);
+ *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
+ fx += dx;
+ }
+ while ((count -= 2) >= 0)
+ {
+ SkASSERT((fx >> (16 + 14)) == 0);
+ *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
+ fx += dx;
+
+ *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
+ fx += dx;
+ }
+}
diff --git a/src/core/SkBitmapProcState_matrix_clamp.h b/src/opts/SkBitmapProcState_matrix_clamp_neon.h
index 06bc0faf45..2e4816104c 100644
--- a/src/core/SkBitmapProcState_matrix_clamp.h
+++ b/src/opts/SkBitmapProcState_matrix_clamp_neon.h
@@ -21,10 +21,6 @@
*/
-#if !defined(__ARM_HAVE_NEON)
-#error this file can be used only when the NEON unit is enabled
-#endif
-
#include <arm_neon.h>
/*
@@ -91,7 +87,7 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
// test if we don't need to apply the tile proc
if ((unsigned)(fx >> 16) <= maxX &&
(unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) {
- decal_nofilter_scale(xy, fx, dx, count);
+ decal_nofilter_scale_neon(xy, fx, dx, count);
return;
}
#endif
@@ -516,7 +512,7 @@ static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
if (dx > 0 &&
(unsigned)(fx >> 16) <= maxX &&
(unsigned)((fx + dx * (count - 1)) >> 16) < maxX) {
- decal_filter_scale(xy, fx, dx, count);
+ decal_filter_scale_neon(xy, fx, dx, count);
} else
#endif
@@ -891,7 +887,7 @@ static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
}
}
-static SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
+const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
SCALE_NOFILTER_NAME,
SCALE_FILTER_NAME,
AFFINE_NOFILTER_NAME,
diff --git a/src/core/SkBitmapProcState_matrix_repeat.h b/src/opts/SkBitmapProcState_matrix_repeat_neon.h
index 8f327955e2..d05beab38c 100644
--- a/src/core/SkBitmapProcState_matrix_repeat.h
+++ b/src/opts/SkBitmapProcState_matrix_repeat_neon.h
@@ -20,10 +20,6 @@
*/
-#if !defined(__ARM_HAVE_NEON)
-#error this file can be used only when the NEON unit is enabled
-#endif
-
#include <arm_neon.h>
/*
@@ -39,11 +35,11 @@
/* SkClampMax(val,max) -- bound to 0..max */
-#define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale_neon)
+#define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale)
#define SCALE_FILTER_NAME MAKENAME(_filter_scale)
-#define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine_neon)
+#define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine)
#define AFFINE_FILTER_NAME MAKENAME(_filter_affine)
-#define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp_neon)
+#define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp)
#define PERSP_FILTER_NAME MAKENAME(_filter_persp)
#define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x)
@@ -89,13 +85,12 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
// test if we don't need to apply the tile proc
if ((unsigned)(fx >> 16) <= maxX &&
(unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) {
- decal_nofilter_scale(xy, fx, dx, count);
+ decal_nofilter_scale_neon(xy, fx, dx, count);
} else
#endif
{
int i;
-#if defined(__ARM_HAVE_NEON)
/* RBE: very much like done in decal_nofilter ,
* but some processing of the 'fx' information
* TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16)
@@ -152,30 +147,6 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
} while (count >= 8);
xy = (uint32_t *) dst16;
}
-#else
- /* simple, portable way of looking at 4 at a crack;
- * so gets some loop unrolling, but not full SIMD speed
- */
- for (i = (count >> 2); i > 0; --i) {
- unsigned a, b;
- a = TILEX_PROCF(fx, maxX); fx += dx;
- b = TILEX_PROCF(fx, maxX); fx += dx;
-#ifdef SK_CPU_BENDIAN
- *xy++ = (a << 16) | b;
-#else
- *xy++ = (b << 16) | a;
-#endif
- a = TILEX_PROCF(fx, maxX); fx += dx;
- b = TILEX_PROCF(fx, maxX); fx += dx;
-#ifdef SK_CPU_BENDIAN
- *xy++ = (a << 16) | b;
-#else
- *xy++ = (b << 16) | a;
-#endif
- }
- /* loop doesn't adjust count */
- count %= 4;
-#endif
uint16_t* xx = (uint16_t*)xy;
for (i = count; i > 0; --i) {
*xx++ = TILEX_PROCF(fx, maxX); fx += dx;
@@ -214,7 +185,6 @@ static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
SkFixed bfx = fx, bfy=fy, bdx=dx, bdy=dy;
#endif
-#if defined(__ARM_HAVE_NEON)
if (0) { extern void rbe(void); rbe(); }
@@ -298,7 +268,6 @@ static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
SkDebugf("maxX %08x maxY %08x\n", maxX, maxY);
}
#endif
-#endif
for (int i = count; i > 0; --i) {
/* fx, fy, dx, dy are all 32 bit 16.16 fixed point */
@@ -324,7 +293,6 @@ static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
while ((count = iter.next()) != 0) {
const SkFixed* SK_RESTRICT srcXY = iter.getXY();
-#if defined(__ARM_HAVE_NEON)
/* RBE: */
/* TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16) */
/* it's a little more complicated than what I did for the
@@ -417,7 +385,6 @@ static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
srcXY = (const SkFixed *) mysrc;
xy = (uint32_t *) mydst;
}
-#endif
while (--count >= 0) {
*xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
TILEX_PROCF(srcXY[0], maxX);
@@ -472,7 +439,7 @@ static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
if (dx > 0 &&
(unsigned)(fx >> 16) <= maxX &&
(unsigned)((fx + dx * (count - 1)) >> 16) < maxX) {
- decal_filter_scale(xy, fx, dx, count);
+ decal_filter_scale_neon(xy, fx, dx, count);
} else
#endif
{
@@ -544,7 +511,7 @@ static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
}
}
-static SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
+const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
SCALE_NOFILTER_NAME,
SCALE_FILTER_NAME,
AFFINE_NOFILTER_NAME,