aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_vectors.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/jumper/SkJumper_vectors.h')
-rw-r--r--src/jumper/SkJumper_vectors.h44
1 files changed, 38 insertions, 6 deletions
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h
index 4deec83a28..a7898398a6 100644
--- a/src/jumper/SkJumper_vectors.h
+++ b/src/jumper/SkJumper_vectors.h
@@ -108,31 +108,63 @@
}
SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
- uint16x4x3_t rgb = vld3_u16(ptr);
+ uint16x4x3_t rgb;
+ if (__builtin_expect(tail,0)) {
+ if ( true ) { rgb = vld3_lane_u16(ptr + 0, rgb, 0); }
+ if (tail > 1) { rgb = vld3_lane_u16(ptr + 3, rgb, 1); }
+ if (tail > 2) { rgb = vld3_lane_u16(ptr + 6, rgb, 2); }
+ } else {
+ rgb = vld3_u16(ptr);
+ }
*r = rgb.val[0];
*g = rgb.val[1];
*b = rgb.val[2];
}
SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
- uint16x4x4_t rgba = vld4_u16(ptr);
+ uint16x4x4_t rgba;
+ if (__builtin_expect(tail,0)) {
+ if ( true ) { rgba = vld4_lane_u16(ptr + 0, rgba, 0); }
+ if (tail > 1) { rgba = vld4_lane_u16(ptr + 4, rgba, 1); }
+ if (tail > 2) { rgba = vld4_lane_u16(ptr + 8, rgba, 2); }
+ } else {
+ rgba = vld4_u16(ptr);
+ }
*r = rgba.val[0];
*g = rgba.val[1];
*b = rgba.val[2];
*a = rgba.val[3];
}
SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
- vst4_u16(ptr, (uint16x4x4_t{{r,g,b,a}}));
+ if (__builtin_expect(tail,0)) {
+ if ( true ) { vst4_lane_u16(ptr + 0, (uint16x4x4_t{{r,g,b,a}}), 0); }
+ if (tail > 1) { vst4_lane_u16(ptr + 4, (uint16x4x4_t{{r,g,b,a}}), 1); }
+ if (tail > 2) { vst4_lane_u16(ptr + 8, (uint16x4x4_t{{r,g,b,a}}), 2); }
+ } else {
+ vst4_u16(ptr, (uint16x4x4_t{{r,g,b,a}}));
+ }
}
-
SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) {
- float32x4x4_t rgba = vld4q_f32(ptr);
+ float32x4x4_t rgba;
+ if (__builtin_expect(tail,0)) {
+ if ( true ) { rgba = vld4q_lane_f32(ptr + 0, rgba, 0); }
+ if (tail > 1) { rgba = vld4q_lane_f32(ptr + 4, rgba, 1); }
+ if (tail > 2) { rgba = vld4q_lane_f32(ptr + 8, rgba, 2); }
+ } else {
+ rgba = vld4q_f32(ptr);
+ }
*r = rgba.val[0];
*g = rgba.val[1];
*b = rgba.val[2];
*a = rgba.val[3];
}
SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) {
- vst4q_f32(ptr, (float32x4x4_t{{r,g,b,a}}));
+ if (__builtin_expect(tail,0)) {
+ if ( true ) { vst4q_lane_f32(ptr + 0, (float32x4x4_t{{r,g,b,a}}), 0); }
+ if (tail > 1) { vst4q_lane_f32(ptr + 4, (float32x4x4_t{{r,g,b,a}}), 1); }
+ if (tail > 2) { vst4q_lane_f32(ptr + 8, (float32x4x4_t{{r,g,b,a}}), 2); }
+ } else {
+ vst4q_f32(ptr, (float32x4x4_t{{r,g,b,a}}));
+ }
}
#elif defined(__arm__)