diff options
Diffstat (limited to 'src/jumper/SkJumper_vectors.h')
-rw-r--r-- | src/jumper/SkJumper_vectors.h | 44 |
1 files changed, 38 insertions, 6 deletions
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h index 4deec83a28..a7898398a6 100644 --- a/src/jumper/SkJumper_vectors.h +++ b/src/jumper/SkJumper_vectors.h @@ -108,31 +108,63 @@ } SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) { - uint16x4x3_t rgb = vld3_u16(ptr); + uint16x4x3_t rgb; + if (__builtin_expect(tail,0)) { + if ( true ) { rgb = vld3_lane_u16(ptr + 0, rgb, 0); } + if (tail > 1) { rgb = vld3_lane_u16(ptr + 3, rgb, 1); } + if (tail > 2) { rgb = vld3_lane_u16(ptr + 6, rgb, 2); } + } else { + rgb = vld3_u16(ptr); + } *r = rgb.val[0]; *g = rgb.val[1]; *b = rgb.val[2]; } SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) { - uint16x4x4_t rgba = vld4_u16(ptr); + uint16x4x4_t rgba; + if (__builtin_expect(tail,0)) { + if ( true ) { rgba = vld4_lane_u16(ptr + 0, rgba, 0); } + if (tail > 1) { rgba = vld4_lane_u16(ptr + 4, rgba, 1); } + if (tail > 2) { rgba = vld4_lane_u16(ptr + 8, rgba, 2); } + } else { + rgba = vld4_u16(ptr); + } *r = rgba.val[0]; *g = rgba.val[1]; *b = rgba.val[2]; *a = rgba.val[3]; } SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) { - vst4_u16(ptr, (uint16x4x4_t{{r,g,b,a}})); + if (__builtin_expect(tail,0)) { + if ( true ) { vst4_lane_u16(ptr + 0, (uint16x4x4_t{{r,g,b,a}}), 0); } + if (tail > 1) { vst4_lane_u16(ptr + 4, (uint16x4x4_t{{r,g,b,a}}), 1); } + if (tail > 2) { vst4_lane_u16(ptr + 8, (uint16x4x4_t{{r,g,b,a}}), 2); } + } else { + vst4_u16(ptr, (uint16x4x4_t{{r,g,b,a}})); + } } - SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) { - float32x4x4_t rgba = vld4q_f32(ptr); + float32x4x4_t rgba; + if (__builtin_expect(tail,0)) { + if ( true ) { rgba = vld4q_lane_f32(ptr + 0, rgba, 0); } + if (tail > 1) { rgba = vld4q_lane_f32(ptr + 4, rgba, 1); } + if (tail > 2) { rgba = vld4q_lane_f32(ptr + 8, rgba, 2); } + } else { + rgba = vld4q_f32(ptr); + } *r = rgba.val[0]; *g = rgba.val[1]; *b = rgba.val[2]; *a = rgba.val[3]; } SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) { - vst4q_f32(ptr, (float32x4x4_t{{r,g,b,a}})); + if (__builtin_expect(tail,0)) { + if ( true ) { vst4q_lane_f32(ptr + 0, (float32x4x4_t{{r,g,b,a}}), 0); } + if (tail > 1) { vst4q_lane_f32(ptr + 4, (float32x4x4_t{{r,g,b,a}}), 1); } + if (tail > 2) { vst4q_lane_f32(ptr + 8, (float32x4x4_t{{r,g,b,a}}), 2); } + } else { + vst4q_f32(ptr, (float32x4x4_t{{r,g,b,a}})); + } } #elif defined(__arm__) |