aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2016-12-20 17:34:29 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2016-12-21 03:55:00 +0000
commit40a82bd0f48d04680fbe7af063a4f2e3eae122e6 (patch)
tree9f8fb95c44dc19c24c48d0f5fed4e8173f771db9
parent6d2f73c364d0d823f14d1ddebc88e0bcbc8f0634 (diff)
remove MIPS DSP optimizations
There are only a couple of these, primarily focused on index8 srcs and 565 dsts. The burden's starting to outweigh the benefit. No one on the team knows MIPS assembly. If we're going to try this again, I'd rather we try some sort of SkNx / compiler-intrinsic based approach, probably targeting MIPS SIMD (MSA), not this older instruction set. We already ignore these files for 64-bit MIPS. This just closes the loop on 32-bit MIPS. CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD,Build-Ubuntu-Clang-mips64el-Debug-Android,Build-Ubuntu-Clang-mips64el-Release-Android,Build-Ubuntu-Clang-mipsel-Debug-Android,Build-Ubuntu-Clang-mipsel-Release-Android BUG=skia:6065 Change-Id: Iecac15b56f59625b2e743ea36e7791b90bb0b422 Reviewed-on: https://skia-review.googlesource.com/6353 Reviewed-by: Leon Scroggins <scroggo@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--BUILD.gn7
-rw-r--r--gn/gn_to_bp.py7
-rw-r--r--gn/opts.gni6
-rw-r--r--gn/shared_sources.gni2
-rw-r--r--src/core/SkBlitter_RGB16.cpp10
-rw-r--r--src/opts/SkBitmapProcState_opts_mips_dsp.cpp259
-rw-r--r--src/opts/SkBlitRow_opts_mips_dsp.cpp958
7 files changed, 4 insertions, 1245 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 7d503e6078..102b7853e5 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -274,12 +274,6 @@ opts("hsw") {
}
}
-opts("dsp") {
- enabled = current_cpu == "mipsel"
- sources = skia_opts.mips_dsp_sources
- cflags = []
-}
-
# Any feature of Skia that requires third-party code should be optional and use this template.
template("optional") {
if (invoker.enabled) {
@@ -528,7 +522,6 @@ component("skia") {
":armv7",
":avx",
":crc32",
- ":dsp",
":fontmgr_android",
":fontmgr_custom",
":fontmgr_fontconfig",
diff --git a/gn/gn_to_bp.py b/gn/gn_to_bp.py
index 4ba5db8b18..754db3653d 100644
--- a/gn/gn_to_bp.py
+++ b/gn/gn_to_bp.py
@@ -61,13 +61,13 @@ cc_library {
mips: {
srcs: [
- $mips_srcs
+ $none_srcs
],
},
mips64: {
srcs: [
- $mips64_srcs
+ $none_srcs
],
},
@@ -199,8 +199,7 @@ with open('Android.bp', 'w') as f:
'arm_neon_srcs': bpfmt(20, scrub(defs['neon'])),
'arm64_srcs': bpfmt(16, scrub(defs['arm64'] +
defs['crc32'])),
- 'mips_srcs': bpfmt(16, scrub(defs['mips_dsp'])),
- 'mips64_srcs': bpfmt(16, scrub(defs['none'])),
+ 'none_srcs': bpfmt(16, scrub(defs['none'])),
'x86_srcs': bpfmt(16, scrub(defs['sse2'] +
defs['ssse3'] +
defs['sse41'] +
diff --git a/gn/opts.gni b/gn/opts.gni
index 02a95cbad5..34481db3b6 100644
--- a/gn/opts.gni
+++ b/gn/opts.gni
@@ -37,12 +37,6 @@ arm64 = [
crc32 = [ "$_src/opts/SkOpts_crc32.cpp" ]
-mips_dsp = [
- "$_src/opts/SkBitmapProcState_opts_mips_dsp.cpp",
- "$_src/opts/SkBlitMask_opts_none.cpp",
- "$_src/opts/SkBlitRow_opts_mips_dsp.cpp",
-]
-
sse2 = [
"$_src/opts/SkBitmapProcState_opts_SSE2.cpp",
"$_src/opts/SkBlitRow_opts_SSE2.cpp",
diff --git a/gn/shared_sources.gni b/gn/shared_sources.gni
index 36a7dfcd9c..6df999921a 100644
--- a/gn/shared_sources.gni
+++ b/gn/shared_sources.gni
@@ -18,7 +18,7 @@ skia_opts = {
neon_sources = neon
arm64_sources = arm64
crc32_sources = crc32
- mips_dsp_sources = mips_dsp
+ mips_dsp_sources = none # Chrome's src/skia/BUILD.gn uses this.
sse2_sources = sse2
ssse3_sources = ssse3
sse41_sources = sse41
diff --git a/src/core/SkBlitter_RGB16.cpp b/src/core/SkBlitter_RGB16.cpp
index 015112a107..afa8cac184 100644
--- a/src/core/SkBlitter_RGB16.cpp
+++ b/src/core/SkBlitter_RGB16.cpp
@@ -14,12 +14,6 @@
#include "SkUtilsArm.h"
#include "SkXfermodePriv.h"
-#if defined(__mips_dsp)
-extern void blitmask_d565_opaque_mips(int width, int height, uint16_t* device,
- unsigned deviceRB, const uint8_t* alpha,
- uint32_t expanded32, unsigned maskRB);
-#endif
-
#if defined(SK_ARM_HAS_NEON) && defined(SK_CPU_LENDIAN)
#include <arm_neon.h>
extern void SkRGB16BlitterBlitV_neon(uint16_t* device,
@@ -360,11 +354,9 @@ void SkRGB16_Opaque_Blitter::blitAntiH(int x, int y,
#define SK_BLITBWMASK_DEVTYPE uint16_t
#include "SkBlitBWMaskTemplate.h"
-#if !defined(__mips_dsp)
static U16CPU blend_compact(uint32_t src32, uint32_t dst32, unsigned scale5) {
return SkCompact_rgb_16(dst32 + ((src32 - dst32) * scale5 >> 5));
}
-#endif
void SkRGB16_Opaque_Blitter::blitMask(const SkMask& mask,
const SkIRect& clip) {
@@ -451,8 +443,6 @@ void SkRGB16_Opaque_Blitter::blitMask(const SkMask& mask,
alpha += maskRB;
} while (--height != 0);
#undef UNROLL
-#elif defined(__mips_dsp)
- blitmask_d565_opaque_mips(width, height, device, deviceRB, alpha, expanded32, maskRB);
#else // non-neon code
do {
int w = width;
diff --git a/src/opts/SkBitmapProcState_opts_mips_dsp.cpp b/src/opts/SkBitmapProcState_opts_mips_dsp.cpp
deleted file mode 100644
index 449fa61007..0000000000
--- a/src/opts/SkBitmapProcState_opts_mips_dsp.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-
-#include "SkBitmapProcState.h"
-#include "SkBitmapScaler.h"
-#include "SkColorPriv.h"
-#include "SkPaint.h"
-#include "SkUtils.h"
-
-static void SI8_opaque_D32_nofilter_DX_mips_dsp(const SkBitmapProcState& s,
- const uint32_t* SK_RESTRICT xy,
- int count, SkPMColor* SK_RESTRICT colors) {
- SkASSERT(count > 0 && colors != nullptr);
- SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask));
- SkASSERT(kNone_SkFilterQuality == s.fFilterQuality);
- const SkPMColor* SK_RESTRICT table = s.fPixmap.ctable()->readColors();
- const uint8_t* SK_RESTRICT srcAddr = (const uint8_t*)s.fPixmap.addr();
- srcAddr = (const uint8_t*)((const char*)srcAddr + xy[0] * s.fPixmap.rowBytes());
-
- if (1 == s.fPixmap.width()) {
- uint8_t src = srcAddr[0];
- SkPMColor dstValue = table[src];
- sk_memset32(colors, dstValue, count);
- } else {
- const uint16_t* xx = (const uint16_t*)(xy + 1);
- int s0, s1, s2, s3, s4, s5, s6, s7;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- ".set noat \n\t"
- "srl $t8, %[count], 4 \n\t"
- "beqz $t8, 3f \n\t"
- " nop \n\t"
- "1: \n\t"
- "addiu $t8, $t8, -1 \n\t"
- "beqz $t8, 2f \n\t"
- " addiu %[count], %[count], -16 \n\t"
- "pref 0, 32(%[xx]) \n\t"
- "lhu $t0, 0(%[xx]) \n\t"
- "lhu $t1, 2(%[xx]) \n\t"
- "lhu $t2, 4(%[xx]) \n\t"
- "lhu $t3, 6(%[xx]) \n\t"
- "lhu $t4, 8(%[xx]) \n\t"
- "lhu $t5, 10(%[xx]) \n\t"
- "lhu $t6, 12(%[xx]) \n\t"
- "lhu $t7, 14(%[xx]) \n\t"
- "lhu %[s0], 16(%[xx]) \n\t"
- "lhu %[s1], 18(%[xx]) \n\t"
- "lhu %[s2], 20(%[xx]) \n\t"
- "lhu %[s3], 22(%[xx]) \n\t"
- "lhu %[s4], 24(%[xx]) \n\t"
- "lhu %[s5], 26(%[xx]) \n\t"
- "lhu %[s6], 28(%[xx]) \n\t"
- "lhu %[s7], 30(%[xx]) \n\t"
- "lbux $t0, $t0(%[srcAddr]) \n\t"
- "lbux $t1, $t1(%[srcAddr]) \n\t"
- "lbux $t2, $t2(%[srcAddr]) \n\t"
- "lbux $t3, $t3(%[srcAddr]) \n\t"
- "lbux $t4, $t4(%[srcAddr]) \n\t"
- "lbux $t5, $t5(%[srcAddr]) \n\t"
- "lbux $t6, $t6(%[srcAddr]) \n\t"
- "lbux $t7, $t7(%[srcAddr]) \n\t"
- "lbux %[s0], %[s0](%[srcAddr]) \n\t"
- "lbux %[s1], %[s1](%[srcAddr]) \n\t"
- "lbux %[s2], %[s2](%[srcAddr]) \n\t"
- "lbux %[s3], %[s3](%[srcAddr]) \n\t"
- "lbux %[s4], %[s4](%[srcAddr]) \n\t"
- "lbux %[s5], %[s5](%[srcAddr]) \n\t"
- "lbux %[s6], %[s6](%[srcAddr]) \n\t"
- "lbux %[s7], %[s7](%[srcAddr]) \n\t"
- "sll $t0, $t0, 2 \n\t"
- "sll $t1, $t1, 2 \n\t"
- "sll $t2, $t2, 2 \n\t"
- "sll $t3, $t3, 2 \n\t"
- "sll $t4, $t4, 2 \n\t"
- "sll $t5, $t5, 2 \n\t"
- "sll $t6, $t6, 2 \n\t"
- "sll $t7, $t7, 2 \n\t"
- "sll %[s0], %[s0], 2 \n\t"
- "sll %[s1], %[s1], 2 \n\t"
- "sll %[s2], %[s2], 2 \n\t"
- "sll %[s3], %[s3], 2 \n\t"
- "sll %[s4], %[s4], 2 \n\t"
- "sll %[s5], %[s5], 2 \n\t"
- "sll %[s6], %[s6], 2 \n\t"
- "sll %[s7], %[s7], 2 \n\t"
- "pref 0, 64(%[table]) \n\t"
- "lwx $t0, $t0(%[table]) \n\t"
- "lwx $t1, $t1(%[table]) \n\t"
- "lwx $t2, $t2(%[table]) \n\t"
- "lwx $t3, $t3(%[table]) \n\t"
- "lwx $t4, $t4(%[table]) \n\t"
- "lwx $t5, $t5(%[table]) \n\t"
- "lwx $t6, $t6(%[table]) \n\t"
- "lwx $t7, $t7(%[table]) \n\t"
- "lwx %[s0], %[s0](%[table]) \n\t"
- "lwx %[s1], %[s1](%[table]) \n\t"
- "lwx %[s2], %[s2](%[table]) \n\t"
- "lwx %[s3], %[s3](%[table]) \n\t"
- "lwx %[s4], %[s4](%[table]) \n\t"
- "lwx %[s5], %[s5](%[table]) \n\t"
- "lwx %[s6], %[s6](%[table]) \n\t"
- "lwx %[s7], %[s7](%[table]) \n\t"
- "pref 30, 64(%[colors]) \n\t"
- "sw $t0, 0(%[colors]) \n\t"
- "sw $t1, 4(%[colors]) \n\t"
- "sw $t2, 8(%[colors]) \n\t"
- "sw $t3, 12(%[colors]) \n\t"
- "sw $t4, 16(%[colors]) \n\t"
- "sw $t5, 20(%[colors]) \n\t"
- "sw $t6, 24(%[colors]) \n\t"
- "sw $t7, 28(%[colors]) \n\t"
- "sw %[s0], 32(%[colors]) \n\t"
- "sw %[s1], 36(%[colors]) \n\t"
- "sw %[s2], 40(%[colors]) \n\t"
- "sw %[s3], 44(%[colors]) \n\t"
- "sw %[s4], 48(%[colors]) \n\t"
- "sw %[s5], 52(%[colors]) \n\t"
- "sw %[s6], 56(%[colors]) \n\t"
- "sw %[s7], 60(%[colors]) \n\t"
- "addiu %[xx], %[xx], 32 \n\t"
- "b 1b \n\t"
- " addiu %[colors], %[colors], 64 \n\t"
- "2: \n\t"
- "lhu $t0, 0(%[xx]) \n\t"
- "lhu $t1, 2(%[xx]) \n\t"
- "lhu $t2, 4(%[xx]) \n\t"
- "lhu $t3, 6(%[xx]) \n\t"
- "lhu $t4, 8(%[xx]) \n\t"
- "lhu $t5, 10(%[xx]) \n\t"
- "lhu $t6, 12(%[xx]) \n\t"
- "lhu $t7, 14(%[xx]) \n\t"
- "lhu %[s0], 16(%[xx]) \n\t"
- "lhu %[s1], 18(%[xx]) \n\t"
- "lhu %[s2], 20(%[xx]) \n\t"
- "lhu %[s3], 22(%[xx]) \n\t"
- "lhu %[s4], 24(%[xx]) \n\t"
- "lhu %[s5], 26(%[xx]) \n\t"
- "lhu %[s6], 28(%[xx]) \n\t"
- "lhu %[s7], 30(%[xx]) \n\t"
- "lbux $t0, $t0(%[srcAddr]) \n\t"
- "lbux $t1, $t1(%[srcAddr]) \n\t"
- "lbux $t2, $t2(%[srcAddr]) \n\t"
- "lbux $t3, $t3(%[srcAddr]) \n\t"
- "lbux $t4, $t4(%[srcAddr]) \n\t"
- "lbux $t5, $t5(%[srcAddr]) \n\t"
- "lbux $t6, $t6(%[srcAddr]) \n\t"
- "lbux $t7, $t7(%[srcAddr]) \n\t"
- "lbux %[s0], %[s0](%[srcAddr]) \n\t"
- "lbux %[s1], %[s1](%[srcAddr]) \n\t"
- "lbux %[s2], %[s2](%[srcAddr]) \n\t"
- "lbux %[s3], %[s3](%[srcAddr]) \n\t"
- "lbux %[s4], %[s4](%[srcAddr]) \n\t"
- "lbux %[s5], %[s5](%[srcAddr]) \n\t"
- "lbux %[s6], %[s6](%[srcAddr]) \n\t"
- "lbux %[s7], %[s7](%[srcAddr]) \n\t"
- "sll $t0, $t0, 2 \n\t"
- "sll $t1, $t1, 2 \n\t"
- "sll $t2, $t2, 2 \n\t"
- "sll $t3, $t3, 2 \n\t"
- "sll $t4, $t4, 2 \n\t"
- "sll $t5, $t5, 2 \n\t"
- "sll $t6, $t6, 2 \n\t"
- "sll $t7, $t7, 2 \n\t"
- "sll %[s0], %[s0], 2 \n\t"
- "sll %[s1], %[s1], 2 \n\t"
- "sll %[s2], %[s2], 2 \n\t"
- "sll %[s3], %[s3], 2 \n\t"
- "sll %[s4], %[s4], 2 \n\t"
- "sll %[s5], %[s5], 2 \n\t"
- "sll %[s6], %[s6], 2 \n\t"
- "sll %[s7], %[s7], 2 \n\t"
- "lwx $t0, $t0(%[table]) \n\t"
- "lwx $t1, $t1(%[table]) \n\t"
- "lwx $t2, $t2(%[table]) \n\t"
- "lwx $t3, $t3(%[table]) \n\t"
- "lwx $t4, $t4(%[table]) \n\t"
- "lwx $t5, $t5(%[table]) \n\t"
- "lwx $t6, $t6(%[table]) \n\t"
- "lwx $t7, $t7(%[table]) \n\t"
- "lwx %[s0], %[s0](%[table]) \n\t"
- "lwx %[s1], %[s1](%[table]) \n\t"
- "lwx %[s2], %[s2](%[table]) \n\t"
- "lwx %[s3], %[s3](%[table]) \n\t"
- "lwx %[s4], %[s4](%[table]) \n\t"
- "lwx %[s5], %[s5](%[table]) \n\t"
- "lwx %[s6], %[s6](%[table]) \n\t"
- "lwx %[s7], %[s7](%[table]) \n\t"
- "sw $t0, 0(%[colors]) \n\t"
- "sw $t1, 4(%[colors]) \n\t"
- "sw $t2, 8(%[colors]) \n\t"
- "sw $t3, 12(%[colors]) \n\t"
- "sw $t4, 16(%[colors]) \n\t"
- "sw $t5, 20(%[colors]) \n\t"
- "sw $t6, 24(%[colors]) \n\t"
- "sw $t7, 28(%[colors]) \n\t"
- "sw %[s0], 32(%[colors]) \n\t"
- "sw %[s1], 36(%[colors]) \n\t"
- "sw %[s2], 40(%[colors]) \n\t"
- "sw %[s3], 44(%[colors]) \n\t"
- "sw %[s4], 48(%[colors]) \n\t"
- "sw %[s5], 52(%[colors]) \n\t"
- "sw %[s6], 56(%[colors]) \n\t"
- "sw %[s7], 60(%[colors]) \n\t"
- "addiu %[xx], %[xx], 32 \n\t"
- "beqz %[count], 4f \n\t"
- " addiu %[colors], %[colors], 64 \n\t"
- "3: \n\t"
- "addiu %[count], %[count], -1 \n\t"
- "lhu $t0, 0(%[xx]) \n\t"
- "lbux $t1, $t0(%[srcAddr]) \n\t"
- "sll $t1, $t1, 2 \n\t"
- "lwx $t2, $t1(%[table]) \n\t"
- "sw $t2, 0(%[colors]) \n\t"
- "addiu %[xx], %[xx], 2 \n\t"
- "bnez %[count], 3b \n\t"
- " addiu %[colors], %[colors], 4 \n\t"
- "4: \n\t"
- ".set pop \n\t"
- : [xx]"+r"(xx), [count]"+r"(count), [colors]"+r"(colors),
- [s0]"=&r"(s0), [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3),
- [s4]"=&r"(s4), [s5]"=&r"(s5), [s6]"=&r"(s6), [s7]"=&r"(s7)
- : [table]"r"(table), [srcAddr]"r"(srcAddr)
- : "memory", "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8"
- );
- }
-}
-
-/* If we replace a sampleproc, then we null-out the associated shaderproc,
- otherwise the shader won't even look at the matrix/sampler
- */
-
-void SkBitmapProcState::platformProcs() {
- bool isOpaque = 256 == fAlphaScale;
- bool justDx = false;
-
- if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
- justDx = true;
- }
-
- switch (fPixmap.colorType()) {
- case kIndex_8_SkColorType:
- if (justDx && kNone_SkFilterQuality == fFilterQuality) {
- if (isOpaque) {
- fSampleProc32 = SI8_opaque_D32_nofilter_DX_mips_dsp;
- fShaderProc32 = nullptr;
- }
- }
- break;
- default:
- break;
- }
-}
diff --git a/src/opts/SkBlitRow_opts_mips_dsp.cpp b/src/opts/SkBlitRow_opts_mips_dsp.cpp
deleted file mode 100644
index 38922b2ead..0000000000
--- a/src/opts/SkBlitRow_opts_mips_dsp.cpp
+++ /dev/null
@@ -1,958 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkBlitRow.h"
-#include "SkBlitMask.h"
-#include "SkColorPriv.h"
-#include "SkDither.h"
-#include "SkMathPriv.h"
-
-static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src, int count,
- U8CPU alpha, int /*x*/, int /*y*/) {
- uint32_t t0, t1, t2, t3, t4, t5, t6;
- uint32_t s0, s1, s2, s4, s5, s6;
-
- alpha += 1;
- if (count >= 2) {
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "sll %[s4], %[alpha], 8 \n\t"
- "or %[s4], %[s4], %[alpha] \n\t"
- "repl.ph %[s5], 0x1f \n\t"
- "repl.ph %[s6], 0x3f \n\t"
- "1: \n\t"
- "lw %[s2], 0(%[src]) \n\t"
- "lw %[s1], 4(%[src]) \n\t"
- "lwr %[s0], 0(%[dst]) \n\t"
- "lwl %[s0], 3(%[dst]) \n\t"
- "and %[t1], %[s0], %[s5] \n\t"
- "shra.ph %[t0], %[s0], 5 \n\t"
- "and %[t2], %[t0], %[s6] \n\t"
-#ifdef __mips_dspr2
- "shrl.ph %[t3], %[s0], 11 \n\t"
-#else
- "shra.ph %[t0], %[s0], 11 \n\t"
- "and %[t3], %[t0], %[s5] \n\t"
-#endif
- "precrq.ph.w %[t0], %[s1], %[s2] \n\t"
- "shrl.qb %[t5], %[t0], 3 \n\t"
- "and %[t4], %[t5], %[s5] \n\t"
- "ins %[s2], %[s1], 16, 16 \n\t"
- "preceu.ph.qbra %[t0], %[s2] \n\t"
- "shrl.qb %[t6], %[t0], 3 \n\t"
-#ifdef __mips_dspr2
- "shrl.ph %[t5], %[s2], 10 \n\t"
-#else
- "shra.ph %[t0], %[s2], 10 \n\t"
- "and %[t5], %[t0], %[s6] \n\t"
-#endif
- "subu.qb %[t4], %[t4], %[t1] \n\t"
- "subu.qb %[t5], %[t5], %[t2] \n\t"
- "subu.qb %[t6], %[t6], %[t3] \n\t"
- "muleu_s.ph.qbr %[t4], %[s4], %[t4] \n\t"
- "muleu_s.ph.qbr %[t5], %[s4], %[t5] \n\t"
- "muleu_s.ph.qbr %[t6], %[s4], %[t6] \n\t"
- "addiu %[count], %[count], -2 \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "shra.ph %[t4], %[t4], 8 \n\t"
- "shra.ph %[t5], %[t5], 8 \n\t"
- "shra.ph %[t6], %[t6], 8 \n\t"
- "addu.qb %[t4], %[t4], %[t1] \n\t"
- "addu.qb %[t5], %[t5], %[t2] \n\t"
- "addu.qb %[t6], %[t6], %[t3] \n\t"
- "andi %[s0], %[t4], 0xffff \n\t"
- "andi %[t0], %[t5], 0xffff \n\t"
- "sll %[t0], %[t0], 0x5 \n\t"
- "or %[s0], %[s0], %[t0] \n\t"
- "sll %[t0], %[t6], 0xb \n\t"
- "or %[t0], %[t0], %[s0] \n\t"
- "sh %[t0], 0(%[dst]) \n\t"
- "srl %[s1], %[t4], 16 \n\t"
- "srl %[t0], %[t5], 16 \n\t"
- "sll %[t5], %[t0], 5 \n\t"
- "or %[t0], %[t5], %[s1] \n\t"
- "srl %[s0], %[t6], 16 \n\t"
- "sll %[s2], %[s0], 0xb \n\t"
- "or %[s1], %[s2], %[t0] \n\t"
- "sh %[s1], 2(%[dst]) \n\t"
- "bge %[count], 2, 1b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- ".set pop \n\t"
- : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
- [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
- [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),
- [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),
- [src]"+r"(src)
- : [alpha]"r"(alpha)
- : "memory", "hi", "lo"
- );
- }
-
- if (count == 1) {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- SkASSERT(SkGetPackedA32(c) == 255);
- uint16_t d = *dst;
- *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),
- SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),
- SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));
- }
-}
-
-static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
- const SkPMColor* __restrict__ src,
- int count, U8CPU alpha, int x, int y) {
- __asm__ volatile (
- "pref 0, 0(%[src]) \n\t"
- "pref 1, 0(%[dst]) \n\t"
- "pref 0, 32(%[src]) \n\t"
- "pref 1, 32(%[dst]) \n\t"
- :
- : [src]"r"(src), [dst]"r"(dst)
- : "memory"
- );
-
- int32_t t0, t1, t2, t3, t4, t5, t6;
- int32_t t7, t8, t9, s0, s1, s2, s3;
- const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
-
- if (count >= 2) {
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "li %[s1], 0x01010101 \n\t"
- "li %[s2], -2017 \n\t"
- "1: \n\t"
- "bnez %[s3], 4f \n\t"
- " li %[s3], 2 \n\t"
- "pref 0, 64(%[src]) \n\t"
- "pref 1, 64(%[dst]) \n\t"
- "4: \n\t"
- "addiu %[s3], %[s3], -1 \n\t"
- "lw %[t1], 0(%[src]) \n\t"
- "andi %[t3], %[x], 0x3 \n\t"
- "addiu %[x], %[x], 1 \n\t"
- "sll %[t4], %[t3], 2 \n\t"
- "srav %[t5], %[dither_scan], %[t4] \n\t"
- "andi %[t3], %[t5], 0xf \n\t"
- "lw %[t2], 4(%[src]) \n\t"
- "andi %[t4], %[x], 0x3 \n\t"
- "sll %[t5], %[t4], 2 \n\t"
- "srav %[t6], %[dither_scan], %[t5] \n\t"
- "addiu %[x], %[x], 1 \n\t"
- "ins %[t3], %[t6], 8, 4 \n\t"
- "srl %[t4], %[t1], 24 \n\t"
- "addiu %[t0], %[t4], 1 \n\t"
- "srl %[t4], %[t2], 24 \n\t"
- "addiu %[t5], %[t4], 1 \n\t"
- "ins %[t0], %[t5], 16, 16 \n\t"
- "muleu_s.ph.qbr %[t4], %[t3], %[t0] \n\t"
- "preceu.ph.qbla %[t3], %[t4] \n\t"
- "andi %[t4], %[t1], 0xff \n\t"
- "ins %[t4], %[t2], 16, 8 \n\t"
- "shrl.qb %[t5], %[t4], 5 \n\t"
- "subu.qb %[t6], %[t3], %[t5] \n\t"
- "addq.ph %[t5], %[t6], %[t4] \n\t"
- "ext %[t4], %[t1], 8, 8 \n\t"
- "srl %[t6], %[t2], 8 \n\t"
- "ins %[t4], %[t6], 16, 8 \n\t"
- "shrl.qb %[t6], %[t4], 6 \n\t"
- "shrl.qb %[t7], %[t3], 1 \n\t"
- "subu.qb %[t8], %[t7], %[t6] \n\t"
- "addq.ph %[t6], %[t8], %[t4] \n\t"
- "ext %[t4], %[t1], 16, 8 \n\t"
- "srl %[t7], %[t2], 16 \n\t"
- "ins %[t4], %[t7], 16, 8 \n\t"
- "shrl.qb %[t7], %[t4], 5 \n\t"
- "subu.qb %[t8], %[t3], %[t7] \n\t"
- "addq.ph %[t7], %[t8], %[t4] \n\t"
- "shll.ph %[t4], %[t7], 2 \n\t"
- "andi %[t9], %[t4], 0xffff \n\t"
- "srl %[s0], %[t4], 16 \n\t"
- "andi %[t3], %[t6], 0xffff \n\t"
- "srl %[t4], %[t6], 16 \n\t"
- "andi %[t6], %[t5], 0xffff \n\t"
- "srl %[t7], %[t5], 16 \n\t"
- "subq.ph %[t5], %[s1], %[t0] \n\t"
- "srl %[t0], %[t5], 3 \n\t"
- "beqz %[t1], 3f \n\t"
- " lhu %[t5], 0(%[dst]) \n\t"
- "sll %[t1], %[t6], 13 \n\t"
- "or %[t8], %[t9], %[t1] \n\t"
- "sll %[t1], %[t3], 24 \n\t"
- "or %[t9], %[t1], %[t8] \n\t"
- "andi %[t3], %[t5], 0x7e0 \n\t"
- "sll %[t6], %[t3], 0x10 \n\t"
- "and %[t8], %[s2], %[t5] \n\t"
- "or %[t5], %[t6], %[t8] \n\t"
- "andi %[t6], %[t0], 0xff \n\t"
- "mul %[t1], %[t6], %[t5] \n\t"
- "addu %[t5], %[t1], %[t9] \n\t"
- "srl %[t6], %[t5], 5 \n\t"
- "and %[t5], %[s2], %[t6] \n\t"
- "srl %[t8], %[t6], 16 \n\t"
- "andi %[t6], %[t8], 0x7e0 \n\t"
- "or %[t1], %[t5], %[t6] \n\t"
- "sh %[t1], 0(%[dst]) \n\t"
- "3: \n\t"
- "beqz %[t2], 2f \n\t"
- " lhu %[t5], 2(%[dst]) \n\t"
- "sll %[t1], %[t7], 13 \n\t"
- "or %[t8], %[s0], %[t1] \n\t"
- "sll %[t1], %[t4], 24 \n\t"
- "or %[t9], %[t1], %[t8] \n\t"
- "andi %[t3], %[t5], 0x7e0 \n\t"
- "sll %[t6], %[t3], 0x10 \n\t"
- "and %[t8], %[s2], %[t5] \n\t"
- "or %[t5], %[t6], %[t8] \n\t"
- "srl %[t6], %[t0], 16 \n\t"
- "mul %[t1], %[t6], %[t5] \n\t"
- "addu %[t5], %[t1], %[t9] \n\t"
- "srl %[t6], %[t5], 5 \n\t"
- "and %[t5], %[s2], %[t6] \n\t"
- "srl %[t8], %[t6], 16 \n\t"
- "andi %[t6], %[t8], 0x7e0 \n\t"
- "or %[t1], %[t5], %[t6] \n\t"
- "sh %[t1], 2(%[dst]) \n\t"
- "2: \n\t"
- "addiu %[count], %[count], -2 \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "addiu %[t1], %[count], -1 \n\t"
- "bgtz %[t1], 1b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- ".set pop \n\t"
- : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),
- [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
- [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),
- [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),
- [s2]"=&r"(s2), [s3]"=&r"(s3)
- : [dither_scan]"r"(dither_scan)
- : "memory", "hi", "lo"
- );
- }
-
- if (count == 1) {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- if (c) {
- unsigned a = SkGetPackedA32(c);
- int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
-
- unsigned sr = SkGetPackedR32(c);
- unsigned sg = SkGetPackedG32(c);
- unsigned sb = SkGetPackedB32(c);
- sr = SkDITHER_R32_FOR_565(sr, d);
- sg = SkDITHER_G32_FOR_565(sg, d);
- sb = SkDITHER_B32_FOR_565(sb, d);
-
- uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
- uint32_t dst_expanded = SkExpand_rgb_16(*dst);
- dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
- // now src and dst expanded are in g:11 r:10 x:1 b:10
- *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
- }
- dst += 1;
- DITHER_INC_X(x);
- }
-}
-
-static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
- const SkPMColor* __restrict__ src,
- int count, U8CPU alpha, int x, int y) {
- uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
- uint32_t t0, t1, t2, t3, t4, t5;
- uint32_t t6, t7, t8, t9, s0;
- int dither[4];
- int i;
-
- for (i = 0; i < 4; i++, x++) {
- dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
- }
-
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "li %[s0], 1 \n\t"
- "2: \n\t"
- "beqz %[count], 1f \n\t"
- " nop \n\t"
- "addiu %[t0], %[count], -1 \n\t"
- "beqz %[t0], 1f \n\t"
- " nop \n\t"
- "beqz %[s0], 3f \n\t"
- " nop \n\t"
- "lw %[t0], 0(%[dither]) \n\t"
- "lw %[t1], 4(%[dither]) \n\t"
- "li %[s0], 0 \n\t"
- "b 4f \n\t"
- " nop \n\t"
- "3: \n\t"
- "lw %[t0], 8(%[dither]) \n\t"
- "lw %[t1], 12(%[dither]) \n\t"
- "li %[s0], 1 \n\t"
- "4: \n\t"
- "sll %[t2], %[t0], 16 \n\t"
- "or %[t1], %[t2], %[t1] \n\t"
- "lw %[t0], 0(%[src]) \n\t"
- "lw %[t2], 4(%[src]) \n\t"
- "precrq.ph.w %[t3], %[t0], %[t2] \n\t"
- "preceu.ph.qbra %[t9], %[t3] \n\t"
-#ifdef __mips_dspr2
- "append %[t0], %[t2], 16 \n\t"
- "preceu.ph.qbra %[t4], %[t0] \n\t"
- "preceu.ph.qbla %[t5], %[t0] \n\t"
-#else
- "sll %[t6], %[t0], 16 \n\t"
- "sll %[t7], %[t2], 16 \n\t"
- "precrq.ph.w %[t8], %[t6], %[t7] \n\t"
- "preceu.ph.qbra %[t4], %[t8] \n\t"
- "preceu.ph.qbla %[t5], %[t8] \n\t"
-#endif
- "addu.qb %[t0], %[t4], %[t1] \n\t"
- "shra.ph %[t2], %[t4], 5 \n\t"
- "subu.qb %[t3], %[t0], %[t2] \n\t"
- "shra.ph %[t6], %[t3], 3 \n\t"
- "addu.qb %[t0], %[t9], %[t1] \n\t"
- "shra.ph %[t2], %[t9], 5 \n\t"
- "subu.qb %[t3], %[t0], %[t2] \n\t"
- "shra.ph %[t7], %[t3], 3 \n\t"
- "shra.ph %[t0], %[t1], 1 \n\t"
- "shra.ph %[t2], %[t5], 6 \n\t"
- "addu.qb %[t3], %[t5], %[t0] \n\t"
- "subu.qb %[t4], %[t3], %[t2] \n\t"
- "shra.ph %[t8], %[t4], 2 \n\t"
- "precrq.ph.w %[t0], %[t6], %[t7] \n\t"
-#ifdef __mips_dspr2
- "append %[t6], %[t7], 16 \n\t"
-#else
- "sll %[t6], %[t6], 16 \n\t"
- "sll %[t2], %[t7], 16 \n\t"
- "precrq.ph.w %[t6], %[t6], %[t2] \n\t"
-#endif
- "sra %[t4], %[t8], 16 \n\t"
- "andi %[t5], %[t8], 0xFF \n\t"
- "sll %[t7], %[t4], 5 \n\t"
- "sra %[t8], %[t0], 5 \n\t"
- "or %[t9], %[t7], %[t8] \n\t"
- "or %[t3], %[t9], %[t0] \n\t"
- "andi %[t4], %[t3], 0xFFFF \n\t"
- "sll %[t7], %[t5], 5 \n\t"
- "sra %[t8], %[t6], 5 \n\t"
- "or %[t9], %[t7], %[t8] \n\t"
- "or %[t3], %[t9], %[t6] \n\t"
- "and %[t7], %[t3], 0xFFFF \n\t"
- "sh %[t4], 0(%[dst]) \n\t"
- "sh %[t7], 2(%[dst]) \n\t"
- "addiu %[count], %[count], -2 \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "b 2b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- "1: \n\t"
- ".set pop \n\t"
- : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
- [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
- [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
- [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)
- : [dither] "r" (dither)
- : "memory"
- );
-
- if (count == 1) {
- SkPMColor c = *src++;
- SkPMColorAssert(c); // only if DEBUG is turned on
- SkASSERT(SkGetPackedA32(c) == 255);
- unsigned dither = DITHER_VALUE(x);
- *dst++ = SkDitherRGB32To565(c, dither);
- }
-}
-
-static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,
- const SkPMColor* src,
- int count, U8CPU alpha, int x, int y) {
- int32_t t0, t1, t2, t3, t4, t5, t6;
- int32_t s0, s1, s2, s3;
- int x1 = 0;
- uint32_t sc_mul;
- uint32_t sc_add;
-#ifdef ENABLE_DITHER_MATRIX_4X4
- const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
-#else // ENABLE_DITHER_MATRIX_4X4
- const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
-#endif // ENABLE_DITHER_MATRIX_4X4
- int dither[4];
-
- for (int i = 0; i < 4; i++) {
- dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
- x += 1;
- }
- alpha += 1;
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "li %[t0], 0x100 \n\t"
- "subu %[t0], %[t0], %[alpha] \n\t"
- "replv.ph %[sc_mul], %[alpha] \n\t"
- "beqz %[alpha], 1f \n\t"
- " nop \n\t"
- "replv.qb %[sc_add], %[t0] \n\t"
- "b 2f \n\t"
- " nop \n\t"
- "1: \n\t"
- "replv.qb %[sc_add], %[alpha] \n\t"
- "2: \n\t"
- "addiu %[t2], %[count], -1 \n\t"
- "blez %[t2], 3f \n\t"
- " nop \n\t"
- "lw %[s0], 0(%[src]) \n\t"
- "lw %[s1], 4(%[src]) \n\t"
- "bnez %[x1], 4f \n\t"
- " nop \n\t"
- "lw %[t0], 0(%[dither]) \n\t"
- "lw %[t1], 4(%[dither]) \n\t"
- "li %[x1], 1 \n\t"
- "b 5f \n\t"
- " nop \n\t"
- "4: \n\t"
- "lw %[t0], 8(%[dither]) \n\t"
- "lw %[t1], 12(%[dither]) \n\t"
- "li %[x1], 0 \n\t"
- "5: \n\t"
- "sll %[t3], %[t0], 7 \n\t"
- "sll %[t4], %[t1], 7 \n\t"
-#ifdef __mips_dspr2
- "append %[t0], %[t1], 16 \n\t"
-#else
- "sll %[t0], %[t0], 8 \n\t"
- "sll %[t2], %[t1], 8 \n\t"
- "precrq.qb.ph %[t0], %[t0], %[t2] \n\t"
-#endif
- "precrq.qb.ph %[t1], %[t3], %[t4] \n\t"
- "sll %[t5], %[s0], 8 \n\t"
- "sll %[t6], %[s1], 8 \n\t"
- "precrq.qb.ph %[t4], %[t5], %[t6] \n\t"
- "precrq.qb.ph %[t6], %[s0], %[s1] \n\t"
- "preceu.ph.qbla %[t5], %[t4] \n\t"
- "preceu.ph.qbra %[t4], %[t4] \n\t"
- "preceu.ph.qbra %[t6], %[t6] \n\t"
- "lh %[t2], 0(%[dst]) \n\t"
- "lh %[s1], 2(%[dst]) \n\t"
-#ifdef __mips_dspr2
- "append %[t2], %[s1], 16 \n\t"
-#else
- "sll %[s1], %[s1], 16 \n\t"
- "packrl.ph %[t2], %[t2], %[s1] \n\t"
-#endif
- "shra.ph %[s1], %[t2], 11 \n\t"
- "and %[s1], %[s1], 0x1F001F \n\t"
- "shra.ph %[s2], %[t2], 5 \n\t"
- "and %[s2], %[s2], 0x3F003F \n\t"
- "and %[s3], %[t2], 0x1F001F \n\t"
- "shrl.qb %[t3], %[t4], 5 \n\t"
- "addu.qb %[t4], %[t4], %[t0] \n\t"
- "subu.qb %[t4], %[t4], %[t3] \n\t"
- "shrl.qb %[t4], %[t4], 3 \n\t"
- "shrl.qb %[t3], %[t5], 5 \n\t"
- "addu.qb %[t5], %[t5], %[t0] \n\t"
- "subu.qb %[t5], %[t5], %[t3] \n\t"
- "shrl.qb %[t5], %[t5], 3 \n\t"
- "shrl.qb %[t3], %[t6], 6 \n\t"
- "addu.qb %[t6], %[t6], %[t1] \n\t"
- "subu.qb %[t6], %[t6], %[t3] \n\t"
- "shrl.qb %[t6], %[t6], 2 \n\t"
- "cmpu.lt.qb %[t4], %[s1] \n\t"
- "pick.qb %[s0], %[sc_add], $0 \n\t"
- "addu.qb %[s0], %[s0], %[s1] \n\t"
- "subu.qb %[t4], %[t4], %[s1] \n\t"
- "muleu_s.ph.qbl %[t0], %[t4], %[sc_mul] \n\t"
- "muleu_s.ph.qbr %[t1], %[t4], %[sc_mul] \n\t"
- "precrq.qb.ph %[t4], %[t0], %[t1] \n\t"
- "addu.qb %[t4], %[t4], %[s0] \n\t"
- "cmpu.lt.qb %[t5], %[s3] \n\t"
- "pick.qb %[s0], %[sc_add], $0 \n\t"
- "addu.qb %[s0], %[s0], %[s3] \n\t"
- "subu.qb %[t5], %[t5], %[s3] \n\t"
- "muleu_s.ph.qbl %[t0], %[t5], %[sc_mul] \n\t"
- "muleu_s.ph.qbr %[t1], %[t5], %[sc_mul] \n\t"
- "precrq.qb.ph %[t5], %[t0], %[t1] \n\t"
- "addu.qb %[t5], %[t5], %[s0] \n\t"
- "cmpu.lt.qb %[t6], %[s2] \n\t"
- "pick.qb %[s0], %[sc_add], $0 \n\t"
- "addu.qb %[s0], %[s0], %[s2] \n\t"
- "subu.qb %[t6], %[t6], %[s2] \n\t"
- "muleu_s.ph.qbl %[t0], %[t6], %[sc_mul] \n\t"
- "muleu_s.ph.qbr %[t1], %[t6], %[sc_mul] \n\t"
- "precrq.qb.ph %[t6], %[t0], %[t1] \n\t"
- "addu.qb %[t6], %[t6], %[s0] \n\t"
- "shll.ph %[s1], %[t4], 11 \n\t"
- "shll.ph %[t0], %[t6], 5 \n\t"
- "or %[s0], %[s1], %[t0] \n\t"
- "or %[s1], %[s0], %[t5] \n\t"
- "srl %[t2], %[s1], 16 \n\t"
- "and %[t3], %[s1], 0xFFFF \n\t"
- "sh %[t2], 0(%[dst]) \n\t"
- "sh %[t3], 2(%[dst]) \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "addi %[count], %[count], -2 \n\t"
- "b 2b \n\t"
- " addu %[dst], %[dst], 4 \n\t"
- "3: \n\t"
- ".set pop \n\t"
- : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
- [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),
- [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
- [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
- [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
- : [dither]"r"(dither), [alpha]"r"(alpha)
- : "memory", "hi", "lo"
- );
-
- if(count == 1) {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- SkASSERT(SkGetPackedA32(c) == 255);
- DITHER_565_SCAN(y);
- int dither = DITHER_VALUE(x);
- int sr = SkGetPackedR32(c);
- int sg = SkGetPackedG32(c);
- int sb = SkGetPackedB32(c);
- sr = SkDITHER_R32To565(sr, dither);
- sg = SkDITHER_G32To565(sg, dither);
- sb = SkDITHER_B32To565(sb, dither);
-
- uint16_t d = *dst;
- *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),
- SkAlphaBlend(sg, SkGetPackedG16(d), alpha),
- SkAlphaBlend(sb, SkGetPackedB16(d), alpha));
- DITHER_INC_X(x);
- }
-}
-
-static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,
- const SkPMColor* __restrict__ src,
- int count, U8CPU alpha, int x, int y) {
-
- __asm__ volatile (
- "pref 0, 0(%[src]) \n\t"
- "pref 1, 0(%[dst]) \n\t"
- "pref 0, 32(%[src]) \n\t"
- "pref 1, 32(%[dst]) \n\t"
- :
- : [src]"r"(src), [dst]"r"(dst)
- : "memory"
- );
-
- uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
- uint32_t t16;
- uint32_t add_x10 = 0x100010;
- uint32_t add_x20 = 0x200020;
- uint32_t sa = 0xff00ff;
-
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "blez %[count], 1f \n\t"
- " nop \n\t"
- "2: \n\t"
- "beqz %[count], 1f \n\t"
- " nop \n\t"
- "addiu %[t0], %[count], -1 \n\t"
- "beqz %[t0], 1f \n\t"
- " nop \n\t"
- "bnez %[t16], 3f \n\t"
- " nop \n\t"
- "li %[t16], 2 \n\t"
- "pref 0, 64(%[src]) \n\t"
- "pref 1, 64(%[dst]) \n\t"
- "3: \n\t"
- "addiu %[t16], %[t16], -1 \n\t"
- "lw %[t0], 0(%[src]) \n\t"
- "lw %[t1], 4(%[src]) \n\t"
- "precrq.ph.w %[t2], %[t0], %[t1] \n\t"
- "preceu.ph.qbra %[t8], %[t2] \n\t"
-#ifdef __mips_dspr2
- "append %[t0], %[t1], 16 \n\t"
-#else
- "sll %[t0], %[t0], 16 \n\t"
- "sll %[t6], %[t1], 16 \n\t"
- "precrq.ph.w %[t0], %[t0], %[t6] \n\t"
-#endif
- "preceu.ph.qbra %[t3], %[t0] \n\t"
- "preceu.ph.qbla %[t4], %[t0] \n\t"
- "preceu.ph.qbla %[t0], %[t2] \n\t"
- "subq.ph %[t1], %[sa], %[t0] \n\t"
- "sra %[t2], %[t1], 8 \n\t"
- "or %[t5], %[t2], %[t1] \n\t"
- "replv.ph %[t2], %[t5] \n\t"
- "lh %[t0], 0(%[dst]) \n\t"
- "lh %[t1], 2(%[dst]) \n\t"
- "and %[t1], %[t1], 0xffff \n\t"
-#ifdef __mips_dspr2
- "append %[t0], %[t1], 16 \n\t"
-#else
- "sll %[t5], %[t0], 16 \n\t"
- "or %[t0], %[t5], %[t1] \n\t"
-#endif
- "and %[t1], %[t0], 0x1f001f \n\t"
- "shra.ph %[t6], %[t0], 11 \n\t"
- "and %[t6], %[t6], 0x1f001f \n\t"
- "and %[t7], %[t0], 0x7e007e0 \n\t"
- "shra.ph %[t5], %[t7], 5 \n\t"
- "muleu_s.ph.qbl %[t0], %[t2], %[t6] \n\t"
- "addq.ph %[t7], %[t0], %[add_x10] \n\t"
- "shra.ph %[t6], %[t7], 5 \n\t"
- "addq.ph %[t6], %[t7], %[t6] \n\t"
- "shra.ph %[t0], %[t6], 5 \n\t"
- "addq.ph %[t7], %[t0], %[t3] \n\t"
- "shra.ph %[t6], %[t7], 3 \n\t"
- "muleu_s.ph.qbl %[t0], %[t2], %[t1] \n\t"
- "addq.ph %[t7], %[t0], %[add_x10] \n\t"
- "shra.ph %[t0], %[t7], 5 \n\t"
- "addq.ph %[t7], %[t7], %[t0] \n\t"
- "shra.ph %[t0], %[t7], 5 \n\t"
- "addq.ph %[t7], %[t0], %[t8] \n\t"
- "shra.ph %[t3], %[t7], 3 \n\t"
- "muleu_s.ph.qbl %[t0], %[t2], %[t5] \n\t"
- "addq.ph %[t7], %[t0], %[add_x20] \n\t"
- "shra.ph %[t0], %[t7], 6 \n\t"
- "addq.ph %[t8], %[t7], %[t0] \n\t"
- "shra.ph %[t0], %[t8], 6 \n\t"
- "addq.ph %[t7], %[t0], %[t4] \n\t"
- "shra.ph %[t8], %[t7], 2 \n\t"
- "shll.ph %[t0], %[t8], 5 \n\t"
- "shll.ph %[t1], %[t6], 11 \n\t"
- "or %[t2], %[t0], %[t1] \n\t"
- "or %[t3], %[t2], %[t3] \n\t"
- "sra %[t4], %[t3], 16 \n\t"
- "sh %[t4], 0(%[dst]) \n\t"
- "sh %[t3], 2(%[dst]) \n\t"
- "addiu %[count], %[count], -2 \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "b 2b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- "1: \n\t"
- ".set pop \n\t"
- : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
- [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
- [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
- [t7]"=&r"(t7), [t8]"=&r"(t8)
- : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)
- : "memory", "hi", "lo"
- );
-
- if (count == 1) {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- if (c) {
- *dst = SkSrcOver32To16(c, *dst);
- }
- dst += 1;
- }
-}
-
-static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src, int count,
- U8CPU alpha, int /*x*/, int /*y*/) {
- uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
- uint32_t s0, s1, s2, s3;
- unsigned dst_scale = 0;
-
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "replv.qb %[t0], %[alpha] \n\t"
- "repl.ph %[t6], 0x80 \n\t"
- "repl.ph %[t7], 0xFF \n\t"
- "1: \n\t"
- "addiu %[t8], %[count], -1 \n\t"
- "blez %[t8], 2f \n\t"
- " nop \n\t"
- "lw %[t8], 0(%[src]) \n\t"
- "lw %[t9], 4(%[src]) \n\t"
- "lh %[t4], 0(%[dst]) \n\t"
- "lh %[t5], 2(%[dst]) \n\t"
- "sll %[t5], %[t5], 16 \n\t"
- "sll %[t2], %[t8], 8 \n\t"
- "sll %[t3], %[t9], 8 \n\t"
- "precrq.qb.ph %[t1], %[t2], %[t3] \n\t"
- "precrq.qb.ph %[t3], %[t8], %[t9] \n\t"
- "preceu.ph.qbla %[t8], %[t3] \n\t"
- "muleu_s.ph.qbr %[s3], %[t0], %[t8] \n\t"
- "preceu.ph.qbla %[t2], %[t1] \n\t"
- "preceu.ph.qbra %[t1], %[t1] \n\t"
- "preceu.ph.qbra %[t3], %[t3] \n\t"
- "packrl.ph %[t9], %[t4], %[t5] \n\t"
- "shra.ph %[s0], %[t9], 11 \n\t"
- "and %[s0], %[s0], 0x1F001F \n\t"
- "shra.ph %[s1], %[t9], 5 \n\t"
- "and %[s1], %[s1], 0x3F003F \n\t"
- "and %[s2], %[t9], 0x1F001F \n\t"
- "addq.ph %[s3], %[s3], %[t6] \n\t"
- "shra.ph %[t5], %[s3], 8 \n\t"
- "and %[t5], %[t5], 0xFF00FF \n\t"
- "addq.ph %[dst_scale], %[s3], %[t5] \n\t"
- "shra.ph %[dst_scale], %[dst_scale], 8 \n\t"
- "subq_s.ph %[dst_scale], %[t7], %[dst_scale] \n\t"
- "sll %[dst_scale], %[dst_scale], 8 \n\t"
- "precrq.qb.ph %[dst_scale], %[dst_scale], %[dst_scale] \n\t"
- "shrl.qb %[t1], %[t1], 3 \n\t"
- "shrl.qb %[t2], %[t2], 3 \n\t"
- "shrl.qb %[t3], %[t3], 2 \n\t"
- "muleu_s.ph.qbl %[t1], %[t0], %[t1] \n\t"
- "muleu_s.ph.qbl %[t2], %[t0], %[t2] \n\t"
- "muleu_s.ph.qbl %[t3], %[t0], %[t3] \n\t"
- "muleu_s.ph.qbl %[t8], %[dst_scale], %[s0] \n\t"
- "muleu_s.ph.qbl %[t9], %[dst_scale], %[s2] \n\t"
- "muleu_s.ph.qbl %[t4], %[dst_scale], %[s1] \n\t"
- "addq.ph %[t1], %[t1], %[t8] \n\t"
- "addq.ph %[t2], %[t2], %[t9] \n\t"
- "addq.ph %[t3], %[t3], %[t4] \n\t"
- "addq.ph %[t8], %[t1], %[t6] \n\t"
- "addq.ph %[t9], %[t2], %[t6] \n\t"
- "addq.ph %[t4], %[t3], %[t6] \n\t"
- "shra.ph %[t1], %[t8], 8 \n\t"
- "addq.ph %[t1], %[t1], %[t8] \n\t"
- "preceu.ph.qbla %[t1], %[t1] \n\t"
- "shra.ph %[t2], %[t9], 8 \n\t"
- "addq.ph %[t2], %[t2], %[t9] \n\t"
- "preceu.ph.qbla %[t2], %[t2] \n\t"
- "shra.ph %[t3], %[t4], 8 \n\t"
- "addq.ph %[t3], %[t3], %[t4] \n\t"
- "preceu.ph.qbla %[t3], %[t3] \n\t"
- "shll.ph %[t8], %[t1], 11 \n\t"
- "shll.ph %[t9], %[t3], 5 \n\t"
- "or %[t8], %[t8], %[t9] \n\t"
- "or %[s0], %[t8], %[t2] \n\t"
- "srl %[t8], %[s0], 16 \n\t"
- "and %[t9], %[s0], 0xFFFF \n\t"
- "sh %[t8], 0(%[dst]) \n\t"
- "sh %[t9], 2(%[dst]) \n\t"
- "addiu %[src], %[src], 8 \n\t"
- "addiu %[count], %[count], -2 \n\t"
- "b 1b \n\t"
- " addiu %[dst], %[dst], 4 \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
- [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),
- [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),
- [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),
- [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)
- : [alpha]"r"(alpha)
- : "memory", "hi", "lo"
- );
-
- if (count == 1) {
- SkPMColor sc = *src++;
- SkPMColorAssert(sc);
- if (sc) {
- uint16_t dc = *dst;
- unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha);
- unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale);
- unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale);
- unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale);
- *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
- }
- dst += 1;
- }
-}
-
-static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha) {
- int32_t t0, t1, t2, t3, t4, t5, t6, t7;
-
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- "li %[t2], 0x100 \n\t"
- "addiu %[t0], %[alpha], 1 \n\t"
- "subu %[t1], %[t2], %[t0] \n\t"
- "replv.qb %[t7], %[t0] \n\t"
- "replv.qb %[t6], %[t1] \n\t"
- "1: \n\t"
- "blez %[count], 2f \n\t"
- "lw %[t0], 0(%[src]) \n\t"
- "lw %[t1], 0(%[dst]) \n\t"
- "preceu.ph.qbr %[t2], %[t0] \n\t"
- "preceu.ph.qbl %[t3], %[t0] \n\t"
- "preceu.ph.qbr %[t4], %[t1] \n\t"
- "preceu.ph.qbl %[t5], %[t1] \n\t"
- "muleu_s.ph.qbr %[t2], %[t7], %[t2] \n\t"
- "muleu_s.ph.qbr %[t3], %[t7], %[t3] \n\t"
- "muleu_s.ph.qbr %[t4], %[t6], %[t4] \n\t"
- "muleu_s.ph.qbr %[t5], %[t6], %[t5] \n\t"
- "addiu %[src], %[src], 4 \n\t"
- "addiu %[count], %[count], -1 \n\t"
-#ifdef SK_SUPPORT_LEGACY_BROKEN_LERP
- "precrq.qb.ph %[t0], %[t3], %[t2] \n\t"
- "precrq.qb.ph %[t2], %[t5], %[t4] \n\t"
- "addu %[t1], %[t0], %[t2] \n\t"
-#else
- "addu %[t0], %[t3], %[t5] \n\t"
- "addu %[t2], %[t2], %[t4] \n\t"
- "precrq.qb.ph %[t1], %[t0], %[t2] \n\t"
-#endif
- "sw %[t1], 0(%[dst]) \n\t"
- "b 1b \n\t"
- " addi %[dst], %[dst], 4 \n\t"
- "2: \n\t"
- ".set pop \n\t"
- : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
- [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
- [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
- : [alpha]"r"(alpha)
- : "memory", "hi", "lo"
- );
-}
-
-void blitmask_d565_opaque_mips(int width, int height, uint16_t* device,
- unsigned deviceRB, const uint8_t* alpha,
- uint32_t expanded32, unsigned maskRB) {
- uint32_t s0, s1, s2, s3;
-
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- ".set noat \n\t"
- "li $t9, 0x7E0F81F \n\t"
- "1: \n\t"
- "move $t8, %[width] \n\t"
- "addiu %[height], %[height], -1 \n\t"
- "2: \n\t"
- "beqz $t8, 4f \n\t"
- " addiu $t0, $t8, -4 \n\t"
- "bltz $t0, 3f \n\t"
- " nop \n\t"
- "addiu $t8, $t8, -4 \n\t"
- "lhu $t0, 0(%[device]) \n\t"
- "lhu $t1, 2(%[device]) \n\t"
- "lhu $t2, 4(%[device]) \n\t"
- "lhu $t3, 6(%[device]) \n\t"
- "lbu $t4, 0(%[alpha]) \n\t"
- "lbu $t5, 1(%[alpha]) \n\t"
- "lbu $t6, 2(%[alpha]) \n\t"
- "lbu $t7, 3(%[alpha]) \n\t"
- "replv.ph $t0, $t0 \n\t"
- "replv.ph $t1, $t1 \n\t"
- "replv.ph $t2, $t2 \n\t"
- "replv.ph $t3, $t3 \n\t"
- "addiu %[s0], $t4, 1 \n\t"
- "addiu %[s1], $t5, 1 \n\t"
- "addiu %[s2], $t6, 1 \n\t"
- "addiu %[s3], $t7, 1 \n\t"
- "srl %[s0], %[s0], 3 \n\t"
- "srl %[s1], %[s1], 3 \n\t"
- "srl %[s2], %[s2], 3 \n\t"
- "srl %[s3], %[s3], 3 \n\t"
- "and $t0, $t0, $t9 \n\t"
- "and $t1, $t1, $t9 \n\t"
- "and $t2, $t2, $t9 \n\t"
- "and $t3, $t3, $t9 \n\t"
- "subu $t4, %[expanded32], $t0 \n\t"
- "subu $t5, %[expanded32], $t1 \n\t"
- "subu $t6, %[expanded32], $t2 \n\t"
- "subu $t7, %[expanded32], $t3 \n\t"
- "mul $t4, $t4, %[s0] \n\t"
- "mul $t5, $t5, %[s1] \n\t"
- "mul $t6, $t6, %[s2] \n\t"
- "mul $t7, $t7, %[s3] \n\t"
- "addiu %[alpha], %[alpha], 4 \n\t"
- "srl $t4, $t4, 5 \n\t"
- "srl $t5, $t5, 5 \n\t"
- "srl $t6, $t6, 5 \n\t"
- "srl $t7, $t7, 5 \n\t"
- "addu $t4, $t0, $t4 \n\t"
- "addu $t5, $t1, $t5 \n\t"
- "addu $t6, $t2, $t6 \n\t"
- "addu $t7, $t3, $t7 \n\t"
- "and $t4, $t4, $t9 \n\t"
- "and $t5, $t5, $t9 \n\t"
- "and $t6, $t6, $t9 \n\t"
- "and $t7, $t7, $t9 \n\t"
- "srl $t0, $t4, 16 \n\t"
- "srl $t1, $t5, 16 \n\t"
- "srl $t2, $t6, 16 \n\t"
- "srl $t3, $t7, 16 \n\t"
- "or %[s0], $t0, $t4 \n\t"
- "or %[s1], $t1, $t5 \n\t"
- "or %[s2], $t2, $t6 \n\t"
- "or %[s3], $t3, $t7 \n\t"
- "sh %[s0], 0(%[device]) \n\t"
- "sh %[s1], 2(%[device]) \n\t"
- "sh %[s2], 4(%[device]) \n\t"
- "sh %[s3], 6(%[device]) \n\t"
- "b 2b \n\t"
- " addiu %[device], %[device], 8 \n\t"
- "3: \n\t"
- "lhu $t0, 0(%[device]) \n\t"
- "lbu $t1, 0(%[alpha]) \n\t"
- "addiu $t8, $t8, -1 \n\t"
- "replv.ph $t2, $t0 \n\t"
- "and $t2, $t2, $t9 \n\t"
- "addiu $t0, $t1, 1 \n\t"
- "srl $t0, $t0, 3 \n\t"
- "subu $t3, %[expanded32], $t2 \n\t"
- "mul $t3, $t3, $t0 \n\t"
- "addiu %[alpha], %[alpha], 1 \n\t"
- "srl $t3, $t3, 5 \n\t"
- "addu $t3, $t2, $t3 \n\t"
- "and $t3, $t3, $t9 \n\t"
- "srl $t4, $t3, 16 \n\t"
- "or %[s0], $t4, $t3 \n\t"
- "sh %[s0], 0(%[device]) \n\t"
- "bnez $t8, 3b \n\t"
- "addiu %[device], %[device], 2 \n\t"
- "4: \n\t"
- "addu %[device], %[device], %[deviceRB] \n\t"
- "bgtz %[height], 1b \n\t"
- " addu %[alpha], %[alpha], %[maskRB] \n\t"
- ".set pop \n\t"
- : [height]"+r"(height), [alpha]"+r"(alpha), [device]"+r"(device),
- [deviceRB]"+r"(deviceRB), [maskRB]"+r"(maskRB), [s0]"=&r"(s0),
- [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
- : [expanded32] "r" (expanded32), [width] "r" (width)
- : "memory", "hi", "lo", "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8", "t9"
- );
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-const SkBlitRow::Proc16 platform_565_procs_mips_dsp[] = {
- // no dither
- nullptr,
- S32_D565_Blend_mips_dsp,
- S32A_D565_Opaque_mips_dsp,
- S32A_D565_Blend_mips_dsp,
-
- // dither
- S32_D565_Opaque_Dither_mips_dsp,
- S32_D565_Blend_Dither_mips_dsp,
- S32A_D565_Opaque_Dither_mips_dsp,
- nullptr,
-};
-
-static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {
- nullptr, // S32_Opaque,
- S32_Blend_BlitRow32_mips_dsp, // S32_Blend,
- nullptr, // S32A_Opaque,
- nullptr, // S32A_Blend,
-};
-
-SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
- return platform_565_procs_mips_dsp[flags];
-}
-
-SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
- return nullptr;
-}
-
-SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
- return platform_32_procs_mips_dsp[flags];
-}