diff options
author | Mike Reed <reed@google.com> | 2017-07-19 17:20:37 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-07-20 00:43:37 +0000 |
commit | e32500f0642df381fd79731df2f7a4a4a71a46e2 (patch) | |
tree | 05747f712923791d6df14077714cede88d9d51ff | |
parent | 3e583cba8af153952e31925e0d4bfbc71cfa43b8 (diff) |
Assume HQ is handled by pipeline, delete legacy code-path
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD
Bug: skia:
Change-Id: If6f0d0a57463bf99a66d674e65a62ce3931d0116
Reviewed-on: https://skia-review.googlesource.com/24644
Commit-Queue: Mike Reed <reed@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
-rw-r--r-- | BUILD.gn | 17 | ||||
-rw-r--r-- | bench/BitmapScaleBench.cpp | 161 | ||||
-rw-r--r-- | gm/filterindiabox.cpp | 3 | ||||
-rw-r--r-- | gm/showmiplevels.cpp | 24 | ||||
-rw-r--r-- | gn/bench.gni | 1 | ||||
-rw-r--r-- | gn/core.gni | 5 | ||||
-rw-r--r-- | gn/gn_to_bp.py | 3 | ||||
-rw-r--r-- | gn/opts.gni | 1 | ||||
-rw-r--r-- | gn/shared_sources.gni | 2 | ||||
-rw-r--r-- | src/core/SkBitmapController.cpp | 140 | ||||
-rw-r--r-- | src/core/SkBitmapController.h | 5 | ||||
-rw-r--r-- | src/core/SkBitmapFilter.h | 209 | ||||
-rw-r--r-- | src/core/SkBitmapProcState.cpp | 4 | ||||
-rw-r--r-- | src/core/SkBitmapProcState.h | 2 | ||||
-rw-r--r-- | src/core/SkBitmapScaler.cpp | 254 | ||||
-rw-r--r-- | src/core/SkBitmapScaler.h | 46 | ||||
-rw-r--r-- | src/core/SkBlitter.cpp | 5 | ||||
-rw-r--r-- | src/core/SkConvolver.cpp | 272 | ||||
-rw-r--r-- | src/core/SkConvolver.h | 173 | ||||
-rw-r--r-- | src/core/SkOpts.cpp | 7 | ||||
-rw-r--r-- | src/core/SkOpts.h | 10 | ||||
-rw-r--r-- | src/opts/SkBitmapFilter_opts.h | 940 | ||||
-rw-r--r-- | src/opts/SkBitmapProcState_opts_none.cpp | 1 | ||||
-rw-r--r-- | src/opts/SkOpts_hsw.cpp | 118 | ||||
-rw-r--r-- | src/opts/opts_check_x86.cpp | 1 | ||||
-rw-r--r-- | src/shaders/SkImageShader.cpp | 2 |
26 files changed, 25 insertions, 2381 deletions
@@ -297,22 +297,6 @@ opts("avx") { } } -opts("hsw") { - enabled = is_x86 - sources = skia_opts.hsw_sources - if (is_win) { - cflags = [ "/arch:AVX2" ] - } else { - cflags = [ - "-mavx2", - "-mbmi", - "-mbmi2", - "-mf16c", - "-mfma", - ] - } -} - # Any feature of Skia that requires third-party code should be optional and use this template. template("optional") { if (invoker.enabled) { @@ -668,7 +652,6 @@ component("skia") { ":fontmgr_fontconfig", ":fontmgr_fuchsia", ":gpu", - ":hsw", ":jpeg", ":none", ":pdf", diff --git a/bench/BitmapScaleBench.cpp b/bench/BitmapScaleBench.cpp deleted file mode 100644 index e309d5162c..0000000000 --- a/bench/BitmapScaleBench.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2013 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "Benchmark.h" -#include "SkBlurMask.h" -#include "SkCanvas.h" -#include "SkPaint.h" -#include "SkRandom.h" -#include "SkShader.h" -#include "SkString.h" - -class BitmapScaleBench: public Benchmark { - int fLoopCount; - int fInputSize; - int fOutputSize; - SkString fName; - -public: - BitmapScaleBench( int is, int os) { - fInputSize = is; - fOutputSize = os; - - fLoopCount = 20; - } - -protected: - - SkBitmap fInputBitmap, fOutputBitmap; - SkMatrix fMatrix; - - const char* onGetName() override { - return fName.c_str(); - } - - int inputSize() const { - return fInputSize; - } - - int outputSize() const { - return fOutputSize; - } - - float scale() const { - return float(outputSize())/inputSize(); - } - - SkIPoint onGetSize() override { - return SkIPoint::Make( fOutputSize, fOutputSize ); - } - - void setName(const char * name) { - fName.printf( "bitmap_scale_%s_%d_%d", name, fInputSize, fOutputSize ); - } - - void onDelayedSetup() override { - fInputBitmap.allocN32Pixels(fInputSize, fInputSize, true); - fInputBitmap.eraseColor(SK_ColorWHITE); - - fOutputBitmap.allocN32Pixels(fOutputSize, fOutputSize, true); - - fMatrix.setScale( scale(), scale() ); - } - - void onDraw(int loops, SkCanvas*) override { - SkPaint paint; - this->setupPaint(&paint); - - preBenchSetup(); - - for (int i = 0; i < loops; i++) { - doScaleImage(); - } - } - - virtual void doScaleImage() = 0; - virtual void preBenchSetup() {} -private: - typedef Benchmark INHERITED; -}; - -class BitmapFilterScaleBench: public BitmapScaleBench { - public: - BitmapFilterScaleBench( int is, int os) : INHERITED(is, os) { - setName( "filter" ); - } -protected: - void doScaleImage() override { - SkCanvas canvas( fOutputBitmap ); - SkPaint paint; - - paint.setFilterQuality(kHigh_SkFilterQuality); - fInputBitmap.notifyPixelsChanged(); - canvas.concat(fMatrix); - canvas.drawBitmap(fInputBitmap, 0, 0, &paint ); - } -private: - typedef BitmapScaleBench INHERITED; -}; - -DEF_BENCH(return new BitmapFilterScaleBench(10, 90);) -DEF_BENCH(return new BitmapFilterScaleBench(30, 90);) -DEF_BENCH(return new BitmapFilterScaleBench(80, 90);) -DEF_BENCH(return new BitmapFilterScaleBench(90, 90);) -DEF_BENCH(return new BitmapFilterScaleBench(90, 80);) -DEF_BENCH(return new BitmapFilterScaleBench(90, 30);) -DEF_BENCH(return new BitmapFilterScaleBench(90, 10);) -DEF_BENCH(return new BitmapFilterScaleBench(256, 64);) -DEF_BENCH(return new BitmapFilterScaleBench(64, 256);) - -/////////////////////////////////////////////////////////////////////////////////////////////// - -#include "SkBitmapScaler.h" - -class PixmapScalerBench: public Benchmark { - SkBitmapScaler::ResizeMethod fMethod; - SkString fName; - SkBitmap fSrc, fDst; - -public: - PixmapScalerBench(SkBitmapScaler::ResizeMethod method, const char suffix[]) : fMethod(method) { - fName.printf("pixmapscaler_%s", suffix); - } - -protected: - const char* onGetName() override { - return fName.c_str(); - } - - SkIPoint onGetSize() override { return{ 100, 100 }; } - - bool isSuitableFor(Backend backend) override { - return backend == kNonRendering_Backend; - } - - void onDelayedSetup() override { - fSrc.allocN32Pixels(640, 480); - fSrc.eraseColor(SK_ColorWHITE); - fDst.allocN32Pixels(300, 250); - } - - void onDraw(int loops, SkCanvas*) override { - SkPixmap src, dst; - fSrc.peekPixels(&src); - fDst.peekPixels(&dst); - for (int i = 0; i < loops * 16; i++) { - SkBitmapScaler::Resize(dst, src, fMethod); - } - } - -private: - typedef Benchmark INHERITED; -}; -DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_LANCZOS3, "lanczos"); ) -DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_MITCHELL, "mitchell"); ) -DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_HAMMING, "hamming"); ) -DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_TRIANGLE, "triangle"); ) -DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_BOX, "box"); ) diff --git a/gm/filterindiabox.cpp b/gm/filterindiabox.cpp index eef0b4a0d5..84f512dfe1 100644 --- a/gm/filterindiabox.cpp +++ b/gm/filterindiabox.cpp @@ -10,7 +10,6 @@ #include "Resources.h" #include "SkBitmapProcState.h" -#include "SkBitmapScaler.h" #include "SkGradientShader.h" #include "SkImageEncoder.h" #include "SkStream.h" @@ -75,7 +74,7 @@ protected: } SkISize onISize() override { - return SkISize::Make(1024, 768); + return SkISize::Make(680, 130); } void onDraw(SkCanvas* canvas) override { diff --git a/gm/showmiplevels.cpp b/gm/showmiplevels.cpp index 57df948536..4f27365788 100644 --- a/gm/showmiplevels.cpp +++ b/gm/showmiplevels.cpp @@ -9,7 +9,6 @@ #include "sk_tool_utils.h" #include "Resources.h" -#include "SkBitmapScaler.h" #include "SkGradientShader.h" #include "SkTypeface.h" #include "SkStream.h" @@ -114,9 +113,7 @@ protected: return str; } - SkISize onISize() override { - return { 824, 862 }; - } + SkISize onISize() override { return { 150, 862 }; } static void DrawAndFrame(SkCanvas* canvas, const SkBitmap& orig, SkScalar x, SkScalar y) { SkBitmap bm; @@ -169,25 +166,6 @@ protected: bm.installPixels(curr); return bm; }); - - const SkBitmapScaler::ResizeMethod methods[] = { - SkBitmapScaler::RESIZE_BOX, - SkBitmapScaler::RESIZE_TRIANGLE, - SkBitmapScaler::RESIZE_LANCZOS3, - SkBitmapScaler::RESIZE_HAMMING, - SkBitmapScaler::RESIZE_MITCHELL, - }; - - SkPixmap basePM; - orig.peekPixels(&basePM); - for (auto method : methods) { - canvas->translate(orig.width()/2 + 8.0f, 0); - drawLevels(canvas, orig, [method](const SkPixmap& prev, const SkPixmap& curr) { - SkBitmap bm; - SkBitmapScaler::Resize(&bm, prev, method, curr.width(), curr.height()); - return bm; - }); - } } void onOnceBeforeDraw() override { diff --git a/gn/bench.gni b/gn/bench.gni index 7560613051..a5ab7810eb 100644 --- a/gn/bench.gni +++ b/gn/bench.gni @@ -17,7 +17,6 @@ bench_sources = [ "$_bench/BitmapBench.cpp", "$_bench/BitmapRectBench.cpp", "$_bench/BitmapRegionDecoderBench.cpp", - "$_bench/BitmapScaleBench.cpp", "$_bench/BlendmodeBench.cpp", "$_bench/BlurBench.cpp", "$_bench/BlurImageFilterBench.cpp", diff --git a/gn/core.gni b/gn/core.gni index 39a80fef85..43b972f45c 100644 --- a/gn/core.gni +++ b/gn/core.gni @@ -31,7 +31,6 @@ skia_core_sources = [ "$_src/core/SkBitmapController.cpp", "$_src/core/SkBitmapDevice.cpp", "$_src/core/SkBitmapDevice.h", - "$_src/core/SkBitmapFilter.h", "$_src/core/SkBitmapProcState.cpp", "$_src/core/SkBitmapProcState.h", "$_src/core/SkBitmapProcState_filter.h", @@ -44,8 +43,6 @@ skia_core_sources = [ "$_src/core/SkBitmapProcState_utils.h", "$_src/core/SkBitmapProvider.cpp", "$_src/core/SkBitmapProvider.h", - "$_src/core/SkBitmapScaler.h", - "$_src/core/SkBitmapScaler.cpp", "$_src/core/SkBlendMode.cpp", "$_src/core/SkBlitBWMaskTemplate.h", "$_src/core/SkBlitMask.h", @@ -89,8 +86,6 @@ skia_core_sources = [ "$_src/core/SkColorTable.cpp", "$_src/core/SkConvertPixels.cpp", "$_src/core/SkConvertPixels.h", - "$_src/core/SkConvolver.cpp", - "$_src/core/SkConvolver.h", "$_src/core/SkCoreBlitters.h", "$_src/core/SkCpu.cpp", "$_src/core/SkCpu.h", diff --git a/gn/gn_to_bp.py b/gn/gn_to_bp.py index 3fc30a03b5..2056c4274f 100644 --- a/gn/gn_to_bp.py +++ b/gn/gn_to_bp.py @@ -294,8 +294,7 @@ with open('Android.bp', 'w') as f: defs['ssse3'] + defs['sse41'] + defs['sse42'] + - defs['avx' ] + - defs['hsw' ])), + defs['avx' ])), 'tool_cflags' : bpfmt(8, tool_cflags), 'tool_shared_libs' : bpfmt(8, tool_shared_libs), diff --git a/gn/opts.gni b/gn/opts.gni index 34481db3b6..1cc6027af2 100644 --- a/gn/opts.gni +++ b/gn/opts.gni @@ -51,4 +51,3 @@ ssse3 = [ sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ] sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ] avx = [ "$_src/opts/SkOpts_avx.cpp" ] -hsw = [ "$_src/opts/SkOpts_hsw.cpp" ] diff --git a/gn/shared_sources.gni b/gn/shared_sources.gni index 6df999921a..29cac671a4 100644 --- a/gn/shared_sources.gni +++ b/gn/shared_sources.gni @@ -24,7 +24,7 @@ skia_opts = { sse41_sources = sse41 sse42_sources = sse42 avx_sources = avx - hsw_sources = hsw + hsw_sources = [] # remove after we update Chrome } # Skia Chromium defines. These flags will be defined in chromium If these diff --git a/src/core/SkBitmapController.cpp b/src/core/SkBitmapController.cpp index d3e47aeae9..586210d4b7 100644 --- a/src/core/SkBitmapController.cpp +++ b/src/core/SkBitmapController.cpp @@ -6,15 +6,13 @@ */ #include "SkBitmap.h" +#include "SkBitmapCache.h" #include "SkBitmapController.h" #include "SkBitmapProvider.h" #include "SkMatrix.h" -#include "SkPixelRef.h" +#include "SkMipMap.h" #include "SkTemplates.h" -// RESIZE_LANCZOS3 is another good option, but chrome prefers mitchell at the moment -#define kHQ_RESIZE_METHOD SkBitmapScaler::RESIZE_MITCHELL - /////////////////////////////////////////////////////////////////////////////////////////////////// SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvider& provider, @@ -33,70 +31,24 @@ SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvi /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "SkBitmapCache.h" -#include "SkBitmapScaler.h" -#include "SkMipMap.h" -#include "SkResourceCache.h" - class SkDefaultBitmapControllerState : public SkBitmapController::State { public: - SkDefaultBitmapControllerState(const SkBitmapProvider&, - const SkMatrix& inv, - SkFilterQuality, - bool canShadeHQ); + SkDefaultBitmapControllerState(const SkBitmapProvider&, const SkMatrix& inv, SkFilterQuality); private: - SkBitmap fResultBitmap; - sk_sp<const SkMipMap> fCurrMip; - bool fCanShadeHQ; + SkBitmap fResultBitmap; + sk_sp<const SkMipMap> fCurrMip; - bool processHQRequest(const SkBitmapProvider&); + bool processHighRequest(const SkBitmapProvider&); bool processMediumRequest(const SkBitmapProvider&); }; -// Check to see that the size of the bitmap that would be produced by -// scaling by the given inverted matrix is less than the maximum allowed. -static inline bool cache_size_okay(const SkBitmapProvider& provider, const SkMatrix& invMat) { - size_t maximumAllocation = SkResourceCache::GetEffectiveSingleAllocationByteLimit(); - if (0 == maximumAllocation) { - return true; - } - // float matrixScaleFactor = 1.0 / (invMat.scaleX * invMat.scaleY); - // return ((origBitmapSize * matrixScaleFactor) < maximumAllocationSize); - // Skip the division step: - const size_t size = provider.info().getSafeSize(provider.info().minRowBytes()); - SkScalar invScaleSqr = invMat.getScaleX() * invMat.getScaleY(); - return size < (maximumAllocation * SkScalarAbs(invScaleSqr)); -} - -/* - * High quality is implemented by performing up-right scale-only filtering and then - * using bilerp for any remaining transformations. - */ -bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& provider) { +bool SkDefaultBitmapControllerState::processHighRequest(const SkBitmapProvider& provider) { if (fQuality != kHigh_SkFilterQuality) { return false; } - // Our default return state is to downgrade the request to Medium, w/ or w/o setting fBitmap - // to a valid bitmap. If we succeed, we will set this to Low instead. fQuality = kMedium_SkFilterQuality; -#ifdef SK_USE_MIP_FOR_DOWNSCALE_HQ - return false; -#endif - - bool supported = false; - switch (provider.info().colorType()) { - case kRGBA_8888_SkColorType: - case kBGRA_8888_SkColorType: - supported = true; - break; - default: - break; - } - if (!supported || !cache_size_okay(provider, fInvMatrix) || fInvMatrix.hasPerspective()) { - return false; // can't handle the reqeust - } SkScalar invScaleX = fInvMatrix.getScaleX(); SkScalar invScaleY = fInvMatrix.getScaleY(); @@ -111,68 +63,14 @@ bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& pr invScaleX = SkScalarAbs(invScaleX); invScaleY = SkScalarAbs(invScaleY); - if (SkScalarNearlyEqual(invScaleX, 1) && SkScalarNearlyEqual(invScaleY, 1)) { - return false; // no need for HQ - } - - if (invScaleX > 1 || invScaleY > 1) { - return false; // only use HQ when upsampling - } - - // If the shader can natively handle HQ filtering, let it do it. - if (fCanShadeHQ) { - fQuality = kHigh_SkFilterQuality; - SkAssertResult(provider.asBitmap(&fResultBitmap)); - return true; - } - - const int dstW = SkScalarRoundToScalar(provider.width() / invScaleX); - const int dstH = SkScalarRoundToScalar(provider.height() / invScaleY); - const SkBitmapCacheDesc desc = provider.makeCacheDesc(dstW, dstH); - - if (!SkBitmapCache::Find(desc, &fResultBitmap)) { - SkBitmap orig; - if (!provider.asBitmap(&orig)) { - return false; - } - SkPixmap src; - if (!orig.peekPixels(&src)) { - return false; - } - - SkPixmap dst; - SkBitmapCache::RecPtr rec; - const SkImageInfo info = SkImageInfo::Make(desc.fScaledWidth, desc.fScaledHeight, - src.colorType(), src.alphaType()); - if (provider.isVolatile()) { - if (!fResultBitmap.tryAllocPixels(info)) { - return false; - } - SkASSERT(fResultBitmap.getPixels()); - fResultBitmap.peekPixels(&dst); - fResultBitmap.setImmutable(); // a little cheat, as we haven't resized yet, but ok - } else { - rec = SkBitmapCache::Alloc(desc, info, &dst); - if (!rec) { - return false; - } - } - if (!SkBitmapScaler::Resize(dst, src, kHQ_RESIZE_METHOD)) { - return false; // we failed to create fScaledBitmap - } - if (rec) { - SkBitmapCache::Add(std::move(rec), &fResultBitmap); - SkASSERT(fResultBitmap.getPixels()); - provider.notifyAddedToCache(); - } + if (invScaleX >= 1 - SK_ScalarNearlyZero || invScaleY >= 1 - SK_ScalarNearlyZero) { + // we're down-scaling so abort HQ + return false; } - SkASSERT(fResultBitmap.getPixels()); - SkASSERT(fResultBitmap.isImmutable()); - - fInvMatrix.postScale(SkIntToScalar(dstW) / provider.width(), - SkIntToScalar(dstH) / provider.height()); - fQuality = kLow_SkFilterQuality; + // Confirmed that we can use HQ (w/ rasterpipeline) + fQuality = kHigh_SkFilterQuality; + (void)provider.asBitmap(&fResultBitmap); return true; } @@ -235,20 +133,15 @@ bool SkDefaultBitmapControllerState::processMediumRequest(const SkBitmapProvider SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider, const SkMatrix& inv, - SkFilterQuality qual, - bool canShadeHQ) { + SkFilterQuality qual) { fInvMatrix = inv; fQuality = qual; - fCanShadeHQ = canShadeHQ; - - bool processed = this->processHQRequest(provider) || this->processMediumRequest(provider); - if (processed) { + if (this->processHighRequest(provider) || this->processMediumRequest(provider)) { SkASSERT(fResultBitmap.getPixels()); } else { (void)provider.asBitmap(&fResultBitmap); } - SkASSERT(fCanShadeHQ || fQuality <= kLow_SkFilterQuality); // fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr() // and will destroy us if it is nullptr. @@ -259,6 +152,5 @@ SkBitmapController::State* SkDefaultBitmapController::onRequestBitmap(const SkBi const SkMatrix& inverse, SkFilterQuality quality, void* storage, size_t size) { - return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, - bm, inverse, quality, fCanShadeHQ); + return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, bm, inverse, quality); } diff --git a/src/core/SkBitmapController.h b/src/core/SkBitmapController.h index 72fc721c53..9eff2d28ff 100644 --- a/src/core/SkBitmapController.h +++ b/src/core/SkBitmapController.h @@ -57,14 +57,11 @@ protected: class SkDefaultBitmapController : public SkBitmapController { public: - enum class CanShadeHQ { kNo, kYes }; - SkDefaultBitmapController(CanShadeHQ canShadeHQ) - : fCanShadeHQ(canShadeHQ == CanShadeHQ::kYes) {} + SkDefaultBitmapController() {} protected: State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality, void* storage, size_t storageSize) override; - bool fCanShadeHQ; }; #endif diff --git a/src/core/SkBitmapFilter.h b/src/core/SkBitmapFilter.h deleted file mode 100644 index ca3e0930f2..0000000000 --- a/src/core/SkBitmapFilter.h +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 2013 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkBitmapFilter_DEFINED -#define SkBitmapFilter_DEFINED - -#include "SkFixed.h" -#include "SkMath.h" -#include "SkScalar.h" - -#include "SkNx.h" - -// size of the precomputed bitmap filter tables for high quality filtering. -// Used to precompute the shape of the filter kernel. -// Table size chosen from experiments to see where I could start to see a difference. - -#define SKBITMAP_FILTER_TABLE_SIZE 128 - -class SkBitmapFilter { -public: - SkBitmapFilter(float width) : fWidth(width), fInvWidth(1.f/width) { - fPrecomputed = false; - fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1); - } - virtual ~SkBitmapFilter() {} - - SkScalar lookupScalar(float x) const { - if (!fPrecomputed) { - precomputeTable(); - } - int filter_idx = int(sk_float_abs(x * fLookupMultiplier)); - SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE); - return fFilterTableScalar[filter_idx]; - } - - float width() const { return fWidth; } - float invWidth() const { return fInvWidth; } - virtual float evaluate(float x) const = 0; - - virtual float evaluate_n(float val, float diff, int count, float* output) const { - float sum = 0; - for (int index = 0; index < count; index++) { - float filterValue = evaluate(val); - *output++ = filterValue; - sum += filterValue; - val += diff; - } - return sum; - } - -protected: - float fWidth; - float fInvWidth; - float fLookupMultiplier; - - mutable bool fPrecomputed; - mutable SkScalar fFilterTableScalar[SKBITMAP_FILTER_TABLE_SIZE]; - -private: - void precomputeTable() const { - fPrecomputed = true; - SkScalar *ftpScalar = fFilterTableScalar; - for (int x = 0; x < SKBITMAP_FILTER_TABLE_SIZE; ++x) { - float fx = ((float)x + .5f) * this->width() / SKBITMAP_FILTER_TABLE_SIZE; - float filter_value = evaluate(fx); - *ftpScalar++ = filter_value; - } - } -}; - -class SkMitchellFilter final : public SkBitmapFilter { -public: - SkMitchellFilter() - : INHERITED(2) - , fB(1.f / 3.f) - , fC(1.f / 3.f) - , fA1(-fB - 6*fC) - , fB1(6*fB + 30*fC) - , fC1(-12*fB - 48*fC) - , fD1(8*fB + 24*fC) - , fA2(12 - 9*fB - 6*fC) - , fB2(-18 + 12*fB + 6*fC) - , fD2(6 - 2*fB) - {} - - float evaluate(float x) const override { - x = fabsf(x); - if (x > 2.f) { - return 0; - } else if (x > 1.f) { - return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f); - } else { - return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f); - } - } - - Sk4f evalcore_n(const Sk4f& val) const { - Sk4f x = val.abs(); - Sk4f over2 = x > Sk4f(2); - Sk4f over1 = x > Sk4f(1); - Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1)) - * Sk4f(1.f/6.f); - Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f); - return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0)); - } - - float evaluate_n(float val, float diff, int count, float* output) const override { - Sk4f sum(0); - while (count >= 4) { - float v0 = val; - float v1 = val += diff; - float v2 = val += diff; - float v3 = val += diff; - val += diff; - Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3)); - filterValue.store(output); - output += 4; - sum = sum + filterValue; - count -= 4; - } - float sums[4]; - sum.store(sums); - float result = sums[0] + sums[1] + sums[2] + sums[3]; - result += INHERITED::evaluate_n(val, diff, count, output); - return result; - } - - protected: - float fB, fC; - float fA1, fB1, fC1, fD1; - float fA2, fB2, fD2; -private: - typedef SkBitmapFilter INHERITED; -}; - -class SkGaussianFilter final : public SkBitmapFilter { - float fAlpha, fExpWidth; - -public: - SkGaussianFilter(float a, float width = 2) - : SkBitmapFilter(width) - , fAlpha(a) - , fExpWidth(expf(-a * width * width)) - {} - - float evaluate(float x) const override { - return SkTMax(0.f, float(expf(-fAlpha*x*x) - fExpWidth)); - } -}; - -class SkTriangleFilter final : public SkBitmapFilter { -public: - SkTriangleFilter(float width = 1) : SkBitmapFilter(width) {} - - float evaluate(float x) const override { - return SkTMax(0.f, fWidth - fabsf(x)); - } -}; - -class SkBoxFilter final : public SkBitmapFilter { -public: - SkBoxFilter(float width = 0.5f) : SkBitmapFilter(width) {} - - float evaluate(float x) const override { - return (x >= -fWidth && x < fWidth) ? 1.0f : 0.0f; - } -}; - -class SkHammingFilter final : public SkBitmapFilter { -public: - SkHammingFilter(float width = 1) : SkBitmapFilter(width) {} - - float evaluate(float x) const override { - if (x <= -fWidth || x >= fWidth) { - return 0.0f; // Outside of the window. - } - if (x > -FLT_EPSILON && x < FLT_EPSILON) { - return 1.0f; // Special case the sinc discontinuity at the origin. - } - const float xpi = x * static_cast<float>(SK_ScalarPI); - - return ((sk_float_sin(xpi) / xpi) * // sinc(x) - (0.54f + 0.46f * sk_float_cos(xpi / fWidth))); // hamming(x) - } -}; - -class SkLanczosFilter final : public SkBitmapFilter { -public: - SkLanczosFilter(float width = 3.f) : SkBitmapFilter(width) {} - - float evaluate(float x) const override { - if (x <= -fWidth || x >= fWidth) { - return 0.0f; // Outside of the window. - } - if (x > -FLT_EPSILON && x < FLT_EPSILON) { - return 1.0f; // Special case the discontinuity at the origin. - } - float xpi = x * static_cast<float>(SK_ScalarPI); - return (sk_float_sin(xpi) / xpi) * // sinc(x) - sk_float_sin(xpi / fWidth) / (xpi / fWidth); // sinc(x/fWidth) - } -}; - - -#endif diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp index 302bd054d3..9bc90609b2 100644 --- a/src/core/SkBitmapProcState.cpp +++ b/src/core/SkBitmapProcState.cpp @@ -12,7 +12,6 @@ #include "SkPaint.h" #include "SkShader.h" // for tilemodes #include "SkUtilsArm.h" -#include "SkBitmapScaler.h" #include "SkMipMap.h" #include "SkPixelRef.h" #include "SkImageEncoder.h" @@ -90,7 +89,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) { fInvMatrix = inv; fFilterQuality = paint.getFilterQuality(); - SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kNo); + SkDefaultBitmapController controller; fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(), fBMStateStorage.get(), fBMStateStorage.size()); // Note : we allow the controller to return an empty (zero-dimension) result. Should we? @@ -102,6 +101,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) { fRealInvMatrix = fBMState->invMatrix(); fPaintColor = paint.getColor(); fFilterQuality = fBMState->quality(); + SkASSERT(fFilterQuality <= kLow_SkFilterQuality); SkASSERT(fPixmap.addr()); // Most of the scanline procs deal with "unit" texture coordinates, as this diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h index 73eaf4fb7c..c9376c60ae 100644 --- a/src/core/SkBitmapProcState.h +++ b/src/core/SkBitmapProcState.h @@ -10,8 +10,8 @@ #include "SkBitmap.h" #include "SkBitmapController.h" -#include "SkBitmapFilter.h" #include "SkBitmapProvider.h" +#include "SkFixed.h" #include "SkFloatBits.h" #include "SkMatrix.h" #include "SkMipMap.h" diff --git a/src/core/SkBitmapScaler.cpp b/src/core/SkBitmapScaler.cpp deleted file mode 100644 index 9e82b92404..0000000000 --- a/src/core/SkBitmapScaler.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkBitmapScaler.h" -#include "SkBitmapFilter.h" -#include "SkConvolver.h" -#include "SkImageInfo.h" -#include "SkPixmap.h" -#include "SkRect.h" -#include "SkTArray.h" - -// SkResizeFilter ---------------------------------------------------------------- - -// Encapsulates computation and storage of the filters required for one complete -// resize operation. -class SkResizeFilter { -public: - SkResizeFilter(SkBitmapScaler::ResizeMethod method, - int srcFullWidth, int srcFullHeight, - float destWidth, float destHeight, - const SkRect& destSubset); - ~SkResizeFilter() { delete fBitmapFilter; } - - // Returns the filled filter values. - const SkConvolutionFilter1D& xFilter() { return fXFilter; } - const SkConvolutionFilter1D& yFilter() { return fYFilter; } - -private: - - SkBitmapFilter* fBitmapFilter; - - // Computes one set of filters either horizontally or vertically. The caller - // will specify the "min" and "max" rather than the bottom/top and - // right/bottom so that the same code can be re-used in each dimension. - // - // |srcDependLo| and |srcDependSize| gives the range for the source - // depend rectangle (horizontally or vertically at the caller's discretion - // -- see above for what this means). - // - // Likewise, the range of destination values to compute and the scale factor - // for the transform is also specified. - - void computeFilters(int srcSize, - float destSubsetLo, float destSubsetSize, - float scale, - SkConvolutionFilter1D* output); - - SkConvolutionFilter1D fXFilter; - SkConvolutionFilter1D fYFilter; -}; - -SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method, - int srcFullWidth, int srcFullHeight, - float destWidth, float destHeight, - const SkRect& destSubset) { - - SkASSERT(method >= SkBitmapScaler::RESIZE_FirstMethod && - method <= SkBitmapScaler::RESIZE_LastMethod); - - fBitmapFilter = nullptr; - switch(method) { - case SkBitmapScaler::RESIZE_BOX: - fBitmapFilter = new SkBoxFilter; - break; - case SkBitmapScaler::RESIZE_TRIANGLE: - fBitmapFilter = new SkTriangleFilter; - break; - case SkBitmapScaler::RESIZE_MITCHELL: - fBitmapFilter = new SkMitchellFilter; - break; - case SkBitmapScaler::RESIZE_HAMMING: - fBitmapFilter = new SkHammingFilter; - break; - case SkBitmapScaler::RESIZE_LANCZOS3: - fBitmapFilter = new SkLanczosFilter; - break; - } - - - float scaleX = destWidth / srcFullWidth; - float scaleY = destHeight / srcFullHeight; - - this->computeFilters(srcFullWidth, destSubset.fLeft, destSubset.width(), - scaleX, &fXFilter); - if (srcFullWidth == srcFullHeight && - destSubset.fLeft == destSubset.fTop && - destSubset.width() == destSubset.height()&& - scaleX == scaleY) { - fYFilter = fXFilter; - } else { - this->computeFilters(srcFullHeight, destSubset.fTop, destSubset.height(), - scaleY, &fYFilter); - } -} - -// TODO(egouriou): Take advantage of periods in the convolution. -// Practical resizing filters are periodic outside of the border area. -// For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the -// source become p pixels in the destination) will have a period of p. -// A nice consequence is a period of 1 when downscaling by an integral -// factor. Downscaling from typical display resolutions is also bound -// to produce interesting periods as those are chosen to have multiple -// small factors. -// Small periods reduce computational load and improve cache usage if -// the coefficients can be shared. For periods of 1 we can consider -// loading the factors only once outside the borders. -void SkResizeFilter::computeFilters(int srcSize, - float destSubsetLo, float destSubsetSize, - float scale, - SkConvolutionFilter1D* output) { - float destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi) - - // When we're doing a magnification, the scale will be larger than one. This - // means the destination pixels are much smaller than the source pixels, and - // that the range covered by the filter won't necessarily cover any source - // pixel boundaries. Therefore, we use these clamped values (max of 1) for - // some computations. - float clampedScale = SkTMin(1.0f, scale); - - // This is how many source pixels from the center we need to count - // to support the filtering function. - float srcSupport = fBitmapFilter->width() / clampedScale; - - float invScale = 1.0f / scale; - - SkSTArray<64, float, true> filterValuesArray; - SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray; - - // Loop over all pixels in the output range. We will generate one set of - // filter values for each one. Those values will tell us how to blend the - // source pixels to compute the destination pixel. - - // This is the pixel in the source directly under the pixel in the dest. - // Note that we base computations on the "center" of the pixels. To see - // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x - // downscale should "cover" the pixels around the pixel with *its center* - // at coordinates (2.5, 2.5) in the source, not those around (0, 0). - // Hence we need to scale coordinates (0.5, 0.5), not (0, 0). - destSubsetLo = SkScalarFloorToScalar(destSubsetLo); - destSubsetHi = SkScalarCeilToScalar(destSubsetHi); - float srcPixel = (destSubsetLo + 0.5f) * invScale; - int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo); - output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2)); - for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++) { - // Compute the (inclusive) range of source pixels the filter covers. - float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport)); - float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport)); - - // Compute the unnormalized filter value at each location of the source - // it covers. - - // Sum of the filter values for normalizing. - // Distance from the center of the filter, this is the filter coordinate - // in source space. We also need to consider the center of the pixel - // when comparing distance against 'srcPixel'. In the 5x downscale - // example used above the distance from the center of the filter to - // the pixel with coordinates (2, 2) should be 0, because its center - // is at (2.5, 2.5). - float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale; - int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1; - if (filterCount <= 0) { - // true when srcSize is equal to srcPixel - srcSupport; this may be a bug - return; - } - filterValuesArray.reset(filterCount); - float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount, - filterValuesArray.begin()); - - // The filter must be normalized so that we don't affect the brightness of - // the image. Convert to normalized fixed point. - int fixedSum = 0; - fixedFilterValuesArray.reset(filterCount); - const float* filterValues = filterValuesArray.begin(); - SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin(); - float invFilterSum = 1 / filterSum; - for (int fixedI = 0; fixedI < filterCount; fixedI++) { - int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum); - fixedSum += curFixed; - fixedFilterValues[fixedI] = SkToS16(curFixed); - } - SkASSERT(fixedSum <= 0x7FFF); - - // The conversion to fixed point will leave some rounding errors, which - // we add back in to avoid affecting the brightness of the image. We - // arbitrarily add this to the center of the filter array (this won't always - // be the center of the filter function since it could get clipped on the - // edges, but it doesn't matter enough to worry about that case). - int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum; - fixedFilterValues[filterCount / 2] += leftovers; - - // Now it's ready to go. - output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount); - } -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -static bool valid_for_resize(const SkPixmap& source, int dstW, int dstH) { - // TODO: Seems like we shouldn't care about the swizzle of source, just that it's 8888 - return source.addr() && source.colorType() == kN32_SkColorType && - source.width() >= 1 && source.height() >= 1 && dstW >= 1 && dstH >= 1; -} - -bool SkBitmapScaler::Resize(const SkPixmap& result, const SkPixmap& source, ResizeMethod method) { - if (!valid_for_resize(source, result.width(), result.height())) { - return false; - } - if (!result.addr() || result.colorType() != source.colorType()) { - return false; - } - - SkRect destSubset = SkRect::MakeIWH(result.width(), result.height()); - - SkResizeFilter filter(method, source.width(), source.height(), - result.width(), result.height(), destSubset); - - // Get a subset encompassing this touched area. We construct the - // offsets and row strides such that it looks like a new bitmap, while - // referring to the old data. - const uint8_t* sourceSubset = reinterpret_cast<const uint8_t*>(source.addr()); - - return BGRAConvolve2D(sourceSubset, static_cast<int>(source.rowBytes()), - !source.isOpaque(), filter.xFilter(), filter.yFilter(), - static_cast<int>(result.rowBytes()), - static_cast<unsigned char*>(result.writable_addr())); -} - -bool SkBitmapScaler::Resize(SkBitmap* resultPtr, const SkPixmap& source, ResizeMethod method, - int destWidth, int destHeight, SkBitmap::Allocator* allocator) { - // Preflight some of the checks, to avoid allocating the result if we don't need it. - if (!valid_for_resize(source, destWidth, destHeight)) { - return false; - } - - SkBitmap result; - // Note: pass along the profile information even thought this is no the right answer because - // this could be scaling in sRGB. - result.setInfo(SkImageInfo::MakeN32(destWidth, destHeight, source.alphaType(), - sk_ref_sp(source.info().colorSpace()))); - result.allocPixels(allocator); - - SkPixmap resultPM; - if (!result.peekPixels(&resultPM) || !Resize(resultPM, source, method)) { - return false; - } - - *resultPtr = result; - SkASSERT(resultPtr->getPixels()); - return true; -} diff --git a/src/core/SkBitmapScaler.h b/src/core/SkBitmapScaler.h deleted file mode 100644 index c96be0dbf8..0000000000 --- a/src/core/SkBitmapScaler.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2013 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkBitmapScaler_DEFINED -#define SkBitmapScaler_DEFINED - -#include "SkBitmap.h" -#include "SkConvolver.h" - -/** \class SkBitmapScaler - - Provides the interface for high quality image resampling. - */ - -class SK_API SkBitmapScaler { -public: - enum ResizeMethod { - RESIZE_BOX, - RESIZE_TRIANGLE, - RESIZE_LANCZOS3, - RESIZE_HAMMING, - RESIZE_MITCHELL, - - RESIZE_FirstMethod = RESIZE_BOX, - RESIZE_LastMethod = RESIZE_MITCHELL, - }; - - /** - * Given already-allocated src and dst pixmaps, this will scale the src pixels using the - * specified resize-method and write the results into the pixels pointed to by dst. - */ - static bool Resize(const SkPixmap& dst, const SkPixmap& src, ResizeMethod method); - - /** - * Helper function that manages allocating a bitmap to hold the dst pixels, and then calls - * the pixmap version of Resize. - */ - static bool Resize(SkBitmap* result, const SkPixmap& src, ResizeMethod method, - int dest_width, int dest_height, SkBitmap::Allocator* = nullptr); -}; - -#endif diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp index 45a569835b..5abb08553a 100644 --- a/src/core/SkBlitter.cpp +++ b/src/core/SkBlitter.cpp @@ -798,21 +798,16 @@ bool SkBlitter::UseRasterPipelineBlitter(const SkPixmap& device, const SkPaint& if (paint.getColorFilter()) { return true; } -#ifndef SK_SUPPORT_LEGACY_HQ_SCALER if (paint.getFilterQuality() == kHigh_SkFilterQuality) { return true; } -#endif // ... unless the blend mode is complicated enough. if (paint.getBlendMode() > SkBlendMode::kLastSeparableMode) { return true; } - - // ... or unless we have to deal with perspective. if (matrix.hasPerspective()) { return true; } - // ... or unless the shader is raster pipeline-only. if (paint.getShader() && as_SB(paint.getShader())->isRasterPipelineOnly()) { return true; diff --git a/src/core/SkConvolver.cpp b/src/core/SkConvolver.cpp deleted file mode 100644 index 9f0cfea821..0000000000 --- a/src/core/SkConvolver.cpp +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "SkConvolver.h" -#include "SkOpts.h" -#include "SkTArray.h" - -namespace { - // Stores a list of rows in a circular buffer. The usage is you write into it - // by calling AdvanceRow. It will keep track of which row in the buffer it - // should use next, and the total number of rows added. - class CircularRowBuffer { - public: - // The number of pixels in each row is given in |sourceRowPixelWidth|. - // The maximum number of rows needed in the buffer is |maxYFilterSize| - // (we only need to store enough rows for the biggest filter). - // - // We use the |firstInputRow| to compute the coordinates of all of the - // following rows returned by Advance(). - CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, - int firstInputRow) - : fRowByteWidth(destRowPixelWidth * 4), - fNumRows(maxYFilterSize), - fNextRow(0), - fNextRowCoordinate(firstInputRow) { - fBuffer.reset(fRowByteWidth * maxYFilterSize); - fRowAddresses.reset(fNumRows); - } - - // Moves to the next row in the buffer, returning a pointer to the beginning - // of it. - unsigned char* advanceRow() { - unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; - fNextRowCoordinate++; - - // Set the pointer to the next row to use, wrapping around if necessary. - fNextRow++; - if (fNextRow == fNumRows) { - fNextRow = 0; - } - return row; - } - - // Returns a pointer to an "unrolled" array of rows. These rows will start - // at the y coordinate placed into |*firstRowIndex| and will continue in - // order for the maximum number of rows in this circular buffer. - // - // The |firstRowIndex_| may be negative. This means the circular buffer - // starts before the top of the image (it hasn't been filled yet). - unsigned char* const* GetRowAddresses(int* firstRowIndex) { - // Example for a 4-element circular buffer holding coords 6-9. - // Row 0 Coord 8 - // Row 1 Coord 9 - // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. - // Row 3 Coord 7 - // - // The "next" row is also the first (lowest) coordinate. This computation - // may yield a negative value, but that's OK, the math will work out - // since the user of this buffer will compute the offset relative - // to the firstRowIndex and the negative rows will never be used. - *firstRowIndex = fNextRowCoordinate - fNumRows; - - int curRow = fNextRow; - for (int i = 0; i < fNumRows; i++) { - fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; - - // Advance to the next row, wrapping if necessary. - curRow++; - if (curRow == fNumRows) { - curRow = 0; - } - } - return &fRowAddresses[0]; - } - - private: - // The buffer storing the rows. They are packed, each one fRowByteWidth. - SkTArray<unsigned char> fBuffer; - - // Number of bytes per row in the |buffer|. - int fRowByteWidth; - - // The number of rows available in the buffer. - int fNumRows; - - // The next row index we should write into. This wraps around as the - // circular buffer is used. - int fNextRow; - - // The y coordinate of the |fNextRow|. This is incremented each time a - // new row is appended and does not wrap. - int fNextRowCoordinate; - - // Buffer used by GetRowAddresses(). - SkTArray<unsigned char*> fRowAddresses; - }; - -} // namespace - -// SkConvolutionFilter1D --------------------------------------------------------- - -SkConvolutionFilter1D::SkConvolutionFilter1D() -: fMaxFilter(0) { -} - -SkConvolutionFilter1D::~SkConvolutionFilter1D() { -} - -void SkConvolutionFilter1D::AddFilter(int filterOffset, - const ConvolutionFixed* filterValues, - int filterLength) { - // It is common for leading/trailing filter values to be zeros. In such - // cases it is beneficial to only store the central factors. - // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on - // a 1080p image this optimization gives a ~10% speed improvement. - int filterSize = filterLength; - int firstNonZero = 0; - while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { - firstNonZero++; - } - - if (firstNonZero < filterLength) { - // Here we have at least one non-zero factor. - int lastNonZero = filterLength - 1; - while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { - lastNonZero--; - } - - filterOffset += firstNonZero; - filterLength = lastNonZero + 1 - firstNonZero; - SkASSERT(filterLength > 0); - - fFilterValues.append(filterLength, &filterValues[firstNonZero]); - } else { - // Here all the factors were zeroes. - filterLength = 0; - } - - FilterInstance instance; - - // We pushed filterLength elements onto fFilterValues - instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - - filterLength); - instance.fOffset = filterOffset; - instance.fTrimmedLength = filterLength; - instance.fLength = filterSize; - fFilters.push(instance); - - fMaxFilter = SkTMax(fMaxFilter, filterLength); -} - -const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( - int* specifiedFilterlength, - int* filterOffset, - int* filterLength) const { - const FilterInstance& filter = fFilters[0]; - *filterOffset = filter.fOffset; - *filterLength = filter.fTrimmedLength; - *specifiedFilterlength = filter.fLength; - if (filter.fTrimmedLength == 0) { - return nullptr; - } - - return &fFilterValues[filter.fDataLocation]; -} - -bool BGRAConvolve2D(const unsigned char* sourceData, - int sourceByteRowStride, - bool sourceHasAlpha, - const SkConvolutionFilter1D& filterX, - const SkConvolutionFilter1D& filterY, - int outputByteRowStride, - unsigned char* output) { - - int maxYFilterSize = filterY.maxFilter(); - - // The next row in the input that we will generate a horizontally - // convolved row for. If the filter doesn't start at the beginning of the - // image (this is the case when we are only resizing a subset), then we - // don't want to generate any output rows before that. Compute the starting - // row for convolution as the first pixel for the first vertical filter. - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filterY.FilterForValue(0, &filterOffset, &filterLength); - int nextXRow = filterOffset; - - // We loop over each row in the input doing a horizontal convolution. This - // will result in a horizontally convolved image. We write the results into - // a circular buffer of convolved rows and do vertical convolution as rows - // are available. This prevents us from having to store the entire - // intermediate image and helps cache coherency. - // We will need four extra rows to allow horizontal convolution could be done - // simultaneously. We also pad each row in row buffer to be aligned-up to - // 32 bytes. - // TODO(jiesun): We do not use aligned load from row buffer in vertical - // convolution pass yet. Somehow Windows does not like it. - int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F; - int rowBufferHeight = maxYFilterSize + - (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0); - - // check for too-big allocation requests : crbug.com/528628 - { - int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); - // need some limit, to avoid over-committing success from malloc, but then - // crashing when we try to actually use the memory. - // 100meg seems big enough to allow "normal" zoom factors and image sizes through - // while avoiding the crash seen by the bug (crbug.com/528628) - if (size > 100 * 1024 * 1024) { -// SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); - return false; - } - } - - CircularRowBuffer rowBuffer(rowBufferWidth, - rowBufferHeight, - filterOffset); - - // Loop over every possible output row, processing just enough horizontal - // convolutions to run each subsequent vertical convolution. - SkASSERT(outputByteRowStride >= filterX.numValues() * 4); - int numOutputRows = filterY.numValues(); - - // We need to check which is the last line to convolve before we advance 4 - // lines in one iteration. - int lastFilterOffset, lastFilterLength; - filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, - &lastFilterLength); - - for (int outY = 0; outY < numOutputRows; outY++) { - filterValues = filterY.FilterForValue(outY, - &filterOffset, &filterLength); - - // Generate output rows until we have enough to run the current filter. - while (nextXRow < filterOffset + filterLength) { - if (SkOpts::convolve_4_rows_horizontally != nullptr && - nextXRow + 3 < lastFilterOffset + lastFilterLength) { - const unsigned char* src[4]; - unsigned char* outRow[4]; - for (int i = 0; i < 4; ++i) { - src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride]; - outRow[i] = rowBuffer.advanceRow(); - } - SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth); - nextXRow += 4; - } else { - SkOpts::convolve_horizontally( - &sourceData[(uint64_t)nextXRow * sourceByteRowStride], - filterX, rowBuffer.advanceRow(), sourceHasAlpha); - nextXRow++; - } - } - - // Compute where in the output image this row of final data will go. - unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; - - // Get the list of rows that the circular buffer has, in order. - int firstRowInCircularBuffer; - unsigned char* const* rowsToConvolve = - rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); - - // Now compute the start of the subset of those rows that the filter needs. - unsigned char* const* firstRowForFilter = - &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; - - SkOpts::convolve_vertically(filterValues, filterLength, - firstRowForFilter, - filterX.numValues(), curOutputRow, - sourceHasAlpha); - } - return true; -} diff --git a/src/core/SkConvolver.h b/src/core/SkConvolver.h deleted file mode 100644 index 4c4b1fd711..0000000000 --- a/src/core/SkConvolver.h +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SK_CONVOLVER_H -#define SK_CONVOLVER_H - -#include "SkSize.h" -#include "SkTDArray.h" - -// avoid confusion with Mac OS X's math library (Carbon) -#if defined(__APPLE__) -#undef FloatToConvolutionFixed -#undef ConvolutionFixedToFloat -#undef FloatToFixed -#undef FixedToFloat -#endif - -// Represents a filter in one dimension. Each output pixel has one entry in this -// object for the filter values contributing to it. You build up the filter -// list by calling AddFilter for each output pixel (in order). -// -// We do 2-dimensional convolution by first convolving each row by one -// SkConvolutionFilter1D, then convolving each column by another one. -// -// Entries are stored in ConvolutionFixed point, shifted left by kShiftBits. -class SkConvolutionFilter1D { -public: - typedef short ConvolutionFixed; - - // The number of bits that ConvolutionFixed point values are shifted by. - enum { kShiftBits = 14 }; - - SK_API SkConvolutionFilter1D(); - SK_API ~SkConvolutionFilter1D(); - - // Convert between floating point and our ConvolutionFixed point representation. - static ConvolutionFixed FloatToFixed(float f) { - return static_cast<ConvolutionFixed>(f * (1 << kShiftBits)); - } - static unsigned char FixedToChar(ConvolutionFixed x) { - return static_cast<unsigned char>(x >> kShiftBits); - } - static float FixedToFloat(ConvolutionFixed x) { - // The cast relies on ConvolutionFixed being a short, implying that on - // the platforms we care about all (16) bits will fit into - // the mantissa of a (32-bit) float. - static_assert(sizeof(ConvolutionFixed) == 2, "ConvolutionFixed_type_should_fit_in_float_mantissa"); - float raw = static_cast<float>(x); - return ldexpf(raw, -kShiftBits); - } - - // Returns the maximum pixel span of a filter. - int maxFilter() const { return fMaxFilter; } - - // Returns the number of filters in this filter. This is the dimension of the - // output image. - int numValues() const { return static_cast<int>(fFilters.count()); } - - void reserveAdditional(int filterCount, int filterValueCount) { - fFilters.setReserve(fFilters.count() + filterCount); - fFilterValues.setReserve(fFilterValues.count() + filterValueCount); - } - - // Appends the given list of scaling values for generating a given output - // pixel. |filterOffset| is the distance from the edge of the image to where - // the scaling factors start. The scaling factors apply to the source pixels - // starting from this position, and going for the next |filterLength| pixels. - // - // You will probably want to make sure your input is normalized (that is, - // all entries in |filterValuesg| sub to one) to prevent affecting the overall - // brighness of the image. - // - // The filterLength must be > 0. - void AddFilter(int filterOffset, - const ConvolutionFixed* filterValues, - int filterLength); - - // Retrieves a filter for the given |valueOffset|, a position in the output - // image in the direction we're convolving. The offset and length of the - // filter values are put into the corresponding out arguments (see AddFilter - // above for what these mean), and a pointer to the first scaling factor is - // returned. There will be |filterLength| values in this array. - inline const ConvolutionFixed* FilterForValue(int valueOffset, - int* filterOffset, - int* filterLength) const { - const FilterInstance& filter = fFilters[valueOffset]; - *filterOffset = filter.fOffset; - *filterLength = filter.fTrimmedLength; - if (filter.fTrimmedLength == 0) { - return nullptr; - } - return &fFilterValues[filter.fDataLocation]; - } - - // Retrieves the filter for the offset 0, presumed to be the one and only. - // The offset and length of the filter values are put into the corresponding - // out arguments (see AddFilter). Note that |filterLegth| and - // |specifiedFilterLength| may be different if leading/trailing zeros of the - // original floating point form were clipped. - // There will be |filterLength| values in the return array. - // Returns nullptr if the filter is 0-length (for instance when all floating - // point values passed to AddFilter were clipped to 0). - SK_API const ConvolutionFixed* GetSingleFilter(int* specifiedFilterLength, - int* filterOffset, - int* filterLength) const; - - // Add another value to the fFilterValues array -- useful for - // SIMD padding which happens outside of this class. - - void addFilterValue( ConvolutionFixed val ) { - fFilterValues.push( val ); - } -private: - struct FilterInstance { - // Offset within filterValues for this instance of the filter. - int fDataLocation; - - // Distance from the left of the filter to the center. IN PIXELS - int fOffset; - - // Number of values in this filter instance. - int fTrimmedLength; - - // Filter length as specified. Note that this may be different from - // 'trimmed_length' if leading/trailing zeros of the original floating - // point form were clipped differently on each tail. - int fLength; - }; - - // Stores the information for each filter added to this class. - SkTDArray<FilterInstance> fFilters; - - // We store all the filter values in this flat list, indexed by - // |FilterInstance.data_location| to avoid the mallocs required for storing - // each one separately. - SkTDArray<ConvolutionFixed> fFilterValues; - - // The maximum size of any filter we've added. - int fMaxFilter; -}; - -// Does a two-dimensional convolution on the given source image. -// -// It is assumed the source pixel offsets referenced in the input filters -// reference only valid pixels, so the source image size is not required. Each -// row of the source image starts |sourceByteRowStride| after the previous -// one (this allows you to have rows with some padding at the end). -// -// The result will be put into the given output buffer. The destination image -// size will be xfilter.numValues() * yfilter.numValues() pixels. It will be -// in rows of exactly xfilter.numValues() * 4 bytes. -// -// |sourceHasAlpha| is a hint that allows us to avoid doing computations on -// the alpha channel if the image is opaque. If you don't know, set this to -// true and it will work properly, but setting this to false will be a few -// percent faster if you know the image is opaque. -// -// The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order -// (this is ARGB when loaded into 32-bit words on a little-endian machine). -/** - * Returns false if it was unable to perform the convolution/rescale. in which case the output - * buffer is assumed to be undefined. - */ -SK_API bool BGRAConvolve2D(const unsigned char* sourceData, - int sourceByteRowStride, - bool sourceHasAlpha, - const SkConvolutionFilter1D& xfilter, - const SkConvolutionFilter1D& yfilter, - int outputByteRowStride, - unsigned char* output); - -#endif // SK_CONVOLVER_H diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 33c3690f4e..e5e304cef9 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -36,7 +36,6 @@ #define SK_OPTS_NS portable #endif -#include "SkBitmapFilter_opts.h" #include "SkBlend_opts.h" #include "SkBlitMask_opts.h" #include "SkBlitRow_opts.h" @@ -88,10 +87,6 @@ namespace SkOpts { DEFINE_DEFAULT(hash_fn); - DEFINE_DEFAULT(convolve_vertically); - DEFINE_DEFAULT(convolve_horizontally); - DEFINE_DEFAULT(convolve_4_rows_horizontally); - #undef DEFINE_DEFAULT // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. @@ -99,7 +94,6 @@ namespace SkOpts { void Init_sse41(); void Init_sse42(); void Init_avx(); - void Init_hsw(); void Init_crc32(); static void init() { @@ -109,7 +103,6 @@ namespace SkOpts { if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); } if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); } if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); } - if (SkCpu::Supports(SkCpu::HSW )) { Init_hsw(); } #elif defined(SK_CPU_ARM64) if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); } diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index a4f1ea284c..3bea740cb6 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -8,7 +8,6 @@ #ifndef SkOpts_DEFINED #define SkOpts_DEFINED -#include "SkConvolver.h" #include "SkRasterPipeline.h" #include "SkTypes.h" #include "SkXfermodePriv.h" @@ -62,15 +61,6 @@ namespace SkOpts { static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) { return hash_fn(data, bytes, seed); } - - extern void (*convolve_vertically)(const SkConvolutionFilter1D::ConvolutionFixed* filter_values, - int filter_length, unsigned char* const* source_data_rows, - int pixel_width, unsigned char* out_row, bool has_alpha); - extern void (*convolve_4_rows_horizontally)(const unsigned char* src_data[4], - const SkConvolutionFilter1D& filter, - unsigned char* out_row[4], size_t out_row_bytes); - extern void (*convolve_horizontally)(const unsigned char* src_data, const SkConvolutionFilter1D& filter, - unsigned char* out_row, bool has_alpha); } #endif//SkOpts_DEFINED diff --git a/src/opts/SkBitmapFilter_opts.h b/src/opts/SkBitmapFilter_opts.h deleted file mode 100644 index 4f21c579fb..0000000000 --- a/src/opts/SkBitmapFilter_opts.h +++ /dev/null @@ -1,940 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkBitmapFilter_opts_DEFINED -#define SkBitmapFilter_opts_DEFINED - -#include "SkConvolver.h" - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - #include <immintrin.h> -#elif defined(SK_ARM_HAS_NEON) - #include <arm_neon.h> -#endif - -namespace SK_OPTS_NS { - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - - static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft, - const SkConvolutionFilter1D::ConvolutionFixed* filterValues, __m128i& accum, int r) { - int remainder[4] = {0}; - for (int i = 0; i < r; i++) { - SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i]; - remainder[0] += coeff * pixelsLeft[i * 4 + 0]; - remainder[1] += coeff * pixelsLeft[i * 4 + 1]; - remainder[2] += coeff * pixelsLeft[i * 4 + 2]; - remainder[3] += coeff * pixelsLeft[i * 4 + 3]; - } - __m128i t = _mm_setr_epi32(remainder[0], remainder[1], remainder[2], remainder[3]); - accum = _mm_add_epi32(accum, t); - } - - // Convolves horizontally along a single row. The row data is given in - // |srcData| and continues for the numValues() of the filter. - void convolve_horizontally(const unsigned char* srcData, - const SkConvolutionFilter1D& filter, - unsigned char* outRow, - bool /*hasAlpha*/) { - // Output one pixel each iteration, calculating all channels (RGBA) together. - int numValues = filter.numValues(); - for (int outX = 0; outX < numValues; outX++) { - // Get the filter that determines the current output pixel. - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filter.FilterForValue(outX, &filterOffset, &filterLength); - - // Compute the first pixel in this row that the filter affects. It will - // touch |filterLength| pixels (4 bytes each) after this. - const unsigned char* rowToFilter = &srcData[filterOffset * 4]; - - __m128i zero = _mm_setzero_si128(); - __m128i accum = _mm_setzero_si128(); - - // We will load and accumulate with four coefficients per iteration. - for (int filterX = 0; filterX < filterLength >> 2; filterX++) { - // Load 4 coefficients => duplicate 1st and 2nd of them for all channels. - __m128i coeff, coeff16; - // [16] xx xx xx xx c3 c2 c1 c0 - coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues)); - // [16] xx xx xx xx c1 c1 c0 c0 - coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0)); - // [16] c1 c1 c1 c1 c0 c0 c0 c0 - coeff16 = _mm_unpacklo_epi16(coeff16, coeff16); - - // Load four pixels => unpack the first two pixels to 16 bits => - // multiply with coefficients => accumulate the convolution result. - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - __m128i src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(rowToFilter)); - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - __m128i src16 = _mm_unpacklo_epi8(src8, zero); - __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16); - __m128i mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a0*c0 b0*c0 g0*c0 r0*c0 - __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum = _mm_add_epi32(accum, t); - // [32] a1*c1 b1*c1 g1*c1 r1*c1 - t = _mm_unpackhi_epi16(mul_lo, mul_hi); - accum = _mm_add_epi32(accum, t); - - // Duplicate 3rd and 4th coefficients for all channels => - // unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients - // => accumulate the convolution results. - // [16] xx xx xx xx c3 c3 c2 c2 - coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2)); - // [16] c3 c3 c3 c3 c2 c2 c2 c2 - coeff16 = _mm_unpacklo_epi16(coeff16, coeff16); - // [16] a3 g3 b3 r3 a2 g2 b2 r2 - src16 = _mm_unpackhi_epi8(src8, zero); - mul_hi = _mm_mulhi_epi16(src16, coeff16); - mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a2*c2 b2*c2 g2*c2 r2*c2 - t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum = _mm_add_epi32(accum, t); - // [32] a3*c3 b3*c3 g3*c3 r3*c3 - t = _mm_unpackhi_epi16(mul_lo, mul_hi); - accum = _mm_add_epi32(accum, t); - - // Advance the pixel and coefficients pointers. - rowToFilter += 16; - filterValues += 4; - } - - // When |filterLength| is not divisible by 4, we accumulate the last 1 - 3 - // coefficients one at a time. - int r = filterLength & 3; - if (r) { - int remainderOffset = (filterOffset + filterLength - r) * 4; - AccumRemainder(srcData + remainderOffset, filterValues, accum, r); - } - - // Shift right for fixed point implementation. - accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits); - - // Packing 32 bits |accum| to 16 bits per channel (signed saturation). - accum = _mm_packs_epi32(accum, zero); - // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation). - accum = _mm_packus_epi16(accum, zero); - - // Store the pixel value of 32 bits. - *(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum); - outRow += 4; - } - } - - // Convolves horizontally along four rows. The row data is given in - // |srcData| and continues for the numValues() of the filter. - // The algorithm is almost same as |convolve_horizontally|. Please - // refer to that function for detailed comments. - void convolve_4_rows_horizontally(const unsigned char* srcData[4], - const SkConvolutionFilter1D& filter, - unsigned char* outRow[4], - size_t outRowBytes) { - SkDEBUGCODE(const unsigned char* out_row_0_start = outRow[0];) - - // Output one pixel each iteration, calculating all channels (RGBA) together. - int numValues = filter.numValues(); - for (int outX = 0; outX < numValues; outX++) { - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filter.FilterForValue(outX, &filterOffset, &filterLength); - - __m128i zero = _mm_setzero_si128(); - - // four pixels in a column per iteration. - __m128i accum0 = _mm_setzero_si128(); - __m128i accum1 = _mm_setzero_si128(); - __m128i accum2 = _mm_setzero_si128(); - __m128i accum3 = _mm_setzero_si128(); - - int start = filterOffset * 4; - // We will load and accumulate with four coefficients per iteration. - for (int filterX = 0; filterX < (filterLength >> 2); filterX++) { - __m128i coeff, coeff16lo, coeff16hi; - // [16] xx xx xx xx c3 c2 c1 c0 - coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues)); - // [16] xx xx xx xx c1 c1 c0 c0 - coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0)); - // [16] c1 c1 c1 c1 c0 c0 c0 c0 - coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo); - // [16] xx xx xx xx c3 c3 c2 c2 - coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2)); - // [16] c3 c3 c3 c3 c2 c2 c2 c2 - coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi); - - __m128i src8, src16, mul_hi, mul_lo, t; - -#define ITERATION(src, accum) \ - src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); \ - src16 = _mm_unpacklo_epi8(src8, zero); \ - mul_hi = _mm_mulhi_epi16(src16, coeff16lo); \ - mul_lo = _mm_mullo_epi16(src16, coeff16lo); \ - t = _mm_unpacklo_epi16(mul_lo, mul_hi); \ - accum = _mm_add_epi32(accum, t); \ - t = _mm_unpackhi_epi16(mul_lo, mul_hi); \ - accum = _mm_add_epi32(accum, t); \ - src16 = _mm_unpackhi_epi8(src8, zero); \ - mul_hi = _mm_mulhi_epi16(src16, coeff16hi); \ - mul_lo = _mm_mullo_epi16(src16, coeff16hi); \ - t = _mm_unpacklo_epi16(mul_lo, mul_hi); \ - accum = _mm_add_epi32(accum, t); \ - t = _mm_unpackhi_epi16(mul_lo, mul_hi); \ - accum = _mm_add_epi32(accum, t) - - ITERATION(srcData[0] + start, accum0); - ITERATION(srcData[1] + start, accum1); - ITERATION(srcData[2] + start, accum2); - ITERATION(srcData[3] + start, accum3); - - start += 16; - filterValues += 4; - } - - int r = filterLength & 3; - if (r) { - int remainderOffset = (filterOffset + filterLength - r) * 4; - AccumRemainder(srcData[0] + remainderOffset, filterValues, accum0, r); - AccumRemainder(srcData[1] + remainderOffset, filterValues, accum1, r); - AccumRemainder(srcData[2] + remainderOffset, filterValues, accum2, r); - AccumRemainder(srcData[3] + remainderOffset, filterValues, accum3, r); - } - - accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits); - accum0 = _mm_packs_epi32(accum0, zero); - accum0 = _mm_packus_epi16(accum0, zero); - accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits); - accum1 = _mm_packs_epi32(accum1, zero); - accum1 = _mm_packus_epi16(accum1, zero); - accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits); - accum2 = _mm_packs_epi32(accum2, zero); - accum2 = _mm_packus_epi16(accum2, zero); - accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits); - accum3 = _mm_packs_epi32(accum3, zero); - accum3 = _mm_packus_epi16(accum3, zero); - - // We seem to be running off the edge here (chromium:491660). - SkASSERT(((size_t)outRow[0] - (size_t)out_row_0_start) < outRowBytes); - - *(reinterpret_cast<int*>(outRow[0])) = _mm_cvtsi128_si32(accum0); - *(reinterpret_cast<int*>(outRow[1])) = _mm_cvtsi128_si32(accum1); - *(reinterpret_cast<int*>(outRow[2])) = _mm_cvtsi128_si32(accum2); - *(reinterpret_cast<int*>(outRow[3])) = _mm_cvtsi128_si32(accum3); - - outRow[0] += 4; - outRow[1] += 4; - outRow[2] += 4; - outRow[3] += 4; - } - } - - // Does vertical convolution to produce one output row. The filter values and - // length are given in the first two parameters. These are applied to each - // of the rows pointed to in the |sourceDataRows| array, with each row - // being |pixelWidth| wide. - // - // The output must have room for |pixelWidth * 4| bytes. - template<bool hasAlpha> - void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, - int filterLength, - unsigned char* const* sourceDataRows, - int pixelWidth, - unsigned char* outRow) { - // Output four pixels per iteration (16 bytes). - int width = pixelWidth & ~3; - __m128i zero = _mm_setzero_si128(); - for (int outX = 0; outX < width; outX += 4) { - // Accumulated result for each pixel. 32 bits per RGBA channel. - __m128i accum0 = _mm_setzero_si128(); - __m128i accum1 = _mm_setzero_si128(); - __m128i accum2 = _mm_setzero_si128(); - __m128i accum3 = _mm_setzero_si128(); - - // Convolve with one filter coefficient per iteration. - for (int filterY = 0; filterY < filterLength; filterY++) { - - // Duplicate the filter coefficient 8 times. - // [16] cj cj cj cj cj cj cj cj - __m128i coeff16 = _mm_set1_epi16(filterValues[filterY]); - - // Load four pixels (16 bytes) together. - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - const __m128i* src = reinterpret_cast<const __m128i*>( - &sourceDataRows[filterY][outX << 2]); - __m128i src8 = _mm_loadu_si128(src); - - // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels => - // multiply with current coefficient => accumulate the result. - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - __m128i src16 = _mm_unpacklo_epi8(src8, zero); - __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16); - __m128i mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a0 b0 g0 r0 - __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum0 = _mm_add_epi32(accum0, t); - // [32] a1 b1 g1 r1 - t = _mm_unpackhi_epi16(mul_lo, mul_hi); - accum1 = _mm_add_epi32(accum1, t); - - // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels => - // multiply with current coefficient => accumulate the result. - // [16] a3 b3 g3 r3 a2 b2 g2 r2 - src16 = _mm_unpackhi_epi8(src8, zero); - mul_hi = _mm_mulhi_epi16(src16, coeff16); - mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a2 b2 g2 r2 - t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum2 = _mm_add_epi32(accum2, t); - // [32] a3 b3 g3 r3 - t = _mm_unpackhi_epi16(mul_lo, mul_hi); - accum3 = _mm_add_epi32(accum3, t); - } - - // Shift right for fixed point implementation. - accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits); - accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits); - accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits); - accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits); - - // Packing 32 bits |accum| to 16 bits per channel (signed saturation). - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - accum0 = _mm_packs_epi32(accum0, accum1); - // [16] a3 b3 g3 r3 a2 b2 g2 r2 - accum2 = _mm_packs_epi32(accum2, accum3); - - // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation). - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - accum0 = _mm_packus_epi16(accum0, accum2); - - if (hasAlpha) { - // Compute the max(ri, gi, bi) for each pixel. - // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0 - __m128i a = _mm_srli_epi32(accum0, 8); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - __m128i b = _mm_max_epu8(a, accum0); // Max of r and g. - // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0 - a = _mm_srli_epi32(accum0, 16); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - b = _mm_max_epu8(a, b); // Max of r and g and b. - // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00 - b = _mm_slli_epi32(b, 24); - - // Make sure the value of alpha channel is always larger than maximum - // value of color channels. - accum0 = _mm_max_epu8(b, accum0); - } else { - // Set value of alpha channels to 0xFF. - __m128i mask = _mm_set1_epi32(0xff000000); - accum0 = _mm_or_si128(accum0, mask); - } - - // Store the convolution result (16 bytes) and advance the pixel pointers. - _mm_storeu_si128(reinterpret_cast<__m128i*>(outRow), accum0); - outRow += 16; - } - - // When the width of the output is not divisible by 4, We need to save one - // pixel (4 bytes) each time. And also the fourth pixel is always absent. - int r = pixelWidth & 3; - if (r) { - __m128i accum0 = _mm_setzero_si128(); - __m128i accum1 = _mm_setzero_si128(); - __m128i accum2 = _mm_setzero_si128(); - for (int filterY = 0; filterY < filterLength; ++filterY) { - __m128i coeff16 = _mm_set1_epi16(filterValues[filterY]); - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - const __m128i* src = reinterpret_cast<const __m128i*>( - &sourceDataRows[filterY][width << 2]); - __m128i src8 = _mm_loadu_si128(src); - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - __m128i src16 = _mm_unpacklo_epi8(src8, zero); - __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16); - __m128i mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a0 b0 g0 r0 - __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum0 = _mm_add_epi32(accum0, t); - // [32] a1 b1 g1 r1 - t = _mm_unpackhi_epi16(mul_lo, mul_hi); - accum1 = _mm_add_epi32(accum1, t); - // [16] a3 b3 g3 r3 a2 b2 g2 r2 - src16 = _mm_unpackhi_epi8(src8, zero); - mul_hi = _mm_mulhi_epi16(src16, coeff16); - mul_lo = _mm_mullo_epi16(src16, coeff16); - // [32] a2 b2 g2 r2 - t = _mm_unpacklo_epi16(mul_lo, mul_hi); - accum2 = _mm_add_epi32(accum2, t); - } - - accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits); - accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits); - accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits); - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - accum0 = _mm_packs_epi32(accum0, accum1); - // [16] a3 b3 g3 r3 a2 b2 g2 r2 - accum2 = _mm_packs_epi32(accum2, zero); - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - accum0 = _mm_packus_epi16(accum0, accum2); - if (hasAlpha) { - // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0 - __m128i a = _mm_srli_epi32(accum0, 8); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - __m128i b = _mm_max_epu8(a, accum0); // Max of r and g. - // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0 - a = _mm_srli_epi32(accum0, 16); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - b = _mm_max_epu8(a, b); // Max of r and g and b. - // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00 - b = _mm_slli_epi32(b, 24); - accum0 = _mm_max_epu8(b, accum0); - } else { - __m128i mask = _mm_set1_epi32(0xff000000); - accum0 = _mm_or_si128(accum0, mask); - } - - for (int i = 0; i < r; i++) { - *(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum0); - accum0 = _mm_srli_si128(accum0, 4); - outRow += 4; - } - } - } - -#elif defined(SK_ARM_HAS_NEON) - - static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft, - const SkConvolutionFilter1D::ConvolutionFixed* filterValues, int32x4_t& accum, int r) { - int remainder[4] = {0}; - for (int i = 0; i < r; i++) { - SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i]; - remainder[0] += coeff * pixelsLeft[i * 4 + 0]; - remainder[1] += coeff * pixelsLeft[i * 4 + 1]; - remainder[2] += coeff * pixelsLeft[i * 4 + 2]; - remainder[3] += coeff * pixelsLeft[i * 4 + 3]; - } - int32x4_t t = {remainder[0], remainder[1], remainder[2], remainder[3]}; - accum += t; - } - - // Convolves horizontally along a single row. The row data is given in - // |srcData| and continues for the numValues() of the filter. - void convolve_horizontally(const unsigned char* srcData, - const SkConvolutionFilter1D& filter, - unsigned char* outRow, - bool /*hasAlpha*/) { - // Loop over each pixel on this row in the output image. - int numValues = filter.numValues(); - for (int outX = 0; outX < numValues; outX++) { - uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100); - uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302); - uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504); - uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706); - // Get the filter that determines the current output pixel. - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filter.FilterForValue(outX, &filterOffset, &filterLength); - - // Compute the first pixel in this row that the filter affects. It will - // touch |filterLength| pixels (4 bytes each) after this. - const unsigned char* rowToFilter = &srcData[filterOffset * 4]; - - // Apply the filter to the row to get the destination pixel in |accum|. - int32x4_t accum = vdupq_n_s32(0); - for (int filterX = 0; filterX < filterLength >> 2; filterX++) { - // Load 4 coefficients - int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3; - coeffs = vld1_s16(filterValues); - coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0)); - coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1)); - coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2)); - coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3)); - - // Load pixels and calc - uint8x16_t pixels = vld1q_u8(rowToFilter); - int16x8_t p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels))); - int16x8_t p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels))); - - int16x4_t p0_src = vget_low_s16(p01_16); - int16x4_t p1_src = vget_high_s16(p01_16); - int16x4_t p2_src = vget_low_s16(p23_16); - int16x4_t p3_src = vget_high_s16(p23_16); - - int32x4_t p0 = vmull_s16(p0_src, coeff0); - int32x4_t p1 = vmull_s16(p1_src, coeff1); - int32x4_t p2 = vmull_s16(p2_src, coeff2); - int32x4_t p3 = vmull_s16(p3_src, coeff3); - - accum += p0; - accum += p1; - accum += p2; - accum += p3; - - // Advance the pointers - rowToFilter += 16; - filterValues += 4; - } - - int r = filterLength & 3; - if (r) { - int remainder_offset = (filterOffset + filterLength - r) * 4; - AccumRemainder(srcData + remainder_offset, filterValues, accum, r); - } - - // Bring this value back in range. All of the filter scaling factors - // are in fixed point with kShiftBits bits of fractional part. - accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits); - - // Pack and store the new pixel. - int16x4_t accum16 = vqmovn_s32(accum); - uint8x8_t accum8 = vqmovun_s16(vcombine_s16(accum16, accum16)); - vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpret_u32_u8(accum8), 0); - outRow += 4; - } - } - - // Convolves horizontally along four rows. The row data is given in - // |srcData| and continues for the numValues() of the filter. - // The algorithm is almost same as |convolve_horizontally|. Please - // refer to that function for detailed comments. - void convolve_4_rows_horizontally(const unsigned char* srcData[4], - const SkConvolutionFilter1D& filter, - unsigned char* outRow[4], - size_t outRowBytes) { - // Output one pixel each iteration, calculating all channels (RGBA) together. - int numValues = filter.numValues(); - for (int outX = 0; outX < numValues; outX++) { - - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filter.FilterForValue(outX, &filterOffset, &filterLength); - - // four pixels in a column per iteration. - int32x4_t accum0 = vdupq_n_s32(0); - int32x4_t accum1 = vdupq_n_s32(0); - int32x4_t accum2 = vdupq_n_s32(0); - int32x4_t accum3 = vdupq_n_s32(0); - - uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100); - uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302); - uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504); - uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706); - - int start = filterOffset * 4; - - // We will load and accumulate with four coefficients per iteration. - for (int filterX = 0; filterX < (filterLength >> 2); filterX++) { - int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3; - - coeffs = vld1_s16(filterValues); - coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0)); - coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1)); - coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2)); - coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3)); - - uint8x16_t pixels; - int16x8_t p01_16, p23_16; - int32x4_t p0, p1, p2, p3; - -#define ITERATION(src, accum) \ - pixels = vld1q_u8(src); \ - p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels))); \ - p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels))); \ - p0 = vmull_s16(vget_low_s16(p01_16), coeff0); \ - p1 = vmull_s16(vget_high_s16(p01_16), coeff1); \ - p2 = vmull_s16(vget_low_s16(p23_16), coeff2); \ - p3 = vmull_s16(vget_high_s16(p23_16), coeff3); \ - accum += p0; \ - accum += p1; \ - accum += p2; \ - accum += p3 - - ITERATION(srcData[0] + start, accum0); - ITERATION(srcData[1] + start, accum1); - ITERATION(srcData[2] + start, accum2); - ITERATION(srcData[3] + start, accum3); - - start += 16; - filterValues += 4; - } - - int r = filterLength & 3; - if (r) { - int remainder_offset = (filterOffset + filterLength - r) * 4; - AccumRemainder(srcData[0] + remainder_offset, filterValues, accum0, r); - AccumRemainder(srcData[1] + remainder_offset, filterValues, accum1, r); - AccumRemainder(srcData[2] + remainder_offset, filterValues, accum2, r); - AccumRemainder(srcData[3] + remainder_offset, filterValues, accum3, r); - } - - int16x4_t accum16; - uint8x8_t res0, res1, res2, res3; - -#define PACK_RESULT(accum, res) \ - accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits); \ - accum16 = vqmovn_s32(accum); \ - res = vqmovun_s16(vcombine_s16(accum16, accum16)); - - PACK_RESULT(accum0, res0); - PACK_RESULT(accum1, res1); - PACK_RESULT(accum2, res2); - PACK_RESULT(accum3, res3); - - vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[0]), vreinterpret_u32_u8(res0), 0); - vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[1]), vreinterpret_u32_u8(res1), 0); - vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[2]), vreinterpret_u32_u8(res2), 0); - vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[3]), vreinterpret_u32_u8(res3), 0); - outRow[0] += 4; - outRow[1] += 4; - outRow[2] += 4; - outRow[3] += 4; - } - } - - - // Does vertical convolution to produce one output row. The filter values and - // length are given in the first two parameters. These are applied to each - // of the rows pointed to in the |sourceDataRows| array, with each row - // being |pixelWidth| wide. - // - // The output must have room for |pixelWidth * 4| bytes. - template<bool hasAlpha> - void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, - int filterLength, - unsigned char* const* sourceDataRows, - int pixelWidth, - unsigned char* outRow) { - int width = pixelWidth & ~3; - - // Output four pixels per iteration (16 bytes). - for (int outX = 0; outX < width; outX += 4) { - - // Accumulated result for each pixel. 32 bits per RGBA channel. - int32x4_t accum0 = vdupq_n_s32(0); - int32x4_t accum1 = vdupq_n_s32(0); - int32x4_t accum2 = vdupq_n_s32(0); - int32x4_t accum3 = vdupq_n_s32(0); - - // Convolve with one filter coefficient per iteration. - for (int filterY = 0; filterY < filterLength; filterY++) { - - // Duplicate the filter coefficient 4 times. - // [16] cj cj cj cj - int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]); - - // Load four pixels (16 bytes) together. - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][outX << 2]); - - int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8))); - int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8))); - int16x4_t src16_0 = vget_low_s16(src16_01); - int16x4_t src16_1 = vget_high_s16(src16_01); - int16x4_t src16_2 = vget_low_s16(src16_23); - int16x4_t src16_3 = vget_high_s16(src16_23); - - accum0 += vmull_s16(src16_0, coeff16); - accum1 += vmull_s16(src16_1, coeff16); - accum2 += vmull_s16(src16_2, coeff16); - accum3 += vmull_s16(src16_3, coeff16); - } - - // Shift right for fixed point implementation. - accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits); - accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits); - accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits); - accum3 = vshrq_n_s32(accum3, SkConvolutionFilter1D::kShiftBits); - - // Packing 32 bits |accum| to 16 bits per channel (signed saturation). - // [16] a1 b1 g1 r1 a0 b0 g0 r0 - int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1)); - // [16] a3 b3 g3 r3 a2 b2 g2 r2 - int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum3)); - - // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation). - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1)); - - if (hasAlpha) { - // Compute the max(ri, gi, bi) for each pixel. - // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0 - uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8)); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g - // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0 - a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16)); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - b = vmaxq_u8(a, b); // Max of r and g and b. - // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00 - b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24)); - - // Make sure the value of alpha channel is always larger than maximum - // value of color channels. - accum8 = vmaxq_u8(b, accum8); - } else { - // Set value of alpha channels to 0xFF. - accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000)); - } - - // Store the convolution result (16 bytes) and advance the pixel pointers. - vst1q_u8(outRow, accum8); - outRow += 16; - } - - // Process the leftovers when the width of the output is not divisible - // by 4, that is at most 3 pixels. - int r = pixelWidth & 3; - if (r) { - - int32x4_t accum0 = vdupq_n_s32(0); - int32x4_t accum1 = vdupq_n_s32(0); - int32x4_t accum2 = vdupq_n_s32(0); - - for (int filterY = 0; filterY < filterLength; ++filterY) { - int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]); - - // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0 - uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][width << 2]); - - int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8))); - int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8))); - int16x4_t src16_0 = vget_low_s16(src16_01); - int16x4_t src16_1 = vget_high_s16(src16_01); - int16x4_t src16_2 = vget_low_s16(src16_23); - - accum0 += vmull_s16(src16_0, coeff16); - accum1 += vmull_s16(src16_1, coeff16); - accum2 += vmull_s16(src16_2, coeff16); - } - - accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits); - accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits); - accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits); - - int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1)); - int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum2)); - - uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1)); - - if (hasAlpha) { - // Compute the max(ri, gi, bi) for each pixel. - // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0 - uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8)); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g - // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0 - a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16)); - // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0 - b = vmaxq_u8(a, b); // Max of r and g and b. - // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00 - b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24)); - - // Make sure the value of alpha channel is always larger than maximum - // value of color channels. - accum8 = vmaxq_u8(b, accum8); - } else { - // Set value of alpha channels to 0xFF. - accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000)); - } - - switch(r) { - case 1: - vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpretq_u32_u8(accum8), 0); - break; - case 2: - vst1_u32(reinterpret_cast<uint32_t*>(outRow), - vreinterpret_u32_u8(vget_low_u8(accum8))); - break; - case 3: - vst1_u32(reinterpret_cast<uint32_t*>(outRow), - vreinterpret_u32_u8(vget_low_u8(accum8))); - vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow+8), vreinterpretq_u32_u8(accum8), 2); - break; - } - } - } - -#else - - // Converts the argument to an 8-bit unsigned value by clamping to the range - // 0-255. - inline unsigned char ClampTo8(int a) { - if (static_cast<unsigned>(a) < 256) { - return a; // Avoid the extra check in the common case. - } - if (a < 0) { - return 0; - } - return 255; - } - - // Convolves horizontally along a single row. The row data is given in - // |srcData| and continues for the numValues() of the filter. - template<bool hasAlpha> - void ConvolveHorizontally(const unsigned char* srcData, - const SkConvolutionFilter1D& filter, - unsigned char* outRow) { - // Loop over each pixel on this row in the output image. - int numValues = filter.numValues(); - for (int outX = 0; outX < numValues; outX++) { - // Get the filter that determines the current output pixel. - int filterOffset, filterLength; - const SkConvolutionFilter1D::ConvolutionFixed* filterValues = - filter.FilterForValue(outX, &filterOffset, &filterLength); - - // Compute the first pixel in this row that the filter affects. It will - // touch |filterLength| pixels (4 bytes each) after this. - const unsigned char* rowToFilter = &srcData[filterOffset * 4]; - - // Apply the filter to the row to get the destination pixel in |accum|. - int accum[4] = {0}; - for (int filterX = 0; filterX < filterLength; filterX++) { - SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; - accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; - accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; - accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; - if (hasAlpha) { - accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; - } - } - - // Bring this value back in range. All of the filter scaling factors - // are in fixed point with kShiftBits bits of fractional part. - accum[0] >>= SkConvolutionFilter1D::kShiftBits; - accum[1] >>= SkConvolutionFilter1D::kShiftBits; - accum[2] >>= SkConvolutionFilter1D::kShiftBits; - if (hasAlpha) { - accum[3] >>= SkConvolutionFilter1D::kShiftBits; - } - - // Store the new pixel. - outRow[outX * 4 + 0] = ClampTo8(accum[0]); - outRow[outX * 4 + 1] = ClampTo8(accum[1]); - outRow[outX * 4 + 2] = ClampTo8(accum[2]); - if (hasAlpha) { - outRow[outX * 4 + 3] = ClampTo8(accum[3]); - } - } - } - - // Does vertical convolution to produce one output row. The filter values and - // length are given in the first two parameters. These are applied to each - // of the rows pointed to in the |sourceDataRows| array, with each row - // being |pixelWidth| wide. - // - // The output must have room for |pixelWidth * 4| bytes. - template<bool hasAlpha> - void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, - int filterLength, - unsigned char* const* sourceDataRows, - int pixelWidth, - unsigned char* outRow) { - // We go through each column in the output and do a vertical convolution, - // generating one output pixel each time. - for (int outX = 0; outX < pixelWidth; outX++) { - // Compute the number of bytes over in each row that the current column - // we're convolving starts at. The pixel will cover the next 4 bytes. - int byteOffset = outX * 4; - - // Apply the filter to one column of pixels. - int accum[4] = {0}; - for (int filterY = 0; filterY < filterLength; filterY++) { - SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; - accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; - accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; - accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; - if (hasAlpha) { - accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3]; - } - } - - // Bring this value back in range. All of the filter scaling factors - // are in fixed point with kShiftBits bits of precision. - accum[0] >>= SkConvolutionFilter1D::kShiftBits; - accum[1] >>= SkConvolutionFilter1D::kShiftBits; - accum[2] >>= SkConvolutionFilter1D::kShiftBits; - if (hasAlpha) { - accum[3] >>= SkConvolutionFilter1D::kShiftBits; - } - - // Store the new pixel. - outRow[byteOffset + 0] = ClampTo8(accum[0]); - outRow[byteOffset + 1] = ClampTo8(accum[1]); - outRow[byteOffset + 2] = ClampTo8(accum[2]); - if (hasAlpha) { - unsigned char alpha = ClampTo8(accum[3]); - - // Make sure the alpha channel doesn't come out smaller than any of the - // color channels. We use premultipled alpha channels, so this should - // never happen, but rounding errors will cause this from time to time. - // These "impossible" colors will cause overflows (and hence random pixel - // values) when the resulting bitmap is drawn to the screen. - // - // We only need to do this when generating the final output row (here). - int maxColorChannel = SkTMax(outRow[byteOffset + 0], - SkTMax(outRow[byteOffset + 1], - outRow[byteOffset + 2])); - if (alpha < maxColorChannel) { - outRow[byteOffset + 3] = maxColorChannel; - } else { - outRow[byteOffset + 3] = alpha; - } - } else { - // No alpha channel, the image is opaque. - outRow[byteOffset + 3] = 0xff; - } - } - } - - // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize). We originally - // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles - // suffer here too. - // - // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. https://bug.skia.org/2575 -#if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE) - #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline)) -#else - #define SK_MAYBE_DISABLE_VECTORIZATION -#endif - - SK_MAYBE_DISABLE_VECTORIZATION - void convolve_horizontally(const unsigned char* srcData, - const SkConvolutionFilter1D& filter, - unsigned char* outRow, - bool hasAlpha) { - if (hasAlpha) { - ConvolveHorizontally<true>(srcData, filter, outRow); - } else { - ConvolveHorizontally<false>(srcData, filter, outRow); - } - } -#undef SK_MAYBE_DISABLE_VECTORIZATION - - void (*convolve_4_rows_horizontally)(const unsigned char* srcData[4], - const SkConvolutionFilter1D& filter, - unsigned char* outRow[4], - size_t outRowBytes) - = nullptr; - - -#endif - - void convolve_vertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, - int filterLength, - unsigned char* const* sourceDataRows, - int pixelWidth, - unsigned char* outRow, - bool hasAlpha) { - if (hasAlpha) { - ConvolveVertically<true>(filterValues, filterLength, sourceDataRows, - pixelWidth, outRow); - } else { - ConvolveVertically<false>(filterValues, filterLength, sourceDataRows, - pixelWidth, outRow); - } - } - -} // namespace SK_OPTS_NS - -#endif//SkBitmapFilter_opts_DEFINED diff --git a/src/opts/SkBitmapProcState_opts_none.cpp b/src/opts/SkBitmapProcState_opts_none.cpp index 0d96e17133..1d83ddfe7c 100644 --- a/src/opts/SkBitmapProcState_opts_none.cpp +++ b/src/opts/SkBitmapProcState_opts_none.cpp @@ -5,7 +5,6 @@ * found in the LICENSE file. */ -#include "SkBitmapScaler.h" #include "SkBitmapProcState.h" /* A platform may optionally overwrite any of these with accelerated diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp deleted file mode 100644 index dded64776a..0000000000 --- a/src/opts/SkOpts_hsw.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -// It is not safe to #include any header file here unless it has been vetted for ODR safety: -// all symbols used must be file-scoped static or in an anonymous namespace. This applies -// to _all_ header files: C standard library, C++ standard library, Skia... everything. - -#include <immintrin.h> // ODR safe -#include <stdint.h> // ODR safe - -#if defined(__AVX2__) - -namespace hsw { - - void convolve_vertically(const int16_t* filter, int filterLen, - uint8_t* const* srcRows, int width, - uint8_t* out, bool hasAlpha) { - // It's simpler to work with the output array in terms of 4-byte pixels. - auto dst = (int*)out; - - // Output up to eight pixels per iteration. - for (int x = 0; x < width; x += 8) { - // Accumulated result for 4 (non-adjacent) pairs of pixels, - // with each channel in signed 17.14 fixed point. - auto accum04 = _mm256_setzero_si256(), - accum15 = _mm256_setzero_si256(), - accum26 = _mm256_setzero_si256(), - accum37 = _mm256_setzero_si256(); - - // Convolve with the filter. (This inner loop is where we spend ~all our time.) - // While we can, we consume 2 filter coefficients and 2 rows of 8 pixels each at a time. - auto convolve_16_pixels = [&](__m256i interlaced_coeffs, - __m256i pixels_01234567, __m256i pixels_89ABCDEF) { - // Interlaced R0R8 G0G8 B0B8 A0A8 R1R9 G1G9... 32 8-bit values each. - auto _08194C5D = _mm256_unpacklo_epi8(pixels_01234567, pixels_89ABCDEF), - _2A3B6E7F = _mm256_unpackhi_epi8(pixels_01234567, pixels_89ABCDEF); - - // Still interlaced R0R8 G0G8... as above, each channel expanded to 16-bit lanes. - auto _084C = _mm256_unpacklo_epi8(_08194C5D, _mm256_setzero_si256()), - _195D = _mm256_unpackhi_epi8(_08194C5D, _mm256_setzero_si256()), - _2A6E = _mm256_unpacklo_epi8(_2A3B6E7F, _mm256_setzero_si256()), - _3B7F = _mm256_unpackhi_epi8(_2A3B6E7F, _mm256_setzero_si256()); - - // accum0_R += R0*coeff0 + R8*coeff1, etc. - accum04 = _mm256_add_epi32(accum04, _mm256_madd_epi16(_084C, interlaced_coeffs)); - accum15 = _mm256_add_epi32(accum15, _mm256_madd_epi16(_195D, interlaced_coeffs)); - accum26 = _mm256_add_epi32(accum26, _mm256_madd_epi16(_2A6E, interlaced_coeffs)); - accum37 = _mm256_add_epi32(accum37, _mm256_madd_epi16(_3B7F, interlaced_coeffs)); - }; - - int i = 0; - for (; i < filterLen/2*2; i += 2) { - convolve_16_pixels(_mm256_set1_epi32(*(const int32_t*)(filter+i)), - _mm256_loadu_si256((const __m256i*)(srcRows[i+0] + x*4)), - _mm256_loadu_si256((const __m256i*)(srcRows[i+1] + x*4))); - } - if (i < filterLen) { - convolve_16_pixels(_mm256_set1_epi32(*(const int16_t*)(filter+i)), - _mm256_loadu_si256((const __m256i*)(srcRows[i] + x*4)), - _mm256_setzero_si256()); - } - - // Trim the fractional parts off the accumulators. - accum04 = _mm256_srai_epi32(accum04, 14); - accum15 = _mm256_srai_epi32(accum15, 14); - accum26 = _mm256_srai_epi32(accum26, 14); - accum37 = _mm256_srai_epi32(accum37, 14); - - // Pack back down to 8-bit channels. - auto pixels = _mm256_packus_epi16(_mm256_packs_epi32(accum04, accum15), - _mm256_packs_epi32(accum26, accum37)); - - if (hasAlpha) { - // Clamp alpha to the max of r,g,b to make sure we stay premultiplied. - __m256i max_rg = _mm256_max_epu8(pixels, _mm256_srli_epi32(pixels, 8)), - max_rgb = _mm256_max_epu8(max_rg, _mm256_srli_epi32(pixels, 16)); - pixels = _mm256_max_epu8(pixels, _mm256_slli_epi32(max_rgb, 24)); - } else { - // Force opaque. - pixels = _mm256_or_si256(pixels, _mm256_set1_epi32(0xff000000)); - } - - // Normal path to store 8 pixels. - if (x + 8 <= width) { - _mm256_storeu_si256((__m256i*)dst, pixels); - dst += 8; - continue; - } - - // Store one pixel at a time on the last iteration. - for (int i = x; i < width; i++) { - *dst++ = _mm_cvtsi128_si32(_mm256_castsi256_si128(pixels)); - pixels = _mm256_permutevar8x32_epi32(pixels, _mm256_setr_epi32(1,2,3,4,5,6,7,0)); - } - } - } - -} - -namespace SkOpts { - // See SkOpts.h, writing SkConvolutionFilter1D::ConvolutionFixed as the underlying type. - extern void (*convolve_vertically)(const int16_t* filter, int filterLen, - uint8_t* const* srcRows, int width, - uint8_t* out, bool hasAlpha); - void Init_hsw() { - convolve_vertically = hsw::convolve_vertically; - } -} - -#else // defined(__AVX2__) is not true... - -namespace SkOpts { void Init_hsw() {} } - -#endif diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp index 7917259554..ec4e9f66bd 100644 --- a/src/opts/opts_check_x86.cpp +++ b/src/opts/opts_check_x86.cpp @@ -7,7 +7,6 @@ #include "SkBitmapProcState_opts_SSE2.h" #include "SkBitmapProcState_opts_SSSE3.h" -#include "SkBitmapScaler.h" #include "SkBlitMask.h" #include "SkBlitRow.h" #include "SkBlitRow_opts_SSE2.h" diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp index 35289127b0..b643f24b0b 100644 --- a/src/shaders/SkImageShader.cpp +++ b/src/shaders/SkImageShader.cpp @@ -256,7 +256,7 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dstCS, SkA auto quality = paint.getFilterQuality(); SkBitmapProvider provider(fImage.get(), dstCS); - SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kYes); + SkDefaultBitmapController controller; std::unique_ptr<SkBitmapController::State> state { controller.requestBitmap(provider, matrix, quality) }; |