Assume HQ is handled by pipeline, delete legacy code-path

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Bug: skia: Change-Id: If6f0d0a57463bf99a66d674e65a62ce3931d0116 Reviewed-on: https://skia-review.googlesource.com/24644 Commit-Queue: Mike Reed <reed@google.com> Reviewed-by: Mike Klein <mtklein@chromium.org>
author: Mike Reed <reed@google.com> 2017-07-19 17:20:37 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2017-07-20 00:43:37 +0000
commit: e32500f0642df381fd79731df2f7a4a4a71a46e2 (patch)
tree: 05747f712923791d6df14077714cede88d9d51ff
parent: 3e583cba8af153952e31925e0d4bfbc71cfa43b8 (diff)
26 files changed, 25 insertions, 2381 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 2fac60ea90..70b8cb5c34 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -297,22 +297,6 @@ opts("avx") {
   }
 }
 
-opts("hsw") {
-  enabled = is_x86
-  sources = skia_opts.hsw_sources
-  if (is_win) {
-    cflags = [ "/arch:AVX2" ]
-  } else {
-    cflags = [
-      "-mavx2",
-      "-mbmi",
-      "-mbmi2",
-      "-mf16c",
-      "-mfma",
-    ]
-  }
-}
-
 # Any feature of Skia that requires third-party code should be optional and use this template.
 template("optional") {
   if (invoker.enabled) {
@@ -668,7 +652,6 @@ component("skia") {
     ":fontmgr_fontconfig",
     ":fontmgr_fuchsia",
     ":gpu",
-    ":hsw",
     ":jpeg",
     ":none",
     ":pdf",
diff --git a/bench/BitmapScaleBench.cpp b/bench/BitmapScaleBench.cpp
deleted file mode 100644
index e309d5162c..0000000000
--- a/bench/BitmapScaleBench.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2013 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "Benchmark.h"
-#include "SkBlurMask.h"
-#include "SkCanvas.h"
-#include "SkPaint.h"
-#include "SkRandom.h"
-#include "SkShader.h"
-#include "SkString.h"
-
-class BitmapScaleBench: public Benchmark {
-    int         fLoopCount;
-    int         fInputSize;
-    int         fOutputSize;
-    SkString    fName;
-
-public:
-    BitmapScaleBench( int is, int os)  {
-        fInputSize = is;
-        fOutputSize = os;
-
-        fLoopCount = 20;
-    }
-
-protected:
-
-    SkBitmap fInputBitmap, fOutputBitmap;
-    SkMatrix fMatrix;
-
-    const char* onGetName() override {
-        return fName.c_str();
-    }
-
-    int inputSize() const {
-        return fInputSize;
-    }
-
-    int outputSize() const {
-        return fOutputSize;
-    }
-
-    float scale() const {
-        return float(outputSize())/inputSize();
-    }
-
-    SkIPoint onGetSize() override {
-        return SkIPoint::Make( fOutputSize, fOutputSize );
-    }
-
-    void setName(const char * name) {
-        fName.printf( "bitmap_scale_%s_%d_%d", name, fInputSize, fOutputSize );
-    }
-
-    void onDelayedSetup() override {
-        fInputBitmap.allocN32Pixels(fInputSize, fInputSize, true);
-        fInputBitmap.eraseColor(SK_ColorWHITE);
-
-        fOutputBitmap.allocN32Pixels(fOutputSize, fOutputSize, true);
-
-        fMatrix.setScale( scale(), scale() );
-    }
-
-    void onDraw(int loops, SkCanvas*) override {
-        SkPaint paint;
-        this->setupPaint(&paint);
-
-        preBenchSetup();
-
-        for (int i = 0; i < loops; i++) {
-            doScaleImage();
-        }
-    }
-
-    virtual void doScaleImage() = 0;
-    virtual void preBenchSetup() {}
-private:
-    typedef Benchmark INHERITED;
-};
-
-class BitmapFilterScaleBench: public BitmapScaleBench {
- public:
-    BitmapFilterScaleBench( int is, int os) : INHERITED(is, os) {
-        setName( "filter" );
-    }
-protected:
-    void doScaleImage() override {
-        SkCanvas canvas( fOutputBitmap );
-        SkPaint paint;
-
-        paint.setFilterQuality(kHigh_SkFilterQuality);
-        fInputBitmap.notifyPixelsChanged();
-        canvas.concat(fMatrix);
-        canvas.drawBitmap(fInputBitmap, 0, 0, &paint );
-    }
-private:
-    typedef BitmapScaleBench INHERITED;
-};
-
-DEF_BENCH(return new BitmapFilterScaleBench(10, 90);)
-DEF_BENCH(return new BitmapFilterScaleBench(30, 90);)
-DEF_BENCH(return new BitmapFilterScaleBench(80, 90);)
-DEF_BENCH(return new BitmapFilterScaleBench(90, 90);)
-DEF_BENCH(return new BitmapFilterScaleBench(90, 80);)
-DEF_BENCH(return new BitmapFilterScaleBench(90, 30);)
-DEF_BENCH(return new BitmapFilterScaleBench(90, 10);)
-DEF_BENCH(return new BitmapFilterScaleBench(256, 64);)
-DEF_BENCH(return new BitmapFilterScaleBench(64, 256);)
-
-///////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "SkBitmapScaler.h"
-
-class PixmapScalerBench: public Benchmark {
-    SkBitmapScaler::ResizeMethod    fMethod;
-    SkString                        fName;
-    SkBitmap                        fSrc, fDst;
-
-public:
-    PixmapScalerBench(SkBitmapScaler::ResizeMethod method, const char suffix[]) : fMethod(method) {
-        fName.printf("pixmapscaler_%s", suffix);
-    }
-
-protected:
-    const char* onGetName() override {
-        return fName.c_str();
-    }
-
-    SkIPoint onGetSize() override { return{ 100, 100 }; }
-
-    bool isSuitableFor(Backend backend) override {
-        return backend == kNonRendering_Backend;
-    }
-
-    void onDelayedSetup() override {
-        fSrc.allocN32Pixels(640, 480);
-        fSrc.eraseColor(SK_ColorWHITE);
-        fDst.allocN32Pixels(300, 250);
-    }
-
-    void onDraw(int loops, SkCanvas*) override {
-        SkPixmap src, dst;
-        fSrc.peekPixels(&src);
-        fDst.peekPixels(&dst);
-        for (int i = 0; i < loops * 16; i++) {
-            SkBitmapScaler::Resize(dst, src, fMethod);
-        }
-    }
-
-private:
-    typedef Benchmark INHERITED;
-};
-DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_LANCZOS3, "lanczos");  )
-DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_MITCHELL, "mitchell"); )
-DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_HAMMING,  "hamming");  )
-DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_TRIANGLE, "triangle"); )
-DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_BOX,      "box");      )
diff --git a/gm/filterindiabox.cpp b/gm/filterindiabox.cpp
index eef0b4a0d5..84f512dfe1 100644
--- a/gm/filterindiabox.cpp
+++ b/gm/filterindiabox.cpp
@@ -10,7 +10,6 @@
 
 #include "Resources.h"
 #include "SkBitmapProcState.h"
-#include "SkBitmapScaler.h"
 #include "SkGradientShader.h"
 #include "SkImageEncoder.h"
 #include "SkStream.h"
@@ -75,7 +74,7 @@ protected:
     }
 
     SkISize onISize() override {
-        return SkISize::Make(1024, 768);
+        return SkISize::Make(680, 130);
     }
 
     void onDraw(SkCanvas* canvas) override {
diff --git a/gm/showmiplevels.cpp b/gm/showmiplevels.cpp
index 57df948536..4f27365788 100644
--- a/gm/showmiplevels.cpp
+++ b/gm/showmiplevels.cpp
@@ -9,7 +9,6 @@
 #include "sk_tool_utils.h"
 
 #include "Resources.h"
-#include "SkBitmapScaler.h"
 #include "SkGradientShader.h"
 #include "SkTypeface.h"
 #include "SkStream.h"
@@ -114,9 +113,7 @@ protected:
         return str;
     }
 
-    SkISize onISize() override {
-        return { 824, 862 };
-    }
+    SkISize onISize() override { return { 150, 862 }; }
 
     static void DrawAndFrame(SkCanvas* canvas, const SkBitmap& orig, SkScalar x, SkScalar y) {
         SkBitmap bm;
@@ -169,25 +166,6 @@ protected:
             bm.installPixels(curr);
             return bm;
         });
-
-        const SkBitmapScaler::ResizeMethod methods[] = {
-            SkBitmapScaler::RESIZE_BOX,
-            SkBitmapScaler::RESIZE_TRIANGLE,
-            SkBitmapScaler::RESIZE_LANCZOS3,
-            SkBitmapScaler::RESIZE_HAMMING,
-            SkBitmapScaler::RESIZE_MITCHELL,
-        };
-
-        SkPixmap basePM;
-        orig.peekPixels(&basePM);
-        for (auto method : methods) {
-            canvas->translate(orig.width()/2 + 8.0f, 0);
-            drawLevels(canvas, orig, [method](const SkPixmap& prev, const SkPixmap& curr) {
-                SkBitmap bm;
-                SkBitmapScaler::Resize(&bm, prev, method, curr.width(), curr.height());
-                return bm;
-            });
-        }
     }
 
     void onOnceBeforeDraw() override {
diff --git a/gn/bench.gni b/gn/bench.gni
index 7560613051..a5ab7810eb 100644
--- a/gn/bench.gni
+++ b/gn/bench.gni
@@ -17,7 +17,6 @@ bench_sources = [
   "$_bench/BitmapBench.cpp",
   "$_bench/BitmapRectBench.cpp",
   "$_bench/BitmapRegionDecoderBench.cpp",
-  "$_bench/BitmapScaleBench.cpp",
   "$_bench/BlendmodeBench.cpp",
   "$_bench/BlurBench.cpp",
   "$_bench/BlurImageFilterBench.cpp",
diff --git a/gn/core.gni b/gn/core.gni
index 39a80fef85..43b972f45c 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -31,7 +31,6 @@ skia_core_sources = [
   "$_src/core/SkBitmapController.cpp",
   "$_src/core/SkBitmapDevice.cpp",
   "$_src/core/SkBitmapDevice.h",
-  "$_src/core/SkBitmapFilter.h",
   "$_src/core/SkBitmapProcState.cpp",
   "$_src/core/SkBitmapProcState.h",
   "$_src/core/SkBitmapProcState_filter.h",
@@ -44,8 +43,6 @@ skia_core_sources = [
   "$_src/core/SkBitmapProcState_utils.h",
   "$_src/core/SkBitmapProvider.cpp",
   "$_src/core/SkBitmapProvider.h",
-  "$_src/core/SkBitmapScaler.h",
-  "$_src/core/SkBitmapScaler.cpp",
   "$_src/core/SkBlendMode.cpp",
   "$_src/core/SkBlitBWMaskTemplate.h",
   "$_src/core/SkBlitMask.h",
@@ -89,8 +86,6 @@ skia_core_sources = [
   "$_src/core/SkColorTable.cpp",
   "$_src/core/SkConvertPixels.cpp",
   "$_src/core/SkConvertPixels.h",
-  "$_src/core/SkConvolver.cpp",
-  "$_src/core/SkConvolver.h",
   "$_src/core/SkCoreBlitters.h",
   "$_src/core/SkCpu.cpp",
   "$_src/core/SkCpu.h",
diff --git a/gn/gn_to_bp.py b/gn/gn_to_bp.py
index 3fc30a03b5..2056c4274f 100644
--- a/gn/gn_to_bp.py
+++ b/gn/gn_to_bp.py
@@ -294,8 +294,7 @@ with open('Android.bp', 'w') as f:
                                      defs['ssse3'] +
                                      defs['sse41'] +
                                      defs['sse42'] +
-                                     defs['avx'  ] +
-                                     defs['hsw'  ])),
+                                     defs['avx'  ])),
 
     'tool_cflags'       : bpfmt(8, tool_cflags),
     'tool_shared_libs'  : bpfmt(8, tool_shared_libs),
diff --git a/gn/opts.gni b/gn/opts.gni
index 34481db3b6..1cc6027af2 100644
--- a/gn/opts.gni
+++ b/gn/opts.gni
@@ -51,4 +51,3 @@ ssse3 = [
 sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ]
 sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ]
 avx = [ "$_src/opts/SkOpts_avx.cpp" ]
-hsw = [ "$_src/opts/SkOpts_hsw.cpp" ]
diff --git a/gn/shared_sources.gni b/gn/shared_sources.gni
index 6df999921a..29cac671a4 100644
--- a/gn/shared_sources.gni
+++ b/gn/shared_sources.gni
@@ -24,7 +24,7 @@ skia_opts = {
   sse41_sources = sse41
   sse42_sources = sse42
   avx_sources = avx
-  hsw_sources = hsw
+  hsw_sources = []  # remove after we update Chrome
 }
 
 # Skia Chromium defines. These flags will be defined in chromium If these
diff --git a/src/core/SkBitmapController.cpp b/src/core/SkBitmapController.cpp
index d3e47aeae9..586210d4b7 100644
--- a/src/core/SkBitmapController.cpp
+++ b/src/core/SkBitmapController.cpp
@@ -6,15 +6,13 @@
  */
 
 #include "SkBitmap.h"
+#include "SkBitmapCache.h"
 #include "SkBitmapController.h"
 #include "SkBitmapProvider.h"
 #include "SkMatrix.h"
-#include "SkPixelRef.h"
+#include "SkMipMap.h"
 #include "SkTemplates.h"
 
-// RESIZE_LANCZOS3 is another good option, but chrome prefers mitchell at the moment
-#define kHQ_RESIZE_METHOD   SkBitmapScaler::RESIZE_MITCHELL
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvider& provider,
@@ -33,70 +31,24 @@ SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvi
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "SkBitmapCache.h"
-#include "SkBitmapScaler.h"
-#include "SkMipMap.h"
-#include "SkResourceCache.h"
-
 class SkDefaultBitmapControllerState : public SkBitmapController::State {
 public:
-    SkDefaultBitmapControllerState(const SkBitmapProvider&,
-                                   const SkMatrix& inv,
-                                   SkFilterQuality,
-                                   bool canShadeHQ);
+    SkDefaultBitmapControllerState(const SkBitmapProvider&, const SkMatrix& inv, SkFilterQuality);
 
 private:
-    SkBitmap                      fResultBitmap;
-    sk_sp<const SkMipMap>         fCurrMip;
-    bool                          fCanShadeHQ;
+    SkBitmap                fResultBitmap;
+    sk_sp<const SkMipMap>   fCurrMip;
 
-    bool processHQRequest(const SkBitmapProvider&);
+    bool processHighRequest(const SkBitmapProvider&);
     bool processMediumRequest(const SkBitmapProvider&);
 };
 
-// Check to see that the size of the bitmap that would be produced by
-// scaling by the given inverted matrix is less than the maximum allowed.
-static inline bool cache_size_okay(const SkBitmapProvider& provider, const SkMatrix& invMat) {
-    size_t maximumAllocation = SkResourceCache::GetEffectiveSingleAllocationByteLimit();
-    if (0 == maximumAllocation) {
-        return true;
-    }
-    // float matrixScaleFactor = 1.0 / (invMat.scaleX * invMat.scaleY);
-    // return ((origBitmapSize * matrixScaleFactor) < maximumAllocationSize);
-    // Skip the division step:
-    const size_t size = provider.info().getSafeSize(provider.info().minRowBytes());
-    SkScalar invScaleSqr = invMat.getScaleX() * invMat.getScaleY();
-    return size < (maximumAllocation * SkScalarAbs(invScaleSqr));
-}
-
-/*
- *  High quality is implemented by performing up-right scale-only filtering and then
- *  using bilerp for any remaining transformations.
- */
-bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& provider) {
+bool SkDefaultBitmapControllerState::processHighRequest(const SkBitmapProvider& provider) {
     if (fQuality != kHigh_SkFilterQuality) {
         return false;
     }
 
-    // Our default return state is to downgrade the request to Medium, w/ or w/o setting fBitmap
-    // to a valid bitmap. If we succeed, we will set this to Low instead.
     fQuality = kMedium_SkFilterQuality;
-#ifdef SK_USE_MIP_FOR_DOWNSCALE_HQ
-    return false;
-#endif
-
-    bool supported = false;
-    switch (provider.info().colorType()) {
-        case kRGBA_8888_SkColorType:
-        case kBGRA_8888_SkColorType:
-            supported = true;
-            break;
-        default:
-            break;
-    }
-    if (!supported || !cache_size_okay(provider, fInvMatrix) || fInvMatrix.hasPerspective()) {
-        return false; // can't handle the reqeust
-    }
 
     SkScalar invScaleX = fInvMatrix.getScaleX();
     SkScalar invScaleY = fInvMatrix.getScaleY();
@@ -111,68 +63,14 @@ bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& pr
     invScaleX = SkScalarAbs(invScaleX);
     invScaleY = SkScalarAbs(invScaleY);
 
-    if (SkScalarNearlyEqual(invScaleX, 1) && SkScalarNearlyEqual(invScaleY, 1)) {
-        return false; // no need for HQ
-    }
-
-    if (invScaleX > 1 || invScaleY > 1) {
-        return false; // only use HQ when upsampling
-    }
-
-    // If the shader can natively handle HQ filtering, let it do it.
-    if (fCanShadeHQ) {
-        fQuality = kHigh_SkFilterQuality;
-        SkAssertResult(provider.asBitmap(&fResultBitmap));
-        return true;
-    }
-
-    const int dstW = SkScalarRoundToScalar(provider.width() / invScaleX);
-    const int dstH = SkScalarRoundToScalar(provider.height() / invScaleY);
-    const SkBitmapCacheDesc desc = provider.makeCacheDesc(dstW, dstH);
-
-    if (!SkBitmapCache::Find(desc, &fResultBitmap)) {
-        SkBitmap orig;
-        if (!provider.asBitmap(&orig)) {
-            return false;
-        }
-        SkPixmap src;
-        if (!orig.peekPixels(&src)) {
-            return false;
-        }
-
-        SkPixmap dst;
-        SkBitmapCache::RecPtr rec;
-        const SkImageInfo info = SkImageInfo::Make(desc.fScaledWidth, desc.fScaledHeight,
-                                                   src.colorType(), src.alphaType());
-        if (provider.isVolatile()) {
-            if (!fResultBitmap.tryAllocPixels(info)) {
-                return false;
-            }
-            SkASSERT(fResultBitmap.getPixels());
-            fResultBitmap.peekPixels(&dst);
-            fResultBitmap.setImmutable();   // a little cheat, as we haven't resized yet, but ok
-        } else {
-            rec = SkBitmapCache::Alloc(desc, info, &dst);
-            if (!rec) {
-                return false;
-            }
-        }
-        if (!SkBitmapScaler::Resize(dst, src, kHQ_RESIZE_METHOD)) {
-            return false; // we failed to create fScaledBitmap
-        }
-        if (rec) {
-            SkBitmapCache::Add(std::move(rec), &fResultBitmap);
-            SkASSERT(fResultBitmap.getPixels());
-            provider.notifyAddedToCache();
-        }
+    if (invScaleX >= 1 - SK_ScalarNearlyZero || invScaleY >= 1 - SK_ScalarNearlyZero) {
+        // we're down-scaling so abort HQ
+        return false;
     }
 
-    SkASSERT(fResultBitmap.getPixels());
-    SkASSERT(fResultBitmap.isImmutable());
-
-    fInvMatrix.postScale(SkIntToScalar(dstW) / provider.width(),
-                         SkIntToScalar(dstH) / provider.height());
-    fQuality = kLow_SkFilterQuality;
+    // Confirmed that we can use HQ (w/ rasterpipeline)
+    fQuality = kHigh_SkFilterQuality;
+    (void)provider.asBitmap(&fResultBitmap);
     return true;
 }
 
@@ -235,20 +133,15 @@ bool SkDefaultBitmapControllerState::processMediumRequest(const SkBitmapProvider
 
 SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider,
                                                                const SkMatrix& inv,
-                                                               SkFilterQuality qual,
-                                                               bool canShadeHQ) {
+                                                               SkFilterQuality qual) {
     fInvMatrix = inv;
     fQuality = qual;
-    fCanShadeHQ = canShadeHQ;
-
-    bool processed = this->processHQRequest(provider) || this->processMediumRequest(provider);
 
-    if (processed) {
+    if (this->processHighRequest(provider) || this->processMediumRequest(provider)) {
         SkASSERT(fResultBitmap.getPixels());
     } else {
         (void)provider.asBitmap(&fResultBitmap);
     }
-    SkASSERT(fCanShadeHQ || fQuality <= kLow_SkFilterQuality);
 
     // fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr()
     // and will destroy us if it is nullptr.
@@ -259,6 +152,5 @@ SkBitmapController::State* SkDefaultBitmapController::onRequestBitmap(const SkBi
                                                                       const SkMatrix& inverse,
                                                                       SkFilterQuality quality,
                                                                       void* storage, size_t size) {
-    return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size,
-                                                             bm, inverse, quality, fCanShadeHQ);
+    return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, bm, inverse, quality);
 }
diff --git a/src/core/SkBitmapController.h b/src/core/SkBitmapController.h
index 72fc721c53..9eff2d28ff 100644
--- a/src/core/SkBitmapController.h
+++ b/src/core/SkBitmapController.h
@@ -57,14 +57,11 @@ protected:
 
 class SkDefaultBitmapController : public SkBitmapController {
 public:
-    enum class CanShadeHQ { kNo, kYes };
-    SkDefaultBitmapController(CanShadeHQ canShadeHQ)
-        : fCanShadeHQ(canShadeHQ == CanShadeHQ::kYes) {}
+    SkDefaultBitmapController() {}
 
 protected:
     State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality,
                            void* storage, size_t storageSize) override;
-    bool fCanShadeHQ;
 };
 
 #endif
diff --git a/src/core/SkBitmapFilter.h b/src/core/SkBitmapFilter.h
deleted file mode 100644
index ca3e0930f2..0000000000
--- a/src/core/SkBitmapFilter.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright 2013 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkBitmapFilter_DEFINED
-#define SkBitmapFilter_DEFINED
-
-#include "SkFixed.h"
-#include "SkMath.h"
-#include "SkScalar.h"
-
-#include "SkNx.h"
-
-// size of the precomputed bitmap filter tables for high quality filtering.
-// Used to precompute the shape of the filter kernel.
-// Table size chosen from experiments to see where I could start to see a difference.
-
-#define SKBITMAP_FILTER_TABLE_SIZE 128
-
-class SkBitmapFilter {
-public:
-    SkBitmapFilter(float width) : fWidth(width), fInvWidth(1.f/width) {
-        fPrecomputed = false;
-        fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
-    }
-    virtual ~SkBitmapFilter() {}
-
-    SkScalar lookupScalar(float x) const {
-        if (!fPrecomputed) {
-            precomputeTable();
-        }
-        int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
-        SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
-        return fFilterTableScalar[filter_idx];
-    }
-
-    float width() const { return fWidth; }
-    float invWidth() const { return fInvWidth; }
-    virtual float evaluate(float x) const = 0;
-
-    virtual float evaluate_n(float val, float diff, int count, float* output) const {
-        float sum = 0;
-        for (int index = 0; index < count; index++) {
-            float filterValue = evaluate(val);
-            *output++ = filterValue;
-            sum += filterValue;
-            val += diff;
-        }
-        return sum;
-    }
-
-protected:
-    float fWidth;
-    float fInvWidth;
-    float fLookupMultiplier;
-
-    mutable bool fPrecomputed;
-    mutable SkScalar fFilterTableScalar[SKBITMAP_FILTER_TABLE_SIZE];
-
-private:
-    void precomputeTable() const {
-        fPrecomputed = true;
-        SkScalar *ftpScalar = fFilterTableScalar;
-        for (int x = 0; x < SKBITMAP_FILTER_TABLE_SIZE; ++x) {
-            float fx = ((float)x + .5f) * this->width() / SKBITMAP_FILTER_TABLE_SIZE;
-            float filter_value = evaluate(fx);
-            *ftpScalar++ = filter_value;
-        }
-    }
-};
-
-class SkMitchellFilter final : public SkBitmapFilter {
-public:
-    SkMitchellFilter()
-        : INHERITED(2)
-        , fB(1.f / 3.f)
-        , fC(1.f / 3.f)
-        , fA1(-fB - 6*fC)
-        , fB1(6*fB + 30*fC)
-        , fC1(-12*fB - 48*fC)
-        , fD1(8*fB + 24*fC)
-        , fA2(12 - 9*fB - 6*fC)
-        , fB2(-18 + 12*fB + 6*fC)
-        , fD2(6 - 2*fB)
-    {}
-
-    float evaluate(float x) const override {
-        x = fabsf(x);
-        if (x > 2.f) {
-            return 0;
-        } else if (x > 1.f) {
-            return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
-        } else {
-            return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
-        }
-    }
-
-    Sk4f evalcore_n(const Sk4f& val) const {
-        Sk4f x = val.abs();
-        Sk4f over2 = x > Sk4f(2);
-        Sk4f over1 = x > Sk4f(1);
-        Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
-                     * Sk4f(1.f/6.f);
-        Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
-        return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
-    }
-
-    float evaluate_n(float val, float diff, int count, float* output) const override {
-        Sk4f sum(0);
-        while (count >= 4) {
-            float v0 = val;
-            float v1 = val += diff;
-            float v2 = val += diff;
-            float v3 = val += diff;
-            val += diff;
-            Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
-            filterValue.store(output);
-            output += 4;
-            sum = sum + filterValue;
-            count -= 4;
-        }
-        float sums[4];
-        sum.store(sums);
-        float result = sums[0] + sums[1] + sums[2] + sums[3];
-        result += INHERITED::evaluate_n(val, diff, count, output);
-        return result;
-    }
-
-  protected:
-      float fB, fC;
-      float fA1, fB1, fC1, fD1;
-      float fA2, fB2, fD2;
-private:
-    typedef SkBitmapFilter INHERITED;
-};
-
-class SkGaussianFilter final : public SkBitmapFilter {
-    float fAlpha, fExpWidth;
-
-public:
-    SkGaussianFilter(float a, float width = 2)
-        : SkBitmapFilter(width)
-        , fAlpha(a)
-        , fExpWidth(expf(-a * width * width))
-    {}
-
-    float evaluate(float x) const override {
-        return SkTMax(0.f, float(expf(-fAlpha*x*x) - fExpWidth));
-    }
-};
-
-class SkTriangleFilter final : public SkBitmapFilter {
-public:
-    SkTriangleFilter(float width = 1) : SkBitmapFilter(width) {}
-
-    float evaluate(float x) const override {
-        return SkTMax(0.f, fWidth - fabsf(x));
-    }
-};
-
-class SkBoxFilter final : public SkBitmapFilter {
-public:
-    SkBoxFilter(float width = 0.5f) : SkBitmapFilter(width) {}
-
-    float evaluate(float x) const override {
-        return (x >= -fWidth && x < fWidth) ? 1.0f : 0.0f;
-    }
-};
-
-class SkHammingFilter final : public SkBitmapFilter {
-public:
-    SkHammingFilter(float width = 1) : SkBitmapFilter(width) {}
-
-    float evaluate(float x) const override {
-        if (x <= -fWidth || x >= fWidth) {
-            return 0.0f;  // Outside of the window.
-        }
-        if (x > -FLT_EPSILON && x < FLT_EPSILON) {
-            return 1.0f;  // Special case the sinc discontinuity at the origin.
-        }
-        const float xpi = x * static_cast<float>(SK_ScalarPI);
-
-        return ((sk_float_sin(xpi) / xpi) *  // sinc(x)
-                (0.54f + 0.46f * sk_float_cos(xpi / fWidth)));  // hamming(x)
-    }
-};
-
-class SkLanczosFilter final : public SkBitmapFilter {
-public:
-    SkLanczosFilter(float width = 3.f) : SkBitmapFilter(width) {}
-
-    float evaluate(float x) const override {
-        if (x <= -fWidth || x >= fWidth) {
-            return 0.0f;  // Outside of the window.
-        }
-        if (x > -FLT_EPSILON && x < FLT_EPSILON) {
-            return 1.0f;  // Special case the discontinuity at the origin.
-        }
-        float xpi = x * static_cast<float>(SK_ScalarPI);
-        return (sk_float_sin(xpi) / xpi) *  // sinc(x)
-               sk_float_sin(xpi / fWidth) / (xpi / fWidth);  // sinc(x/fWidth)
-    }
-};
-
-
-#endif
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp
index 302bd054d3..9bc90609b2 100644
--- a/src/core/SkBitmapProcState.cpp
+++ b/src/core/SkBitmapProcState.cpp
@@ -12,7 +12,6 @@
 #include "SkPaint.h"
 #include "SkShader.h"   // for tilemodes
 #include "SkUtilsArm.h"
-#include "SkBitmapScaler.h"
 #include "SkMipMap.h"
 #include "SkPixelRef.h"
 #include "SkImageEncoder.h"
@@ -90,7 +89,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
     fInvMatrix = inv;
     fFilterQuality = paint.getFilterQuality();
 
-    SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kNo);
+    SkDefaultBitmapController controller;
     fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(),
                                         fBMStateStorage.get(), fBMStateStorage.size());
     // Note : we allow the controller to return an empty (zero-dimension) result. Should we?
@@ -102,6 +101,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
     fRealInvMatrix = fBMState->invMatrix();
     fPaintColor = paint.getColor();
     fFilterQuality = fBMState->quality();
+    SkASSERT(fFilterQuality <= kLow_SkFilterQuality);
     SkASSERT(fPixmap.addr());
 
     // Most of the scanline procs deal with "unit" texture coordinates, as this
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index 73eaf4fb7c..c9376c60ae 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -10,8 +10,8 @@
 
 #include "SkBitmap.h"
 #include "SkBitmapController.h"
-#include "SkBitmapFilter.h"
 #include "SkBitmapProvider.h"
+#include "SkFixed.h"
 #include "SkFloatBits.h"
 #include "SkMatrix.h"
 #include "SkMipMap.h"
diff --git a/src/core/SkBitmapScaler.cpp b/src/core/SkBitmapScaler.cpp
deleted file mode 100644
index 9e82b92404..0000000000
--- a/src/core/SkBitmapScaler.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkBitmapScaler.h"
-#include "SkBitmapFilter.h"
-#include "SkConvolver.h"
-#include "SkImageInfo.h"
-#include "SkPixmap.h"
-#include "SkRect.h"
-#include "SkTArray.h"
-
-// SkResizeFilter ----------------------------------------------------------------
-
-// Encapsulates computation and storage of the filters required for one complete
-// resize operation.
-class SkResizeFilter {
-public:
-    SkResizeFilter(SkBitmapScaler::ResizeMethod method,
-                   int srcFullWidth, int srcFullHeight,
-                   float destWidth, float destHeight,
-                   const SkRect& destSubset);
-    ~SkResizeFilter() { delete fBitmapFilter; }
-
-    // Returns the filled filter values.
-    const SkConvolutionFilter1D& xFilter() { return fXFilter; }
-    const SkConvolutionFilter1D& yFilter() { return fYFilter; }
-
-private:
-
-    SkBitmapFilter* fBitmapFilter;
-
-    // Computes one set of filters either horizontally or vertically. The caller
-    // will specify the "min" and "max" rather than the bottom/top and
-    // right/bottom so that the same code can be re-used in each dimension.
-    //
-    // |srcDependLo| and |srcDependSize| gives the range for the source
-    // depend rectangle (horizontally or vertically at the caller's discretion
-    // -- see above for what this means).
-    //
-    // Likewise, the range of destination values to compute and the scale factor
-    // for the transform is also specified.
-
-    void computeFilters(int srcSize,
-                        float destSubsetLo, float destSubsetSize,
-                        float scale,
-                        SkConvolutionFilter1D* output);
-
-    SkConvolutionFilter1D fXFilter;
-    SkConvolutionFilter1D fYFilter;
-};
-
-SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
-                               int srcFullWidth, int srcFullHeight,
-                               float destWidth, float destHeight,
-                               const SkRect& destSubset) {
-
-    SkASSERT(method >= SkBitmapScaler::RESIZE_FirstMethod &&
-             method <= SkBitmapScaler::RESIZE_LastMethod);
-
-    fBitmapFilter = nullptr;
-    switch(method) {
-        case SkBitmapScaler::RESIZE_BOX:
-            fBitmapFilter = new SkBoxFilter;
-            break;
-        case SkBitmapScaler::RESIZE_TRIANGLE:
-            fBitmapFilter = new SkTriangleFilter;
-            break;
-        case SkBitmapScaler::RESIZE_MITCHELL:
-            fBitmapFilter = new SkMitchellFilter;
-            break;
-        case SkBitmapScaler::RESIZE_HAMMING:
-            fBitmapFilter = new SkHammingFilter;
-            break;
-        case SkBitmapScaler::RESIZE_LANCZOS3:
-            fBitmapFilter = new SkLanczosFilter;
-            break;
-    }
-
-
-    float scaleX = destWidth / srcFullWidth;
-    float scaleY = destHeight / srcFullHeight;
-
-    this->computeFilters(srcFullWidth, destSubset.fLeft, destSubset.width(),
-                         scaleX, &fXFilter);
-    if (srcFullWidth == srcFullHeight &&
-        destSubset.fLeft == destSubset.fTop &&
-        destSubset.width() == destSubset.height()&&
-        scaleX == scaleY) {
-        fYFilter = fXFilter;
-    } else {
-        this->computeFilters(srcFullHeight, destSubset.fTop, destSubset.height(),
-                          scaleY, &fYFilter);
-    }
-}
-
-// TODO(egouriou): Take advantage of periods in the convolution.
-// Practical resizing filters are periodic outside of the border area.
-// For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the
-// source become p pixels in the destination) will have a period of p.
-// A nice consequence is a period of 1 when downscaling by an integral
-// factor. Downscaling from typical display resolutions is also bound
-// to produce interesting periods as those are chosen to have multiple
-// small factors.
-// Small periods reduce computational load and improve cache usage if
-// the coefficients can be shared. For periods of 1 we can consider
-// loading the factors only once outside the borders.
-void SkResizeFilter::computeFilters(int srcSize,
-                                    float destSubsetLo, float destSubsetSize,
-                                    float scale,
-                                    SkConvolutionFilter1D* output) {
-    float destSubsetHi = destSubsetLo + destSubsetSize;  // [lo, hi)
-
-    // When we're doing a magnification, the scale will be larger than one. This
-    // means the destination pixels are much smaller than the source pixels, and
-    // that the range covered by the filter won't necessarily cover any source
-    // pixel boundaries. Therefore, we use these clamped values (max of 1) for
-    // some computations.
-    float clampedScale = SkTMin(1.0f, scale);
-
-    // This is how many source pixels from the center we need to count
-    // to support the filtering function.
-    float srcSupport = fBitmapFilter->width() / clampedScale;
-
-    float invScale = 1.0f / scale;
-
-    SkSTArray<64, float, true> filterValuesArray;
-    SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;
-
-    // Loop over all pixels in the output range. We will generate one set of
-    // filter values for each one. Those values will tell us how to blend the
-    // source pixels to compute the destination pixel.
-
-    // This is the pixel in the source directly under the pixel in the dest.
-    // Note that we base computations on the "center" of the pixels. To see
-    // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
-    // downscale should "cover" the pixels around the pixel with *its center*
-    // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
-    // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
-    destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
-    destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
-    float srcPixel = (destSubsetLo + 0.5f) * invScale;
-    int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
-    output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
-    for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++) {
-        // Compute the (inclusive) range of source pixels the filter covers.
-        float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
-        float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
-
-        // Compute the unnormalized filter value at each location of the source
-        // it covers.
-
-        // Sum of the filter values for normalizing.
-        // Distance from the center of the filter, this is the filter coordinate
-        // in source space. We also need to consider the center of the pixel
-        // when comparing distance against 'srcPixel'. In the 5x downscale
-        // example used above the distance from the center of the filter to
-        // the pixel with coordinates (2, 2) should be 0, because its center
-        // is at (2.5, 2.5).
-        float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
-        int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
-        if (filterCount <= 0) {
-            // true when srcSize is equal to srcPixel - srcSupport; this may be a bug
-            return;
-        }
-        filterValuesArray.reset(filterCount);
-        float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
-                                                filterValuesArray.begin());
-
-        // The filter must be normalized so that we don't affect the brightness of
-        // the image. Convert to normalized fixed point.
-        int fixedSum = 0;
-        fixedFilterValuesArray.reset(filterCount);
-        const float* filterValues = filterValuesArray.begin();
-        SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
-        float invFilterSum = 1 / filterSum;
-        for (int fixedI = 0; fixedI < filterCount; fixedI++) {
-            int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
-            fixedSum += curFixed;
-            fixedFilterValues[fixedI] = SkToS16(curFixed);
-        }
-        SkASSERT(fixedSum <= 0x7FFF);
-
-        // The conversion to fixed point will leave some rounding errors, which
-        // we add back in to avoid affecting the brightness of the image. We
-        // arbitrarily add this to the center of the filter array (this won't always
-        // be the center of the filter function since it could get clipped on the
-        // edges, but it doesn't matter enough to worry about that case).
-        int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
-        fixedFilterValues[filterCount / 2] += leftovers;
-
-        // Now it's ready to go.
-        output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-static bool valid_for_resize(const SkPixmap& source, int dstW, int dstH) {
-    // TODO: Seems like we shouldn't care about the swizzle of source, just that it's 8888
-    return source.addr() && source.colorType() == kN32_SkColorType &&
-           source.width() >= 1 && source.height() >= 1 && dstW >= 1 && dstH >= 1;
-}
-
-bool SkBitmapScaler::Resize(const SkPixmap& result, const SkPixmap& source, ResizeMethod method) {
-    if (!valid_for_resize(source, result.width(), result.height())) {
-        return false;
-    }
-    if (!result.addr() || result.colorType() != source.colorType()) {
-        return false;
-    }
-
-    SkRect destSubset = SkRect::MakeIWH(result.width(), result.height());
-
-    SkResizeFilter filter(method, source.width(), source.height(),
-                          result.width(), result.height(), destSubset);
-
-    // Get a subset encompassing this touched area. We construct the
-    // offsets and row strides such that it looks like a new bitmap, while
-    // referring to the old data.
-    const uint8_t* sourceSubset = reinterpret_cast<const uint8_t*>(source.addr());
-
-    return BGRAConvolve2D(sourceSubset, static_cast<int>(source.rowBytes()),
-                          !source.isOpaque(), filter.xFilter(), filter.yFilter(),
-                          static_cast<int>(result.rowBytes()),
-                          static_cast<unsigned char*>(result.writable_addr()));
-}
-
-bool SkBitmapScaler::Resize(SkBitmap* resultPtr, const SkPixmap& source, ResizeMethod method,
-                            int destWidth, int destHeight, SkBitmap::Allocator* allocator) {
-    // Preflight some of the checks, to avoid allocating the result if we don't need it.
-    if (!valid_for_resize(source, destWidth, destHeight)) {
-        return false;
-    }
-
-    SkBitmap result;
-    // Note: pass along the profile information even thought this is no the right answer because
-    // this could be scaling in sRGB.
-    result.setInfo(SkImageInfo::MakeN32(destWidth, destHeight, source.alphaType(),
-                                        sk_ref_sp(source.info().colorSpace())));
-    result.allocPixels(allocator);
-
-    SkPixmap resultPM;
-    if (!result.peekPixels(&resultPM) || !Resize(resultPM, source, method)) {
-        return false;
-    }
-
-    *resultPtr = result;
-    SkASSERT(resultPtr->getPixels());
-    return true;
-}
diff --git a/src/core/SkBitmapScaler.h b/src/core/SkBitmapScaler.h
deleted file mode 100644
index c96be0dbf8..0000000000
--- a/src/core/SkBitmapScaler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2013 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkBitmapScaler_DEFINED
-#define SkBitmapScaler_DEFINED
-
-#include "SkBitmap.h"
-#include "SkConvolver.h"
-
-/** \class SkBitmapScaler
-
-    Provides the interface for high quality image resampling.
- */
-
-class SK_API SkBitmapScaler {
-public:
-    enum ResizeMethod {
-        RESIZE_BOX,
-        RESIZE_TRIANGLE,
-        RESIZE_LANCZOS3,
-        RESIZE_HAMMING,
-        RESIZE_MITCHELL,
-
-        RESIZE_FirstMethod = RESIZE_BOX,
-        RESIZE_LastMethod = RESIZE_MITCHELL,
-    };
-
-    /**
-     *  Given already-allocated src and dst pixmaps, this will scale the src pixels using the
-     *  specified resize-method and write the results into the pixels pointed to by dst.
-     */
-    static bool Resize(const SkPixmap& dst, const SkPixmap& src, ResizeMethod method);
-
-    /**
-     *  Helper function that manages allocating a bitmap to hold the dst pixels, and then calls
-     *  the pixmap version of Resize.
-     */
-    static bool Resize(SkBitmap* result, const SkPixmap& src, ResizeMethod method,
-                       int dest_width, int dest_height, SkBitmap::Allocator* = nullptr);
-};
-
-#endif
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 45a569835b..5abb08553a 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -798,21 +798,16 @@ bool SkBlitter::UseRasterPipelineBlitter(const SkPixmap& device, const SkPaint&
     if (paint.getColorFilter()) {
         return true;
     }
-#ifndef SK_SUPPORT_LEGACY_HQ_SCALER
     if (paint.getFilterQuality() == kHigh_SkFilterQuality) {
         return true;
     }
-#endif
     // ... unless the blend mode is complicated enough.
     if (paint.getBlendMode() > SkBlendMode::kLastSeparableMode) {
         return true;
     }
-
-    // ... or unless we have to deal with perspective.
     if (matrix.hasPerspective()) {
         return true;
     }
-
     // ... or unless the shader is raster pipeline-only.
     if (paint.getShader() && as_SB(paint.getShader())->isRasterPipelineOnly()) {
         return true;
diff --git a/src/core/SkConvolver.cpp b/src/core/SkConvolver.cpp
deleted file mode 100644
index 9f0cfea821..0000000000
--- a/src/core/SkConvolver.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "SkConvolver.h"
-#include "SkOpts.h"
-#include "SkTArray.h"
-
-namespace {
-    // Stores a list of rows in a circular buffer. The usage is you write into it
-    // by calling AdvanceRow. It will keep track of which row in the buffer it
-    // should use next, and the total number of rows added.
-    class CircularRowBuffer {
-    public:
-        // The number of pixels in each row is given in |sourceRowPixelWidth|.
-        // The maximum number of rows needed in the buffer is |maxYFilterSize|
-        // (we only need to store enough rows for the biggest filter).
-        //
-        // We use the |firstInputRow| to compute the coordinates of all of the
-        // following rows returned by Advance().
-        CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
-                          int firstInputRow)
-            : fRowByteWidth(destRowPixelWidth * 4),
-              fNumRows(maxYFilterSize),
-              fNextRow(0),
-              fNextRowCoordinate(firstInputRow) {
-            fBuffer.reset(fRowByteWidth * maxYFilterSize);
-            fRowAddresses.reset(fNumRows);
-        }
-
-        // Moves to the next row in the buffer, returning a pointer to the beginning
-        // of it.
-        unsigned char* advanceRow() {
-            unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
-            fNextRowCoordinate++;
-
-            // Set the pointer to the next row to use, wrapping around if necessary.
-            fNextRow++;
-            if (fNextRow == fNumRows) {
-                fNextRow = 0;
-            }
-            return row;
-        }
-
-        // Returns a pointer to an "unrolled" array of rows. These rows will start
-        // at the y coordinate placed into |*firstRowIndex| and will continue in
-        // order for the maximum number of rows in this circular buffer.
-        //
-        // The |firstRowIndex_| may be negative. This means the circular buffer
-        // starts before the top of the image (it hasn't been filled yet).
-        unsigned char* const* GetRowAddresses(int* firstRowIndex) {
-            // Example for a 4-element circular buffer holding coords 6-9.
-            //   Row 0   Coord 8
-            //   Row 1   Coord 9
-            //   Row 2   Coord 6  <- fNextRow = 2, fNextRowCoordinate = 10.
-            //   Row 3   Coord 7
-            //
-            // The "next" row is also the first (lowest) coordinate. This computation
-            // may yield a negative value, but that's OK, the math will work out
-            // since the user of this buffer will compute the offset relative
-            // to the firstRowIndex and the negative rows will never be used.
-            *firstRowIndex = fNextRowCoordinate - fNumRows;
-
-            int curRow = fNextRow;
-            for (int i = 0; i < fNumRows; i++) {
-                fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
-
-                // Advance to the next row, wrapping if necessary.
-                curRow++;
-                if (curRow == fNumRows) {
-                    curRow = 0;
-                }
-            }
-            return &fRowAddresses[0];
-        }
-
-    private:
-        // The buffer storing the rows. They are packed, each one fRowByteWidth.
-        SkTArray<unsigned char> fBuffer;
-
-        // Number of bytes per row in the |buffer|.
-        int fRowByteWidth;
-
-        // The number of rows available in the buffer.
-        int fNumRows;
-
-        // The next row index we should write into. This wraps around as the
-        // circular buffer is used.
-        int fNextRow;
-
-        // The y coordinate of the |fNextRow|. This is incremented each time a
-        // new row is appended and does not wrap.
-        int fNextRowCoordinate;
-
-        // Buffer used by GetRowAddresses().
-        SkTArray<unsigned char*> fRowAddresses;
-    };
-
-}  // namespace
-
-// SkConvolutionFilter1D ---------------------------------------------------------
-
-SkConvolutionFilter1D::SkConvolutionFilter1D()
-: fMaxFilter(0) {
-}
-
-SkConvolutionFilter1D::~SkConvolutionFilter1D() {
-}
-
-void SkConvolutionFilter1D::AddFilter(int filterOffset,
-                                      const ConvolutionFixed* filterValues,
-                                      int filterLength) {
-    // It is common for leading/trailing filter values to be zeros. In such
-    // cases it is beneficial to only store the central factors.
-    // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
-    // a 1080p image this optimization gives a ~10% speed improvement.
-    int filterSize = filterLength;
-    int firstNonZero = 0;
-    while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
-        firstNonZero++;
-    }
-
-    if (firstNonZero < filterLength) {
-        // Here we have at least one non-zero factor.
-        int lastNonZero = filterLength - 1;
-        while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
-            lastNonZero--;
-        }
-
-        filterOffset += firstNonZero;
-        filterLength = lastNonZero + 1 - firstNonZero;
-        SkASSERT(filterLength > 0);
-
-        fFilterValues.append(filterLength, &filterValues[firstNonZero]);
-    } else {
-        // Here all the factors were zeroes.
-        filterLength = 0;
-    }
-
-    FilterInstance instance;
-
-    // We pushed filterLength elements onto fFilterValues
-    instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -
-                                               filterLength);
-    instance.fOffset = filterOffset;
-    instance.fTrimmedLength = filterLength;
-    instance.fLength = filterSize;
-    fFilters.push(instance);
-
-    fMaxFilter = SkTMax(fMaxFilter, filterLength);
-}
-
-const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(
-                                        int* specifiedFilterlength,
-                                        int* filterOffset,
-                                        int* filterLength) const {
-    const FilterInstance& filter = fFilters[0];
-    *filterOffset = filter.fOffset;
-    *filterLength = filter.fTrimmedLength;
-    *specifiedFilterlength = filter.fLength;
-    if (filter.fTrimmedLength == 0) {
-        return nullptr;
-    }
-
-    return &fFilterValues[filter.fDataLocation];
-}
-
-bool BGRAConvolve2D(const unsigned char* sourceData,
-                    int sourceByteRowStride,
-                    bool sourceHasAlpha,
-                    const SkConvolutionFilter1D& filterX,
-                    const SkConvolutionFilter1D& filterY,
-                    int outputByteRowStride,
-                    unsigned char* output) {
-
-    int maxYFilterSize = filterY.maxFilter();
-
-    // The next row in the input that we will generate a horizontally
-    // convolved row for. If the filter doesn't start at the beginning of the
-    // image (this is the case when we are only resizing a subset), then we
-    // don't want to generate any output rows before that. Compute the starting
-    // row for convolution as the first pixel for the first vertical filter.
-    int filterOffset, filterLength;
-    const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-        filterY.FilterForValue(0, &filterOffset, &filterLength);
-    int nextXRow = filterOffset;
-
-    // We loop over each row in the input doing a horizontal convolution. This
-    // will result in a horizontally convolved image. We write the results into
-    // a circular buffer of convolved rows and do vertical convolution as rows
-    // are available. This prevents us from having to store the entire
-    // intermediate image and helps cache coherency.
-    // We will need four extra rows to allow horizontal convolution could be done
-    // simultaneously. We also pad each row in row buffer to be aligned-up to
-    // 32 bytes.
-    // TODO(jiesun): We do not use aligned load from row buffer in vertical
-    // convolution pass yet. Somehow Windows does not like it.
-    int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
-    int rowBufferHeight = maxYFilterSize +
-                          (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0);
-
-    // check for too-big allocation requests : crbug.com/528628
-    {
-        int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);
-        // need some limit, to avoid over-committing success from malloc, but then
-        // crashing when we try to actually use the memory.
-        // 100meg seems big enough to allow "normal" zoom factors and image sizes through
-        // while avoiding the crash seen by the bug (crbug.com/528628)
-        if (size > 100 * 1024 * 1024) {
-//            SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);
-            return false;
-        }
-    }
-
-    CircularRowBuffer rowBuffer(rowBufferWidth,
-                                rowBufferHeight,
-                                filterOffset);
-
-    // Loop over every possible output row, processing just enough horizontal
-    // convolutions to run each subsequent vertical convolution.
-    SkASSERT(outputByteRowStride >= filterX.numValues() * 4);
-    int numOutputRows = filterY.numValues();
-
-    // We need to check which is the last line to convolve before we advance 4
-    // lines in one iteration.
-    int lastFilterOffset, lastFilterLength;
-    filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
-                           &lastFilterLength);
-
-    for (int outY = 0; outY < numOutputRows; outY++) {
-        filterValues = filterY.FilterForValue(outY,
-                                              &filterOffset, &filterLength);
-
-        // Generate output rows until we have enough to run the current filter.
-        while (nextXRow < filterOffset + filterLength) {
-            if (SkOpts::convolve_4_rows_horizontally != nullptr &&
-                nextXRow + 3 < lastFilterOffset + lastFilterLength) {
-                const unsigned char* src[4];
-                unsigned char* outRow[4];
-                for (int i = 0; i < 4; ++i) {
-                    src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride];
-                    outRow[i] = rowBuffer.advanceRow();
-                }
-                SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth);
-                nextXRow += 4;
-            } else {
-                SkOpts::convolve_horizontally(
-                        &sourceData[(uint64_t)nextXRow * sourceByteRowStride],
-                        filterX, rowBuffer.advanceRow(), sourceHasAlpha);
-                nextXRow++;
-            }
-        }
-
-        // Compute where in the output image this row of final data will go.
-        unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
-
-        // Get the list of rows that the circular buffer has, in order.
-        int firstRowInCircularBuffer;
-        unsigned char* const* rowsToConvolve =
-            rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
-
-        // Now compute the start of the subset of those rows that the filter needs.
-        unsigned char* const* firstRowForFilter =
-            &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
-
-        SkOpts::convolve_vertically(filterValues, filterLength,
-                                    firstRowForFilter,
-                                    filterX.numValues(), curOutputRow,
-                                    sourceHasAlpha);
-    }
-    return true;
-}
diff --git a/src/core/SkConvolver.h b/src/core/SkConvolver.h
deleted file mode 100644
index 4c4b1fd711..0000000000
--- a/src/core/SkConvolver.h
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SK_CONVOLVER_H
-#define SK_CONVOLVER_H
-
-#include "SkSize.h"
-#include "SkTDArray.h"
-
-// avoid confusion with Mac OS X's math library (Carbon)
-#if defined(__APPLE__)
-#undef FloatToConvolutionFixed
-#undef ConvolutionFixedToFloat
-#undef FloatToFixed
-#undef FixedToFloat
-#endif
-
-// Represents a filter in one dimension. Each output pixel has one entry in this
-// object for the filter values contributing to it. You build up the filter
-// list by calling AddFilter for each output pixel (in order).
-//
-// We do 2-dimensional convolution by first convolving each row by one
-// SkConvolutionFilter1D, then convolving each column by another one.
-//
-// Entries are stored in ConvolutionFixed point, shifted left by kShiftBits.
-class SkConvolutionFilter1D {
-public:
-    typedef short ConvolutionFixed;
-
-    // The number of bits that ConvolutionFixed point values are shifted by.
-    enum { kShiftBits = 14 };
-
-    SK_API SkConvolutionFilter1D();
-    SK_API ~SkConvolutionFilter1D();
-
-    // Convert between floating point and our ConvolutionFixed point representation.
-    static ConvolutionFixed FloatToFixed(float f) {
-        return static_cast<ConvolutionFixed>(f * (1 << kShiftBits));
-    }
-    static unsigned char FixedToChar(ConvolutionFixed x) {
-        return static_cast<unsigned char>(x >> kShiftBits);
-    }
-    static float FixedToFloat(ConvolutionFixed x) {
-        // The cast relies on ConvolutionFixed being a short, implying that on
-        // the platforms we care about all (16) bits will fit into
-        // the mantissa of a (32-bit) float.
-        static_assert(sizeof(ConvolutionFixed) == 2, "ConvolutionFixed_type_should_fit_in_float_mantissa");
-        float raw = static_cast<float>(x);
-        return ldexpf(raw, -kShiftBits);
-    }
-
-    // Returns the maximum pixel span of a filter.
-    int maxFilter() const { return fMaxFilter; }
-
-    // Returns the number of filters in this filter. This is the dimension of the
-    // output image.
-    int numValues() const { return static_cast<int>(fFilters.count()); }
-
-    void reserveAdditional(int filterCount, int filterValueCount) {
-        fFilters.setReserve(fFilters.count() + filterCount);
-        fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
-    }
-
-    // Appends the given list of scaling values for generating a given output
-    // pixel. |filterOffset| is the distance from the edge of the image to where
-    // the scaling factors start. The scaling factors apply to the source pixels
-    // starting from this position, and going for the next |filterLength| pixels.
-    //
-    // You will probably want to make sure your input is normalized (that is,
-    // all entries in |filterValuesg| sub to one) to prevent affecting the overall
-    // brighness of the image.
-    //
-    // The filterLength must be > 0.
-    void AddFilter(int filterOffset,
-                   const ConvolutionFixed* filterValues,
-                   int filterLength);
-
-    // Retrieves a filter for the given |valueOffset|, a position in the output
-    // image in the direction we're convolving. The offset and length of the
-    // filter values are put into the corresponding out arguments (see AddFilter
-    // above for what these mean), and a pointer to the first scaling factor is
-    // returned. There will be |filterLength| values in this array.
-    inline const ConvolutionFixed* FilterForValue(int valueOffset,
-                                       int* filterOffset,
-                                       int* filterLength) const {
-        const FilterInstance& filter = fFilters[valueOffset];
-        *filterOffset = filter.fOffset;
-        *filterLength = filter.fTrimmedLength;
-        if (filter.fTrimmedLength == 0) {
-            return nullptr;
-        }
-        return &fFilterValues[filter.fDataLocation];
-    }
-
-  // Retrieves the filter for the offset 0, presumed to be the one and only.
-  // The offset and length of the filter values are put into the corresponding
-  // out arguments (see AddFilter). Note that |filterLegth| and
-  // |specifiedFilterLength| may be different if leading/trailing zeros of the
-  // original floating point form were clipped.
-  // There will be |filterLength| values in the return array.
-  // Returns nullptr if the filter is 0-length (for instance when all floating
-  // point values passed to AddFilter were clipped to 0).
-    SK_API const ConvolutionFixed* GetSingleFilter(int* specifiedFilterLength,
-        int* filterOffset,
-        int* filterLength) const;
-
-    // Add another value to the fFilterValues array -- useful for
-    // SIMD padding which happens outside of this class.
-
-    void addFilterValue( ConvolutionFixed val ) {
-        fFilterValues.push( val );
-    }
-private:
-    struct FilterInstance {
-        // Offset within filterValues for this instance of the filter.
-        int fDataLocation;
-
-        // Distance from the left of the filter to the center. IN PIXELS
-        int fOffset;
-
-        // Number of values in this filter instance.
-        int fTrimmedLength;
-
-        // Filter length as specified. Note that this may be different from
-        // 'trimmed_length' if leading/trailing zeros of the original floating
-        // point form were clipped differently on each tail.
-        int fLength;
-    };
-
-    // Stores the information for each filter added to this class.
-    SkTDArray<FilterInstance> fFilters;
-
-    // We store all the filter values in this flat list, indexed by
-    // |FilterInstance.data_location| to avoid the mallocs required for storing
-    // each one separately.
-    SkTDArray<ConvolutionFixed> fFilterValues;
-
-    // The maximum size of any filter we've added.
-    int fMaxFilter;
-};
-
-// Does a two-dimensional convolution on the given source image.
-//
-// It is assumed the source pixel offsets referenced in the input filters
-// reference only valid pixels, so the source image size is not required. Each
-// row of the source image starts |sourceByteRowStride| after the previous
-// one (this allows you to have rows with some padding at the end).
-//
-// The result will be put into the given output buffer. The destination image
-// size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
-// in rows of exactly xfilter.numValues() * 4 bytes.
-//
-// |sourceHasAlpha| is a hint that allows us to avoid doing computations on
-// the alpha channel if the image is opaque. If you don't know, set this to
-// true and it will work properly, but setting this to false will be a few
-// percent faster if you know the image is opaque.
-//
-// The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
-// (this is ARGB when loaded into 32-bit words on a little-endian machine).
-/**
- *  Returns false if it was unable to perform the convolution/rescale. in which case the output
- *  buffer is assumed to be undefined.
- */
-SK_API bool BGRAConvolve2D(const unsigned char* sourceData,
-    int sourceByteRowStride,
-    bool sourceHasAlpha,
-    const SkConvolutionFilter1D& xfilter,
-    const SkConvolutionFilter1D& yfilter,
-    int outputByteRowStride,
-    unsigned char* output);
-
-#endif  // SK_CONVOLVER_H
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 33c3690f4e..e5e304cef9 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -36,7 +36,6 @@
     #define SK_OPTS_NS portable
 #endif
 
-#include "SkBitmapFilter_opts.h"
 #include "SkBlend_opts.h"
 #include "SkBlitMask_opts.h"
 #include "SkBlitRow_opts.h"
@@ -88,10 +87,6 @@ namespace SkOpts {
 
     DEFINE_DEFAULT(hash_fn);
 
-    DEFINE_DEFAULT(convolve_vertically);
-    DEFINE_DEFAULT(convolve_horizontally);
-    DEFINE_DEFAULT(convolve_4_rows_horizontally);
-
 #undef DEFINE_DEFAULT
 
     // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
@@ -99,7 +94,6 @@ namespace SkOpts {
     void Init_sse41();
     void Init_sse42();
     void Init_avx();
-    void Init_hsw();
     void Init_crc32();
 
     static void init() {
@@ -109,7 +103,6 @@ namespace SkOpts {
         if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
         if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
         if (SkCpu::Supports(SkCpu::AVX  )) { Init_avx();   }
-        if (SkCpu::Supports(SkCpu::HSW  )) { Init_hsw();   }
 
     #elif defined(SK_CPU_ARM64)
         if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); }
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index a4f1ea284c..3bea740cb6 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -8,7 +8,6 @@
 #ifndef SkOpts_DEFINED
 #define SkOpts_DEFINED
 
-#include "SkConvolver.h"
 #include "SkRasterPipeline.h"
 #include "SkTypes.h"
 #include "SkXfermodePriv.h"
@@ -62,15 +61,6 @@ namespace SkOpts {
     static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) {
         return hash_fn(data, bytes, seed);
     }
-
-    extern void (*convolve_vertically)(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
-                                       int filter_length, unsigned char* const* source_data_rows,
-                                       int pixel_width, unsigned char* out_row, bool has_alpha);
-    extern void (*convolve_4_rows_horizontally)(const unsigned char* src_data[4],
-                                                const SkConvolutionFilter1D& filter,
-                                                unsigned char* out_row[4], size_t out_row_bytes);
-    extern void (*convolve_horizontally)(const unsigned char* src_data, const SkConvolutionFilter1D& filter,
-                                         unsigned char* out_row, bool has_alpha);
 }
 
 #endif//SkOpts_DEFINED
diff --git a/src/opts/SkBitmapFilter_opts.h b/src/opts/SkBitmapFilter_opts.h
deleted file mode 100644
index 4f21c579fb..0000000000
--- a/src/opts/SkBitmapFilter_opts.h
+++ /dev/null
@@ -1,940 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkBitmapFilter_opts_DEFINED
-#define SkBitmapFilter_opts_DEFINED
-
-#include "SkConvolver.h"
-
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-    #include <immintrin.h>
-#elif defined(SK_ARM_HAS_NEON)
-    #include <arm_neon.h>
-#endif
-
-namespace SK_OPTS_NS {
-
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-
-    static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues, __m128i& accum, int r) {
-        int remainder[4] = {0};
-        for (int i = 0; i < r; i++) {
-            SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
-            remainder[0] += coeff * pixelsLeft[i * 4 + 0];
-            remainder[1] += coeff * pixelsLeft[i * 4 + 1];
-            remainder[2] += coeff * pixelsLeft[i * 4 + 2];
-            remainder[3] += coeff * pixelsLeft[i * 4 + 3];
-        }
-        __m128i t = _mm_setr_epi32(remainder[0], remainder[1], remainder[2], remainder[3]);
-        accum = _mm_add_epi32(accum, t);
-    }
-
-    // Convolves horizontally along a single row. The row data is given in
-    // |srcData| and continues for the numValues() of the filter.
-    void convolve_horizontally(const unsigned char* srcData,
-                               const SkConvolutionFilter1D& filter,
-                               unsigned char* outRow,
-                               bool /*hasAlpha*/) {
-        // Output one pixel each iteration, calculating all channels (RGBA) together.
-        int numValues = filter.numValues();
-        for (int outX = 0; outX < numValues; outX++) {
-            // Get the filter that determines the current output pixel.
-            int filterOffset, filterLength;
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-                filter.FilterForValue(outX, &filterOffset, &filterLength);
-
-            // Compute the first pixel in this row that the filter affects. It will
-            // touch |filterLength| pixels (4 bytes each) after this.
-            const unsigned char* rowToFilter = &srcData[filterOffset * 4];
-
-            __m128i zero = _mm_setzero_si128();
-            __m128i accum = _mm_setzero_si128();
-
-            // We will load and accumulate with four coefficients per iteration.
-            for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
-                // Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
-                __m128i coeff, coeff16;
-                // [16] xx xx xx xx c3 c2 c1 c0
-                coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
-                // [16] xx xx xx xx c1 c1 c0 c0
-                coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-                // [16] c1 c1 c1 c1 c0 c0 c0 c0
-                coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-
-                // Load four pixels => unpack the first two pixels to 16 bits =>
-                // multiply with coefficients => accumulate the convolution result.
-                // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-                __m128i src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(rowToFilter));
-                // [16] a1 b1 g1 r1 a0 b0 g0 r0
-                __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-                __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32]  a0*c0 b0*c0 g0*c0 r0*c0
-                __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum = _mm_add_epi32(accum, t);
-                // [32]  a1*c1 b1*c1 g1*c1 r1*c1
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-                accum = _mm_add_epi32(accum, t);
-
-                // Duplicate 3rd and 4th coefficients for all channels =>
-                // unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
-                // => accumulate the convolution results.
-                // [16] xx xx xx xx c3 c3 c2 c2
-                coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-                // [16] c3 c3 c3 c3 c2 c2 c2 c2
-                coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
-                // [16] a3 g3 b3 r3 a2 g2 b2 r2
-                src16 = _mm_unpackhi_epi8(src8, zero);
-                mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32]  a2*c2 b2*c2 g2*c2 r2*c2
-                t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum = _mm_add_epi32(accum, t);
-                // [32]  a3*c3 b3*c3 g3*c3 r3*c3
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-                accum = _mm_add_epi32(accum, t);
-
-                // Advance the pixel and coefficients pointers.
-                rowToFilter += 16;
-                filterValues += 4;
-            }
-
-            // When |filterLength| is not divisible by 4, we accumulate the last 1 - 3
-            // coefficients one at a time.
-            int r = filterLength & 3;
-            if (r) {
-                int remainderOffset = (filterOffset + filterLength - r) * 4;
-                AccumRemainder(srcData + remainderOffset, filterValues, accum, r);
-            }
-
-            // Shift right for fixed point implementation.
-            accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
-
-            // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
-            accum = _mm_packs_epi32(accum, zero);
-            // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
-            accum = _mm_packus_epi16(accum, zero);
-
-            // Store the pixel value of 32 bits.
-            *(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum);
-            outRow += 4;
-        }
-    }
-
-    // Convolves horizontally along four rows. The row data is given in
-    // |srcData| and continues for the numValues() of the filter.
-    // The algorithm is almost same as |convolve_horizontally|. Please
-    // refer to that function for detailed comments.
-    void convolve_4_rows_horizontally(const unsigned char* srcData[4],
-                                      const SkConvolutionFilter1D& filter,
-                                      unsigned char* outRow[4],
-                                      size_t outRowBytes) {
-        SkDEBUGCODE(const unsigned char* out_row_0_start = outRow[0];)
-
-        // Output one pixel each iteration, calculating all channels (RGBA) together.
-        int numValues = filter.numValues();
-        for (int outX = 0; outX < numValues; outX++) {
-            int filterOffset, filterLength;
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-                filter.FilterForValue(outX, &filterOffset, &filterLength);
-
-            __m128i zero = _mm_setzero_si128();
-
-            // four pixels in a column per iteration.
-            __m128i accum0 = _mm_setzero_si128();
-            __m128i accum1 = _mm_setzero_si128();
-            __m128i accum2 = _mm_setzero_si128();
-            __m128i accum3 = _mm_setzero_si128();
-
-            int start = filterOffset * 4;
-            // We will load and accumulate with four coefficients per iteration.
-            for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
-                __m128i coeff, coeff16lo, coeff16hi;
-                // [16] xx xx xx xx c3 c2 c1 c0
-                coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
-                // [16] xx xx xx xx c1 c1 c0 c0
-                coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
-                // [16] c1 c1 c1 c1 c0 c0 c0 c0
-                coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
-                // [16] xx xx xx xx c3 c3 c2 c2
-                coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
-                // [16] c3 c3 c3 c3 c2 c2 c2 c2
-                coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
-
-                __m128i src8, src16, mul_hi, mul_lo, t;
-
-#define ITERATION(src, accum)                                                    \
-                src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));   \
-                src16 = _mm_unpacklo_epi8(src8, zero);                           \
-                mul_hi = _mm_mulhi_epi16(src16, coeff16lo);                      \
-                mul_lo = _mm_mullo_epi16(src16, coeff16lo);                      \
-                t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
-                accum = _mm_add_epi32(accum, t);                                 \
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
-                accum = _mm_add_epi32(accum, t);                                 \
-                src16 = _mm_unpackhi_epi8(src8, zero);                           \
-                mul_hi = _mm_mulhi_epi16(src16, coeff16hi);                      \
-                mul_lo = _mm_mullo_epi16(src16, coeff16hi);                      \
-                t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
-                accum = _mm_add_epi32(accum, t);                                 \
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
-                accum = _mm_add_epi32(accum, t)
-
-                ITERATION(srcData[0] + start, accum0);
-                ITERATION(srcData[1] + start, accum1);
-                ITERATION(srcData[2] + start, accum2);
-                ITERATION(srcData[3] + start, accum3);
-
-                start += 16;
-                filterValues += 4;
-            }
-
-            int r = filterLength & 3;
-            if (r) {
-                int remainderOffset = (filterOffset + filterLength - r) * 4;
-                AccumRemainder(srcData[0] + remainderOffset, filterValues, accum0, r);
-                AccumRemainder(srcData[1] + remainderOffset, filterValues, accum1, r);
-                AccumRemainder(srcData[2] + remainderOffset, filterValues, accum2, r);
-                AccumRemainder(srcData[3] + remainderOffset, filterValues, accum3, r);
-            }
-
-            accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-            accum0 = _mm_packs_epi32(accum0, zero);
-            accum0 = _mm_packus_epi16(accum0, zero);
-            accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-            accum1 = _mm_packs_epi32(accum1, zero);
-            accum1 = _mm_packus_epi16(accum1, zero);
-            accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-            accum2 = _mm_packs_epi32(accum2, zero);
-            accum2 = _mm_packus_epi16(accum2, zero);
-            accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
-            accum3 = _mm_packs_epi32(accum3, zero);
-            accum3 = _mm_packus_epi16(accum3, zero);
-
-            // We seem to be running off the edge here (chromium:491660).
-            SkASSERT(((size_t)outRow[0] - (size_t)out_row_0_start) < outRowBytes);
-
-            *(reinterpret_cast<int*>(outRow[0])) = _mm_cvtsi128_si32(accum0);
-            *(reinterpret_cast<int*>(outRow[1])) = _mm_cvtsi128_si32(accum1);
-            *(reinterpret_cast<int*>(outRow[2])) = _mm_cvtsi128_si32(accum2);
-            *(reinterpret_cast<int*>(outRow[3])) = _mm_cvtsi128_si32(accum3);
-
-            outRow[0] += 4;
-            outRow[1] += 4;
-            outRow[2] += 4;
-            outRow[3] += 4;
-        }
-    }
-
-    // Does vertical convolution to produce one output row. The filter values and
-    // length are given in the first two parameters. These are applied to each
-    // of the rows pointed to in the |sourceDataRows| array, with each row
-    // being |pixelWidth| wide.
-    //
-    // The output must have room for |pixelWidth * 4| bytes.
-    template<bool hasAlpha>
-    void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
-                            int filterLength,
-                            unsigned char* const* sourceDataRows,
-                            int pixelWidth,
-                            unsigned char* outRow) {
-        // Output four pixels per iteration (16 bytes).
-        int width = pixelWidth & ~3;
-        __m128i zero = _mm_setzero_si128();
-        for (int outX = 0; outX < width; outX += 4) {
-            // Accumulated result for each pixel. 32 bits per RGBA channel.
-            __m128i accum0 = _mm_setzero_si128();
-            __m128i accum1 = _mm_setzero_si128();
-            __m128i accum2 = _mm_setzero_si128();
-            __m128i accum3 = _mm_setzero_si128();
-
-            // Convolve with one filter coefficient per iteration.
-            for (int filterY = 0; filterY < filterLength; filterY++) {
-
-                // Duplicate the filter coefficient 8 times.
-                // [16] cj cj cj cj cj cj cj cj
-                __m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
-
-                // Load four pixels (16 bytes) together.
-                // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-                const __m128i* src = reinterpret_cast<const __m128i*>(
-                    &sourceDataRows[filterY][outX << 2]);
-                __m128i src8 = _mm_loadu_si128(src);
-
-                // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
-                // multiply with current coefficient => accumulate the result.
-                // [16] a1 b1 g1 r1 a0 b0 g0 r0
-                __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-                __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32] a0 b0 g0 r0
-                __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum0 = _mm_add_epi32(accum0, t);
-                // [32] a1 b1 g1 r1
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-                accum1 = _mm_add_epi32(accum1, t);
-
-                // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
-                // multiply with current coefficient => accumulate the result.
-                // [16] a3 b3 g3 r3 a2 b2 g2 r2
-                src16 = _mm_unpackhi_epi8(src8, zero);
-                mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32] a2 b2 g2 r2
-                t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum2 = _mm_add_epi32(accum2, t);
-                // [32] a3 b3 g3 r3
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-                accum3 = _mm_add_epi32(accum3, t);
-            }
-
-            // Shift right for fixed point implementation.
-            accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-            accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-            accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-            accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
-
-            // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
-            // [16] a1 b1 g1 r1 a0 b0 g0 r0
-            accum0 = _mm_packs_epi32(accum0, accum1);
-            // [16] a3 b3 g3 r3 a2 b2 g2 r2
-            accum2 = _mm_packs_epi32(accum2, accum3);
-
-            // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
-            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-            accum0 = _mm_packus_epi16(accum0, accum2);
-
-            if (hasAlpha) {
-                // Compute the max(ri, gi, bi) for each pixel.
-                // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-                __m128i a = _mm_srli_epi32(accum0, 8);
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
-                // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-                a = _mm_srli_epi32(accum0, 16);
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                b = _mm_max_epu8(a, b);  // Max of r and g and b.
-                // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-                b = _mm_slli_epi32(b, 24);
-
-                // Make sure the value of alpha channel is always larger than maximum
-                // value of color channels.
-                accum0 = _mm_max_epu8(b, accum0);
-            } else {
-                // Set value of alpha channels to 0xFF.
-                __m128i mask = _mm_set1_epi32(0xff000000);
-                accum0 = _mm_or_si128(accum0, mask);
-            }
-
-            // Store the convolution result (16 bytes) and advance the pixel pointers.
-            _mm_storeu_si128(reinterpret_cast<__m128i*>(outRow), accum0);
-            outRow += 16;
-        }
-
-        // When the width of the output is not divisible by 4, We need to save one
-        // pixel (4 bytes) each time. And also the fourth pixel is always absent.
-        int r = pixelWidth & 3;
-        if (r) {
-            __m128i accum0 = _mm_setzero_si128();
-            __m128i accum1 = _mm_setzero_si128();
-            __m128i accum2 = _mm_setzero_si128();
-            for (int filterY = 0; filterY < filterLength; ++filterY) {
-                __m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
-                // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-                const __m128i* src = reinterpret_cast<const __m128i*>(
-                    &sourceDataRows[filterY][width << 2]);
-                __m128i src8 = _mm_loadu_si128(src);
-                // [16] a1 b1 g1 r1 a0 b0 g0 r0
-                __m128i src16 = _mm_unpacklo_epi8(src8, zero);
-                __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32] a0 b0 g0 r0
-                __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum0 = _mm_add_epi32(accum0, t);
-                // [32] a1 b1 g1 r1
-                t = _mm_unpackhi_epi16(mul_lo, mul_hi);
-                accum1 = _mm_add_epi32(accum1, t);
-                // [16] a3 b3 g3 r3 a2 b2 g2 r2
-                src16 = _mm_unpackhi_epi8(src8, zero);
-                mul_hi = _mm_mulhi_epi16(src16, coeff16);
-                mul_lo = _mm_mullo_epi16(src16, coeff16);
-                // [32] a2 b2 g2 r2
-                t = _mm_unpacklo_epi16(mul_lo, mul_hi);
-                accum2 = _mm_add_epi32(accum2, t);
-            }
-
-            accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
-            accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
-            accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
-            // [16] a1 b1 g1 r1 a0 b0 g0 r0
-            accum0 = _mm_packs_epi32(accum0, accum1);
-            // [16] a3 b3 g3 r3 a2 b2 g2 r2
-            accum2 = _mm_packs_epi32(accum2, zero);
-            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-            accum0 = _mm_packus_epi16(accum0, accum2);
-            if (hasAlpha) {
-                // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-                __m128i a = _mm_srli_epi32(accum0, 8);
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
-                // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-                a = _mm_srli_epi32(accum0, 16);
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                b = _mm_max_epu8(a, b);  // Max of r and g and b.
-                // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-                b = _mm_slli_epi32(b, 24);
-                accum0 = _mm_max_epu8(b, accum0);
-            } else {
-                __m128i mask = _mm_set1_epi32(0xff000000);
-                accum0 = _mm_or_si128(accum0, mask);
-            }
-
-            for (int i = 0; i < r; i++) {
-                *(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum0);
-                accum0 = _mm_srli_si128(accum0, 4);
-                outRow += 4;
-            }
-        }
-    }
-
-#elif defined(SK_ARM_HAS_NEON)
-
-    static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues, int32x4_t& accum, int r) {
-        int remainder[4] = {0};
-        for (int i = 0; i < r; i++) {
-            SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
-            remainder[0] += coeff * pixelsLeft[i * 4 + 0];
-            remainder[1] += coeff * pixelsLeft[i * 4 + 1];
-            remainder[2] += coeff * pixelsLeft[i * 4 + 2];
-            remainder[3] += coeff * pixelsLeft[i * 4 + 3];
-        }
-        int32x4_t t = {remainder[0], remainder[1], remainder[2], remainder[3]};
-        accum += t;
-    }
-
-    // Convolves horizontally along a single row. The row data is given in
-    // |srcData| and continues for the numValues() of the filter.
-    void convolve_horizontally(const unsigned char* srcData,
-                               const SkConvolutionFilter1D& filter,
-                               unsigned char* outRow,
-                               bool /*hasAlpha*/) {
-        // Loop over each pixel on this row in the output image.
-        int numValues = filter.numValues();
-        for (int outX = 0; outX < numValues; outX++) {
-            uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
-            uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
-            uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
-            uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
-            // Get the filter that determines the current output pixel.
-            int filterOffset, filterLength;
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-                filter.FilterForValue(outX, &filterOffset, &filterLength);
-
-            // Compute the first pixel in this row that the filter affects. It will
-            // touch |filterLength| pixels (4 bytes each) after this.
-            const unsigned char* rowToFilter = &srcData[filterOffset * 4];
-
-            // Apply the filter to the row to get the destination pixel in |accum|.
-            int32x4_t accum = vdupq_n_s32(0);
-            for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
-                // Load 4 coefficients
-                int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
-                coeffs = vld1_s16(filterValues);
-                coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
-                coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
-                coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
-                coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
-
-                // Load pixels and calc
-                uint8x16_t pixels = vld1q_u8(rowToFilter);
-                int16x8_t p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels)));
-                int16x8_t p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels)));
-
-                int16x4_t p0_src = vget_low_s16(p01_16);
-                int16x4_t p1_src = vget_high_s16(p01_16);
-                int16x4_t p2_src = vget_low_s16(p23_16);
-                int16x4_t p3_src = vget_high_s16(p23_16);
-
-                int32x4_t p0 = vmull_s16(p0_src, coeff0);
-                int32x4_t p1 = vmull_s16(p1_src, coeff1);
-                int32x4_t p2 = vmull_s16(p2_src, coeff2);
-                int32x4_t p3 = vmull_s16(p3_src, coeff3);
-
-                accum += p0;
-                accum += p1;
-                accum += p2;
-                accum += p3;
-
-                // Advance the pointers
-                rowToFilter += 16;
-                filterValues += 4;
-            }
-
-            int r = filterLength & 3;
-            if (r) {
-                int remainder_offset = (filterOffset + filterLength - r) * 4;
-                AccumRemainder(srcData + remainder_offset, filterValues, accum, r);
-            }
-
-            // Bring this value back in range. All of the filter scaling factors
-            // are in fixed point with kShiftBits bits of fractional part.
-            accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits);
-
-            // Pack and store the new pixel.
-            int16x4_t accum16 = vqmovn_s32(accum);
-            uint8x8_t accum8 = vqmovun_s16(vcombine_s16(accum16, accum16));
-            vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpret_u32_u8(accum8), 0);
-            outRow += 4;
-        }
-    }
-
-    // Convolves horizontally along four rows. The row data is given in
-    // |srcData| and continues for the numValues() of the filter.
-    // The algorithm is almost same as |convolve_horizontally|. Please
-    // refer to that function for detailed comments.
-    void convolve_4_rows_horizontally(const unsigned char* srcData[4],
-                                      const SkConvolutionFilter1D& filter,
-                                      unsigned char* outRow[4],
-                                      size_t outRowBytes) {
-        // Output one pixel each iteration, calculating all channels (RGBA) together.
-        int numValues = filter.numValues();
-        for (int outX = 0; outX < numValues; outX++) {
-
-            int filterOffset, filterLength;
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-            filter.FilterForValue(outX, &filterOffset, &filterLength);
-
-            // four pixels in a column per iteration.
-            int32x4_t accum0 = vdupq_n_s32(0);
-            int32x4_t accum1 = vdupq_n_s32(0);
-            int32x4_t accum2 = vdupq_n_s32(0);
-            int32x4_t accum3 = vdupq_n_s32(0);
-
-            uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
-            uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
-            uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
-            uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
-
-            int start = filterOffset * 4;
-
-            // We will load and accumulate with four coefficients per iteration.
-            for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
-                int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
-
-                coeffs = vld1_s16(filterValues);
-                coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
-                coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
-                coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
-                coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
-
-                uint8x16_t pixels;
-                int16x8_t p01_16, p23_16;
-                int32x4_t p0, p1, p2, p3;
-
-#define ITERATION(src, accum)                                                   \
-                pixels = vld1q_u8(src);                                         \
-                p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels)));  \
-                p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels))); \
-                p0 = vmull_s16(vget_low_s16(p01_16), coeff0);                   \
-                p1 = vmull_s16(vget_high_s16(p01_16), coeff1);                  \
-                p2 = vmull_s16(vget_low_s16(p23_16), coeff2);                   \
-                p3 = vmull_s16(vget_high_s16(p23_16), coeff3);                  \
-                accum += p0;                                                    \
-                accum += p1;                                                    \
-                accum += p2;                                                    \
-                accum += p3
-
-                ITERATION(srcData[0] + start, accum0);
-                ITERATION(srcData[1] + start, accum1);
-                ITERATION(srcData[2] + start, accum2);
-                ITERATION(srcData[3] + start, accum3);
-
-                start += 16;
-                filterValues += 4;
-            }
-
-            int r = filterLength & 3;
-            if (r) {
-                int remainder_offset = (filterOffset + filterLength - r) * 4;
-                AccumRemainder(srcData[0] + remainder_offset, filterValues, accum0, r);
-                AccumRemainder(srcData[1] + remainder_offset, filterValues, accum1, r);
-                AccumRemainder(srcData[2] + remainder_offset, filterValues, accum2, r);
-                AccumRemainder(srcData[3] + remainder_offset, filterValues, accum3, r);
-            }
-
-            int16x4_t accum16;
-            uint8x8_t res0, res1, res2, res3;
-
-#define PACK_RESULT(accum, res)                                             \
-            accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits);  \
-            accum16 = vqmovn_s32(accum);                                    \
-            res = vqmovun_s16(vcombine_s16(accum16, accum16));
-
-            PACK_RESULT(accum0, res0);
-            PACK_RESULT(accum1, res1);
-            PACK_RESULT(accum2, res2);
-            PACK_RESULT(accum3, res3);
-
-            vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[0]), vreinterpret_u32_u8(res0), 0);
-            vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[1]), vreinterpret_u32_u8(res1), 0);
-            vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[2]), vreinterpret_u32_u8(res2), 0);
-            vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[3]), vreinterpret_u32_u8(res3), 0);
-            outRow[0] += 4;
-            outRow[1] += 4;
-            outRow[2] += 4;
-            outRow[3] += 4;
-        }
-    }
-
-
-    // Does vertical convolution to produce one output row. The filter values and
-    // length are given in the first two parameters. These are applied to each
-    // of the rows pointed to in the |sourceDataRows| array, with each row
-    // being |pixelWidth| wide.
-    //
-    // The output must have room for |pixelWidth * 4| bytes.
-    template<bool hasAlpha>
-    void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
-                            int filterLength,
-                            unsigned char* const* sourceDataRows,
-                            int pixelWidth,
-                            unsigned char* outRow) {
-        int width = pixelWidth & ~3;
-
-        // Output four pixels per iteration (16 bytes).
-        for (int outX = 0; outX < width; outX += 4) {
-
-            // Accumulated result for each pixel. 32 bits per RGBA channel.
-            int32x4_t accum0 = vdupq_n_s32(0);
-            int32x4_t accum1 = vdupq_n_s32(0);
-            int32x4_t accum2 = vdupq_n_s32(0);
-            int32x4_t accum3 = vdupq_n_s32(0);
-
-            // Convolve with one filter coefficient per iteration.
-            for (int filterY = 0; filterY < filterLength; filterY++) {
-
-                // Duplicate the filter coefficient 4 times.
-                // [16] cj cj cj cj
-                int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
-
-                // Load four pixels (16 bytes) together.
-                // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-                uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][outX << 2]);
-
-                int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
-                int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
-                int16x4_t src16_0 = vget_low_s16(src16_01);
-                int16x4_t src16_1 = vget_high_s16(src16_01);
-                int16x4_t src16_2 = vget_low_s16(src16_23);
-                int16x4_t src16_3 = vget_high_s16(src16_23);
-
-                accum0 += vmull_s16(src16_0, coeff16);
-                accum1 += vmull_s16(src16_1, coeff16);
-                accum2 += vmull_s16(src16_2, coeff16);
-                accum3 += vmull_s16(src16_3, coeff16);
-            }
-
-            // Shift right for fixed point implementation.
-            accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
-            accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
-            accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
-            accum3 = vshrq_n_s32(accum3, SkConvolutionFilter1D::kShiftBits);
-
-            // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
-            // [16] a1 b1 g1 r1 a0 b0 g0 r0
-            int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
-            // [16] a3 b3 g3 r3 a2 b2 g2 r2
-            int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum3));
-
-            // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
-            // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-            uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
-
-            if (hasAlpha) {
-                // Compute the max(ri, gi, bi) for each pixel.
-                // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-                uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
-                // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-                a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                b = vmaxq_u8(a, b); // Max of r and g and b.
-                // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-                b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
-
-                // Make sure the value of alpha channel is always larger than maximum
-                // value of color channels.
-                accum8 = vmaxq_u8(b, accum8);
-            } else {
-                // Set value of alpha channels to 0xFF.
-                accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
-            }
-
-            // Store the convolution result (16 bytes) and advance the pixel pointers.
-            vst1q_u8(outRow, accum8);
-            outRow += 16;
-        }
-
-        // Process the leftovers when the width of the output is not divisible
-        // by 4, that is at most 3 pixels.
-        int r = pixelWidth & 3;
-        if (r) {
-
-            int32x4_t accum0 = vdupq_n_s32(0);
-            int32x4_t accum1 = vdupq_n_s32(0);
-            int32x4_t accum2 = vdupq_n_s32(0);
-
-            for (int filterY = 0; filterY < filterLength; ++filterY) {
-                int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
-
-                // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
-                uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][width << 2]);
-
-                int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
-                int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
-                int16x4_t src16_0 = vget_low_s16(src16_01);
-                int16x4_t src16_1 = vget_high_s16(src16_01);
-                int16x4_t src16_2 = vget_low_s16(src16_23);
-
-                accum0 += vmull_s16(src16_0, coeff16);
-                accum1 += vmull_s16(src16_1, coeff16);
-                accum2 += vmull_s16(src16_2, coeff16);
-            }
-
-            accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
-            accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
-            accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
-
-            int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
-            int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum2));
-
-            uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
-
-            if (hasAlpha) {
-                // Compute the max(ri, gi, bi) for each pixel.
-                // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
-                uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
-                // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
-                a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
-                // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
-                b = vmaxq_u8(a, b); // Max of r and g and b.
-                // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
-                b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
-
-                // Make sure the value of alpha channel is always larger than maximum
-                // value of color channels.
-                accum8 = vmaxq_u8(b, accum8);
-            } else {
-                // Set value of alpha channels to 0xFF.
-                accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
-            }
-
-            switch(r) {
-            case 1:
-                vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpretq_u32_u8(accum8), 0);
-                break;
-            case 2:
-                vst1_u32(reinterpret_cast<uint32_t*>(outRow),
-                         vreinterpret_u32_u8(vget_low_u8(accum8)));
-                break;
-            case 3:
-                vst1_u32(reinterpret_cast<uint32_t*>(outRow),
-                         vreinterpret_u32_u8(vget_low_u8(accum8)));
-                vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow+8), vreinterpretq_u32_u8(accum8), 2);
-                break;
-            }
-        }
-    }
-
-#else
-
-    // Converts the argument to an 8-bit unsigned value by clamping to the range
-    // 0-255.
-    inline unsigned char ClampTo8(int a) {
-        if (static_cast<unsigned>(a) < 256) {
-            return a;  // Avoid the extra check in the common case.
-        }
-        if (a < 0) {
-            return 0;
-        }
-        return 255;
-    }
-
-    // Convolves horizontally along a single row. The row data is given in
-    // |srcData| and continues for the numValues() of the filter.
-    template<bool hasAlpha>
-    void ConvolveHorizontally(const unsigned char* srcData,
-                              const SkConvolutionFilter1D& filter,
-                              unsigned char* outRow) {
-        // Loop over each pixel on this row in the output image.
-        int numValues = filter.numValues();
-        for (int outX = 0; outX < numValues; outX++) {
-            // Get the filter that determines the current output pixel.
-            int filterOffset, filterLength;
-            const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
-                filter.FilterForValue(outX, &filterOffset, &filterLength);
-
-            // Compute the first pixel in this row that the filter affects. It will
-            // touch |filterLength| pixels (4 bytes each) after this.
-            const unsigned char* rowToFilter = &srcData[filterOffset * 4];
-
-            // Apply the filter to the row to get the destination pixel in |accum|.
-            int accum[4] = {0};
-            for (int filterX = 0; filterX < filterLength; filterX++) {
-                SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
-                accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
-                accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
-                accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
-                if (hasAlpha) {
-                    accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
-                }
-            }
-
-            // Bring this value back in range. All of the filter scaling factors
-            // are in fixed point with kShiftBits bits of fractional part.
-            accum[0] >>= SkConvolutionFilter1D::kShiftBits;
-            accum[1] >>= SkConvolutionFilter1D::kShiftBits;
-            accum[2] >>= SkConvolutionFilter1D::kShiftBits;
-            if (hasAlpha) {
-                accum[3] >>= SkConvolutionFilter1D::kShiftBits;
-            }
-
-            // Store the new pixel.
-            outRow[outX * 4 + 0] = ClampTo8(accum[0]);
-            outRow[outX * 4 + 1] = ClampTo8(accum[1]);
-            outRow[outX * 4 + 2] = ClampTo8(accum[2]);
-            if (hasAlpha) {
-                outRow[outX * 4 + 3] = ClampTo8(accum[3]);
-            }
-        }
-    }
-
-    // Does vertical convolution to produce one output row. The filter values and
-    // length are given in the first two parameters. These are applied to each
-    // of the rows pointed to in the |sourceDataRows| array, with each row
-    // being |pixelWidth| wide.
-    //
-    // The output must have room for |pixelWidth * 4| bytes.
-    template<bool hasAlpha>
-    void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
-                            int filterLength,
-                            unsigned char* const* sourceDataRows,
-                            int pixelWidth,
-                            unsigned char* outRow) {
-        // We go through each column in the output and do a vertical convolution,
-        // generating one output pixel each time.
-        for (int outX = 0; outX < pixelWidth; outX++) {
-            // Compute the number of bytes over in each row that the current column
-            // we're convolving starts at. The pixel will cover the next 4 bytes.
-            int byteOffset = outX * 4;
-
-            // Apply the filter to one column of pixels.
-            int accum[4] = {0};
-            for (int filterY = 0; filterY < filterLength; filterY++) {
-                SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
-                accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
-                accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
-                accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
-                if (hasAlpha) {
-                    accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
-                }
-            }
-
-            // Bring this value back in range. All of the filter scaling factors
-            // are in fixed point with kShiftBits bits of precision.
-            accum[0] >>= SkConvolutionFilter1D::kShiftBits;
-            accum[1] >>= SkConvolutionFilter1D::kShiftBits;
-            accum[2] >>= SkConvolutionFilter1D::kShiftBits;
-            if (hasAlpha) {
-                accum[3] >>= SkConvolutionFilter1D::kShiftBits;
-            }
-
-            // Store the new pixel.
-            outRow[byteOffset + 0] = ClampTo8(accum[0]);
-            outRow[byteOffset + 1] = ClampTo8(accum[1]);
-            outRow[byteOffset + 2] = ClampTo8(accum[2]);
-            if (hasAlpha) {
-                unsigned char alpha = ClampTo8(accum[3]);
-
-                // Make sure the alpha channel doesn't come out smaller than any of the
-                // color channels. We use premultipled alpha channels, so this should
-                // never happen, but rounding errors will cause this from time to time.
-                // These "impossible" colors will cause overflows (and hence random pixel
-                // values) when the resulting bitmap is drawn to the screen.
-                //
-                // We only need to do this when generating the final output row (here).
-                int maxColorChannel = SkTMax(outRow[byteOffset + 0],
-                                               SkTMax(outRow[byteOffset + 1],
-                                                      outRow[byteOffset + 2]));
-                if (alpha < maxColorChannel) {
-                    outRow[byteOffset + 3] = maxColorChannel;
-                } else {
-                    outRow[byteOffset + 3] = alpha;
-                }
-            } else {
-                // No alpha channel, the image is opaque.
-                outRow[byteOffset + 3] = 0xff;
-            }
-        }
-    }
-
-    // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize).  We originally
-    // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles
-    // suffer here too.
-    //
-    // Dropping to -O2 disables -ftree-vectorize.  GCC 4.6 needs noinline.  https://bug.skia.org/2575
-#if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)
-        #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline))
-#else
-        #define SK_MAYBE_DISABLE_VECTORIZATION
-#endif
-
-    SK_MAYBE_DISABLE_VECTORIZATION
-    void convolve_horizontally(const unsigned char* srcData,
-                               const SkConvolutionFilter1D& filter,
-                               unsigned char* outRow,
-                               bool hasAlpha) {
-        if (hasAlpha) {
-            ConvolveHorizontally<true>(srcData, filter, outRow);
-        } else {
-            ConvolveHorizontally<false>(srcData, filter, outRow);
-        }
-    }
-#undef SK_MAYBE_DISABLE_VECTORIZATION
-
-    void (*convolve_4_rows_horizontally)(const unsigned char* srcData[4],
-                                         const SkConvolutionFilter1D& filter,
-                                         unsigned char* outRow[4],
-                                         size_t outRowBytes)
-        = nullptr;
-
-
-#endif
-
-    void convolve_vertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
-                             int filterLength,
-                             unsigned char* const* sourceDataRows,
-                             int pixelWidth,
-                             unsigned char* outRow,
-                             bool hasAlpha) {
-        if (hasAlpha) {
-            ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
-                                     pixelWidth, outRow);
-        } else {
-            ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
-                                      pixelWidth, outRow);
-        }
-    }
-
-}  // namespace SK_OPTS_NS
-
-#endif//SkBitmapFilter_opts_DEFINED
diff --git a/src/opts/SkBitmapProcState_opts_none.cpp b/src/opts/SkBitmapProcState_opts_none.cpp
index 0d96e17133..1d83ddfe7c 100644
--- a/src/opts/SkBitmapProcState_opts_none.cpp
+++ b/src/opts/SkBitmapProcState_opts_none.cpp
@@ -5,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-#include "SkBitmapScaler.h"
 #include "SkBitmapProcState.h"
 
 /*  A platform may optionally overwrite any of these with accelerated
diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp
deleted file mode 100644
index dded64776a..0000000000
--- a/src/opts/SkOpts_hsw.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-// It is not safe to #include any header file here unless it has been vetted for ODR safety:
-// all symbols used must be file-scoped static or in an anonymous namespace.  This applies
-// to _all_ header files:  C standard library, C++ standard library, Skia... everything.
-
-#include <immintrin.h>   // ODR safe
-#include <stdint.h>      // ODR safe
-
-#if defined(__AVX2__)
-
-namespace hsw {
-
-    void convolve_vertically(const int16_t* filter, int filterLen,
-                             uint8_t* const* srcRows, int width,
-                             uint8_t* out, bool hasAlpha) {
-        // It's simpler to work with the output array in terms of 4-byte pixels.
-        auto dst = (int*)out;
-
-        // Output up to eight pixels per iteration.
-        for (int x = 0; x < width; x += 8) {
-            // Accumulated result for 4 (non-adjacent) pairs of pixels,
-            // with each channel in signed 17.14 fixed point.
-            auto accum04 = _mm256_setzero_si256(),
-                 accum15 = _mm256_setzero_si256(),
-                 accum26 = _mm256_setzero_si256(),
-                 accum37 = _mm256_setzero_si256();
-
-            // Convolve with the filter.  (This inner loop is where we spend ~all our time.)
-            // While we can, we consume 2 filter coefficients and 2 rows of 8 pixels each at a time.
-            auto convolve_16_pixels = [&](__m256i interlaced_coeffs,
-                                          __m256i pixels_01234567, __m256i pixels_89ABCDEF) {
-                // Interlaced R0R8 G0G8 B0B8 A0A8 R1R9 G1G9... 32 8-bit values each.
-                auto _08194C5D = _mm256_unpacklo_epi8(pixels_01234567, pixels_89ABCDEF),
-                     _2A3B6E7F = _mm256_unpackhi_epi8(pixels_01234567, pixels_89ABCDEF);
-
-                // Still interlaced R0R8 G0G8... as above, each channel expanded to 16-bit lanes.
-                auto _084C = _mm256_unpacklo_epi8(_08194C5D, _mm256_setzero_si256()),
-                     _195D = _mm256_unpackhi_epi8(_08194C5D, _mm256_setzero_si256()),
-                     _2A6E = _mm256_unpacklo_epi8(_2A3B6E7F, _mm256_setzero_si256()),
-                     _3B7F = _mm256_unpackhi_epi8(_2A3B6E7F, _mm256_setzero_si256());
-
-                // accum0_R += R0*coeff0 + R8*coeff1, etc.
-                accum04 = _mm256_add_epi32(accum04, _mm256_madd_epi16(_084C, interlaced_coeffs));
-                accum15 = _mm256_add_epi32(accum15, _mm256_madd_epi16(_195D, interlaced_coeffs));
-                accum26 = _mm256_add_epi32(accum26, _mm256_madd_epi16(_2A6E, interlaced_coeffs));
-                accum37 = _mm256_add_epi32(accum37, _mm256_madd_epi16(_3B7F, interlaced_coeffs));
-            };
-
-            int i = 0;
-            for (; i < filterLen/2*2; i += 2) {
-                convolve_16_pixels(_mm256_set1_epi32(*(const int32_t*)(filter+i)),
-                                   _mm256_loadu_si256((const __m256i*)(srcRows[i+0] + x*4)),
-                                   _mm256_loadu_si256((const __m256i*)(srcRows[i+1] + x*4)));
-            }
-            if (i < filterLen) {
-                convolve_16_pixels(_mm256_set1_epi32(*(const int16_t*)(filter+i)),
-                                   _mm256_loadu_si256((const __m256i*)(srcRows[i] + x*4)),
-                                   _mm256_setzero_si256());
-            }
-
-            // Trim the fractional parts off the accumulators.
-            accum04 = _mm256_srai_epi32(accum04, 14);
-            accum15 = _mm256_srai_epi32(accum15, 14);
-            accum26 = _mm256_srai_epi32(accum26, 14);
-            accum37 = _mm256_srai_epi32(accum37, 14);
-
-            // Pack back down to 8-bit channels.
-            auto pixels = _mm256_packus_epi16(_mm256_packs_epi32(accum04, accum15),
-                                              _mm256_packs_epi32(accum26, accum37));
-
-            if (hasAlpha) {
-                // Clamp alpha to the max of r,g,b to make sure we stay premultiplied.
-                __m256i max_rg  = _mm256_max_epu8(pixels, _mm256_srli_epi32(pixels,  8)),
-                        max_rgb = _mm256_max_epu8(max_rg, _mm256_srli_epi32(pixels, 16));
-                pixels = _mm256_max_epu8(pixels, _mm256_slli_epi32(max_rgb, 24));
-            } else {
-                // Force opaque.
-                pixels = _mm256_or_si256(pixels, _mm256_set1_epi32(0xff000000));
-            }
-
-            // Normal path to store 8 pixels.
-            if (x + 8 <= width) {
-                _mm256_storeu_si256((__m256i*)dst, pixels);
-                dst += 8;
-                continue;
-            }
-
-            // Store one pixel at a time on the last iteration.
-            for (int i = x; i < width; i++) {
-                *dst++ = _mm_cvtsi128_si32(_mm256_castsi256_si128(pixels));
-                pixels = _mm256_permutevar8x32_epi32(pixels, _mm256_setr_epi32(1,2,3,4,5,6,7,0));
-            }
-        }
-    }
-
-}
-
-namespace SkOpts {
-    // See SkOpts.h, writing SkConvolutionFilter1D::ConvolutionFixed as the underlying type.
-    extern void (*convolve_vertically)(const int16_t* filter, int filterLen,
-                                       uint8_t* const* srcRows, int width,
-                                       uint8_t* out, bool hasAlpha);
-    void Init_hsw() {
-        convolve_vertically = hsw::convolve_vertically;
-    }
-}
-
-#else  // defined(__AVX2__) is not true...
-
-namespace SkOpts { void Init_hsw() {} }
-
-#endif
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 7917259554..ec4e9f66bd 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -7,7 +7,6 @@
 
 #include "SkBitmapProcState_opts_SSE2.h"
 #include "SkBitmapProcState_opts_SSSE3.h"
-#include "SkBitmapScaler.h"
 #include "SkBlitMask.h"
 #include "SkBlitRow.h"
 #include "SkBlitRow_opts_SSE2.h"
diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp
index 35289127b0..b643f24b0b 100644
--- a/src/shaders/SkImageShader.cpp
+++ b/src/shaders/SkImageShader.cpp
@@ -256,7 +256,7 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dstCS, SkA
     auto quality = paint.getFilterQuality();
 
     SkBitmapProvider provider(fImage.get(), dstCS);
-    SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kYes);
+    SkDefaultBitmapController controller;
     std::unique_ptr<SkBitmapController::State> state {
         controller.requestBitmap(provider, matrix, quality)
     };
author	Mike Reed <reed@google.com>	2017-07-19 17:20:37 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2017-07-20 00:43:37 +0000
commit	e32500f0642df381fd79731df2f7a4a4a71a46e2 (patch)
tree	05747f712923791d6df14077714cede88d9d51ff
parent	3e583cba8af153952e31925e0d4bfbc71cfa43b8 (diff)