aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/effects/SkBlurImageFilter.cpp
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-08-04 08:49:21 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-08-04 08:49:21 -0700
commitdce5ce4276e2825efc6d8c4daa819c965794cd12 (patch)
tree5bfdc110936e1f3d854d52a9a35f88dcef9b8136 /src/effects/SkBlurImageFilter.cpp
parent562a66b093c45e2788aab1572d6138eb39c6dc37 (diff)
Port SkBlurImage opts to SkOpts.
+268 -535 lines I also rearranged the code a little bit to encapsulate itself better, mostly replacing static helper functions with lambdas. This also let me merge the SSE2 and SSE4.1 code paths. BUG=skia:4117 Review URL: https://codereview.chromium.org/1264103004
Diffstat (limited to 'src/effects/SkBlurImageFilter.cpp')
-rw-r--r--src/effects/SkBlurImageFilter.cpp131
1 files changed, 33 insertions, 98 deletions
diff --git a/src/effects/SkBlurImageFilter.cpp b/src/effects/SkBlurImageFilter.cpp
index 17f8d3a337..a867971a0c 100644
--- a/src/effects/SkBlurImageFilter.cpp
+++ b/src/effects/SkBlurImageFilter.cpp
@@ -8,10 +8,10 @@
#include "SkBitmap.h"
#include "SkBlurImageFilter.h"
#include "SkColorPriv.h"
+#include "SkGpuBlurUtils.h"
+#include "SkOpts.h"
#include "SkReadBuffer.h"
#include "SkWriteBuffer.h"
-#include "SkGpuBlurUtils.h"
-#include "SkBlurImage_opts.h"
#if SK_SUPPORT_GPU
#include "GrContext.h"
#endif
@@ -51,83 +51,6 @@ void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
buffer.writeScalar(fSigma.fHeight);
}
-enum BlurDirection {
- kX, kY
-};
-
-/**
- *
- * In order to make memory accesses cache-friendly, we reorder the passes to
- * use contiguous memory reads wherever possible.
- *
- * For example, the 6 passes of the X-and-Y blur case are rewritten as
- * follows. Instead of 3 passes in X and 3 passes in Y, we perform
- * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
- * then 1 pass in X transposed to Y on write.
- *
- * +----+ +----+ +----+ +---+ +---+ +---+ +----+
- * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
- * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
- * +---+ +---+ +---+
- *
- * In this way, two of the y-blurs become x-blurs applied to transposed
- * images, and all memory reads are contiguous.
- */
-
-template<BlurDirection srcDirection, BlurDirection dstDirection>
-static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
- int leftOffset, int rightOffset, int width, int height)
-{
- int rightBorder = SkMin32(rightOffset + 1, width);
- int srcStrideX = srcDirection == kX ? 1 : srcStride;
- int dstStrideX = dstDirection == kX ? 1 : height;
- int srcStrideY = srcDirection == kX ? srcStride : 1;
- int dstStrideY = dstDirection == kX ? width : 1;
- uint32_t scale = (1 << 24) / kernelSize;
- uint32_t half = 1 << 23;
- for (int y = 0; y < height; ++y) {
- int sumA = 0, sumR = 0, sumG = 0, sumB = 0;
- const SkPMColor* p = src;
- for (int i = 0; i < rightBorder; ++i) {
- sumA += SkGetPackedA32(*p);
- sumR += SkGetPackedR32(*p);
- sumG += SkGetPackedG32(*p);
- sumB += SkGetPackedB32(*p);
- p += srcStrideX;
- }
-
- const SkPMColor* sptr = src;
- SkColor* dptr = dst;
- for (int x = 0; x < width; ++x) {
- *dptr = SkPackARGB32((sumA * scale + half) >> 24,
- (sumR * scale + half) >> 24,
- (sumG * scale + half) >> 24,
- (sumB * scale + half) >> 24);
- if (x >= leftOffset) {
- SkColor l = *(sptr - leftOffset * srcStrideX);
- sumA -= SkGetPackedA32(l);
- sumR -= SkGetPackedR32(l);
- sumG -= SkGetPackedG32(l);
- sumB -= SkGetPackedB32(l);
- }
- if (x + rightOffset + 1 < width) {
- SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
- sumA += SkGetPackedA32(r);
- sumR += SkGetPackedR32(r);
- sumG += SkGetPackedG32(r);
- sumB += SkGetPackedB32(r);
- }
- sptr += srcStrideX;
- if (srcDirection == kY) {
- SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX);
- }
- dptr += dstStrideX;
- }
- src += srcStrideY;
- dst += dstStrideY;
- }
-}
-
static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lowOffset,
int *highOffset)
{
@@ -204,28 +127,40 @@ bool SkBlurImageFilter::onFilterImage(Proxy* proxy,
SkPMColor* d = dst->getAddr32(0, 0);
int w = dstBounds.width(), h = dstBounds.height();
int sw = src.rowBytesAsPixels();
- SkBoxBlurProc boxBlurX, boxBlurXY, boxBlurYX;
- if (!SkBoxBlurGetPlatformProcs(&boxBlurX, &boxBlurXY, &boxBlurYX)) {
- boxBlurX = boxBlur<kX, kX>;
- boxBlurXY = boxBlur<kX, kY>;
- boxBlurYX = boxBlur<kY, kX>;
- }
+ /**
+ *
+ * In order to make memory accesses cache-friendly, we reorder the passes to
+ * use contiguous memory reads wherever possible.
+ *
+ * For example, the 6 passes of the X-and-Y blur case are rewritten as
+ * follows. Instead of 3 passes in X and 3 passes in Y, we perform
+ * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
+ * then 1 pass in X transposed to Y on write.
+ *
+ * +----+ +----+ +----+ +---+ +---+ +---+ +----+
+ * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
+ * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
+ * +---+ +---+ +---+
+ *
+ * In this way, two of the y-blurs become x-blurs applied to transposed
+ * images, and all memory reads are contiguous.
+ */
if (kernelSizeX > 0 && kernelSizeY > 0) {
- boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
- boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h);
- boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
- boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
- boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
- boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
+ SkOpts::box_blur_xx(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
+ SkOpts::box_blur_xx(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h);
+ SkOpts::box_blur_xy(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
+ SkOpts::box_blur_xx(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
+ SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
+ SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
} else if (kernelSizeX > 0) {
- boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h);
- boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h);
- boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h);
+ SkOpts::box_blur_xx(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h);
+ SkOpts::box_blur_xx(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h);
+ SkOpts::box_blur_xx(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h);
} else if (kernelSizeY > 0) {
- boxBlurYX(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
- boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
- boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
+ SkOpts::box_blur_yx(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
+ SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
+ SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
}
return true;
}
@@ -260,7 +195,7 @@ bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
#if SK_SUPPORT_GPU
SkBitmap input = src;
SkIPoint srcOffset = SkIPoint::Make(0, 0);
- if (this->getInput(0) &&
+ if (this->getInput(0) &&
!this->getInput(0)->getInputResultGPU(proxy, src, ctx, &input, &srcOffset)) {
return false;
}