aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/effects/SkBlurMask.cpp114
1 files changed, 104 insertions, 10 deletions
diff --git a/src/effects/SkBlurMask.cpp b/src/effects/SkBlurMask.cpp
index 99bf2d5003..c97916bbb0 100644
--- a/src/effects/SkBlurMask.cpp
+++ b/src/effects/SkBlurMask.cpp
@@ -22,12 +22,51 @@
#define UNROLL_SEPARABLE_LOOPS
+#define SK_DISABLE_BLUR_ROUNDING
+
/**
* This function performs a box blur in X, of the given radius. If the
* "transpose" parameter is true, it will transpose the pixels on write,
* such that X and Y are swapped. Reads are always performed from contiguous
* memory in X, for speed. The destination buffer (dst) must be at least
* (width + leftRadius + rightRadius) * height bytes in size.
+ *
+ * This is what the inner loop looks like before unrolling, and with the two
+ * cases broken out separately (width < diameter, width >= diameter):
+ *
+ * if (width < diameter) {
+ * for (int x = 0; x < width; ++x) {
+ * sum += *right++;
+ * *dptr = (sum * scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = width; x < diameter; ++x) {
+ * *dptr = (sum * scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = 0; x < width; ++x) {
+ * *dptr = (sum * scale + half) >> 24;
+ * sum -= *left++;
+ * dptr += dst_x_stride;
+ * }
+ * } else {
+ * for (int x = 0; x < diameter; ++x) {
+ * sum += *right++;
+ * *dptr = (sum * scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = diameter; x < width; ++x) {
+ * sum += *right++;
+ * *dptr = (sum * scale + half) >> 24;
+ * sum -= *left++;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = 0; x < diameter; ++x) {
+ * *dptr = (sum * scale + half) >> 24;
+ * sum -= *left++;
+ * dptr += dst_x_stride;
+ * }
+ * }
*/
static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
int leftRadius, int rightRadius, int width, int height,
@@ -40,8 +79,13 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
int dst_x_stride = transpose ? height : 1;
int dst_y_stride = transpose ? 1 : new_width;
+#ifndef SK_DISABLE_BLUR_ROUNDING
+ uint32_t half = 1 << 23;
+#else
+ uint32_t half = 0;
+#endif
for (int y = 0; y < height; ++y) {
- int sum = 0;
+ uint32_t sum = 0;
uint8_t* dptr = dst + y * dst_y_stride;
const uint8_t* right = src + y * src_y_stride;
const uint8_t* left = right;
@@ -51,7 +95,7 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
}
#define LEFT_BORDER_ITER \
sum += *right++; \
- *dptr = (sum * scale) >> 24; \
+ *dptr = (sum * scale + half) >> 24; \
dptr += dst_x_stride;
int x = 0;
@@ -80,7 +124,7 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
}
#undef LEFT_BORDER_ITER
#define TRIVIAL_ITER \
- *dptr = (sum * scale) >> 24; \
+ *dptr = (sum * scale + half) >> 24; \
dptr += dst_x_stride;
x = width;
#ifdef UNROLL_SEPARABLE_LOOPS
@@ -109,7 +153,7 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
#undef TRIVIAL_ITER
#define CENTER_ITER \
sum += *right++; \
- *dptr = (sum * scale) >> 24; \
+ *dptr = (sum * scale + half) >> 24; \
sum -= *left++; \
dptr += dst_x_stride;
@@ -139,7 +183,7 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
}
#undef CENTER_ITER
#define RIGHT_BORDER_ITER \
- *dptr = (sum * scale) >> 24; \
+ *dptr = (sum * scale + half) >> 24; \
sum -= *left++; \
dptr += dst_x_stride;
@@ -184,7 +228,52 @@ static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
* interpolates between them. In float this would be:
* outer_weight * outer_sum / kernelSize +
* (1.0 - outer_weight) * innerSum / (kernelSize - 2)
+ *
+ * This is what the inner loop looks like before unrolling, and with the two
+ * cases broken out separately (width < diameter, width >= diameter):
+ *
+ * if (width < diameter) {
+ * for (int x = 0; x < width; x++) {
+ * inner_sum = outer_sum;
+ * outer_sum += *right++;
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = width; x < diameter; ++x) {
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = 0; x < width; x++) {
+ * inner_sum = outer_sum - *left++;
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * outer_sum = inner_sum;
+ * }
+ * } else {
+ * for (int x = 0; x < diameter; x++) {
+ * inner_sum = outer_sum;
+ * outer_sum += *right++;
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * }
+ * for (int x = diameter; x < width; ++x) {
+ * inner_sum = outer_sum - *left;
+ * outer_sum += *right++;
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * outer_sum -= *left++;
+ * }
+ * for (int x = 0; x < diameter; x++) {
+ * inner_sum = outer_sum - *left++;
+ * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
+ * dptr += dst_x_stride;
+ * outer_sum = inner_sum;
+ * }
+ * }
+ * }
+ * return new_width;
*/
+
static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
int radius, int width, int height,
bool transpose, uint8_t outer_weight)
@@ -197,11 +286,16 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
inner_weight += inner_weight >> 7;
uint32_t outer_scale = (outer_weight << 16) / kernelSize;
uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
+#ifndef SK_DISABLE_BLUR_ROUNDING
+ uint32_t half = 1 << 23;
+#else
+ uint32_t half = 0;
+#endif
int new_width = width + diameter;
int dst_x_stride = transpose ? height : 1;
int dst_y_stride = transpose ? 1 : new_width;
for (int y = 0; y < height; ++y) {
- int outer_sum = 0, inner_sum = 0;
+ uint32_t outer_sum = 0, inner_sum = 0;
uint8_t* dptr = dst + y * dst_y_stride;
const uint8_t* right = src + y * src_y_stride;
const uint8_t* left = right;
@@ -210,7 +304,7 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
#define LEFT_BORDER_ITER \
inner_sum = outer_sum; \
outer_sum += *right++; \
- *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
+ *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride;
#ifdef UNROLL_SEPARABLE_LOOPS
@@ -239,7 +333,7 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
}
#undef LEFT_BORDER_ITER
for (int x = width; x < diameter; ++x) {
- *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24;
+ *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
dptr += dst_x_stride;
}
x = diameter;
@@ -247,7 +341,7 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
#define CENTER_ITER \
inner_sum = outer_sum - *left; \
outer_sum += *right++; \
- *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
+ *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride; \
outer_sum -= *left++;
@@ -278,7 +372,7 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
#define RIGHT_BORDER_ITER \
inner_sum = outer_sum - *left++; \
- *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
+ *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
dptr += dst_x_stride; \
outer_sum = inner_sum;