aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-06-11 12:42:24 +0000
committerGravatar tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-06-11 12:42:24 +0000
commitf8a2289667e2ad2a4776e43485ce29f32e584d0b (patch)
tree8caee1281a58cdbd14a034213e285dffdcdbcdda
parent23113dd4206abadd95d401821e90ceb66232e4ed (diff)
Manually unroll loop in convolution (blur) - most GL drivers do, but some
don't, and this leads to 20-30% benchmark speedup. http://codereview.appspot.com/6308057/ git-svn-id: http://skia.googlecode.com/svn/trunk@4218 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r--src/gpu/effects/GrConvolutionEffect.cpp32
1 files changed, 14 insertions, 18 deletions
diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp
index 14e817fcec..5aaa1858a0 100644
--- a/src/gpu/effects/GrConvolutionEffect.cpp
+++ b/src/gpu/effects/GrConvolutionEffect.cpp
@@ -84,29 +84,25 @@ void GrGLConvolutionEffect::emitFS(GrGLShaderBuilder* state,
const char* inputColor,
const char* samplerName) {
GrStringBuilder* code = &state->fFSCode;
- // const char* texFunc = "texture2D";
- // bool complexCoord = false;
- state->fFSCode.appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n");
+ code->appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n");
code->appendf("\t\tvec2 coord = %s;\n", state->fSampleCoords.c_str());
- code->appendf("\t\tfor (int i = 0; i < %d; i++) {\n", this->width());
- // Creates the string "kernel[i]" with workarounds for
- // possible driver bugs
- GrStringBuilder kernelIndex;
- fKernelVar->appendArrayAccess("i", &kernelIndex);
- state->fFSCode.appendf("\t\t\tsum += ");
- state->emitTextureLookup(samplerName, "coord");
- state->fFSCode.appendf(" * %s;\n", kernelIndex.c_str());
-
- code->appendf("\t\t\tcoord += %s;\n",
- fImageIncrementVar->getName().c_str());
- code->appendf("\t\t}\n");
+ // Manually unroll loop because some drivers don't; yields 20-30% speedup.
+ for (int i = 0; i < this->width(); i++) {
+ GrStringBuilder index;
+ GrStringBuilder kernelIndex;
+ index.appendS32(i);
+ fKernelVar->appendArrayAccess(index.c_str(), &kernelIndex);
+ code->appendf("\t\tsum += ");
+ state->emitTextureLookup(samplerName, "coord");
+ code->appendf(" * %s;\n", kernelIndex.c_str());
+ code->appendf("\t\tcoord += %s;\n",
+ fImageIncrementVar->getName().c_str());
+ }
- state->fFSCode.appendf("\t\t%s = sum%s;\n",
- outputColor,
- state->fModulate.c_str());
+ code->appendf("\t\t%s = sum%s;\n", outputColor, state->fModulate.c_str());
}
void GrGLConvolutionEffect::initUniforms(const GrGLInterface* gl,