diff options
author | tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2012-06-11 12:42:24 +0000 |
---|---|---|
committer | tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2012-06-11 12:42:24 +0000 |
commit | f8a2289667e2ad2a4776e43485ce29f32e584d0b (patch) | |
tree | 8caee1281a58cdbd14a034213e285dffdcdbcdda | |
parent | 23113dd4206abadd95d401821e90ceb66232e4ed (diff) |
Manually unroll loop in convolution (blur) - most GL drivers do, but some
don't, and this leads to 20-30% benchmark speedup.
http://codereview.appspot.com/6308057/
git-svn-id: http://skia.googlecode.com/svn/trunk@4218 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r-- | src/gpu/effects/GrConvolutionEffect.cpp | 32 |
1 files changed, 14 insertions, 18 deletions
diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp index 14e817fcec..5aaa1858a0 100644 --- a/src/gpu/effects/GrConvolutionEffect.cpp +++ b/src/gpu/effects/GrConvolutionEffect.cpp @@ -84,29 +84,25 @@ void GrGLConvolutionEffect::emitFS(GrGLShaderBuilder* state, const char* inputColor, const char* samplerName) { GrStringBuilder* code = &state->fFSCode; - // const char* texFunc = "texture2D"; - // bool complexCoord = false; - state->fFSCode.appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n"); + code->appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n"); code->appendf("\t\tvec2 coord = %s;\n", state->fSampleCoords.c_str()); - code->appendf("\t\tfor (int i = 0; i < %d; i++) {\n", this->width()); - // Creates the string "kernel[i]" with workarounds for - // possible driver bugs - GrStringBuilder kernelIndex; - fKernelVar->appendArrayAccess("i", &kernelIndex); - state->fFSCode.appendf("\t\t\tsum += "); - state->emitTextureLookup(samplerName, "coord"); - state->fFSCode.appendf(" * %s;\n", kernelIndex.c_str()); - - code->appendf("\t\t\tcoord += %s;\n", - fImageIncrementVar->getName().c_str()); - code->appendf("\t\t}\n"); + // Manually unroll loop because some drivers don't; yields 20-30% speedup. + for (int i = 0; i < this->width(); i++) { + GrStringBuilder index; + GrStringBuilder kernelIndex; + index.appendS32(i); + fKernelVar->appendArrayAccess(index.c_str(), &kernelIndex); + code->appendf("\t\tsum += "); + state->emitTextureLookup(samplerName, "coord"); + code->appendf(" * %s;\n", kernelIndex.c_str()); + code->appendf("\t\tcoord += %s;\n", + fImageIncrementVar->getName().c_str()); + } - state->fFSCode.appendf("\t\t%s = sum%s;\n", - outputColor, - state->fModulate.c_str()); + code->appendf("\t\t%s = sum%s;\n", outputColor, state->fModulate.c_str()); } void GrGLConvolutionEffect::initUniforms(const GrGLInterface* gl, |