From 91abe10af417148939548551e210c001022d3bda Mon Sep 17 00:00:00 2001 From: ericrk Date: Fri, 10 Jul 2015 15:05:13 -0700 Subject: Bilinear optimization for 1D convolution. Splits GrGLConvolutionEffect into GrGLBilerpConvolutionEffect and GrGLBoundedConvolutionEffect. When doing a non-bounded convolution we now always use the GrGLBilerpConvolutionEffect which uses bilinear filtering to perform half as many samples in the texture. BUG=skia:3986 Review URL: https://codereview.chromium.org/1216623003 --- src/gpu/effects/Gr1DKernelEffect.h | 14 ++ src/gpu/effects/GrConvolutionEffect.cpp | 347 ++++++++++++++++++++++++-------- 2 files changed, 276 insertions(+), 85 deletions(-) diff --git a/src/gpu/effects/Gr1DKernelEffect.h b/src/gpu/effects/Gr1DKernelEffect.h index 0aec4b1336..4fc52b1385 100644 --- a/src/gpu/effects/Gr1DKernelEffect.h +++ b/src/gpu/effects/Gr1DKernelEffect.h @@ -28,6 +28,8 @@ public: kY_Direction, }; + // Constructor using default nearest-neighbor sampling for the input texture + // filter mode. Gr1DKernelEffect(GrProcessorDataManager* procDataManager, GrTexture* texture, Direction direction, @@ -36,6 +38,18 @@ public: , fDirection(direction) , fRadius(radius) {} + Gr1DKernelEffect(GrProcessorDataManager* procDataManager, + GrTexture* texture, + Direction direction, + int radius, + GrTextureParams::FilterMode filterMode) + : INHERITED(procDataManager, + texture, + GrCoordTransform::MakeDivByTextureWHMatrix(texture), + filterMode) + , fDirection(direction) + , fRadius(radius) {} + virtual ~Gr1DKernelEffect() {}; static int WidthFromRadius(int radius) { return 2 * radius + 1; } diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp index f5b5e22ce1..1e1c477cd5 100644 --- a/src/gpu/effects/GrConvolutionEffect.cpp +++ b/src/gpu/effects/GrConvolutionEffect.cpp @@ -13,9 +13,73 @@ // For brevity typedef GrGLProgramDataManager::UniformHandle UniformHandle; +/** + * Base class with shared functionality for GrGLBoundedConvolutionEffect and + * GrGLLerpConvolutionEffect. + */ class GrGLConvolutionEffect : public GrGLFragmentProcessor { public: GrGLConvolutionEffect(const GrProcessor&); + static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*); + +protected: + int radius() const { return fRadius; } + int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); } + Gr1DKernelEffect::Direction direction() const { return fDirection; } + void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const; + +private: + int fRadius; + Gr1DKernelEffect::Direction fDirection; + + typedef GrGLFragmentProcessor INHERITED; +}; + +GrGLConvolutionEffect::GrGLConvolutionEffect(const GrProcessor& processor) { + const GrConvolutionEffect& c = processor.cast(); + fRadius = c.radius(); + fDirection = c.direction(); +} + +void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, + const GrGLSLCaps&, + GrProcessorKeyBuilder* b) { + const GrConvolutionEffect& conv = processor.cast(); + uint32_t key = conv.radius(); + key <<= 2; + if (conv.useBounds()) { + key |= 0x2; + key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0; + } + b->add32(key); +} + +void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& conv, + float (*imageIncrement)[2]) const { + GrTexture& texture = *conv.texture(0); + (*imageIncrement)[0] = (*imageIncrement)[1] = 0; + float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; + switch (conv.direction()) { + case Gr1DKernelEffect::kX_Direction: + (*imageIncrement)[0] = 1.0f / texture.width(); + break; + case Gr1DKernelEffect::kY_Direction: + (*imageIncrement)[1] = ySign / texture.height(); + break; + default: + SkFAIL("Unknown filter direction."); + } +} + +/////////////////////////////////////////////////////////////////////////////// + +/** + * Applies a convolution effect which restricts samples to the provided bounds + * using shader logic. + */ +class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect { +public: + GrGLBoundedConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {} virtual void emitCode(GrGLFPBuilder*, const GrFragmentProcessor&, @@ -26,58 +90,41 @@ public: void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override; - static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*); - private: - int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); } - bool useBounds() const { return fUseBounds; } - Gr1DKernelEffect::Direction direction() const { return fDirection; } - - int fRadius; - bool fUseBounds; - Gr1DKernelEffect::Direction fDirection; UniformHandle fKernelUni; UniformHandle fImageIncrementUni; UniformHandle fBoundsUni; - typedef GrGLFragmentProcessor INHERITED; + typedef GrGLConvolutionEffect INHERITED; }; -GrGLConvolutionEffect::GrGLConvolutionEffect(const GrProcessor& processor) { - const GrConvolutionEffect& c = processor.cast(); - fRadius = c.radius(); - fUseBounds = c.useBounds(); - fDirection = c.direction(); -} +void GrGLBoundedConvolutionEffect::emitCode(GrGLFPBuilder* builder, + const GrFragmentProcessor& processor, + const char* outputColor, + const char* inputColor, + const TransformedCoordsArray& coords, + const TextureSamplerArray& samplers) { + fImageIncrementUni = + builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, + kDefault_GrSLPrecision, "ImageIncrement"); -void GrGLConvolutionEffect::emitCode(GrGLFPBuilder* builder, - const GrFragmentProcessor&, - const char* outputColor, - const char* inputColor, - const TransformedCoordsArray& coords, - const TextureSamplerArray& samplers) { - fImageIncrementUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, - kVec2f_GrSLType, kDefault_GrSLPrecision, - "ImageIncrement"); - if (this->useBounds()) { - fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, - kVec2f_GrSLType, kDefault_GrSLPrecision, - "Bounds"); - } - fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, - kFloat_GrSLType, kDefault_GrSLPrecision, - "Kernel", this->width()); + fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, + kDefault_GrSLPrecision, "Bounds"); + + fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType, + kDefault_GrSLPrecision, "Kernel", this->width()); GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder(); SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0); - fsBuilder->codeAppendf("\t\t%s = vec4(0, 0, 0, 0);\n", outputColor); + fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor); int width = this->width(); const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni); const char* imgInc = builder->getUniformCStr(fImageIncrementUni); - fsBuilder->codeAppendf("\t\tvec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), fRadius, imgInc); + fsBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), this->radius(), + imgInc); // Manually unroll loop because some drivers don't; yields 20-30% speedup. for (int i = 0; i < width; i++) { @@ -85,23 +132,18 @@ void GrGLConvolutionEffect::emitCode(GrGLFPBuilder* builder, SkString kernelIndex; index.appendS32(i); kernel.appendArrayAccess(index.c_str(), &kernelIndex); - - if (this->useBounds()) { - // We used to compute a bool indicating whether we're in bounds or not, cast it to a - // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems - // to have a bug that caused corruption. - const char* bounds = builder->getUniformCStr(fBoundsUni); - const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x"; - fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", - component, bounds, component, bounds); - } - fsBuilder->codeAppendf("\t\t%s += ", outputColor); + // We used to compute a bool indicating whether we're in bounds or not, cast it to a + // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems + // to have a bug that caused corruption. + const char* bounds = builder->getUniformCStr(fBoundsUni); + const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x"; + fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", + component, bounds, component, bounds); + fsBuilder->codeAppendf("%s += ", outputColor); fsBuilder->appendTextureLookup(samplers[0], "coord"); fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str()); - if (this->useBounds()) { - fsBuilder->codeAppend("}"); - } - fsBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc); + fsBuilder->codeAppend("}"); + fsBuilder->codeAppendf("coord += %s;\n", imgInc); } SkString modulate; @@ -109,47 +151,162 @@ void GrGLConvolutionEffect::emitCode(GrGLFPBuilder* builder, fsBuilder->codeAppend(modulate.c_str()); } -void GrGLConvolutionEffect::setData(const GrGLProgramDataManager& pdman, - const GrProcessor& processor) { +void GrGLBoundedConvolutionEffect::setData(const GrGLProgramDataManager& pdman, + const GrProcessor& processor) { const GrConvolutionEffect& conv = processor.cast(); - GrTexture& texture = *conv.texture(0); + // the code we generated was for a specific kernel radius - SkASSERT(conv.radius() == fRadius); - float imageIncrement[2] = { 0 }; - float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; - switch (conv.direction()) { - case Gr1DKernelEffect::kX_Direction: - imageIncrement[0] = 1.0f / texture.width(); - break; - case Gr1DKernelEffect::kY_Direction: - imageIncrement[1] = ySign / texture.height(); - break; - default: - SkFAIL("Unknown filter direction."); - } + SkASSERT(conv.radius() == this->radius()); + + // the code we generated was for a specific bounding mode. + SkASSERT(conv.useBounds()); + + GrTexture& texture = *conv.texture(0); + float imageIncrement[2]; + getImageIncrement(conv, &imageIncrement); pdman.set2fv(fImageIncrementUni, 1, imageIncrement); - if (conv.useBounds()) { - const float* bounds = conv.bounds(); - if (Gr1DKernelEffect::kY_Direction == conv.direction() && - texture.origin() != kTopLeft_GrSurfaceOrigin) { - pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]); - } else { - pdman.set2f(fBoundsUni, bounds[0], bounds[1]); - } + const float* bounds = conv.bounds(); + if (Gr1DKernelEffect::kY_Direction == conv.direction() && + texture.origin() != kTopLeft_GrSurfaceOrigin) { + pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]); + } else { + pdman.set2f(fBoundsUni, bounds[0], bounds[1]); } pdman.set1fv(fKernelUni, this->width(), conv.kernel()); } -void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&, - GrProcessorKeyBuilder* b) { +/////////////////////////////////////////////////////////////////////////////// + +/** + * Applies a convolution effect which applies the convolution using a linear + * interpolation optimization to use half as many samples. + */ +class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect { +public: + GrGLLerpConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {} + + virtual void emitCode(GrGLFPBuilder*, + const GrFragmentProcessor&, + const char* outputColor, + const char* inputColor, + const TransformedCoordsArray&, + const TextureSamplerArray&) override; + + void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override; + +private: + int bilerpSampleCount() const; + + // Bounded uniforms + UniformHandle fSampleWeightUni; + UniformHandle fSampleOffsetUni; + + typedef GrGLConvolutionEffect INHERITED; +}; + +void GrGLLerpConvolutionEffect::emitCode(GrGLFPBuilder* builder, + const GrFragmentProcessor& processor, + const char* outputColor, + const char* inputColor, + const TransformedCoordsArray& coords, + const TextureSamplerArray& samplers) { + int sampleCount = bilerpSampleCount(); + + // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so + // ensure we don't exceed this. Note that it is currently impossible to + // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth + // maxes out at 25, resulting in a max sampleCount of 26. + SkASSERT(sampleCount < 16); + + fSampleOffsetUni = + builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, + kDefault_GrSLPrecision, "SampleOffset", sampleCount); + fSampleWeightUni = + builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType, + kDefault_GrSLPrecision, "SampleWeight", sampleCount); + + GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder(); + SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0); + + fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor); + + const GrGLShaderVar& kernel = builder->getUniformVariable(fSampleWeightUni); + const GrGLShaderVar& imgInc = builder->getUniformVariable(fSampleOffsetUni); + + fsBuilder->codeAppendf("vec2 coord; \n"); + + // Manually unroll loop because some drivers don't; yields 20-30% speedup. + for (int i = 0; i < sampleCount; i++) { + SkString index; + SkString weightIndex; + SkString offsetIndex; + index.appendS32(i); + kernel.appendArrayAccess(index.c_str(), &weightIndex); + imgInc.appendArrayAccess(index.c_str(), &offsetIndex); + fsBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str()); + fsBuilder->codeAppendf("%s += ", outputColor); + fsBuilder->appendTextureLookup(samplers[0], "coord"); + fsBuilder->codeAppendf(" * %s;\n", weightIndex.c_str()); + } + + SkString modulate; + GrGLSLMulVarBy4f(&modulate, outputColor, inputColor); + fsBuilder->codeAppend(modulate.c_str()); +} + +void GrGLLerpConvolutionEffect::setData(const GrGLProgramDataManager& pdman, + const GrProcessor& processor) { const GrConvolutionEffect& conv = processor.cast(); - uint32_t key = conv.radius(); - key <<= 2; - if (conv.useBounds()) { - key |= 0x2; - key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0; + + // the code we generated was for a specific kernel radius + SkASSERT(conv.radius() == this->radius()); + + // the code we generated was for a specific bounding mode. + SkASSERT(!conv.useBounds()); + + int sampleCount = bilerpSampleCount(); + SkAutoTArray imageIncrements(sampleCount * 2); // X and Y floats per sample. + SkAutoTArray kernel(sampleCount); + + float baseImageIncrement[2]; + getImageIncrement(conv, &baseImageIncrement); + + for (int i = 0; i < sampleCount; i++) { + int sampleIndex1 = i * 2; + int sampleIndex2 = sampleIndex1 + 1; + + // If we have an odd number of samples in our filter, the last sample won't use + // the linear interpolation optimization (it will be pixel aligned). + if (sampleIndex2 >= this->width()) { + sampleIndex2 = sampleIndex1; + } + + float kernelWeight1 = conv.kernel()[sampleIndex1]; + float kernelWeight2 = conv.kernel()[sampleIndex2]; + + float totalKernelWeight = + (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2); + + float sampleRatio = + (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2); + + imageIncrements[i * 2] = (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[0]; + imageIncrements[i * 2 + 1] = + (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[1]; + + kernel[i] = totalKernelWeight; } - b->add32(key); + pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get()); + pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get()); +} + +int GrGLLerpConvolutionEffect::bilerpSampleCount() const { + // We use a linear interpolation optimization to only sample once for each + // two pixel aligned samples in the kernel. If we have an odd number of + // samples, we will have to skip this optimization for the last sample. + // Because of this we always round up our sample count (by adding 1 before + // dividing). + return (this->width() + 1) / 2; } /////////////////////////////////////////////////////////////////////////////// @@ -161,7 +318,13 @@ GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager const float* kernel, bool useBounds, float bounds[2]) - : INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) { + : INHERITED(procDataManager, + texture, + direction, + radius, + useBounds ? GrTextureParams::FilterMode::kNone_FilterMode + : GrTextureParams::FilterMode::kBilerp_FilterMode) + , fUseBounds(useBounds) { this->initClassID(); SkASSERT(radius <= kMaxKernelRadius); SkASSERT(kernel); @@ -179,7 +342,13 @@ GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager float gaussianSigma, bool useBounds, float bounds[2]) - : INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) { + : INHERITED(procDataManager, + texture, + direction, + radius, + useBounds ? GrTextureParams::FilterMode::kNone_FilterMode + : GrTextureParams::FilterMode::kBilerp_FilterMode) + , fUseBounds(useBounds) { this->initClassID(); SkASSERT(radius <= kMaxKernelRadius); int width = this->width(); @@ -210,7 +379,15 @@ void GrConvolutionEffect::getGLProcessorKey(const GrGLSLCaps& caps, } GrGLFragmentProcessor* GrConvolutionEffect::createGLInstance() const { - return SkNEW_ARGS(GrGLConvolutionEffect, (*this)); + // We support a linear interpolation optimization which (when feasible) uses + // half the number of samples to apply the kernel. This is not always + // applicable, as the linear interpolation optimization does not support + // bounded sampling. + if (this->useBounds()) { + return SkNEW_ARGS(GrGLBoundedConvolutionEffect, (*this)); + } else { + return SkNEW_ARGS(GrGLLerpConvolutionEffect, (*this)); + } } bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const { -- cgit v1.2.3