/* * Copyright 2012 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "GrConvolutionEffect.h" #include "gl/GrGLProcessor.h" #include "gl/GrGLTexture.h" #include "gl/builders/GrGLProgramBuilder.h" // For brevity typedef GrGLProgramDataManager::UniformHandle UniformHandle; /** * Base class with shared functionality for GrGLBoundedConvolutionEffect and * GrGLLerpConvolutionEffect. */ class GrGLConvolutionEffect : public GrGLFragmentProcessor { public: GrGLConvolutionEffect(const GrProcessor&); static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*); protected: int radius() const { return fRadius; } int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); } Gr1DKernelEffect::Direction direction() const { return fDirection; } void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const; private: int fRadius; Gr1DKernelEffect::Direction fDirection; typedef GrGLFragmentProcessor INHERITED; }; GrGLConvolutionEffect::GrGLConvolutionEffect(const GrProcessor& processor) { const GrConvolutionEffect& c = processor.cast(); fRadius = c.radius(); fDirection = c.direction(); } void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&, GrProcessorKeyBuilder* b) { const GrConvolutionEffect& conv = processor.cast(); uint32_t key = conv.radius(); key <<= 2; if (conv.useBounds()) { key |= 0x2; key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0; } b->add32(key); } void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& conv, float (*imageIncrement)[2]) const { GrTexture& texture = *conv.texture(0); (*imageIncrement)[0] = (*imageIncrement)[1] = 0; float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; switch (conv.direction()) { case Gr1DKernelEffect::kX_Direction: (*imageIncrement)[0] = 1.0f / texture.width(); break; case Gr1DKernelEffect::kY_Direction: (*imageIncrement)[1] = ySign / texture.height(); break; default: SkFAIL("Unknown filter direction."); } } /////////////////////////////////////////////////////////////////////////////// /** * Applies a convolution effect which restricts samples to the provided bounds * using shader logic. */ class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect { public: GrGLBoundedConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {} virtual void emitCode(GrGLFPBuilder*, const GrFragmentProcessor&, const char* outputColor, const char* inputColor, const TransformedCoordsArray&, const TextureSamplerArray&) override; void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override; private: UniformHandle fKernelUni; UniformHandle fImageIncrementUni; UniformHandle fBoundsUni; typedef GrGLConvolutionEffect INHERITED; }; void GrGLBoundedConvolutionEffect::emitCode(GrGLFPBuilder* builder, const GrFragmentProcessor& processor, const char* outputColor, const char* inputColor, const TransformedCoordsArray& coords, const TextureSamplerArray& samplers) { fImageIncrementUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, kDefault_GrSLPrecision, "ImageIncrement"); fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, kDefault_GrSLPrecision, "Bounds"); fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType, kDefault_GrSLPrecision, "Kernel", this->width()); GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder(); SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0); fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor); int width = this->width(); const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni); const char* imgInc = builder->getUniformCStr(fImageIncrementUni); fsBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), this->radius(), imgInc); // Manually unroll loop because some drivers don't; yields 20-30% speedup. for (int i = 0; i < width; i++) { SkString index; SkString kernelIndex; index.appendS32(i); kernel.appendArrayAccess(index.c_str(), &kernelIndex); // We used to compute a bool indicating whether we're in bounds or not, cast it to a // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems // to have a bug that caused corruption. const char* bounds = builder->getUniformCStr(fBoundsUni); const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x"; fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", component, bounds, component, bounds); fsBuilder->codeAppendf("%s += ", outputColor); fsBuilder->appendTextureLookup(samplers[0], "coord"); fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str()); fsBuilder->codeAppend("}"); fsBuilder->codeAppendf("coord += %s;\n", imgInc); } SkString modulate; GrGLSLMulVarBy4f(&modulate, outputColor, inputColor); fsBuilder->codeAppend(modulate.c_str()); } void GrGLBoundedConvolutionEffect::setData(const GrGLProgramDataManager& pdman, const GrProcessor& processor) { const GrConvolutionEffect& conv = processor.cast(); // the code we generated was for a specific kernel radius SkASSERT(conv.radius() == this->radius()); // the code we generated was for a specific bounding mode. SkASSERT(conv.useBounds()); GrTexture& texture = *conv.texture(0); float imageIncrement[2]; getImageIncrement(conv, &imageIncrement); pdman.set2fv(fImageIncrementUni, 1, imageIncrement); const float* bounds = conv.bounds(); if (Gr1DKernelEffect::kY_Direction == conv.direction() && texture.origin() != kTopLeft_GrSurfaceOrigin) { pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]); } else { pdman.set2f(fBoundsUni, bounds[0], bounds[1]); } pdman.set1fv(fKernelUni, this->width(), conv.kernel()); } /////////////////////////////////////////////////////////////////////////////// /** * Applies a convolution effect which applies the convolution using a linear * interpolation optimization to use half as many samples. */ class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect { public: GrGLLerpConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {} virtual void emitCode(GrGLFPBuilder*, const GrFragmentProcessor&, const char* outputColor, const char* inputColor, const TransformedCoordsArray&, const TextureSamplerArray&) override; void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override; private: int bilerpSampleCount() const; // Bounded uniforms UniformHandle fSampleWeightUni; UniformHandle fSampleOffsetUni; typedef GrGLConvolutionEffect INHERITED; }; void GrGLLerpConvolutionEffect::emitCode(GrGLFPBuilder* builder, const GrFragmentProcessor& processor, const char* outputColor, const char* inputColor, const TransformedCoordsArray& coords, const TextureSamplerArray& samplers) { int sampleCount = bilerpSampleCount(); // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so // ensure we don't exceed this. Note that it is currently impossible to // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth // maxes out at 25, resulting in a max sampleCount of 26. SkASSERT(sampleCount < 16); fSampleOffsetUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType, kDefault_GrSLPrecision, "SampleOffset", sampleCount); fSampleWeightUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType, kDefault_GrSLPrecision, "SampleWeight", sampleCount); GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder(); SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0); fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor); const GrGLShaderVar& kernel = builder->getUniformVariable(fSampleWeightUni); const GrGLShaderVar& imgInc = builder->getUniformVariable(fSampleOffsetUni); fsBuilder->codeAppendf("vec2 coord; \n"); // Manually unroll loop because some drivers don't; yields 20-30% speedup. for (int i = 0; i < sampleCount; i++) { SkString index; SkString weightIndex; SkString offsetIndex; index.appendS32(i); kernel.appendArrayAccess(index.c_str(), &weightIndex); imgInc.appendArrayAccess(index.c_str(), &offsetIndex); fsBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str()); fsBuilder->codeAppendf("%s += ", outputColor); fsBuilder->appendTextureLookup(samplers[0], "coord"); fsBuilder->codeAppendf(" * %s;\n", weightIndex.c_str()); } SkString modulate; GrGLSLMulVarBy4f(&modulate, outputColor, inputColor); fsBuilder->codeAppend(modulate.c_str()); } void GrGLLerpConvolutionEffect::setData(const GrGLProgramDataManager& pdman, const GrProcessor& processor) { const GrConvolutionEffect& conv = processor.cast(); // the code we generated was for a specific kernel radius SkASSERT(conv.radius() == this->radius()); // the code we generated was for a specific bounding mode. SkASSERT(!conv.useBounds()); int sampleCount = bilerpSampleCount(); SkAutoTArray imageIncrements(sampleCount * 2); // X and Y floats per sample. SkAutoTArray kernel(sampleCount); float baseImageIncrement[2]; getImageIncrement(conv, &baseImageIncrement); for (int i = 0; i < sampleCount; i++) { int sampleIndex1 = i * 2; int sampleIndex2 = sampleIndex1 + 1; // If we have an odd number of samples in our filter, the last sample won't use // the linear interpolation optimization (it will be pixel aligned). if (sampleIndex2 >= this->width()) { sampleIndex2 = sampleIndex1; } float kernelWeight1 = conv.kernel()[sampleIndex1]; float kernelWeight2 = conv.kernel()[sampleIndex2]; float totalKernelWeight = (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2); float sampleRatio = (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2); imageIncrements[i * 2] = (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[0]; imageIncrements[i * 2 + 1] = (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[1]; kernel[i] = totalKernelWeight; } pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get()); pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get()); } int GrGLLerpConvolutionEffect::bilerpSampleCount() const { // We use a linear interpolation optimization to only sample once for each // two pixel aligned samples in the kernel. If we have an odd number of // samples, we will have to skip this optimization for the last sample. // Because of this we always round up our sample count (by adding 1 before // dividing). return (this->width() + 1) / 2; } /////////////////////////////////////////////////////////////////////////////// GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager, GrTexture* texture, Direction direction, int radius, const float* kernel, bool useBounds, float bounds[2]) : INHERITED(procDataManager, texture, direction, radius, useBounds ? GrTextureParams::FilterMode::kNone_FilterMode : GrTextureParams::FilterMode::kBilerp_FilterMode) , fUseBounds(useBounds) { this->initClassID(); SkASSERT(radius <= kMaxKernelRadius); SkASSERT(kernel); int width = this->width(); for (int i = 0; i < width; i++) { fKernel[i] = kernel[i]; } memcpy(fBounds, bounds, sizeof(fBounds)); } GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager, GrTexture* texture, Direction direction, int radius, float gaussianSigma, bool useBounds, float bounds[2]) : INHERITED(procDataManager, texture, direction, radius, useBounds ? GrTextureParams::FilterMode::kNone_FilterMode : GrTextureParams::FilterMode::kBilerp_FilterMode) , fUseBounds(useBounds) { this->initClassID(); SkASSERT(radius <= kMaxKernelRadius); int width = this->width(); float sum = 0.0f; float denom = 1.0f / (2.0f * gaussianSigma * gaussianSigma); for (int i = 0; i < width; ++i) { float x = static_cast(i - this->radius()); // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian // is dropped here, since we renormalize the kernel below. fKernel[i] = sk_float_exp(- x * x * denom); sum += fKernel[i]; } // Normalize the kernel float scale = 1.0f / sum; for (int i = 0; i < width; ++i) { fKernel[i] *= scale; } memcpy(fBounds, bounds, sizeof(fBounds)); } GrConvolutionEffect::~GrConvolutionEffect() { } void GrConvolutionEffect::getGLProcessorKey(const GrGLSLCaps& caps, GrProcessorKeyBuilder* b) const { GrGLConvolutionEffect::GenKey(*this, caps, b); } GrGLFragmentProcessor* GrConvolutionEffect::createGLInstance() const { // We support a linear interpolation optimization which (when feasible) uses // half the number of samples to apply the kernel. This is not always // applicable, as the linear interpolation optimization does not support // bounded sampling. if (this->useBounds()) { return SkNEW_ARGS(GrGLBoundedConvolutionEffect, (*this)); } else { return SkNEW_ARGS(GrGLLerpConvolutionEffect, (*this)); } } bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const { const GrConvolutionEffect& s = sBase.cast(); return (this->radius() == s.radius() && this->direction() == s.direction() && this->useBounds() == s.useBounds() && 0 == memcmp(fBounds, s.fBounds, sizeof(fBounds)) && 0 == memcmp(fKernel, s.fKernel, this->width() * sizeof(float))); } /////////////////////////////////////////////////////////////////////////////// GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrConvolutionEffect); GrFragmentProcessor* GrConvolutionEffect::TestCreate(GrProcessorTestData* d) { int texIdx = d->fRandom->nextBool() ? GrProcessorUnitTest::kSkiaPMTextureIdx : GrProcessorUnitTest::kAlphaTextureIdx; Direction dir = d->fRandom->nextBool() ? kX_Direction : kY_Direction; int radius = d->fRandom->nextRangeU(1, kMaxKernelRadius); float kernel[kMaxKernelWidth]; for (size_t i = 0; i < SK_ARRAY_COUNT(kernel); ++i) { kernel[i] = d->fRandom->nextSScalar1(); } float bounds[2]; for (size_t i = 0; i < SK_ARRAY_COUNT(bounds); ++i) { bounds[i] = d->fRandom->nextF(); } bool useBounds = d->fRandom->nextBool(); return GrConvolutionEffect::Create(d->fProcDataManager, d->fTextures[texIdx], dir, radius, kernel, useBounds, bounds); }