From 0d28e574ac73fef8bf75cab083ffe23f2d8860a1 Mon Sep 17 00:00:00 2001 From: csmartdalton Date: Wed, 6 Jul 2016 09:59:43 -0700 Subject: Fix caching of sample locations The original caching logic for sample locations wishfully assumed that the GPU would always use the same sample pattern for render targets that had the same number of samples. It turns out we can't rely on that. This change improves the caching logic to handle mismatched simple patterns with the same count, and adds a unit test that emulates different sample patterns observed on real hardware. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2111423002 Committed: https://skia.googlesource.com/skia/+/09d49a3bfe2d1e652a648ce1ea0962b38d10d166 Review-Url: https://codereview.chromium.org/2111423002 --- src/gpu/GrGpu.cpp | 103 ++++++++++++++------------- src/gpu/GrGpu.h | 40 +++++++---- src/gpu/GrRenderTargetPriv.h | 1 + src/gpu/gl/GrGLGpu.cpp | 12 ++-- src/gpu/gl/GrGLGpu.h | 6 +- src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp | 2 +- src/gpu/vk/GrVkGpu.cpp | 2 +- src/gpu/vk/GrVkGpu.h | 6 +- 8 files changed, 92 insertions(+), 80 deletions(-) (limited to 'src/gpu') diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp index 1397845f42..4cb96edd88 100644 --- a/src/gpu/GrGpu.cpp +++ b/src/gpu/GrGpu.cpp @@ -46,8 +46,8 @@ GrMesh& GrMesh::operator =(const GrMesh& di) { GrGpu::GrGpu(GrContext* context) : fResetTimestamp(kExpiredTimestamp+1) , fResetBits(kAll_GrBackendState) - , fMultisampleSpecsAllocator(1) , fContext(context) { + fMultisampleSpecs.emplace_back(0, 0, nullptr); // Index 0 is an invalid unique id. } GrGpu::~GrGpu() {} @@ -425,58 +425,63 @@ void GrGpu::didWriteToSurface(GrSurface* surface, const SkIRect* bounds, uint32_ } } -inline static uint8_t multisample_specs_id(uint8_t numSamples, GrSurfaceOrigin origin, - const GrCaps& caps) { - if (!caps.sampleLocationsSupport()) { - return numSamples; +const GrGpu::MultisampleSpecs& GrGpu::getMultisampleSpecs(GrRenderTarget* rt, + const GrStencilSettings& stencil) { + SkASSERT(rt->desc().fSampleCnt > 1); + +#ifndef SK_DEBUG + // In debug mode we query the multisample info every time to verify the caching is correct. + if (uint8_t id = rt->renderTargetPriv().accessMultisampleSpecsID()) { + SkASSERT(id > 0 && id < fMultisampleSpecs.count()); + return fMultisampleSpecs[id]; } +#endif - SkASSERT(numSamples < 128); - SkASSERT(kTopLeft_GrSurfaceOrigin == origin || kBottomLeft_GrSurfaceOrigin == origin); - return (numSamples << 1) | (origin - 1); + int effectiveSampleCnt; + SkSTArray<16, SkPoint, true> pattern; + this->onGetMultisampleSpecs(rt, stencil, &effectiveSampleCnt, &pattern); + SkASSERT(effectiveSampleCnt >= rt->desc().fSampleCnt); + + uint8_t id; + if (this->caps()->sampleLocationsSupport()) { + SkASSERT(pattern.count() == effectiveSampleCnt); + const auto& insertResult = fMultisampleSpecsIdMap.insert( + MultisampleSpecsIdMap::value_type(pattern, SkTMin(fMultisampleSpecs.count(), 255))); + id = insertResult.first->second; + if (insertResult.second) { + // This means the insert did not find the pattern in the map already, and therefore an + // actual insertion took place. (We don't expect to see many unique sample patterns.) + const SkPoint* sampleLocations = insertResult.first->first.begin(); + SkASSERT(id == fMultisampleSpecs.count()); + fMultisampleSpecs.emplace_back(id, effectiveSampleCnt, sampleLocations); + } + } else { + id = effectiveSampleCnt; + for (int i = fMultisampleSpecs.count(); i <= id; ++i) { + fMultisampleSpecs.emplace_back(i, i, nullptr); + } + } + SkASSERT(id > 0); + SkASSERT(!rt->renderTargetPriv().accessMultisampleSpecsID() || + rt->renderTargetPriv().accessMultisampleSpecsID() == id); - GR_STATIC_ASSERT(1 == kTopLeft_GrSurfaceOrigin); - GR_STATIC_ASSERT(2 == kBottomLeft_GrSurfaceOrigin); + rt->renderTargetPriv().accessMultisampleSpecsID() = id; + return fMultisampleSpecs[id]; } -const GrGpu::MultisampleSpecs& GrGpu::getMultisampleSpecs(GrRenderTarget* rt, - const GrStencilSettings& stencil) { - const GrSurfaceDesc& desc = rt->desc(); - uint8_t surfDescKey = multisample_specs_id(desc.fSampleCnt, desc.fOrigin, *this->caps()); - if (fMultisampleSpecsMap.count() > surfDescKey && fMultisampleSpecsMap[surfDescKey]) { -#if !defined(SK_DEBUG) - // In debug mode we query the multisample info every time and verify the caching is correct. - return *fMultisampleSpecsMap[surfDescKey]; -#endif +bool GrGpu::SamplePatternComparator::operator()(const SamplePattern& a, + const SamplePattern& b) const { + if (a.count() != b.count()) { + return a.count() < b.count(); } - int effectiveSampleCnt; - SkAutoTDeleteArray locations(nullptr); - this->onGetMultisampleSpecs(rt, stencil, &effectiveSampleCnt, &locations); - SkASSERT(effectiveSampleCnt && effectiveSampleCnt >= desc.fSampleCnt); - uint8_t effectiveKey = multisample_specs_id(effectiveSampleCnt, desc.fOrigin, *this->caps()); - if (fMultisampleSpecsMap.count() > effectiveKey && fMultisampleSpecsMap[effectiveKey]) { - const MultisampleSpecs& specs = *fMultisampleSpecsMap[effectiveKey]; - SkASSERT(effectiveKey == specs.fUniqueID); - SkASSERT(effectiveSampleCnt == specs.fEffectiveSampleCnt); - SkASSERT(!this->caps()->sampleLocationsSupport() || - !memcmp(locations.get(), specs.fSampleLocations.get(), - effectiveSampleCnt * sizeof(SkPoint))); - SkASSERT(surfDescKey <= effectiveKey); - SkASSERT(!fMultisampleSpecsMap[surfDescKey] || fMultisampleSpecsMap[surfDescKey] == &specs); - fMultisampleSpecsMap[surfDescKey] = &specs; - return specs; - } - const MultisampleSpecs& specs = *new (&fMultisampleSpecsAllocator) - MultisampleSpecs{effectiveKey, effectiveSampleCnt, locations.release()}; - if (fMultisampleSpecsMap.count() <= effectiveKey) { - int n = 1 + effectiveKey - fMultisampleSpecsMap.count(); - fMultisampleSpecsMap.push_back_n(n, (const MultisampleSpecs*) nullptr); - } - fMultisampleSpecsMap[effectiveKey] = &specs; - if (effectiveSampleCnt != desc.fSampleCnt) { - SkASSERT(surfDescKey < effectiveKey); - fMultisampleSpecsMap[surfDescKey] = &specs; - } - return specs; + for (int i = 0; i < a.count(); ++i) { + // This doesn't have geometric meaning. We just need to define an ordering for std::map. + if (a[i].x() != b[i].x()) { + return a[i].x() < b[i].x(); + } + if (a[i].y() != b[i].y()) { + return a[i].y() < b[i].y(); + } + } + return false; // Equal. } - diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h index a733f45e3c..e7c2196c62 100644 --- a/src/gpu/GrGpu.h +++ b/src/gpu/GrGpu.h @@ -17,6 +17,7 @@ #include "GrXferProcessor.h" #include "SkPath.h" #include "SkTArray.h" +#include class GrBatchTracker; class GrBuffer; @@ -332,14 +333,19 @@ public: const SkIPoint& dstPoint); struct MultisampleSpecs { + MultisampleSpecs(uint8_t uniqueID, int effectiveSampleCnt, const SkPoint* locations) + : fUniqueID(uniqueID), + fEffectiveSampleCnt(effectiveSampleCnt), + fSampleLocations(locations) {} + // Nonzero ID that uniquely identifies these multisample specs. - uint8_t fUniqueID; + uint8_t fUniqueID; // The actual number of samples the GPU will run. NOTE: this value can be greater than the // the render target's sample count. - int fEffectiveSampleCnt; - // If sample locations are supported, contains the subpixel locations at which the GPU will - // sample. Pixel center is at (.5, .5) and (0, 0) indicates the top left corner. - SkAutoTDeleteArray fSampleLocations; + int fEffectiveSampleCnt; + // If sample locations are supported, points to the subpixel locations at which the GPU will + // sample. Pixel center is at (.5, .5), and (0, 0) indicates the top left corner. + const SkPoint* fSampleLocations; }; // Finds a render target's multisample specs. The stencil settings are only needed to flush the @@ -495,6 +501,8 @@ protected: // Subclass must initialize this in its constructor. SkAutoTUnref fCaps; + typedef SkTArray SamplePattern; + private: // called when the 3D context state is unknown. Subclass should emit any // assumed 3D context state and dirty any state cache. @@ -560,10 +568,8 @@ private: const SkIPoint& dstPoint) = 0; // overridden by backend specific derived class to perform the multisample queries - virtual void onGetMultisampleSpecs(GrRenderTarget*, - const GrStencilSettings&, - int* effectiveSampleCnt, - SkAutoTDeleteArray* sampleLocations) = 0; + virtual void onGetMultisampleSpecs(GrRenderTarget*, const GrStencilSettings&, + int* effectiveSampleCnt, SamplePattern*) = 0; void resetContext() { this->onResetContext(fResetBits); @@ -571,12 +577,18 @@ private: ++fResetTimestamp; } - ResetTimestamp fResetTimestamp; - uint32_t fResetBits; - SkTArray fMultisampleSpecsMap; - GrTAllocator fMultisampleSpecsAllocator; + struct SamplePatternComparator { + bool operator()(const SamplePattern&, const SamplePattern&) const; + }; + + typedef std::map MultisampleSpecsIdMap; + + ResetTimestamp fResetTimestamp; + uint32_t fResetBits; + MultisampleSpecsIdMap fMultisampleSpecsIdMap; + SkSTArray<1, MultisampleSpecs, true> fMultisampleSpecs; // The context owns us, not vice-versa, so this ptr is not ref'ed by Gpu. - GrContext* fContext; + GrContext* fContext; friend class GrPathRendering; typedef SkRefCnt INHERITED; diff --git a/src/gpu/GrRenderTargetPriv.h b/src/gpu/GrRenderTargetPriv.h index 52eed69a6b..db66bc3316 100644 --- a/src/gpu/GrRenderTargetPriv.h +++ b/src/gpu/GrRenderTargetPriv.h @@ -33,6 +33,7 @@ public: int numStencilBits() const; const GrGpu::MultisampleSpecs& getMultisampleSpecs(const GrStencilSettings& stencil) const; + uint8_t& accessMultisampleSpecsID() { return fRenderTarget->fMultisampleSpecsID; } GrRenderTarget::SampleConfig sampleConfig() const { return fRenderTarget->fSampleConfig; } diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp index 5b24d8a382..a37d72e26b 100644 --- a/src/gpu/gl/GrGLGpu.cpp +++ b/src/gpu/gl/GrGLGpu.cpp @@ -4425,10 +4425,8 @@ bool GrGLGpu::generateMipmap(GrGLTexture* texture, bool gammaCorrect) { return true; } -void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt, - const GrStencilSettings& stencil, - int* effectiveSampleCnt, - SkAutoTDeleteArray* sampleLocations) { +void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings& stencil, + int* effectiveSampleCnt, SamplePattern* samplePattern) { SkASSERT(!rt->hasMixedSamples() || rt->renderTargetPriv().getStencilAttachment() || stencil.isDisabled()); @@ -4445,14 +4443,14 @@ void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt, SkASSERT(*effectiveSampleCnt >= rt->desc().fSampleCnt); if (this->caps()->sampleLocationsSupport()) { - sampleLocations->reset(new SkPoint[*effectiveSampleCnt]); + samplePattern->reset(*effectiveSampleCnt); for (int i = 0; i < *effectiveSampleCnt; ++i) { GrGLfloat pos[2]; GL_CALL(GetMultisamplefv(GR_GL_SAMPLE_POSITION, i, pos)); if (kTopLeft_GrSurfaceOrigin == rt->origin()) { - (*sampleLocations)[i].set(pos[0], pos[1]); + (*samplePattern)[i].set(pos[0], pos[1]); } else { - (*sampleLocations)[i].set(pos[0], 1 - pos[1]); + (*samplePattern)[i].set(pos[0], 1 - pos[1]); } } } diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h index 013613ad27..c8edbb9c28 100644 --- a/src/gpu/gl/GrGLGpu.h +++ b/src/gpu/gl/GrGLGpu.h @@ -216,10 +216,8 @@ private: const SkIRect& srcRect, const SkIPoint& dstPoint) override; - void onGetMultisampleSpecs(GrRenderTarget*, - const GrStencilSettings&, - int* effectiveSampleCnt, - SkAutoTDeleteArray* sampleLocations) override; + void onGetMultisampleSpecs(GrRenderTarget*, const GrStencilSettings&, + int* effectiveSampleCnt, SamplePattern*) override; // binds texture unit in GL void setTextureUnit(int unitIdx); diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp index a52b1a6386..5d1ba511b7 100644 --- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp +++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp @@ -365,7 +365,7 @@ void GrGLSLFragmentShaderBuilder::defineSampleOffsetArray(const char* name, cons const GrGpu::MultisampleSpecs& specs = rtp.getMultisampleSpecs(pipeline.getStencil()); SkSTArray<16, SkPoint, true> offsets; offsets.push_back_n(specs.fEffectiveSampleCnt); - m.mapPoints(offsets.begin(), specs.fSampleLocations.get(), specs.fEffectiveSampleCnt); + m.mapPoints(offsets.begin(), specs.fSampleLocations, specs.fEffectiveSampleCnt); this->definitions().append("const "); if (fProgramBuilder->glslCaps()->usesPrecisionModifiers()) { this->definitions().append("highp "); diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 6875824f45..fdec861076 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -1367,7 +1367,7 @@ bool GrVkGpu::initCopySurfaceDstDesc(const GrSurface* src, GrSurfaceDesc* desc) } void GrVkGpu::onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings&, - int* effectiveSampleCnt, SkAutoTDeleteArray*) { + int* effectiveSampleCnt, SamplePattern*) { // TODO: stub. SkASSERT(!this->caps()->sampleLocationsSupport()); *effectiveSampleCnt = rt->desc().fSampleCnt; diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index df1aae1824..29a3acad45 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -79,10 +79,8 @@ public: const SkIRect& srcRect, const SkIPoint& dstPoint) override; - void onGetMultisampleSpecs(GrRenderTarget* rt, - const GrStencilSettings&, - int* effectiveSampleCnt, - SkAutoTDeleteArray*) override; + void onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings&, + int* effectiveSampleCnt, SamplePattern*) override; bool initCopySurfaceDstDesc(const GrSurface* src, GrSurfaceDesc* desc) const override; -- cgit v1.2.3