aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar csmartdalton <csmartdalton@google.com>2016-07-04 15:55:17 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-07-04 15:55:17 -0700
commit09d49a3bfe2d1e652a648ce1ea0962b38d10d166 (patch)
treecc7ab96fee76e22b6180bcfb9b8073dd80d020f6 /src
parent2b2810b4eb423cc6a368b5f8b011a3508fef7a63 (diff)
Fix caching of sample locations
The original caching logic for sample locations wishfully assumed that the GPU would always use the same sample pattern for render targets that had the same number of samples. It turns out we can't rely on that. This change improves the caching logic to handle mismatched simple patterns with the same count, and adds a unit test that emulates different sample patterns observed on real hardware. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2111423002 Review-Url: https://codereview.chromium.org/2111423002
Diffstat (limited to 'src')
-rw-r--r--src/gpu/GrGpu.cpp103
-rw-r--r--src/gpu/GrGpu.h38
-rw-r--r--src/gpu/GrRenderTargetPriv.h1
-rw-r--r--src/gpu/gl/GrGLGpu.cpp12
-rw-r--r--src/gpu/gl/GrGLGpu.h6
-rw-r--r--src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp2
-rw-r--r--src/gpu/vk/GrVkGpu.cpp2
-rw-r--r--src/gpu/vk/GrVkGpu.h6
8 files changed, 90 insertions, 80 deletions
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 1397845f42..812b20a1ca 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -46,8 +46,8 @@ GrMesh& GrMesh::operator =(const GrMesh& di) {
GrGpu::GrGpu(GrContext* context)
: fResetTimestamp(kExpiredTimestamp+1)
, fResetBits(kAll_GrBackendState)
- , fMultisampleSpecsAllocator(1)
, fContext(context) {
+ fMultisampleSpecs.emplace_back(0, 0, nullptr); // Index 0 is an invalid unique id.
}
GrGpu::~GrGpu() {}
@@ -425,58 +425,63 @@ void GrGpu::didWriteToSurface(GrSurface* surface, const SkIRect* bounds, uint32_
}
}
-inline static uint8_t multisample_specs_id(uint8_t numSamples, GrSurfaceOrigin origin,
- const GrCaps& caps) {
- if (!caps.sampleLocationsSupport()) {
- return numSamples;
+const GrGpu::MultisampleSpecs& GrGpu::getMultisampleSpecs(GrRenderTarget* rt,
+ const GrStencilSettings& stencil) {
+ SkASSERT(rt->desc().fSampleCnt > 1);
+
+#ifndef SK_DEBUG
+ // In debug mode we query the multisample info every time to verify the caching is correct.
+ if (uint8_t id = rt->renderTargetPriv().accessMultisampleSpecsID()) {
+ SkASSERT(id > 0 && id < fMultisampleSpecs.count());
+ return fMultisampleSpecs[id];
}
+#endif
- SkASSERT(numSamples < 128);
- SkASSERT(kTopLeft_GrSurfaceOrigin == origin || kBottomLeft_GrSurfaceOrigin == origin);
- return (numSamples << 1) | (origin - 1);
+ int effectiveSampleCnt;
+ SkSTArray<16, SkPoint, true> pattern;
+ this->onGetMultisampleSpecs(rt, stencil, &effectiveSampleCnt, &pattern);
+ SkASSERT(effectiveSampleCnt >= rt->desc().fSampleCnt);
+
+ uint8_t id;
+ if (this->caps()->sampleLocationsSupport()) {
+ SkASSERT(pattern.count() == effectiveSampleCnt);
+ const auto& emplaceResult =
+ fMultisampleSpecsIdMap.emplace(pattern, SkTMin(fMultisampleSpecs.count(), 255));
+ id = emplaceResult.first->second;
+ if (emplaceResult.second) {
+ // This means the emplace did not find the pattern in the map already, and therefore an
+ // actual insertion took place. (We don't expect to see many unique sample patterns.)
+ const SkPoint* sampleLocations = emplaceResult.first->first.begin();
+ SkASSERT(id == fMultisampleSpecs.count());
+ fMultisampleSpecs.emplace_back(id, effectiveSampleCnt, sampleLocations);
+ }
+ } else {
+ id = effectiveSampleCnt;
+ for (int i = fMultisampleSpecs.count(); i <= id; ++i) {
+ fMultisampleSpecs.emplace_back(i, i, nullptr);
+ }
+ }
+ SkASSERT(id > 0);
+ SkASSERT(!rt->renderTargetPriv().accessMultisampleSpecsID() ||
+ rt->renderTargetPriv().accessMultisampleSpecsID() == id);
- GR_STATIC_ASSERT(1 == kTopLeft_GrSurfaceOrigin);
- GR_STATIC_ASSERT(2 == kBottomLeft_GrSurfaceOrigin);
+ rt->renderTargetPriv().accessMultisampleSpecsID() = id;
+ return fMultisampleSpecs[id];
}
-const GrGpu::MultisampleSpecs& GrGpu::getMultisampleSpecs(GrRenderTarget* rt,
- const GrStencilSettings& stencil) {
- const GrSurfaceDesc& desc = rt->desc();
- uint8_t surfDescKey = multisample_specs_id(desc.fSampleCnt, desc.fOrigin, *this->caps());
- if (fMultisampleSpecsMap.count() > surfDescKey && fMultisampleSpecsMap[surfDescKey]) {
-#if !defined(SK_DEBUG)
- // In debug mode we query the multisample info every time and verify the caching is correct.
- return *fMultisampleSpecsMap[surfDescKey];
-#endif
+bool GrGpu::SamplePatternComparator::operator()(const SamplePattern& a,
+ const SamplePattern& b) const {
+ if (a.count() != b.count()) {
+ return a.count() < b.count();
}
- int effectiveSampleCnt;
- SkAutoTDeleteArray<SkPoint> locations(nullptr);
- this->onGetMultisampleSpecs(rt, stencil, &effectiveSampleCnt, &locations);
- SkASSERT(effectiveSampleCnt && effectiveSampleCnt >= desc.fSampleCnt);
- uint8_t effectiveKey = multisample_specs_id(effectiveSampleCnt, desc.fOrigin, *this->caps());
- if (fMultisampleSpecsMap.count() > effectiveKey && fMultisampleSpecsMap[effectiveKey]) {
- const MultisampleSpecs& specs = *fMultisampleSpecsMap[effectiveKey];
- SkASSERT(effectiveKey == specs.fUniqueID);
- SkASSERT(effectiveSampleCnt == specs.fEffectiveSampleCnt);
- SkASSERT(!this->caps()->sampleLocationsSupport() ||
- !memcmp(locations.get(), specs.fSampleLocations.get(),
- effectiveSampleCnt * sizeof(SkPoint)));
- SkASSERT(surfDescKey <= effectiveKey);
- SkASSERT(!fMultisampleSpecsMap[surfDescKey] || fMultisampleSpecsMap[surfDescKey] == &specs);
- fMultisampleSpecsMap[surfDescKey] = &specs;
- return specs;
- }
- const MultisampleSpecs& specs = *new (&fMultisampleSpecsAllocator)
- MultisampleSpecs{effectiveKey, effectiveSampleCnt, locations.release()};
- if (fMultisampleSpecsMap.count() <= effectiveKey) {
- int n = 1 + effectiveKey - fMultisampleSpecsMap.count();
- fMultisampleSpecsMap.push_back_n(n, (const MultisampleSpecs*) nullptr);
- }
- fMultisampleSpecsMap[effectiveKey] = &specs;
- if (effectiveSampleCnt != desc.fSampleCnt) {
- SkASSERT(surfDescKey < effectiveKey);
- fMultisampleSpecsMap[surfDescKey] = &specs;
- }
- return specs;
+ for (int i = 0; i < a.count(); ++i) {
+ // This doesn't have geometric meaning. We just need to define an ordering for std::map.
+ if (a[i].x() != b[i].x()) {
+ return a[i].x() < b[i].x();
+ }
+ if (a[i].y() != b[i].y()) {
+ return a[i].y() < b[i].y();
+ }
+ }
+ return false; // Equal.
}
-
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 032edc3fc9..e77f29346f 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -17,6 +17,7 @@
#include "GrXferProcessor.h"
#include "SkPath.h"
#include "SkTArray.h"
+#include <map>
class GrBatchTracker;
class GrBuffer;
@@ -341,14 +342,19 @@ public:
const SkIPoint& dstPoint);
struct MultisampleSpecs {
+ MultisampleSpecs(uint8_t uniqueID, int effectiveSampleCnt, const SkPoint* locations)
+ : fUniqueID(uniqueID),
+ fEffectiveSampleCnt(effectiveSampleCnt),
+ fSampleLocations(locations) {}
+
// Nonzero ID that uniquely identifies these multisample specs.
- uint8_t fUniqueID;
+ uint8_t fUniqueID;
// The actual number of samples the GPU will run. NOTE: this value can be greater than the
// the render target's sample count.
- int fEffectiveSampleCnt;
- // If sample locations are supported, contains the subpixel locations at which the GPU will
- // sample. Pixel center is at (.5, .5) and (0, 0) indicates the top left corner.
- SkAutoTDeleteArray<const SkPoint> fSampleLocations;
+ int fEffectiveSampleCnt;
+ // If sample locations are supported, points to the subpixel locations at which the GPU will
+ // sample. Pixel center is at (.5, .5), and (0, 0) indicates the top left corner.
+ const SkPoint* fSampleLocations;
};
// Finds a render target's multisample specs. The stencil settings are only needed to flush the
@@ -504,6 +510,8 @@ protected:
// Subclass must initialize this in its constructor.
SkAutoTUnref<const GrCaps> fCaps;
+ typedef SkTArray<SkPoint, true> SamplePattern;
+
private:
// called when the 3D context state is unknown. Subclass should emit any
// assumed 3D context state and dirty any state cache.
@@ -569,10 +577,8 @@ private:
const SkIPoint& dstPoint) = 0;
// overridden by backend specific derived class to perform the multisample queries
- virtual void onGetMultisampleSpecs(GrRenderTarget*,
- const GrStencilSettings&,
- int* effectiveSampleCnt,
- SkAutoTDeleteArray<SkPoint>* sampleLocations) = 0;
+ virtual void onGetMultisampleSpecs(GrRenderTarget*, const GrStencilSettings&,
+ int* effectiveSampleCnt, SamplePattern*) = 0;
void resetContext() {
this->onResetContext(fResetBits);
@@ -580,12 +586,16 @@ private:
++fResetTimestamp;
}
- ResetTimestamp fResetTimestamp;
- uint32_t fResetBits;
- SkTArray<const MultisampleSpecs*, true> fMultisampleSpecsMap;
- GrTAllocator<MultisampleSpecs> fMultisampleSpecsAllocator;
+ struct SamplePatternComparator {
+ bool operator()(const SamplePattern&, const SamplePattern&) const;
+ };
+
+ ResetTimestamp fResetTimestamp;
+ uint32_t fResetBits;
+ std::map<SamplePattern, uint8_t, SamplePatternComparator> fMultisampleSpecsIdMap;
+ SkSTArray<1, MultisampleSpecs, true> fMultisampleSpecs;
// The context owns us, not vice-versa, so this ptr is not ref'ed by Gpu.
- GrContext* fContext;
+ GrContext* fContext;
friend class GrPathRendering;
friend class gr_instanced::InstancedRendering;
diff --git a/src/gpu/GrRenderTargetPriv.h b/src/gpu/GrRenderTargetPriv.h
index 52eed69a6b..db66bc3316 100644
--- a/src/gpu/GrRenderTargetPriv.h
+++ b/src/gpu/GrRenderTargetPriv.h
@@ -33,6 +33,7 @@ public:
int numStencilBits() const;
const GrGpu::MultisampleSpecs& getMultisampleSpecs(const GrStencilSettings& stencil) const;
+ uint8_t& accessMultisampleSpecsID() { return fRenderTarget->fMultisampleSpecsID; }
GrRenderTarget::SampleConfig sampleConfig() const { return fRenderTarget->fSampleConfig; }
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 2fb66804e0..9972690487 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -4432,10 +4432,8 @@ bool GrGLGpu::generateMipmap(GrGLTexture* texture, bool gammaCorrect) {
return true;
}
-void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt,
- const GrStencilSettings& stencil,
- int* effectiveSampleCnt,
- SkAutoTDeleteArray<SkPoint>* sampleLocations) {
+void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings& stencil,
+ int* effectiveSampleCnt, SamplePattern* samplePattern) {
SkASSERT(!rt->hasMixedSamples() || rt->renderTargetPriv().getStencilAttachment() ||
stencil.isDisabled());
@@ -4452,14 +4450,14 @@ void GrGLGpu::onGetMultisampleSpecs(GrRenderTarget* rt,
SkASSERT(*effectiveSampleCnt >= rt->desc().fSampleCnt);
if (this->caps()->sampleLocationsSupport()) {
- sampleLocations->reset(new SkPoint[*effectiveSampleCnt]);
+ samplePattern->reset(*effectiveSampleCnt);
for (int i = 0; i < *effectiveSampleCnt; ++i) {
GrGLfloat pos[2];
GL_CALL(GetMultisamplefv(GR_GL_SAMPLE_POSITION, i, pos));
if (kTopLeft_GrSurfaceOrigin == rt->origin()) {
- (*sampleLocations)[i].set(pos[0], pos[1]);
+ (*samplePattern)[i].set(pos[0], pos[1]);
} else {
- (*sampleLocations)[i].set(pos[0], 1 - pos[1]);
+ (*samplePattern)[i].set(pos[0], 1 - pos[1]);
}
}
}
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index b7daa431d8..5cc0facea6 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -220,10 +220,8 @@ private:
const SkIRect& srcRect,
const SkIPoint& dstPoint) override;
- void onGetMultisampleSpecs(GrRenderTarget*,
- const GrStencilSettings&,
- int* effectiveSampleCnt,
- SkAutoTDeleteArray<SkPoint>* sampleLocations) override;
+ void onGetMultisampleSpecs(GrRenderTarget*, const GrStencilSettings&,
+ int* effectiveSampleCnt, SamplePattern*) override;
// binds texture unit in GL
void setTextureUnit(int unitIdx);
diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
index a52b1a6386..5d1ba511b7 100644
--- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
@@ -365,7 +365,7 @@ void GrGLSLFragmentShaderBuilder::defineSampleOffsetArray(const char* name, cons
const GrGpu::MultisampleSpecs& specs = rtp.getMultisampleSpecs(pipeline.getStencil());
SkSTArray<16, SkPoint, true> offsets;
offsets.push_back_n(specs.fEffectiveSampleCnt);
- m.mapPoints(offsets.begin(), specs.fSampleLocations.get(), specs.fEffectiveSampleCnt);
+ m.mapPoints(offsets.begin(), specs.fSampleLocations, specs.fEffectiveSampleCnt);
this->definitions().append("const ");
if (fProgramBuilder->glslCaps()->usesPrecisionModifiers()) {
this->definitions().append("highp ");
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index c1cec9d96f..fb2c4a49ed 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -1365,7 +1365,7 @@ bool GrVkGpu::initCopySurfaceDstDesc(const GrSurface* src, GrSurfaceDesc* desc)
}
void GrVkGpu::onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings&,
- int* effectiveSampleCnt, SkAutoTDeleteArray<SkPoint>*) {
+ int* effectiveSampleCnt, SamplePattern*) {
// TODO: stub.
SkASSERT(!this->caps()->sampleLocationsSupport());
*effectiveSampleCnt = rt->desc().fSampleCnt;
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 542917e9d0..5e5510c3ae 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -79,10 +79,8 @@ public:
const SkIRect& srcRect,
const SkIPoint& dstPoint) override;
- void onGetMultisampleSpecs(GrRenderTarget* rt,
- const GrStencilSettings&,
- int* effectiveSampleCnt,
- SkAutoTDeleteArray<SkPoint>*) override;
+ void onGetMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings&,
+ int* effectiveSampleCnt, SamplePattern*) override;
bool initCopySurfaceDstDesc(const GrSurface* src, GrSurfaceDesc* desc) const override;