aboutsummaryrefslogtreecommitdiffhomepage
path: root/video
diff options
context:
space:
mode:
authorGravatar Niklas Haas <git@haasn.xyz>2017-07-15 23:54:20 +0200
committerGravatar Niklas Haas <git@haasn.xyz>2017-07-15 23:54:20 +0200
commit8e20ef4292719233d4b249ab714433e845cf6da2 (patch)
tree9c720443b388fc7d7d5e770c7bcccbba91c65b16 /video
parentb93bcce5df343de20096deb6d37e971bfa9a3072 (diff)
vo_opengl: update BufferData usage hints
STREAM is better than DYNAMIC because we're only using it once per frame. As for COPY vs DRAW, that was pretty much incorrect to begin with - but surprisngly, COPY is actually faster (sometimes significantly so, e.g. on my NUMA system). After testing, the best I can gather is that it has to do with the fact that COPY requires fewer redundant memcpy()s, and also 3x reduce RAM bandwidth (in theory). Anyway, that bit shouldn't introduce any regressions, it's just a documentation update. Maybe I'll change my mind about the comment again the future, it's really hard to tell. Vulkan, please save us!
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/utils.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 9e786c6dcc..9870936bc5 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -232,7 +232,7 @@ void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num)
if (ptr) {
gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
- gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_DYNAMIC_DRAW);
+ gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW);
gl->BindBuffer(GL_ARRAY_BUFFER, 0);
}
@@ -1328,8 +1328,17 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
pbo->buffer_size = buffer_size;
gl->GenBuffers(1, &pbo->buffer);
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+ // Magic time: Because we memcpy once from RAM to the buffer, and then
+ // the GPU needs to read from this anyway, we actually *don't* want
+ // this buffer to be allocated in RAM. If we allocate it in VRAM
+ // instead, we can reduce this to a single copy: from RAM into VRAM.
+ // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
+ // allocated on host memory instead of device memory, so we lie about
+ // the usage to fool the driver into giving us a buffer in VRAM instead
+ // of RAM, which can be significantly faster for our use case.
+ // Seriously, fuck OpenGL.
gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
- NULL, GL_DYNAMIC_COPY);
+ NULL, GL_STREAM_COPY);
}
size_t offset = buffer_size * pbo->index;