diff options
author | wm4 <wm4@nowhere> | 2015-09-25 18:58:49 +0200 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2015-09-25 19:18:16 +0200 |
commit | ac5538c7df460f218d9eaf10f7c0ee17a300ee7b (patch) | |
tree | 4713c373ee8a5d109545ebeefcab7768e57941af | |
parent | 0ae8aebb89b5d0b2226a5d3852a9c72cd52da2ff (diff) |
vaapi: use GPU memcpy for reading back from HW surface
This makes it much faster if the surface is really mapped from GPU
memory. It's slightly slower than system memcpy if used on system
memory. We don't really know definitely in which type of memory
it's located, so we use the GPU memcpy in all cases.
Fixes #2317.
-rw-r--r-- | video/vaapi.c | 6 | ||||
-rw-r--r-- | video/vaapi.h | 1 | ||||
-rw-r--r-- | wscript | 2 |
3 files changed, 7 insertions, 2 deletions
diff --git a/video/vaapi.c b/video/vaapi.c index c2a298f556..16954f7313 100644 --- a/video/vaapi.c +++ b/video/vaapi.c @@ -439,7 +439,11 @@ static struct mp_image *try_download(struct mp_image *src, mp_image_set_size(&tmp, src->w, src->h); // copy only visible part dst = mp_image_pool_get(pool, tmp.imgfmt, tmp.w, tmp.h); if (dst) { - mp_image_copy(dst, &tmp); + va_lock(p->ctx); + mp_check_gpu_memcpy(p->ctx->log, &p->ctx->gpu_memcpy_message); + va_unlock(p->ctx); + + mp_image_copy_gpu(dst, &tmp); mp_image_copy_attributes(dst, src); } va_image_unmap(p->ctx, image); diff --git a/video/vaapi.h b/video/vaapi.h index c9692f61e1..5f27e782b4 100644 --- a/video/vaapi.h +++ b/video/vaapi.h @@ -48,6 +48,7 @@ struct mp_vaapi_ctx { struct mp_log *log; VADisplay display; struct va_image_formats *image_formats; + bool gpu_memcpy_message; pthread_mutex_t lock; }; @@ -772,7 +772,7 @@ hwaccel_features = [ }, { 'name': 'sse4-intrinsics', 'desc': 'GCC SSE4 intrinsics for GPU memcpy', - 'deps_any': [ 'dxva2-hwaccel' ], + 'deps_any': [ 'dxva2-hwaccel', 'vaapi-hwaccel' ], 'func': check_cc(fragment=load_fragment('sse.c')), } ] |