aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar wm4 <wm4@nowhere>2015-09-25 18:58:49 +0200
committerGravatar wm4 <wm4@nowhere>2015-09-25 19:18:16 +0200
commitac5538c7df460f218d9eaf10f7c0ee17a300ee7b (patch)
tree4713c373ee8a5d109545ebeefcab7768e57941af
parent0ae8aebb89b5d0b2226a5d3852a9c72cd52da2ff (diff)
vaapi: use GPU memcpy for reading back from HW surface
This makes it much faster if the surface is really mapped from GPU memory. It's slightly slower than system memcpy if used on system memory. We don't really know definitely in which type of memory it's located, so we use the GPU memcpy in all cases. Fixes #2317.
-rw-r--r--video/vaapi.c6
-rw-r--r--video/vaapi.h1
-rw-r--r--wscript2
3 files changed, 7 insertions, 2 deletions
diff --git a/video/vaapi.c b/video/vaapi.c
index c2a298f556..16954f7313 100644
--- a/video/vaapi.c
+++ b/video/vaapi.c
@@ -439,7 +439,11 @@ static struct mp_image *try_download(struct mp_image *src,
mp_image_set_size(&tmp, src->w, src->h); // copy only visible part
dst = mp_image_pool_get(pool, tmp.imgfmt, tmp.w, tmp.h);
if (dst) {
- mp_image_copy(dst, &tmp);
+ va_lock(p->ctx);
+ mp_check_gpu_memcpy(p->ctx->log, &p->ctx->gpu_memcpy_message);
+ va_unlock(p->ctx);
+
+ mp_image_copy_gpu(dst, &tmp);
mp_image_copy_attributes(dst, src);
}
va_image_unmap(p->ctx, image);
diff --git a/video/vaapi.h b/video/vaapi.h
index c9692f61e1..5f27e782b4 100644
--- a/video/vaapi.h
+++ b/video/vaapi.h
@@ -48,6 +48,7 @@ struct mp_vaapi_ctx {
struct mp_log *log;
VADisplay display;
struct va_image_formats *image_formats;
+ bool gpu_memcpy_message;
pthread_mutex_t lock;
};
diff --git a/wscript b/wscript
index 63c74686fa..6f04545995 100644
--- a/wscript
+++ b/wscript
@@ -772,7 +772,7 @@ hwaccel_features = [
}, {
'name': 'sse4-intrinsics',
'desc': 'GCC SSE4 intrinsics for GPU memcpy',
- 'deps_any': [ 'dxva2-hwaccel' ],
+ 'deps_any': [ 'dxva2-hwaccel', 'vaapi-hwaccel' ],
'func': check_cc(fragment=load_fragment('sse.c')),
}
]