d3d12decoder: Remove CPU-side waiting

Sets decoder command queue's fence to memory instead of waiting
from decoder's output thread. CPU-side waiting will happen
only if download is required.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/6749>
This commit is contained in:
Seungha Yang 2024-04-27 23:34:35 +09:00 committed by GStreamer Marge Bot
parent a05961ab7b
commit b7844ef307

View file

@ -445,7 +445,7 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
cmd->queue = gst_d3d12_command_queue_new (cmd->device.Get (), &desc,
D3D12_FENCE_FLAG_NONE, 4);
D3D12_FENCE_FLAG_NONE, ASYNC_DEPTH * 2);
if (!cmd->queue) {
GST_ERROR_OBJECT (element, "Couldn't create command queue");
return FALSE;
@ -1299,6 +1299,17 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
}
decoder_pic->fence_val = priv->cmd->fence_val;
auto fence_handle =
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
dmem = (GstD3D12Memory *) gst_buffer_peek_memory (decoder_pic->buffer, 0);
gst_d3d12_memory_set_external_fence (dmem,
fence_handle, priv->cmd->fence_val);
if (decoder_pic->output_buffer) {
dmem = (GstD3D12Memory *)
gst_buffer_peek_memory (decoder_pic->output_buffer, 0);
gst_d3d12_memory_set_external_fence (dmem,
fence_handle, priv->cmd->fence_val);
}
GstD3D12FenceData *fence_data;
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
@ -1519,18 +1530,28 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
}
guint64 copy_fence_val = 0;
GstD3D12FenceData *fence_data = nullptr;
D3D12_COMMAND_LIST_TYPE queue_type = D3D12_COMMAND_LIST_TYPE_COPY;
if (out_resource)
if (out_resource) {
queue_type = D3D12_COMMAND_LIST_TYPE_DIRECT;
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
gst_d3d12_fence_data_add_notify_mini_object (fence_data,
gst_buffer_ref (buffer));
}
auto fence_handle =
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
copy_args.data (), nullptr, nullptr, 0, queue_type, &copy_fence_val);
gst_d3d12_device_fence_wait (self->device, queue_type,
copy_fence_val, priv->copy_event_handle);
copy_args.data (), fence_data, fence_handle, decoder_pic->fence_val,
queue_type, &copy_fence_val);
if (!out_resource) {
guint8 *map_data;
GstVideoFrame vframe;
gst_d3d12_device_fence_wait (self->device, queue_type,
copy_fence_val, priv->copy_event_handle);
hr = priv->session->staging->Map (0, nullptr, (void **) &map_data);
if (!gst_d3d12_result (hr, self->device)) {
ret = GST_FLOW_ERROR;
@ -1555,6 +1576,8 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
priv->session->staging->Unmap (0, nullptr);
gst_video_frame_unmap (&vframe);
} else {
gst_d3d12_buffer_after_write (frame->output_buffer, copy_fence_val);
}
}
@ -1592,8 +1615,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Entering output thread");
HANDLE event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
while (true) {
DecoderOutputData output_data;
{
@ -1613,8 +1634,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
auto decoder_pic = get_decoder_picture (output_data.picture);
g_assert (decoder_pic);
gst_d3d12_command_queue_fence_wait (priv->cmd->queue,
decoder_pic->fence_val, event_handle);
if (priv->flushing) {
GST_DEBUG_OBJECT (self, "Drop framem, we are flushing");
@ -1640,8 +1659,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Leaving output thread");
CloseHandle (event_handle);
return nullptr;
}