d3d12: Update copy_texture_region() method

Pass external fence value if any and allow passing fence
data so that dependent resources can be released
once copy is done

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/6749>
This commit is contained in:
Seungha Yang 2024-04-27 17:54:38 +09:00 committed by GStreamer Marge Bot
parent d8aa2eea83
commit 478e49dd73
8 changed files with 97 additions and 27 deletions

View file

@ -39,9 +39,16 @@ GST_D3D12_API
gboolean gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
guint num_args,
const GstD3D12CopyTextureRegionArgs * args,
GstD3D12FenceData * fence_data,
ID3D12Fence * fence_to_wait,
guint64 fence_value_to_wait,
D3D12_COMMAND_LIST_TYPE command_type,
guint64 * fence_value);
GST_D3D12_API
gboolean gst_d3d12_device_acquire_fence_data (GstD3D12Device * device,
GstD3D12FenceData ** fence_data);
GST_D3D12_API
void gst_d3d12_device_clear_yuv_texture (GstD3D12Device * device,
GstMemory * mem);

View file

@ -120,6 +120,8 @@ struct DeviceInner
gst_clear_object (&copy_ca_pool);
gst_clear_object (&copy_cl_pool);
gst_clear_object (&fence_data_pool);
factory = nullptr;
adapter = nullptr;
@ -222,6 +224,8 @@ struct DeviceInner
GstD3D12CommandListPool *copy_cl_pool = nullptr;
GstD3D12CommandAllocatorPool *copy_ca_pool = nullptr;
GstD3D12FenceDataPool *fence_data_pool = nullptr;
guint rtv_inc_size;
guint adapter_index = 0;
@ -910,6 +914,8 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
priv->rtv_inc_size =
device->GetDescriptorHandleIncrementSize (D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
priv->fence_data_pool = gst_d3d12_fence_data_pool_new ();
GST_OBJECT_FLAG_SET (priv->direct_queue, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->direct_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->direct_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
@ -918,6 +924,8 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
GST_OBJECT_FLAG_SET (priv->copy_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->copy_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->fence_data_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
hr = device->CreateFence (0,
D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&priv->dev_removed_fence));
if (FAILED (hr)) {
@ -1270,6 +1278,8 @@ gst_d3d12_device_fence_wait (GstD3D12Device * device,
gboolean
gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
guint num_args, const GstD3D12CopyTextureRegionArgs * args,
GstD3D12FenceData * fence_data,
ID3D12Fence * fence_to_wait, guint64 fence_value_to_wait,
D3D12_COMMAND_LIST_TYPE command_type, guint64 * fence_value)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
@ -1285,6 +1295,9 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
GstD3D12CommandQueue *queue = nullptr;
guint64 fence_val = 0;
if (!fence_data)
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
switch (command_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
@ -1299,21 +1312,25 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
default:
GST_ERROR_OBJECT (device, "Not supported command list type %d",
command_type);
gst_d3d12_fence_data_unref (fence_data);
return FALSE;
}
gst_d3d12_command_allocator_pool_acquire (ca_pool, &gst_ca);
if (!gst_ca) {
GST_ERROR_OBJECT (device, "Couldn't acquire command allocator");
gst_d3d12_fence_data_unref (fence_data);
return FALSE;
}
gst_d3d12_fence_data_add_notify_mini_object (fence_data, gst_ca);
auto ca = gst_d3d12_command_allocator_get_handle (gst_ca);
gst_d3d12_command_list_pool_acquire (cl_pool, ca, &gst_cl);
if (!gst_cl) {
GST_ERROR_OBJECT (device, "Couldn't acquire command list");
gst_clear_d3d12_command_allocator (&gst_ca);
gst_d3d12_fence_data_unref (fence_data);
return FALSE;
}
@ -1333,23 +1350,23 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
if (!gst_d3d12_result (hr, device)) {
GST_ERROR_OBJECT (device, "Couldn't close command list");
gst_clear_d3d12_command_list (&gst_cl);
gst_clear_d3d12_command_allocator (&gst_ca);
gst_d3d12_fence_data_unref (fence_data);
return FALSE;
}
ID3D12CommandList *cmd_list[] = { cl.Get () };
hr = gst_d3d12_command_queue_execute_command_lists (queue,
1, cmd_list, &fence_val);
hr = gst_d3d12_command_queue_execute_wait_and_command_lists (queue,
fence_to_wait, fence_value_to_wait, 1, cmd_list, &fence_val);
auto ret = gst_d3d12_result (hr, device);
/* We can release command list since command list pool will hold it */
gst_d3d12_command_list_unref (gst_cl);
if (ret) {
gst_d3d12_command_queue_set_notify (queue, fence_val, gst_ca,
(GDestroyNotify) gst_d3d12_command_allocator_unref);
gst_d3d12_command_queue_set_notify (queue, fence_val, fence_data,
(GDestroyNotify) gst_d3d12_fence_data_unref);
} else {
gst_d3d12_command_allocator_unref (gst_ca);
gst_d3d12_fence_data_unref (fence_data);
}
if (fence_value)
@ -1358,6 +1375,18 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
return ret;
}
gboolean
gst_d3d12_device_acquire_fence_data (GstD3D12Device * device,
GstD3D12FenceData ** fence_data)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
g_return_val_if_fail (fence_data, FALSE);
auto priv = device->priv->inner;
return gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, fence_data);
}
static inline GstDebugLevel
d3d12_message_severity_to_gst (D3D12_MESSAGE_SEVERITY level)
{

View file

@ -332,9 +332,14 @@ gst_d3d12_frame_copy (GstD3D12Frame * dest, const GstD3D12Frame * src,
args[i].src_box = &src_box[i];
}
GstD3D12FenceData *fence_data;
gst_d3d12_device_acquire_fence_data (dest->device, &fence_data);
gst_d3d12_fence_data_add_notify_mini_object (fence_data,
gst_buffer_ref (src->buffer));
return gst_d3d12_device_copy_texture_region (dest->device,
GST_VIDEO_INFO_N_PLANES (&dest->info), args,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value);
GST_VIDEO_INFO_N_PLANES (&dest->info), args, fence_data,
nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value);
}
/**
@ -373,6 +378,11 @@ gst_d3d12_frame_copy_plane (GstD3D12Frame * dest, const GstD3D12Frame * src,
gst_d3d12_frame_build_copy_args (dest, src, plane, &args, &src_box);
args.src_box = &src_box;
GstD3D12FenceData *fence_data;
gst_d3d12_device_acquire_fence_data (dest->device, &fence_data);
gst_d3d12_fence_data_add_notify_mini_object (fence_data,
gst_buffer_ref (src->buffer));
return gst_d3d12_device_copy_texture_region (dest->device, 1, &args,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value);
fence_data, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value);
}

View file

@ -479,13 +479,22 @@ gst_d3d12_memory_download (GstD3D12Memory * dmem)
copy_args.push_back (args);
}
gst_d3d12_memory_wait_gpu (dmem, D3D12_COMMAND_LIST_TYPE_DIRECT,
dmem->fence_value);
if (priv->external_fence) {
auto cq = gst_d3d12_device_get_command_queue (dmem->device,
D3D12_COMMAND_LIST_TYPE_COPY);
gst_d3d12_command_queue_execute_wait (cq, priv->external_fence.Get (),
priv->external_fence_val);
}
auto cq = gst_d3d12_device_get_command_queue (dmem->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
auto direct_fence = gst_d3d12_command_queue_get_fence_handle (cq);
guint64 fence_val = 0;
/* Use async copy queue when downloading */
if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_COPY, &fence_val)) {
copy_args.data (), nullptr, direct_fence, dmem->fence_value,
D3D12_COMMAND_LIST_TYPE_COPY, &fence_val)) {
GST_ERROR_OBJECT (dmem->device, "Couldn't download texture to staging");
return FALSE;
}
@ -521,7 +530,8 @@ gst_d3d12_memory_upload (GstD3D12Memory * dmem)
}
if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT,
copy_args.data (), nullptr, priv->external_fence.Get (),
priv->external_fence_val, D3D12_COMMAND_LIST_TYPE_DIRECT,
&dmem->fence_value)) {
GST_ERROR_OBJECT (dmem->device, "Couldn't upload texture");
return FALSE;
@ -1166,11 +1176,25 @@ gst_d3d12_memory_copy (GstMemory * mem, gssize offset, gssize size)
mem_priv->subresource_index[i]);
copy_args.push_back (args);
}
gst_memory_unmap (mem, &info);
ComPtr < ID3D12Fence > fence_to_wait;
guint64 fence_value_to_wait;
{
std::lock_guard < std::mutex > lk (mem_priv->lock);
fence_to_wait = mem_priv->external_fence;
fence_value_to_wait = mem_priv->external_fence_val;
}
GstD3D12FenceData *fence_data;
gst_d3d12_device_acquire_fence_data (dmem->device, &fence_data);
gst_d3d12_fence_data_add_notify_mini_object (fence_data,
gst_memory_ref (mem));
gst_d3d12_device_copy_texture_region (dmem->device,
copy_args.size (), copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT,
copy_args.size (), copy_args.data (), fence_data, fence_to_wait.Get (),
fence_value_to_wait, D3D12_COMMAND_LIST_TYPE_DIRECT,
&dst_dmem->fence_value);
gst_memory_unmap (mem, &info);
GST_MINI_OBJECT_FLAG_SET (dst, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);

View file

@ -1523,7 +1523,7 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
if (out_resource)
queue_type = D3D12_COMMAND_LIST_TYPE_DIRECT;
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
copy_args.data (), queue_type, &copy_fence_val);
copy_args.data (), nullptr, nullptr, 0, queue_type, &copy_fence_val);
gst_d3d12_device_fence_wait (self->device, queue_type,
copy_fence_val, priv->copy_event_handle);

View file

@ -771,7 +771,8 @@ gst_d3d12_encoder_upload_frame (GstD3D12Encoder * self, GstBuffer * buffer)
guint64 fence_val = 0;
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, &fence_val);
copy_args.data (), nullptr, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
&fence_val);
gst_d3d12_buffer_after_write (upload, fence_val);
} else {
GstVideoFrame src_frame, dst_frame;

View file

@ -513,7 +513,8 @@ gst_d3d12_ipc_client_have_data (GstD3D12IpcClient * self)
guint64 copy_fence_val;
gst_d3d12_device_copy_texture_region (priv->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, &copy_fence_val);
copy_args.data (), nullptr, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
&copy_fence_val);
auto data = new GstD3D12IpcReleaseData ();
data->self = (GstD3D12IpcClient *) gst_object_ref (self);

View file

@ -363,12 +363,6 @@ gst_dwrite_d3d12_render_draw_layout (GstDWriteRender * render,
args.dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
args.dst.pResource = texture;
gst_d3d12_device_copy_texture_region (priv->device,
1, &args, D3D12_COMMAND_LIST_TYPE_DIRECT, &priv->fence_val);
priv->scheduled.push (priv->fence_val);
dmem->fence_value = priv->fence_val;
GstD3D12FenceData *fence_data;
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
auto resource_clone = priv->layout_resource;
@ -377,8 +371,12 @@ gst_dwrite_d3d12_render_draw_layout (GstDWriteRender * render,
gst_d3d12_fence_data_add_notify_com (fence_data, resource_clone.Detach ());
gst_d3d12_fence_data_add_notify_com (fence_data, wrapped_clone.Detach ());
gst_d3d12_device_set_fence_notify (priv->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, dmem->fence_value, fence_data);
gst_d3d12_device_copy_texture_region (priv->device,
1, &args, fence_data, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
&priv->fence_val);
priv->scheduled.push (priv->fence_val);
dmem->fence_value = priv->fence_val;
GST_MINI_OBJECT_FLAG_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
GST_MINI_OBJECT_FLAG_UNSET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);