d3d12: Add convert element

Implement converter object with convert element

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5870>
This commit is contained in:
Seungha Yang 2023-12-29 21:22:41 +09:00 committed by GStreamer Marge Bot
parent 4198bd6932
commit 660f2d7d27
21 changed files with 7244 additions and 4 deletions

View file

@ -23,8 +23,37 @@
#include "gstd3d12device.h"
#include "gstd3d12format.h"
/*
* Preferred sorting order in a tier
* - number of components
* - depth
* - subsampling
* - supports both SRV and RTV
* - prefer smaller number of planes
* - prefer non-complex formats
* - prefer YUV formats over RGB ones
* - prefer I420 over YV12
* - format name
*/
/* DXGI (semi) native formats */
#define GST_D3D12_TIER_0_FORMATS \
"RGBA64_LE, RGB10A2_LE, Y410, VUYA, RGBA, BGRA, RBGA, P016_LE, P012_LE, " \
"P010_10LE, RGBx, BGRx, NV12"
/* both SRV and RTV are supported */
#define GST_D3D12_TIER_1_FORMATS \
"AYUV64, GBRA_12LE, GBRA_10LE, AYUV, ABGR, ARGB, GBRA, Y444_16LE, " \
"GBR_16LE, Y444_12LE, GBR_12LE, I422_12LE, I420_12LE, Y444_10LE, GBR_10LE, " \
"I422_10LE, I420_10LE, Y444, BGRP, GBR, RGBP, xBGR, xRGB, Y42B, NV21, " \
"I420, YV12, GRAY16_LE, GRAY8"
#define GST_D3D12_COMMON_FORMATS \
GST_D3D12_TIER_0_FORMATS ", " \
GST_D3D12_TIER_1_FORMATS
#define GST_D3D12_ALL_FORMATS \
"{ NV12, P010_10LE, P016_LE }"
"{ " GST_D3D12_COMMON_FORMATS " }"
#define MAKE_FORMAT_MAP_YUV(g,d,r0,r1,r2,r3) \
{ GST_VIDEO_FORMAT_ ##g, DXGI_FORMAT_ ##d, \

View file

@ -25,6 +25,7 @@
#include "gstd3d12commandallocatorpool.h"
#include "gstd3d12commandlistpool.h"
#include "gstd3d12commandqueue.h"
#include "gstd3d12converter.h"
#include "gstd3d12descriptorpool.h"
#include "gstd3d12device.h"
#include "gstd3d12fencedatapool.h"

View file

@ -54,5 +54,9 @@ typedef struct _GstD3D12BufferPool GstD3D12BufferPool;
typedef struct _GstD3D12BufferPoolClass GstD3D12BufferPoolClass;
typedef struct _GstD3D12BufferPoolPrivate GstD3D12BufferPoolPrivate;
typedef struct _GstD3D12Converter GstD3D12Converter;
typedef struct _GstD3D12ConverterClass GstD3D12ConverterClass;
typedef struct _GstD3D12ConverterPrivate GstD3D12ConverterPrivate;
G_END_DECLS

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,32 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include "gstd3d12basefilter.h"
G_BEGIN_DECLS
#define GST_TYPE_D3D12_CONVERT (gst_d3d12_convert_get_type())
G_DECLARE_FINAL_TYPE (GstD3D12Convert, gst_d3d12_convert,
GST, D3D12_CONVERT, GstD3D12BaseFilter)
G_END_DECLS

View file

@ -0,0 +1,579 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstd3d12.h"
#include "gstd3d12converter-builder.h"
#include <directx/d3dx12.h>
#include <map>
#include <mutex>
#include <string>
#include <utility>
#include <memory>
#include "PSMainConverter.h"
#include "VSMain_converter.h"
GST_DEBUG_CATEGORY_EXTERN (gst_d3d12_converter_debug);
#define GST_CAT_DEFAULT gst_d3d12_converter_debug
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
enum class PS_OUTPUT
{
PACKED,
LUMA,
CHROMA,
CHROMA_PLANAR,
PLANAR,
PLANAR_FULL,
};
static const std::string
ps_output_to_string (PS_OUTPUT output)
{
switch (output) {
case PS_OUTPUT::PACKED:
return "PS_OUTPUT_PACKED";
case PS_OUTPUT::LUMA:
return "PS_OUTPUT_LUMA";
case PS_OUTPUT::CHROMA:
return "PS_OUTPUT_CHROMA";
case PS_OUTPUT::CHROMA_PLANAR:
return "PS_OUTPUT_CHROMA_PLANAR";
case PS_OUTPUT::PLANAR:
return "PS_OUTPUT_PLANAR";
case PS_OUTPUT::PLANAR_FULL:
return "PS_OUTPUT_PLANAR_FULL";
default:
g_assert_not_reached ();
break;
}
return "";
}
static guint
ps_output_get_num_rtv (PS_OUTPUT output)
{
switch (output) {
case PS_OUTPUT::PACKED:
case PS_OUTPUT::LUMA:
case PS_OUTPUT::CHROMA:
return 1;
case PS_OUTPUT::CHROMA_PLANAR:
return 2;
case PS_OUTPUT::PLANAR:
return 3;
case PS_OUTPUT::PLANAR_FULL:
return 4;
default:
g_assert_not_reached ();
break;
}
return 0;
}
static std::string
make_input (GstVideoFormat format, gboolean premul)
{
switch (format) {
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBA64_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
case GST_VIDEO_FORMAT_BGRA:
if (premul)
return "RGBAPremul";
return "RGBA";
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
return "RGBx";
case GST_VIDEO_FORMAT_ARGB:
return "ARGB";
case GST_VIDEO_FORMAT_xRGB:
return "xRGB";
case GST_VIDEO_FORMAT_ABGR:
return "ABGR";
case GST_VIDEO_FORMAT_xBGR:
return "xBGR";
case GST_VIDEO_FORMAT_VUYA:
return "VUYA";
case GST_VIDEO_FORMAT_AYUV:
case GST_VIDEO_FORMAT_AYUV64:
return "AYUV";
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
return "NV12";
case GST_VIDEO_FORMAT_NV21:
return "NV21";
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_Y42B:
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
return "I420";
case GST_VIDEO_FORMAT_YV12:
return "YV12";
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_I422_10LE:
case GST_VIDEO_FORMAT_Y444_10LE:
return "I420_10";
case GST_VIDEO_FORMAT_I420_12LE:
case GST_VIDEO_FORMAT_I422_12LE:
case GST_VIDEO_FORMAT_Y444_12LE:
return "I420_12";
case GST_VIDEO_FORMAT_Y410:
return "Y410";
case GST_VIDEO_FORMAT_GRAY8:
case GST_VIDEO_FORMAT_GRAY16_LE:
return "GRAY";
case GST_VIDEO_FORMAT_RGBP:
return "RGBP";
case GST_VIDEO_FORMAT_BGRP:
return "BGRP";
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_16LE:
return "GBR";
case GST_VIDEO_FORMAT_GBR_10LE:
return "GBR_10";
case GST_VIDEO_FORMAT_GBR_12LE:
return "GBR_12";
case GST_VIDEO_FORMAT_GBRA:
return "GBRA";
case GST_VIDEO_FORMAT_GBRA_10LE:
return "GBRA_10";
case GST_VIDEO_FORMAT_GBRA_12LE:
return "GBRA_12";
case GST_VIDEO_FORMAT_Y412_LE:
return "Y412";
case GST_VIDEO_FORMAT_BGR10A2_LE:
return "BGR10A2";
case GST_VIDEO_FORMAT_BGRA64_LE:
return "BGRA64";
case GST_VIDEO_FORMAT_RBGA:
return "RBGA";
default:
g_assert_not_reached ();
break;
}
return "";
}
static std::vector<std::pair<PS_OUTPUT, std::string>>
make_output (GstVideoFormat format, gboolean premul)
{
std::vector<std::pair<PS_OUTPUT, std::string>> ret;
switch (format) {
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBA64_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
case GST_VIDEO_FORMAT_BGRA:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBAPremul"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBA"));
break;
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBx"));
break;
case GST_VIDEO_FORMAT_ARGB:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "ARGB"));
break;
case GST_VIDEO_FORMAT_xRGB:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "xRGB"));
break;
case GST_VIDEO_FORMAT_ABGR:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "ABGR"));
break;
case GST_VIDEO_FORMAT_xBGR:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "xBGR"));
break;
case GST_VIDEO_FORMAT_VUYA:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "VUYA"));
break;
case GST_VIDEO_FORMAT_AYUV:
case GST_VIDEO_FORMAT_AYUV64:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "AYUV"));
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV12"));
break;
case GST_VIDEO_FORMAT_NV21:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV21"));
break;
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_Y42B:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420"));
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444"));
break;
case GST_VIDEO_FORMAT_YV12:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaYV12"));
break;
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_I422_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_10"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_10"));
break;
case GST_VIDEO_FORMAT_Y444_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_10"));
break;
case GST_VIDEO_FORMAT_I420_12LE:
case GST_VIDEO_FORMAT_I422_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_12"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_12"));
break;
case GST_VIDEO_FORMAT_Y444_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_12"));
break;
case GST_VIDEO_FORMAT_GRAY8:
case GST_VIDEO_FORMAT_GRAY16_LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
break;
case GST_VIDEO_FORMAT_RGBP:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "RGBP"));
break;
case GST_VIDEO_FORMAT_BGRP:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "BGRP"));
break;
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_16LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR"));
break;
case GST_VIDEO_FORMAT_GBR_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_10"));
break;
case GST_VIDEO_FORMAT_GBR_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_12"));
break;
case GST_VIDEO_FORMAT_GBRA:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA"));
break;
case GST_VIDEO_FORMAT_GBRA_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_10"));
break;
case GST_VIDEO_FORMAT_GBRA_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_12"));
break;
case GST_VIDEO_FORMAT_RBGA:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RBGA"));
break;
default:
g_assert_not_reached ();
break;
}
return ret;
}
PixelShaderBlobList
gst_d3d12_get_converter_pixel_shader_blob (GstVideoFormat in_format,
GstVideoFormat out_format, gboolean in_premul, gboolean out_premul,
CONVERT_TYPE type)
{
auto input = make_input (in_format, in_premul);
auto output = make_output (out_format, out_premul);
std::string conv_type;
PixelShaderBlobList ret;
static std::mutex cache_lock;
static std::map<std::string, std::shared_ptr<PixelShaderBlob>> ps_cache;
switch (type) {
case CONVERT_TYPE::IDENTITY:
conv_type = "Identity";
break;
case CONVERT_TYPE::SIMPLE:
conv_type = "Simple";
break;
case CONVERT_TYPE::RANGE:
conv_type = "Range";
break;
case CONVERT_TYPE::GAMMA:
conv_type = "Gamma";
break;
case CONVERT_TYPE::PRIMARY:
conv_type = "Primary";
break;
}
for (const auto & it : output) {
std::string entry_point = "PSMain_" + input + "_" + conv_type + "_" +
it.second;
std::shared_ptr<PixelShaderBlob> source;
std::lock_guard<std::mutex> lk (cache_lock);
auto cached = ps_cache.find(entry_point);
if (cached != ps_cache.end()) {
source = cached->second;
} else {
auto precompiled = precompiled_bytecode.find (entry_point);
if (precompiled == precompiled_bytecode.end ()) {
GST_ERROR ("Couldn't find precompiled %s", entry_point.c_str ());
ret.clear ();
return ret;
}
source = std::make_shared<PixelShaderBlob> ();
source->bytecode.pShaderBytecode = precompiled->second.first;
source->bytecode.BytecodeLength = precompiled->second.second;
source->num_rtv = ps_output_get_num_rtv (it.first);
ps_cache[entry_point] = source;
}
ret.push_back (*source);
}
return ret;
}
/* *INDENT-ON* */
HRESULT
gst_d3d12_get_converter_vertex_shader_blob (D3D12_SHADER_BYTECODE * vs,
D3D12_INPUT_ELEMENT_DESC input_desc[2])
{
input_desc[0].SemanticName = "POSITION";
input_desc[0].SemanticIndex = 0;
input_desc[0].Format = DXGI_FORMAT_R32G32B32_FLOAT;
input_desc[0].InputSlot = 0;
input_desc[0].AlignedByteOffset = D3D12_APPEND_ALIGNED_ELEMENT;
input_desc[0].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
input_desc[0].InstanceDataStepRate = 0;
input_desc[1].SemanticName = "TEXCOORD";
input_desc[1].SemanticIndex = 0;
input_desc[1].Format = DXGI_FORMAT_R32G32_FLOAT;
input_desc[1].InputSlot = 0;
input_desc[1].AlignedByteOffset = D3D12_APPEND_ALIGNED_ELEMENT;
input_desc[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
input_desc[1].InstanceDataStepRate = 0;
vs->BytecodeLength = sizeof (g_VSMain_converter);
vs->pShaderBytecode = g_VSMain_converter;
return S_OK;
}
/* root signature
*
* +-----+---------+--------------+
* | RS | size in | |
* | idx | DWORD | |
* +-----+---------+--------------+
* | 0 | 1 | table (SRV) |
* +-----+---------+--------------+
* | 1 | 16 | VS matrix |
* +-----+---------+--------------+
* | 2 | 1 | PS alpha |
* +-----+---------+--------------+
* | 3 | 2 | PS CBV |
* +-----+---------+--------------+
*/
static const D3D12_STATIC_SAMPLER_DESC static_sampler_desc_ = {
D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT,
D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
0,
1,
D3D12_COMPARISON_FUNC_ALWAYS,
D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK,
0,
D3D12_FLOAT32_MAX,
0,
0,
D3D12_SHADER_VISIBILITY_PIXEL
};
static const D3D12_ROOT_SIGNATURE_FLAGS rs_flags_ =
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS |
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS;
ConverterRootSignature::ConverterRootSignature (D3D_ROOT_SIGNATURE_VERSION
version, UINT num_srv, D3D12_FILTER filter, bool build_lut)
{
D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc = { };
num_srv_ = num_srv;
have_lut_ = build_lut;
std::vector < D3D12_STATIC_SAMPLER_DESC > static_sampler;
D3D12_STATIC_SAMPLER_DESC sampler_desc = static_sampler_desc_;
sampler_desc.Filter = filter;
if (filter == D3D12_FILTER_ANISOTROPIC)
sampler_desc.MaxAnisotropy = 16;
static_sampler.push_back (sampler_desc);
if (build_lut) {
sampler_desc = static_sampler_desc_;
sampler_desc.ShaderRegister = 1;
static_sampler.push_back (sampler_desc);
}
std::vector < D3D12_DESCRIPTOR_RANGE1 > range_v1_1;
std::vector < D3D12_ROOT_PARAMETER1 > param_list_v1_1;
std::vector < D3D12_DESCRIPTOR_RANGE > range_v1_0;
std::vector < D3D12_ROOT_PARAMETER > param_list_v1_0;
if (version == D3D_ROOT_SIGNATURE_VERSION_1_1) {
CD3DX12_ROOT_PARAMETER1 param;
ps_srv_ = 0;
for (UINT i = 0; i < num_srv; i++) {
range_v1_1.push_back (CD3DX12_DESCRIPTOR_RANGE1
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, i, 0,
D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE |
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE));
}
if (build_lut) {
range_v1_1.push_back (CD3DX12_DESCRIPTOR_RANGE1
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4, 0,
D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE |
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE));
range_v1_1.push_back (CD3DX12_DESCRIPTOR_RANGE1
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5, 0,
D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE |
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE));
}
param.InitAsDescriptorTable (range_v1_1.size (),
range_v1_1.data (), D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_1.push_back (param);
/* VS root const, maybe updated */
vs_root_const_ = (UINT) param_list_v1_1.size ();
param.InitAsConstants (16, 0, 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_list_v1_1.push_back (param);
/* PS alpha constant value, maybe updated */
ps_root_const_ = (UINT) param_list_v1_1.size ();
param.InitAsConstants (1, 0, 0, D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_1.push_back (param);
/* PS CBV, this is static */
ps_cbv_ = (UINT) param_list_v1_1.size ();
param.InitAsConstantBufferView (1, 0,
D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE,
D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_1.push_back (param);
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC::Init_1_1 (desc,
param_list_v1_1.size (), param_list_v1_1.data (),
static_sampler.size (), static_sampler.data (), rs_flags_);
} else {
CD3DX12_ROOT_PARAMETER param;
ps_srv_ = 0;
for (UINT i = 0; i < num_srv; i++) {
range_v1_0.push_back (CD3DX12_DESCRIPTOR_RANGE
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, i));
}
if (build_lut) {
range_v1_0.push_back (CD3DX12_DESCRIPTOR_RANGE
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4));
range_v1_0.push_back (CD3DX12_DESCRIPTOR_RANGE
(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5));
}
param.InitAsDescriptorTable (range_v1_0.size (),
range_v1_0.data (), D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_0.push_back (param);
/* VS root const, maybe updated */
vs_root_const_ = (UINT) param_list_v1_0.size ();
param.InitAsConstants (16, 0, 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_list_v1_0.push_back (param);
/* PS alpha constant value, maybe updated */
ps_root_const_ = (UINT) param_list_v1_0.size ();
param.InitAsConstants (1, 0, 0, D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_0.push_back (param);
/* PS CBV, this is static */
ps_cbv_ = (UINT) param_list_v1_0.size ();
param.InitAsConstantBufferView (1, 0, D3D12_SHADER_VISIBILITY_PIXEL);
param_list_v1_0.push_back (param);
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC::Init_1_0 (desc,
param_list_v1_0.size (), param_list_v1_0.data (),
static_sampler.size (), static_sampler.data (), rs_flags_);
}
ComPtr < ID3DBlob > error_blob;
hr_ = D3DX12SerializeVersionedRootSignature (&desc,
D3D_ROOT_SIGNATURE_VERSION_1_1, &blob_, &error_blob);
if (FAILED (hr_)) {
const gchar *error_msg = nullptr;
if (error_blob)
error_msg = (const gchar *) error_blob->GetBufferPointer ();
GST_ERROR ("Couldn't serialize root signature, hr: 0x%x, error detail: %s",
(guint) hr_, GST_STR_NULL (error_msg));
}
}
ConverterRootSignaturePtr
gst_d3d12_get_converter_root_signature (GstD3D12Device * device,
GstVideoFormat in_format, CONVERT_TYPE type, D3D12_FILTER filter)
{
auto info = gst_video_format_get_info (in_format);
auto num_planes = GST_VIDEO_FORMAT_INFO_N_PLANES (info);
bool build_lut = false;
auto device_handle = gst_d3d12_device_get_device_handle (device);
D3D_ROOT_SIGNATURE_VERSION rs_version = D3D_ROOT_SIGNATURE_VERSION_1_1;
D3D12_FEATURE_DATA_ROOT_SIGNATURE feature_data = { };
feature_data.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1;
auto hr = device_handle->CheckFeatureSupport (D3D12_FEATURE_ROOT_SIGNATURE,
&feature_data, sizeof (feature_data));
if (FAILED (hr)) {
rs_version = D3D_ROOT_SIGNATURE_VERSION_1_0;
} else {
GST_INFO_OBJECT (device, "Device supports version 1.1 root signature");
}
if (type == CONVERT_TYPE::GAMMA || type == CONVERT_TYPE::PRIMARY)
build_lut = true;
auto rs = std::make_shared < ConverterRootSignature >
(rs_version, num_planes, filter, build_lut);
if (!rs->IsValid ())
return nullptr;
return rs;
}

View file

@ -0,0 +1,127 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstd3d12_fwd.h"
#include <vector>
#include <wrl.h>
#include <memory>
#include <vector>
enum class CONVERT_TYPE
{
IDENTITY,
SIMPLE,
RANGE,
GAMMA,
PRIMARY,
};
struct PixelShaderBlob
{
D3D12_SHADER_BYTECODE bytecode;
guint num_rtv;
};
class ConverterRootSignature
{
public:
ConverterRootSignature () = delete;
ConverterRootSignature (D3D_ROOT_SIGNATURE_VERSION version, UINT num_srv,
D3D12_FILTER filter, bool build_lut);
UINT GetPsSrvIdx ()
{
return ps_srv_;
}
UINT GetNumSrv ()
{
return num_srv_;
}
bool HaveLut ()
{
return have_lut_;
}
UINT GetVsRootConstIdx ()
{
return vs_root_const_;
}
UINT GetPsRootConstIdx ()
{
return ps_root_const_;
};
UINT GetPsCbvIdx ()
{
return ps_cbv_;
}
bool IsValid ()
{
return SUCCEEDED (hr_);
}
HRESULT GetBlob (ID3DBlob ** blob)
{
if (SUCCEEDED (hr_)) {
*blob = blob_.Get ();
(*blob)->AddRef ();
}
return hr_;
}
private:
Microsoft::WRL::ComPtr<ID3DBlob> blob_;
UINT ps_srv_ = 0;
UINT ps_cbv_ = 0;
UINT vs_root_const_ = 0;
UINT num_srv_ = 0;
bool have_lut_ = false;
UINT ps_root_const_ = 0;
HRESULT hr_ = S_OK;
};
typedef std::vector<PixelShaderBlob> PixelShaderBlobList;
typedef std::shared_ptr<ConverterRootSignature> ConverterRootSignaturePtr;
PixelShaderBlobList
gst_d3d12_get_converter_pixel_shader_blob (GstVideoFormat in_format,
GstVideoFormat out_format,
gboolean in_premul,
gboolean out_premul,
CONVERT_TYPE type);
HRESULT
gst_d3d12_get_converter_vertex_shader_blob (D3D12_SHADER_BYTECODE * vs,
D3D12_INPUT_ELEMENT_DESC layout[2]);
ConverterRootSignaturePtr
gst_d3d12_get_converter_root_signature (GstD3D12Device * device,
GstVideoFormat in_format,
CONVERT_TYPE type,
D3D12_FILTER filter);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,147 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstd3d12_fwd.h"
#include "gstd3d12fencedatapool.h"
G_BEGIN_DECLS
#define GST_TYPE_D3D12_CONVERTER (gst_d3d12_converter_get_type())
#define GST_D3D12_CONVERTER(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_D3D12_CONVERTER,GstD3D12Converter))
#define GST_D3D12_CONVERTER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_D3D12_CONVERTER,GstD3D12ConverterClass))
#define GST_D3D12_CONVERTER_GET_CLASS(obj) (GST_D3D12_CONVERTER_CLASS(G_OBJECT_GET_CLASS(obj)))
#define GST_IS_D3D12_CONVERTER(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_D3D12_CONVERTER))
#define GST_IS_D3D12_CONVERTER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_D3D12_CONVERTER))
#define GST_D3D12_CONVERTER_CAST(obj) ((GstD3D12Converter*)(obj))
/**
* GST_D3D12_CONVERTER_OPT_GAMMA_MODE:
*
* #GstVideoGammaMode, set the gamma mode.
* Default is #GST_VIDEO_GAMMA_MODE_NONE
*/
#define GST_D3D12_CONVERTER_OPT_GAMMA_MODE "GstD3D12Converter.gamma-mode"
/**
* GST_D3D12_CONVERTER_OPT_PRIMARIES_MODE:
*
* #GstVideoPrimariesMode, set the primaries conversion mode.
* Default is #GST_VIDEO_PRIMARIES_MODE_NONE.
*/
#define GST_D3D12_CONVERTER_OPT_PRIMARIES_MODE "GstD3D12Converter.primaries-mode"
/**
* GST_D3D12_CONVERTER_OPT_SAMPLER_FILTER:
*
* #D3D12_FILTER, set sampler filter.
*
* Supported values are:
* @D3D12_FILTER_MIN_MAG_MIP_POINT
* @D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT
* @D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT
* @D3D12_FILTER_ANISOTROPIC
*
* Default is #D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT.
*/
#define GST_D3D12_CONVERTER_OPT_SAMPLER_FILTER "GstD3D12Converter.sampler-filter"
GType gst_d3d12_converter_sampler_filter_get_type (void);
#define GST_TYPE_D3D12_CONVERTER_SAMPLER_FILTER (gst_d3d12_converter_sampler_filter_get_type())
/**
* GstD3D12ConverterAlphaMode:
* @GST_D3D12_CONVERTER_ALPHA_MODE_UNSPECIFIED: Unspecified alpha mode
* @GST_D3D12_CONVERTER_ALPHA_MODE_PREMULTIPLIED: Premultiplied alpha
* @GST_D3D12_CONVERTER_ALPHA_MODE_STRAIGHT: Straight alpha
*
* Alpha mode. Enum values are idnetical to DXGI_ALPHA_MODE
*/
typedef enum
{
GST_D3D12_CONVERTER_ALPHA_MODE_UNSPECIFIED = 0,
GST_D3D12_CONVERTER_ALPHA_MODE_PREMULTIPLIED = 1,
GST_D3D12_CONVERTER_ALPHA_MODE_STRAIGHT = 2,
} GstD3D12ConverterAlphaMode;
GType gst_d3d12_converter_alpha_mode_get_type (void);
#define GST_TYPE_D3D12_CONVERTER_ALPHA_MODE (gst_d3d12_converter_alpha_mode_get_type())
/**
* GST_D3D12_CONVERTER_OPT_SRC_ALPHA_MODE:
*
* Set the source alpha mode.
* Default is #GST_D3D12_CONVERTER_ALPHA_MODE_UNSPECIFIED.
*/
#define GST_D3D12_CONVERTER_OPT_SRC_ALPHA_MODE "GstD3D12Converter.src-alpha-mode"
/**
* GST_D3D12_CONVERTER_OPT_DEST_ALPHA_MODE:
*
* Set the source alpha mode.
* Default is #GST_D3D12_CONVERTER_ALPHA_MODE_UNSPECIFIED.
*/
#define GST_D3D12_CONVERTER_OPT_DEST_ALPHA_MODE "GstD3D12Converter.dest-alpha-mode"
/**
* GstD3D12Converter:
*
* Opaque GstD3D12Converter struct
*/
struct _GstD3D12Converter
{
GstObject parent;
GstD3D12Device *device;
/*< private >*/
GstD3D12ConverterPrivate *priv;
gpointer _gst_reserved[GST_PADDING];
};
/**
* GstD3D12ConverterClass:
*
* Opaque GstD3D12ConverterClass struct
*/
struct _GstD3D12ConverterClass
{
GstObjectClass parent_class;
/*< private >*/
gpointer _gst_reserved[GST_PADDING];
};
GType gst_d3d12_converter_get_type (void);
GstD3D12Converter * gst_d3d12_converter_new (GstD3D12Device * device,
const GstVideoInfo * in_info,
const GstVideoInfo * out_info,
GstStructure * config);
gboolean gst_d3d12_converter_convert_buffer (GstD3D12Converter * converter,
GstBuffer * in_buf,
GstBuffer * out_buf,
GstD3D12FenceData * fence_data,
ID3D12GraphicsCommandList * command_list);
G_END_DECLS

View file

@ -106,3 +106,771 @@ gst_d3d12_dxgi_format_to_resource_formats (DXGI_FORMAT format,
return TRUE;
}
char *
gst_d3d12_dump_color_matrix (GstD3D12ColorMatrix * matrix)
{
/* *INDENT-OFF* */
static const gchar format[] =
"[MATRIX]\n"
"|% .6f, % .6f, % .6f|\n"
"|% .6f, % .6f, % .6f|\n"
"|% .6f, % .6f, % .6f|\n"
"[OFFSET]\n"
"|% .6f, % .6f, % .6f|\n"
"[MIN]\n"
"|% .6f, % .6f, % .6f|\n"
"[MAX]\n"
"|% .6f, % .6f, % .6f|";
/* *INDENT-ON* */
g_return_val_if_fail (matrix != nullptr, nullptr);
return g_strdup_printf (format,
matrix->matrix[0][0], matrix->matrix[0][1], matrix->matrix[0][2],
matrix->matrix[1][0], matrix->matrix[1][1], matrix->matrix[1][2],
matrix->matrix[2][0], matrix->matrix[2][1], matrix->matrix[2][2],
matrix->offset[0], matrix->offset[1], matrix->offset[2],
matrix->min[0], matrix->min[1], matrix->min[2],
matrix->max[0], matrix->max[1], matrix->max[2]);
}
static void
color_matrix_copy (GstD3D12ColorMatrix * dst, const GstD3D12ColorMatrix * src)
{
for (guint i = 0; i < 3; i++) {
for (guint j = 0; j < 3; j++) {
dst->matrix[i][j] = src->matrix[i][j];
}
}
}
static void
color_matrix_multiply (GstD3D12ColorMatrix * dst, GstD3D12ColorMatrix * a,
GstD3D12ColorMatrix * b)
{
GstD3D12ColorMatrix tmp;
for (guint i = 0; i < 3; i++) {
for (guint j = 0; j < 3; j++) {
gdouble val = 0;
for (guint k = 0; k < 3; k++) {
val += a->matrix[i][k] * b->matrix[k][j];
}
tmp.matrix[i][j] = val;
}
}
color_matrix_copy (dst, &tmp);
}
static void
color_matrix_identity (GstD3D12ColorMatrix * m)
{
for (guint i = 0; i < 3; i++) {
for (guint j = 0; j < 3; j++) {
if (i == j)
m->matrix[i][j] = 1.0;
else
m->matrix[i][j] = 0;
}
}
}
void
gst_d3d12_color_matrix_init (GstD3D12ColorMatrix * matrix)
{
g_return_if_fail (matrix);
color_matrix_identity (matrix);
for (guint i = 0; i < 3; i++) {
matrix->min[i] = 0;
matrix->max[i] = 1;
matrix->offset[i] = 0;
}
}
static gboolean
color_matrix_invert (GstD3D12ColorMatrix * dst, GstD3D12ColorMatrix * src)
{
GstD3D12ColorMatrix tmp;
gdouble det;
color_matrix_identity (&tmp);
for (guint j = 0; j < 3; j++) {
for (guint i = 0; i < 3; i++) {
tmp.matrix[j][i] =
src->matrix[(i + 1) % 3][(j + 1) % 3] *
src->matrix[(i + 2) % 3][(j + 2) % 3] -
src->matrix[(i + 1) % 3][(j + 2) % 3] *
src->matrix[(i + 2) % 3][(j + 1) % 3];
}
}
det = tmp.matrix[0][0] * src->matrix[0][0] +
tmp.matrix[0][1] * src->matrix[1][0] +
tmp.matrix[0][2] * src->matrix[2][0];
if (det == 0)
return FALSE;
for (guint j = 0; j < 3; j++) {
for (guint i = 0; i < 3; i++) {
tmp.matrix[i][j] /= det;
}
}
color_matrix_copy (dst, &tmp);
return TRUE;
}
/**
* gst_d3d12_color_range_adjust_matrix_unorm:
* @in_info: a #GstVideoInfo
* @out_info: a #GstVideoInfo
* @matrix: a #GstD3D12ColorMatrix
*
* Calculates matrix for color range adjustment. Both input and output
* signals are in normalized [0.0..1.0] space.
*
* Resulting values can be calculated by
* | Yout | | Yin | | matrix.offset[0] |
* | Uout | = clamp ( matrix.matrix * | Uin | + | matrix.offset[1] |, matrix.min, matrix.max )
* | Vout | | Vin | | matrix.offset[2] |
*
* Returns: %TRUE if successful
*/
gboolean
gst_d3d12_color_range_adjust_matrix_unorm (const GstVideoInfo * in_info,
const GstVideoInfo * out_info, GstD3D12ColorMatrix * matrix)
{
gboolean in_rgb, out_rgb;
gint in_offset[GST_VIDEO_MAX_COMPONENTS];
gint in_scale[GST_VIDEO_MAX_COMPONENTS];
gint out_offset[GST_VIDEO_MAX_COMPONENTS];
gint out_scale[GST_VIDEO_MAX_COMPONENTS];
GstVideoColorRange in_range;
GstVideoColorRange out_range;
gdouble src_fullscale, dst_fullscale;
g_return_val_if_fail (in_info != nullptr, FALSE);
g_return_val_if_fail (out_info != nullptr, FALSE);
g_return_val_if_fail (matrix != nullptr, FALSE);
memset (matrix, 0, sizeof (GstD3D12ColorMatrix));
for (guint i = 0; i < 3; i++) {
matrix->matrix[i][i] = 1.0;
matrix->matrix[i][i] = 1.0;
matrix->matrix[i][i] = 1.0;
matrix->max[i] = 1.0;
}
in_rgb = GST_VIDEO_INFO_IS_RGB (in_info);
out_rgb = GST_VIDEO_INFO_IS_RGB (out_info);
if (in_rgb != out_rgb) {
GST_WARNING ("Invalid format conversion");
return FALSE;
}
in_range = in_info->colorimetry.range;
out_range = out_info->colorimetry.range;
if (in_range == GST_VIDEO_COLOR_RANGE_UNKNOWN) {
GST_WARNING ("Unknown input color range");
if (in_rgb || GST_VIDEO_INFO_IS_GRAY (in_info))
in_range = GST_VIDEO_COLOR_RANGE_0_255;
else
in_range = GST_VIDEO_COLOR_RANGE_16_235;
}
if (out_range == GST_VIDEO_COLOR_RANGE_UNKNOWN) {
GST_WARNING ("Unknown output color range");
if (out_rgb || GST_VIDEO_INFO_IS_GRAY (out_info))
out_range = GST_VIDEO_COLOR_RANGE_0_255;
else
out_range = GST_VIDEO_COLOR_RANGE_16_235;
}
src_fullscale = (gdouble) ((1 << in_info->finfo->depth[0]) - 1);
dst_fullscale = (gdouble) ((1 << out_info->finfo->depth[0]) - 1);
gst_video_color_range_offsets (in_range, in_info->finfo, in_offset, in_scale);
gst_video_color_range_offsets (out_range,
out_info->finfo, out_offset, out_scale);
matrix->min[0] = matrix->min[1] = matrix->min[2] =
(gdouble) out_offset[0] / dst_fullscale;
matrix->max[0] = (out_scale[0] + out_offset[0]) / dst_fullscale;
matrix->max[1] = matrix->max[2] =
(out_scale[1] + out_offset[0]) / dst_fullscale;
if (in_info->colorimetry.range == out_info->colorimetry.range) {
GST_DEBUG ("Same color range");
return TRUE;
}
/* Formula
*
* 1) Scales and offset compensates input to [0..1] range
* SRC_NORM[i] = (src[i] * src_fullscale - in_offset[i]) / in_scale[i]
* = (src[i] * src_fullscale / in_scale[i]) - in_offset[i] / in_scale[i]
*
* 2) Reverse to output UNIT scale
* DST_UINT[i] = SRC_NORM[i] * out_scale[i] + out_offset[i]
* = src[i] * src_fullscale * out_scale[i] / in_scale[i]
* - in_offset[i] * out_scale[i] / in_scale[i]
* + out_offset[i]
*
* 3) Back to [0..1] scale
* dst[i] = DST_UINT[i] / dst_fullscale
* = COEFF[i] * src[i] + OFF[i]
* where
* src_fullscale * out_scale[i]
* COEFF[i] = ------------------------------
* dst_fullscale * in_scale[i]
*
* out_offset[i] in_offset[i] * out_scale[i]
* OFF[i] = -------------- - ------------------------------
* dst_fullscale dst_fullscale * in_scale[i]
*/
for (guint i = 0; i < 3; i++) {
matrix->matrix[i][i] = (src_fullscale * out_scale[i]) /
(dst_fullscale * in_scale[i]);
matrix->offset[i] = (out_offset[i] / dst_fullscale) -
((gdouble) in_offset[i] * out_scale[i] / (dst_fullscale * in_scale[i]));
}
return TRUE;
}
/**
* gst_d3d12_yuv_to_rgb_matrix_unorm:
* @in_yuv_info: a #GstVideoInfo of input YUV signal
* @out_rgb_info: a #GstVideoInfo of output RGB signal
* @matrix: a #GstD3D12ColorMatrix
*
* Calculates transform matrix from YUV to RGB conversion. Both input and output
* signals are in normalized [0.0..1.0] space and additional gamma decoding
* or primary/transfer function transform is not performed by this matrix.
*
* Resulting non-linear RGB values can be calculated by
* | R' | | Y' | | matrix.offset[0] |
* | G' | = clamp ( matrix.matrix * | Cb | + | matrix.offset[1] | matrix.min, matrix.max )
* | B' | | Cr | | matrix.offset[2] |
*
* Returns: %TRUE if successful
*/
gboolean
gst_d3d12_yuv_to_rgb_matrix_unorm (const GstVideoInfo * in_yuv_info,
const GstVideoInfo * out_rgb_info, GstD3D12ColorMatrix * matrix)
{
gint offset[4], scale[4];
gdouble Kr, Kb, Kg;
g_return_val_if_fail (in_yuv_info != nullptr, FALSE);
g_return_val_if_fail (out_rgb_info != nullptr, FALSE);
g_return_val_if_fail (matrix != nullptr, FALSE);
/*
* <Formula>
*
* Input: Unsigned normalized Y'CbCr(unorm), [0.0..1.0] range
* Output: Unsigned normalized non-linear R'G'B'(unorm), [0.0..1.0] range
*
* 1) Y'CbCr(unorm) to scaled Y'CbCr
* | Y' | | Y'(unorm) |
* | Cb | = S | Cb(unorm) |
* | Cb | | Cr(unorm) |
* where S = (2 ^ bitdepth) - 1
*
* 2) Y'CbCr to YPbPr
* Y = (Y' - offsetY ) / scaleY
* Pb = [(Cb - offsetCbCr) / scaleCbCr]
* Pr = [(Cr - offsetCrCr) / scaleCrCr]
* =>
* Y = Y'(unorm) * Sy + Oy
* Pb = Cb(unorm) * Suv + Ouv
* Pb = Cr(unorm) * Suv + Ouv
* where
* Sy = S / scaleY
* Suv = S / scaleCbCr
* Oy = -(offsetY / scaleY)
* Ouv = -(offsetCbCr / scaleCbCr)
*
* 3) YPbPr to R'G'B'
* | R' | | Y |
* | G' | = M *| Pb |
* | B' | | Pr |
* where
* | vecR |
* M = | vecG |
* | vecB |
* vecR = | 1, 0 , 2(1 - Kr) |
* vecG = | 1, -(Kb/Kg) * 2(1 - Kb), -(Kr/Kg) * 2(1 - Kr) |
* vecB = | 1, 2(1 - Kb) , 0 |
* =>
* R' = dot(vecR, (Syuv * Y'CbCr(unorm))) + dot(vecR, Offset)
* G' = dot(vecG, (Svuy * Y'CbCr(unorm))) + dot(vecG, Offset)
* B' = dot(vecB, (Syuv * Y'CbCr(unorm)) + dot(vecB, Offset)
* where
* | Sy, 0, 0 |
* Syuv = | 0, Suv, 0 |
* | 0 0, Suv |
*
* | Oy |
* Offset = | Ouv |
* | Ouv |
*
* 4) YUV -> RGB matrix
* | R' | | Y'(unorm) | | offsetA |
* | G' | = Matrix * | Cb(unorm) | + | offsetB |
* | B' | | Cr(unorm) | | offsetC |
*
* where
* | vecR |
* Matrix = | vecG | * Syuv
* | vecB |
*
* offsetA = dot(vecR, Offset)
* offsetB = dot(vecG, Offset)
* offsetC = dot(vecB, Offset)
*
* 4) Consider 16-235 scale RGB
* RGBfull(0..255) -> RGBfull(16..235) matrix is represented by
* | Rs | | Rf | | Or |
* | Gs | = Ms | Gf | + | Og |
* | Bs | | Bf | | Ob |
*
* Combining all matrix into
* | Rs | | Y'(unorm) | | offsetA | | Or |
* | Gs | = Ms * ( Matrix * | Cb(unorm) | + | offsetB | ) + | Og |
* | Bs | | Cr(unorm) | | offsetC | | Ob |
*
* | Y'(unorm) | | offsetA | | Or |
* = Ms * Matrix * | Cb(unorm) | + Ms | offsetB | + | Og |
* | Cr(unorm) | | offsetC | | Ob |
*/
memset (matrix, 0, sizeof (GstD3D12ColorMatrix));
for (guint i = 0; i < 3; i++)
matrix->max[i] = 1.0;
gst_video_color_range_offsets (in_yuv_info->colorimetry.range,
in_yuv_info->finfo, offset, scale);
if (gst_video_color_matrix_get_Kr_Kb (in_yuv_info->colorimetry.matrix,
&Kr, &Kb)) {
guint S;
gdouble Sy, Suv;
gdouble Oy, Ouv;
gdouble vecR[3], vecG[3], vecB[3];
Kg = 1.0 - Kr - Kb;
vecR[0] = 1.0;
vecR[1] = 0;
vecR[2] = 2 * (1 - Kr);
vecG[0] = 1.0;
vecG[1] = -(Kb / Kg) * 2 * (1 - Kb);
vecG[2] = -(Kr / Kg) * 2 * (1 - Kr);
vecB[0] = 1.0;
vecB[1] = 2 * (1 - Kb);
vecB[2] = 0;
/* Assume all components has the same bitdepth */
S = (1 << in_yuv_info->finfo->depth[0]) - 1;
Sy = (gdouble) S / scale[0];
Suv = (gdouble) S / scale[1];
Oy = -((gdouble) offset[0] / scale[0]);
Ouv = -((gdouble) offset[1] / scale[1]);
matrix->matrix[0][0] = Sy * vecR[0];
matrix->matrix[1][0] = Sy * vecG[0];
matrix->matrix[2][0] = Sy * vecB[0];
matrix->matrix[0][1] = Suv * vecR[1];
matrix->matrix[1][1] = Suv * vecG[1];
matrix->matrix[2][1] = Suv * vecB[1];
matrix->matrix[0][2] = Suv * vecR[2];
matrix->matrix[1][2] = Suv * vecG[2];
matrix->matrix[2][2] = Suv * vecB[2];
matrix->offset[0] = vecR[0] * Oy + vecR[1] * Ouv + vecR[2] * Ouv;
matrix->offset[1] = vecG[0] * Oy + vecG[1] * Ouv + vecG[2] * Ouv;
matrix->offset[2] = vecB[0] * Oy + vecB[1] * Ouv + vecB[2] * Ouv;
/* Apply RGB range scale matrix */
if (out_rgb_info->colorimetry.range == GST_VIDEO_COLOR_RANGE_16_235) {
GstD3D12ColorMatrix scale_matrix, rst;
GstVideoInfo full_rgb = *out_rgb_info;
full_rgb.colorimetry.range = GST_VIDEO_COLOR_RANGE_0_255;
if (gst_d3d12_color_range_adjust_matrix_unorm (&full_rgb,
out_rgb_info, &scale_matrix)) {
/* Ms * Matrix */
color_matrix_multiply (&rst, &scale_matrix, matrix);
/* Ms * transform offsets */
for (guint i = 0; i < 3; i++) {
gdouble val = 0;
for (guint j = 0; j < 3; j++) {
val += scale_matrix.matrix[i][j] * matrix->offset[j];
}
rst.offset[i] = val + scale_matrix.offset[i];
}
/* copy back to output matrix */
for (guint i = 0; i < 3; i++) {
for (guint j = 0; j < 3; j++) {
matrix->matrix[i][j] = rst.matrix[i][j];
}
matrix->offset[i] = rst.offset[i];
matrix->min[i] = scale_matrix.min[i];
matrix->max[i] = scale_matrix.max[i];
}
}
}
} else {
/* Unknown matrix */
matrix->matrix[0][0] = 1.0;
matrix->matrix[1][1] = 1.0;
matrix->matrix[2][2] = 1.0;
}
return TRUE;
}
/**
* gst_d3d12_rgb_to_yuv_matrix_unorm:
* @in_rgb_info: a #GstVideoInfo of input RGB signal
* @out_yuv_info: a #GstVideoInfo of output YUV signal
* @matrix: a #GstD3D12ColorMatrix
*
* Calculates transform matrix from RGB to YUV conversion. Both input and output
* signals are in normalized [0.0..1.0] space and additional gamma decoding
* or primary/transfer function transform is not performed by this matrix.
*
* Resulting RGB values can be calculated by
* | Y' | | R' | | matrix.offset[0] |
* | Cb | = clamp ( matrix.matrix * | G' | + | matrix.offset[1] |, matrix.min, matrix.max )
* | Cr | | B' | | matrix.offset[2] |
*
* Returns: %TRUE if successful
*/
gboolean
gst_d3d12_rgb_to_yuv_matrix_unorm (const GstVideoInfo * in_rgb_info,
const GstVideoInfo * out_yuv_info, GstD3D12ColorMatrix * matrix)
{
gint offset[4], scale[4];
gdouble Kr, Kb, Kg;
g_return_val_if_fail (in_rgb_info != nullptr, FALSE);
g_return_val_if_fail (out_yuv_info != nullptr, FALSE);
g_return_val_if_fail (matrix != nullptr, FALSE);
/*
* <Formula>
*
* Input: Unsigned normalized non-linear R'G'B'(unorm), [0.0..1.0] range
* Output: Unsigned normalized Y'CbCr(unorm), [0.0..1.0] range
*
* 1) R'G'B' to YPbPr
* | Y | | R' |
* | Pb | = M *| G' |
* | Pr | | B' |
* where
* | vecY |
* M = | vecU |
* | vecV |
* vecY = | Kr , Kg , Kb |
* vecU = | -0.5*Kr/(1-Kb), -0.5*Kg/(1-Kb), 0.5 |
* vecV = | 0.5 , -0.5*Kg/(1-Kr), -0.5*Kb(1-Kr) |
*
* 2) YPbPr to Y'CbCr(unorm)
* Y'(unorm) = (Y * scaleY + offsetY) / S
* Cb(unorm) = (Pb * scaleCbCr + offsetCbCr) / S
* Cr(unorm) = (Pr * scaleCbCr + offsetCbCr) / S
* =>
* Y'(unorm) = (Y * scaleY / S) + (offsetY / S)
* Cb(unorm) = (Pb * scaleCbCr / S) + (offsetCbCr / S)
* Cr(unorm) = (Pb * scaleCbCr / S) + (offsetCbCr / S)
* where S = (2 ^ bitdepth) - 1
*
* 3) RGB -> YUV matrix
* | Y'(unorm) | | R' | | offsetA |
* | Cb(unorm) | = Matrix * | G' | + | offsetB |
* | Cr(unorm) | | B' | | offsetC |
*
* where
* | (scaleY/S) * vecY |
* Matrix = | (scaleCbCr/S) * vecU |
* | (scaleCbCr/S) * vecV |
*
* offsetA = offsetY / S
* offsetB = offsetCbCr / S
* offsetC = offsetCbCr / S
*
* 4) Consider 16-235 scale RGB
* RGBstudio(16..235) -> RGBfull(0..255) matrix is represented by
* | Rf | | Rs | | Or |
* | Gf | = Ms | Gs | + | Og |
* | Bf | | Bs | | Ob |
*
* Combining all matrix into
* | Y'(unorm) | | Rs | | Or | | offsetA |
* | Cb(unorm) | = Matrix * ( Ms | Gs | + | Og | ) + | offsetB |
* | Cr(unorm) | | Bs | | Ob | | offsetC |
*
* | Rs | | Or | | offsetA |
* = Matrix * Ms | Gs | + Matrix | Og | + | offsetB |
* | Bs | | Ob | | offsetB |
*/
memset (matrix, 0, sizeof (GstD3D12ColorMatrix));
for (guint i = 0; i < 3; i++)
matrix->max[i] = 1.0;
gst_video_color_range_offsets (out_yuv_info->colorimetry.range,
out_yuv_info->finfo, offset, scale);
if (gst_video_color_matrix_get_Kr_Kb (out_yuv_info->colorimetry.matrix,
&Kr, &Kb)) {
guint S;
gdouble Sy, Suv;
gdouble Oy, Ouv;
gdouble vecY[3], vecU[3], vecV[3];
Kg = 1.0 - Kr - Kb;
vecY[0] = Kr;
vecY[1] = Kg;
vecY[2] = Kb;
vecU[0] = -0.5 * Kr / (1 - Kb);
vecU[1] = -0.5 * Kg / (1 - Kb);
vecU[2] = 0.5;
vecV[0] = 0.5;
vecV[1] = -0.5 * Kg / (1 - Kr);
vecV[2] = -0.5 * Kb / (1 - Kr);
/* Assume all components has the same bitdepth */
S = (1 << out_yuv_info->finfo->depth[0]) - 1;
Sy = (gdouble) scale[0] / S;
Suv = (gdouble) scale[1] / S;
Oy = (gdouble) offset[0] / S;
Ouv = (gdouble) offset[1] / S;
for (guint i = 0; i < 3; i++) {
matrix->matrix[0][i] = Sy * vecY[i];
matrix->matrix[1][i] = Suv * vecU[i];
matrix->matrix[2][i] = Suv * vecV[i];
}
matrix->offset[0] = Oy;
matrix->offset[1] = Ouv;
matrix->offset[2] = Ouv;
matrix->min[0] = Oy;
matrix->min[1] = Oy;
matrix->min[2] = Oy;
matrix->max[0] = ((gdouble) scale[0] + offset[0]) / S;
matrix->max[1] = ((gdouble) scale[1] + offset[0]) / S;
matrix->max[2] = ((gdouble) scale[1] + offset[0]) / S;
/* Apply RGB range scale matrix */
if (in_rgb_info->colorimetry.range == GST_VIDEO_COLOR_RANGE_16_235) {
GstD3D12ColorMatrix scale_matrix, rst;
GstVideoInfo full_rgb = *in_rgb_info;
full_rgb.colorimetry.range = GST_VIDEO_COLOR_RANGE_0_255;
if (gst_d3d12_color_range_adjust_matrix_unorm (in_rgb_info,
&full_rgb, &scale_matrix)) {
/* Matrix * Ms */
color_matrix_multiply (&rst, matrix, &scale_matrix);
/* Matrix * scale offsets */
for (guint i = 0; i < 3; i++) {
gdouble val = 0;
for (guint j = 0; j < 3; j++) {
val += matrix->matrix[i][j] * scale_matrix.offset[j];
}
rst.offset[i] = val + matrix->offset[i];
}
/* copy back to output matrix */
for (guint i = 0; i < 3; i++) {
for (guint j = 0; j < 3; j++) {
matrix->matrix[i][j] = rst.matrix[i][j];
}
matrix->offset[i] = rst.offset[i];
}
}
}
} else {
/* Unknown matrix */
matrix->matrix[0][0] = 1.0;
matrix->matrix[1][1] = 1.0;
matrix->matrix[2][2] = 1.0;
}
return TRUE;
}
static gboolean
rgb_to_xyz_matrix (const GstVideoColorPrimariesInfo * info,
GstD3D12ColorMatrix * matrix)
{
GstD3D12ColorMatrix m, im;
gdouble Sr, Sg, Sb;
gdouble Xw, Yw, Zw;
if (info->Rx == 0 || info->Gx == 0 || info->By == 0 || info->Wy == 0)
return FALSE;
color_matrix_identity (&m);
m.matrix[0][0] = info->Rx / info->Ry;
m.matrix[1][0] = 1.0;
m.matrix[2][0] = (1.0 - info->Rx - info->Ry) / info->Ry;
m.matrix[0][1] = info->Gx / info->Gy;
m.matrix[1][1] = 1.0;
m.matrix[2][1] = (1.0 - info->Gx - info->Gy) / info->Gy;
m.matrix[0][2] = info->Bx / info->By;
m.matrix[1][2] = 1.0;
m.matrix[2][2] = (1.0 - info->Bx - info->By) / info->By;
if (!color_matrix_invert (&im, &m))
return FALSE;
Xw = info->Wx / info->Wy;
Yw = 1.0;
Zw = (1.0 - info->Wx - info->Wy) / info->Wy;
Sr = im.matrix[0][0] * Xw + im.matrix[0][1] * Yw + im.matrix[0][2] * Zw;
Sg = im.matrix[1][0] * Xw + im.matrix[1][1] * Yw + im.matrix[1][2] * Zw;
Sb = im.matrix[2][0] * Xw + im.matrix[2][1] * Yw + im.matrix[2][2] * Zw;
for (guint i = 0; i < 3; i++) {
m.matrix[i][0] *= Sr;
m.matrix[i][1] *= Sg;
m.matrix[i][2] *= Sb;
}
color_matrix_copy (matrix, &m);
return TRUE;
}
/**
* gst_d3d12_color_primaries_matrix_unorm:
* @in_info: a #GstVideoColorPrimariesInfo of input signal
* @out_info: a #GstVideoColorPrimariesInfo of output signal
* @matrix: a #GstD3D12ColorMatrix
*
* Calculates color primaries conversion matrix
*
* Resulting RGB values can be calculated by
* | Rout | | Rin |
* | Gout | = saturate ( matrix.matrix * | Gin | )
* | Bout | | Bin |
*
* Returns: %TRUE if successful
*/
gboolean
gst_d3d12_color_primaries_matrix_unorm (const GstVideoColorPrimariesInfo *
in_info, const GstVideoColorPrimariesInfo * out_info,
GstD3D12ColorMatrix * matrix)
{
GstD3D12ColorMatrix Ms, invMd, ret;
g_return_val_if_fail (in_info != nullptr, FALSE);
g_return_val_if_fail (out_info != nullptr, FALSE);
g_return_val_if_fail (matrix != nullptr, FALSE);
/*
* <Formula>
*
* 1) RGB -> XYZ conversion
* | X | | R |
* | Y | = M | G |
* | Z | | B |
* where
* | SrXr, SgXg, SbXb |
* M = | SrYr, SgYg, SbYb |
* | SrZr, SgZg, SbZb |
*
* Xr = xr / yr
* Yr = 1
* Zr = (1 - xr - yr) / yr
* xr and yr are xy coordinates of red primary in the CIE 1931 color space.
* And its applied to G and B components
*
* | Sr | | Xr, Xg, Xb | | Xw |
* | Sg | = inv( | Yr, Yg, Yb | ) * | Yw |
* | Sb | | Zr, Zg, Zb | | Zw |
*
* 2) XYZsrc -> XYZdst conversion
* Apply chromatic adaptation
* | Xdst | | Xsrc |
* | Ydst | = Mc | Ysrc |
* | Zdst | | Zsrc |
* where
* | Xwdst / Xwsrc, 0 , 0 |
* Mc = | 0 , Ywdst / Ywsrc, 0 |
* | 0 , 0 , Zwdst / Zwsrc |
*
* where
*
* 3) Final matrix
* | Rd | | Rs |
* | Gd | = inv (Md) * Mc * Ms | Gs |
* | Bd | | Bs |
*/
memset (matrix, 0, sizeof (GstD3D12ColorMatrix));
for (guint i = 0; i < 3; i++)
matrix->max[i] = 1.0;
if (!rgb_to_xyz_matrix (in_info, &Ms)) {
GST_WARNING ("Failed to get src XYZ matrix");
return FALSE;
}
if (!rgb_to_xyz_matrix (out_info, &invMd) ||
!color_matrix_invert (&invMd, &invMd)) {
GST_WARNING ("Failed to get dst XYZ matrix");
return FALSE;
}
if (in_info->Wx != out_info->Wx || in_info->Wy != out_info->Wy) {
GstD3D12ColorMatrix Mc;
color_matrix_identity (&Mc);
Mc.matrix[0][0] = (out_info->Wx / out_info->Wy) /
(in_info->Wx / in_info->Wy);
/* Yw == 1.0 */
Mc.matrix[2][2] = ((1.0 - out_info->Wx - out_info->Wy) / out_info->Wy) /
((1.0 - in_info->Wx - in_info->Wy) / in_info->Wy);
color_matrix_multiply (&ret, &Mc, &Ms);
} else {
color_matrix_copy (&ret, &Ms);
}
color_matrix_multiply (&ret, &invMd, &ret);
color_matrix_copy (matrix, &ret);
return TRUE;
}

View file

@ -48,10 +48,39 @@ struct _GstD3D12Format
guint padding[GST_PADDING_LARGE];
};
typedef struct _GstD3D12ColorMatrix
{
gdouble matrix[3][3];
gdouble offset[3];
gdouble min[3];
gdouble max[3];
} GstD3D12ColorMatrix;
GstVideoFormat gst_d3d12_dxgi_format_to_gst (DXGI_FORMAT format);
gboolean gst_d3d12_dxgi_format_to_resource_formats (DXGI_FORMAT format,
DXGI_FORMAT resource_format[GST_VIDEO_MAX_PLANES]);
void gst_d3d12_color_matrix_init (GstD3D12ColorMatrix * matrix);
gchar * gst_d3d12_dump_color_matrix (GstD3D12ColorMatrix * matrix);
gboolean gst_d3d12_color_range_adjust_matrix_unorm (const GstVideoInfo * in_info,
const GstVideoInfo * out_info,
GstD3D12ColorMatrix * matrix);
gboolean gst_d3d12_yuv_to_rgb_matrix_unorm (const GstVideoInfo * in_yuv_info,
const GstVideoInfo * out_rgb_info,
GstD3D12ColorMatrix * matrix);
gboolean gst_d3d12_rgb_to_yuv_matrix_unorm (const GstVideoInfo * in_rgb_info,
const GstVideoInfo * out_yuv_info,
GstD3D12ColorMatrix * matrix);
gboolean gst_d3d12_color_primaries_matrix_unorm (const GstVideoColorPrimariesInfo * in_info,
const GstVideoColorPrimariesInfo * out_info,
GstD3D12ColorMatrix * matrix);
G_END_DECLS

View file

@ -161,6 +161,17 @@ gst_d3d12_allocation_params_set_resource_flags (GstD3D12AllocationParams *
return TRUE;
}
gboolean
gst_d3d12_allocation_params_unset_resource_flags (GstD3D12AllocationParams *
params, D3D12_RESOURCE_FLAGS resource_flags)
{
g_return_val_if_fail (params, FALSE);
params->resource_flags &= ~resource_flags;
return TRUE;
}
gboolean
gst_d3d12_allocation_params_set_array_size (GstD3D12AllocationParams * params,
guint size)

View file

@ -101,6 +101,9 @@ gboolean gst_d3d12_allocation_params_alignment (GstD3D12Alloca
gboolean gst_d3d12_allocation_params_set_resource_flags (GstD3D12AllocationParams * params,
D3D12_RESOURCE_FLAGS resource_flags);
gboolean gst_d3d12_allocation_params_unset_resource_flags (GstD3D12AllocationParams * params,
D3D12_RESOURCE_FLAGS resource_flags);
gboolean gst_d3d12_allocation_params_set_array_size (GstD3D12AllocationParams * params,
guint size);

View file

@ -0,0 +1,71 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstd3d12pluginutils.h"
GType
gst_d3d12_sampling_method_get_type (void)
{
static GType type = 0;
static const GEnumValue methods[] = {
{GST_D3D12_SAMPLING_METHOD_NEAREST,
"Nearest Neighbour", "nearest-neighbour"},
{GST_D3D12_SAMPLING_METHOD_BILINEAR,
"Bilinear", "bilinear"},
{GST_D3D12_SAMPLING_METHOD_LINEAR_MINIFICATION,
"Linear minification, point magnification", "linear-minification"},
{GST_D3D12_SAMPLING_METHOD_ANISOTROPIC, "Anisotropic", "anisotropic"},
{0, nullptr, nullptr},
};
GST_D3D12_CALL_ONCE_BEGIN {
type = g_enum_register_static ("GstD3D12SamplingMethod", methods);
} GST_D3D12_CALL_ONCE_END;
return type;
}
struct SamplingMethodMap
{
GstD3D12SamplingMethod method;
D3D12_FILTER filter;
};
static const SamplingMethodMap sampling_method_map[] = {
{GST_D3D12_SAMPLING_METHOD_NEAREST, D3D12_FILTER_MIN_MAG_MIP_POINT},
{GST_D3D12_SAMPLING_METHOD_BILINEAR, D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT},
{GST_D3D12_SAMPLING_METHOD_LINEAR_MINIFICATION,
D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT},
{GST_D3D12_SAMPLING_METHOD_ANISOTROPIC, D3D12_FILTER_ANISOTROPIC},
};
D3D12_FILTER
gst_d3d12_sampling_method_to_native (GstD3D12SamplingMethod method)
{
for (guint i = 0; i < G_N_ELEMENTS (sampling_method_map); i++) {
if (sampling_method_map[i].method == method)
return sampling_method_map[i].filter;
}
return D3D12_FILTER_MIN_MAG_MIP_POINT;
}

View file

@ -22,3 +22,20 @@
#include <gst/gst.h>
#include "gstd3d12.h"
#include "gstd3d12-private.h"
G_BEGIN_DECLS
typedef enum
{
GST_D3D12_SAMPLING_METHOD_NEAREST,
GST_D3D12_SAMPLING_METHOD_BILINEAR,
GST_D3D12_SAMPLING_METHOD_LINEAR_MINIFICATION,
GST_D3D12_SAMPLING_METHOD_ANISOTROPIC,
} GstD3D12SamplingMethod;
#define GST_TYPE_D3D12_SAMPLING_METHOD (gst_d3d12_sampling_method_get_type())
GType gst_d3d12_sampling_method_get_type (void);
D3D12_FILTER gst_d3d12_sampling_method_to_native (GstD3D12SamplingMethod method);
G_END_DECLS

View file

@ -0,0 +1,966 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
cbuffer PsAlphaFactor : register(b0, space0)
{
float alphaFactor;
};
struct PSColorSpace
{
float3 CoeffX;
float3 CoeffY;
float3 CoeffZ;
float3 Offset;
float3 Min;
float3 Max;
float padding;
};
cbuffer PsConstBuffer : register(b1, space0)
{
PSColorSpace preCoeff;
PSColorSpace postCoeff;
PSColorSpace primariesCoeff;
};
#ifdef NUM_SRV_1
Texture2D shaderTexture_0 : register(t0, space0);
#endif
#ifdef NUM_SRV_2
Texture2D shaderTexture_0 : register(t0, space0);
Texture2D shaderTexture_1 : register(t1, space0);
#endif
#ifdef NUM_SRV_3
Texture2D shaderTexture_0 : register(t0, space0);
Texture2D shaderTexture_1 : register(t1, space0);
Texture2D shaderTexture_2 : register(t2, space0);
#endif
#ifdef NUM_SRV_4
Texture2D shaderTexture_0 : register(t0, space0);
Texture2D shaderTexture_1 : register(t1, space0);
Texture2D shaderTexture_2 : register(t2, space0);
Texture2D shaderTexture_3 : register(t3, space0);
#endif
SamplerState samplerState : register(s0, space0);
#ifdef BUILD_LUT
Texture1D<float> gammaDecLUT : register(t4, space0);
Texture1D<float> gammaEncLUT : register(t5, space0);
SamplerState lutSamplerState : register(s1, space0);
#endif
struct PS_INPUT
{
float4 Position: SV_POSITION;
float2 Texture: TEXCOORD;
};
struct PS_OUTPUT_PACKED
{
float4 Plane0: SV_TARGET0;
};
struct PS_OUTPUT_LUMA
{
float4 Plane0: SV_TARGET0;
};
struct PS_OUTPUT_CHROMA
{
float4 Plane0: SV_TARGET0;
};
struct PS_OUTPUT_CHROMA_PLANAR
{
float4 Plane0: SV_TARGET0;
float4 Plane1: SV_TARGET1;
};
struct PS_OUTPUT_PLANAR
{
float4 Plane0: SV_TARGET0;
float4 Plane1: SV_TARGET1;
float4 Plane2: SV_TARGET2;
};
struct PS_OUTPUT_PLANAR_FULL
{
float4 Plane0: SV_TARGET0;
float4 Plane1: SV_TARGET1;
float4 Plane2: SV_TARGET2;
float4 Plane3: SV_TARGET3;
};
float4 DoAlphaPremul (float4 sample)
{
float4 premul_tex;
premul_tex.rgb = sample.rgb * sample.a;
premul_tex.a = sample.a;
return premul_tex;
}
float4 DoAlphaUnpremul (float4 sample)
{
float4 unpremul_tex;
if (sample.a == 0 || sample.a == 1)
return sample;
unpremul_tex.rgb = saturate (sample.rgb / sample.a);
unpremul_tex.a = sample.a;
return unpremul_tex;
}
interface ISampler
{
float4 Execute (float2 uv);
};
#ifdef NUM_SRV_1
class SamplerRGBA : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv);
}
};
class SamplerRGBAPremul : ISampler
{
float4 Execute (float2 uv)
{
return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv));
}
};
class SamplerRBGA : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).rbga;
}
};
class SamplerVUYA : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).zyxw;
}
};
class SamplerY410 : ISampler
{
float4 Execute (float2 uv)
{
return float4 (shaderTexture_0.Sample(samplerState, uv).yxz, 1.0);
}
};
class SamplerY412 : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).grba;
}
};
class SamplerAYUV : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).yzwx;
}
};
class SamplerRGBx : ISampler
{
float4 Execute (float2 uv)
{
return float4 (shaderTexture_0.Sample(samplerState, uv).rgb, 1.0);
}
};
class SamplerxRGB : ISampler
{
float4 Execute (float2 uv)
{
return float4 (shaderTexture_0.Sample(samplerState, uv).gba, 1.0);
}
};
class SamplerARGB : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).gbar;
}
};
class SamplerxBGR : ISampler
{
float4 Execute (float2 uv)
{
return float4 (shaderTexture_0.Sample(samplerState, uv).abg, 1.0);
}
};
class SamplerABGR : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).abgr;
}
};
class SamplerBGR10A2 : ISampler
{
float4 Execute (float2 uv)
{
return float4 (shaderTexture_0.Sample(samplerState, uv).zyx, 1.0);
}
};
class SamplerBGRA64 : ISampler
{
float4 Execute (float2 uv)
{
return shaderTexture_0.Sample(samplerState, uv).bgra;
}
};
class SamplerGRAY : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.y = 0.5;
sample.z = 0.5;
sample.a = 1.0;
return sample;
}
};
#endif
#ifdef NUM_SRV_2
class SamplerNV12 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.yz = shaderTexture_1.Sample(samplerState, uv).xy;
sample.a = 1.0;
return sample;
}
};
class SamplerNV21 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.yz = shaderTexture_1.Sample(samplerState, uv).yx;
sample.a = 1.0;
return sample;
}
};
#endif
#ifdef NUM_SRV_3
class SamplerI420 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.y = shaderTexture_1.Sample(samplerState, uv).x;
sample.z = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = 1.0;
return sample;
}
};
class SamplerYV12 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.z = shaderTexture_1.Sample(samplerState, uv).x;
sample.y = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = 1.0;
return sample;
}
};
class SamplerI420_10 : ISampler
{
float4 Execute (float2 uv)
{
float3 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.y = shaderTexture_1.Sample(samplerState, uv).x;
sample.z = shaderTexture_2.Sample(samplerState, uv).x;
return float4 (saturate (sample * 64.0), 1.0);
}
};
class SamplerI420_12 : ISampler
{
float4 Execute (float2 uv)
{
float3 sample;
sample.x = shaderTexture_0.Sample(samplerState, uv).x;
sample.y = shaderTexture_1.Sample(samplerState, uv).x;
sample.z = shaderTexture_2.Sample(samplerState, uv).x;
return float4 (saturate (sample * 16.0), 1.0);
}
};
class SamplerGBR : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = 1.0;
return sample;
}
};
class SamplerGBR_10 : ISampler
{
float4 Execute (float2 uv)
{
float3 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
return float4 (saturate (sample * 64.0), 1.0);
}
};
class SamplerGBR_12 : ISampler
{
float4 Execute (float2 uv)
{
float3 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
return float4 (saturate (sample * 16.0), 1.0);
}
};
class SamplerRGBP : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.r = shaderTexture_0.Sample(samplerState, uv).x;
sample.g = shaderTexture_1.Sample(samplerState, uv).x;
sample.b = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = 1.0;
return sample;
}
};
class SamplerBGRP : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.b = shaderTexture_0.Sample(samplerState, uv).x;
sample.g = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = 1.0;
return sample;
}
};
#endif
#ifdef NUM_SRV_4
class SamplerGBRA : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = shaderTexture_3.Sample(samplerState, uv).x;
return sample;
}
};
class SamplerGBRA_10 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = shaderTexture_3.Sample(samplerState, uv).x;
return saturate (sample * 64.0);
}
};
class SamplerGBRA_12 : ISampler
{
float4 Execute (float2 uv)
{
float4 sample;
sample.g = shaderTexture_0.Sample(samplerState, uv).x;
sample.b = shaderTexture_1.Sample(samplerState, uv).x;
sample.r = shaderTexture_2.Sample(samplerState, uv).x;
sample.a = shaderTexture_3.Sample(samplerState, uv).x;
return saturate (sample * 16.0);
}
};
#endif
interface IConverter
{
float4 Execute (float4 sample);
};
class ConverterIdentity : IConverter
{
float4 Execute (float4 sample)
{
return sample;
}
};
class ConverterRange : IConverter
{
float4 Execute (float4 sample)
{
float3 out_space;
out_space.x = postCoeff.CoeffX.x * sample.x;
out_space.y = postCoeff.CoeffY.y * sample.y;
out_space.z = postCoeff.CoeffZ.z * sample.z;
out_space += postCoeff.Offset;
return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);
}
};
class ConverterSimple : IConverter
{
float4 Execute (float4 sample)
{
float3 out_space;
out_space.x = dot (postCoeff.CoeffX, sample.xyz);
out_space.y = dot (postCoeff.CoeffY, sample.xyz);
out_space.z = dot (postCoeff.CoeffZ, sample.xyz);
out_space += postCoeff.Offset;
return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);
}
};
#ifdef BUILD_LUT
class ConverterGamma : IConverter
{
float4 Execute (float4 sample)
{
float3 out_space;
out_space.x = dot (preCoeff.CoeffX, sample.xyz);
out_space.y = dot (preCoeff.CoeffY, sample.xyz);
out_space.z = dot (preCoeff.CoeffZ, sample.xyz);
out_space += preCoeff.Offset;
out_space = clamp (out_space, preCoeff.Min, preCoeff.Max);
out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x);
out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y);
out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z);
out_space.x = gammaEncLUT.Sample (lutSamplerState, out_space.x);
out_space.y = gammaEncLUT.Sample (lutSamplerState, out_space.y);
out_space.z = gammaEncLUT.Sample (lutSamplerState, out_space.z);
out_space.x = dot (postCoeff.CoeffX, out_space);
out_space.y = dot (postCoeff.CoeffY, out_space);
out_space.z = dot (postCoeff.CoeffZ, out_space);
out_space += postCoeff.Offset;
return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);
}
};
class ConverterPrimary : IConverter
{
float4 Execute (float4 sample)
{
float3 out_space;
float3 tmp;
out_space.x = dot (preCoeff.CoeffX, sample.xyz);
out_space.y = dot (preCoeff.CoeffY, sample.xyz);
out_space.z = dot (preCoeff.CoeffZ, sample.xyz);
out_space += preCoeff.Offset;
out_space = clamp (out_space, preCoeff.Min, preCoeff.Max);
out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x);
out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y);
out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z);
tmp.x = dot (primariesCoeff.CoeffX, out_space);
tmp.y = dot (primariesCoeff.CoeffY, out_space);
tmp.z = dot (primariesCoeff.CoeffZ, out_space);
out_space.x = gammaEncLUT.Sample (lutSamplerState, tmp.x);
out_space.y = gammaEncLUT.Sample (lutSamplerState, tmp.y);
out_space.z = gammaEncLUT.Sample (lutSamplerState, tmp.z);
out_space.x = dot (postCoeff.CoeffX, out_space);
out_space.y = dot (postCoeff.CoeffY, out_space);
out_space.z = dot (postCoeff.CoeffZ, out_space);
out_space += postCoeff.Offset;
return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);
}
};
#endif
float UnormTo10bit (float sample)
{
return sample * 1023.0 / 65535.0;
}
float2 UnormTo10bit (float2 sample)
{
return sample * 1023.0 / 65535.0;
}
float3 UnormTo10bit (float3 sample)
{
return sample * 1023.0 / 65535.0;
}
float4 UnormTo10bit (float4 sample)
{
return sample * 1023.0 / 65535.0;
}
float UnormTo12bit (float sample)
{
return sample * 4095.0 / 65535.0;
}
float2 UnormTo12bit (float2 sample)
{
return sample * 4095.0 / 65535.0;
}
float3 UnormTo12bit (float3 sample)
{
return sample * 4095.0 / 65535.0;
}
float4 UnormTo12bit (float4 sample)
{
return sample * 4095.0 / 65535.0;
}
interface IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample);
};
class OutputRGBA : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = float4 (sample.rgb, sample.a * alphaFactor);
return output;
}
};
class OutputRGBAPremul : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
sample.a *= alphaFactor;
output.Plane0 = DoAlphaPremul (sample);
return output;
}
};
class OutputRGBx : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = float4 (sample.rgb, 1.0);
return output;
}
};
class OutputxRGB : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = float4 (0.0, sample.rgb);
return output;
}
};
class OutputARGB : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = sample.argb;
return output;
}
};
class OutputxBGR : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = float4 (0.0, sample.bgr);
return output;
}
};
class OutputABGR : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
output.Plane0 = sample.abgr;
return output;
}
};
class OutputVUYA : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
sample.a *= alphaFactor;
output.Plane0 = sample.zyxw;
return output;
}
};
class OutputAYUV : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
sample.a *= alphaFactor;
output.Plane0 = sample.wxyz;
return output;
}
};
class OutputRBGA : IOutputPacked
{
PS_OUTPUT_PACKED Build (float4 sample)
{
PS_OUTPUT_PACKED output;
sample.a *= alphaFactor;
output.Plane0 = sample.rbga;
return output;
}
};
interface IOutputLuma
{
PS_OUTPUT_LUMA Build (float4 sample);
};
class OutputLuma : IOutputLuma
{
PS_OUTPUT_LUMA Build (float4 sample)
{
PS_OUTPUT_LUMA output;
output.Plane0 = float4 (sample.x, 0, 0, 0);
return output;
}
};
class OutputLuma_10 : IOutputLuma
{
PS_OUTPUT_LUMA Build (float4 sample)
{
PS_OUTPUT_LUMA output;
output.Plane0 = float4 (UnormTo10bit (sample.x), 0, 0, 0);
return output;
}
};
class OutputLuma_12 : IOutputLuma
{
PS_OUTPUT_LUMA Build (float4 sample)
{
PS_OUTPUT_LUMA output;
output.Plane0 = float4 (UnormTo12bit (sample.x), 0, 0, 0);
return output;
}
};
interface IOutputChroma
{
PS_OUTPUT_CHROMA Build (float4 sample);
};
class OutputChromaNV12 : IOutputChroma
{
PS_OUTPUT_CHROMA Build (float4 sample)
{
PS_OUTPUT_CHROMA output;
output.Plane0 = float4 (sample.yz, 0, 0);
return output;
}
};
class OutputChromaNV21 : IOutputChroma
{
PS_OUTPUT_CHROMA Build (float4 sample)
{
PS_OUTPUT_CHROMA output;
output.Plane0 = float4 (sample.zy, 0, 0);
return output;
}
};
interface IOutputChromaPlanar
{
PS_OUTPUT_CHROMA_PLANAR Build (float4 sample);
};
class OutputChromaI420 : IOutputChromaPlanar
{
PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)
{
PS_OUTPUT_CHROMA_PLANAR output;
output.Plane0 = float4 (sample.y, 0, 0, 0);
output.Plane1 = float4 (sample.z, 0, 0, 0);
return output;
}
};
class OutputChromaYV12 : IOutputChromaPlanar
{
PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)
{
PS_OUTPUT_CHROMA_PLANAR output;
output.Plane0 = float4 (sample.z, 0, 0, 0);
output.Plane1 = float4 (sample.y, 0, 0, 0);
return output;
}
};
class OutputChromaI420_10 : IOutputChromaPlanar
{
PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)
{
PS_OUTPUT_CHROMA_PLANAR output;
float2 scaled = UnormTo10bit (sample.yz);
output.Plane0 = float4 (scaled.x, 0, 0, 0);
output.Plane1 = float4 (scaled.y, 0, 0, 0);
return output;
}
};
class OutputChromaI420_12 : IOutputChromaPlanar
{
PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)
{
PS_OUTPUT_CHROMA_PLANAR output;
float2 scaled = UnormTo12bit (sample.yz);
output.Plane0 = float4 (scaled.x, 0, 0, 0);
output.Plane1 = float4 (scaled.y, 0, 0, 0);
return output;
}
};
interface IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample);
};
class OutputY444 : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
output.Plane0 = float4 (sample.x, 0, 0, 0);
output.Plane1 = float4 (sample.y, 0, 0, 0);
output.Plane2 = float4 (sample.z, 0, 0, 0);
return output;
}
};
class OutputY444_10 : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
float3 scaled = UnormTo10bit (sample.xyz);
output.Plane0 = float4 (scaled.x, 0, 0, 0);
output.Plane1 = float4 (scaled.y, 0, 0, 0);
output.Plane2 = float4 (scaled.z, 0, 0, 0);
return output;
}
};
class OutputY444_12 : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
float3 scaled = UnormTo12bit (sample.xyz);
output.Plane0 = float4 (scaled.x, 0, 0, 0);
output.Plane1 = float4 (scaled.y, 0, 0, 0);
output.Plane2 = float4 (scaled.z, 0, 0, 0);
return output;
}
};
class OutputGBR : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
output.Plane0 = float4 (sample.g, 0, 0, 0);
output.Plane1 = float4 (sample.b, 0, 0, 0);
output.Plane2 = float4 (sample.r, 0, 0, 0);
return output;
}
};
class OutputGBR_10 : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
float3 scaled = UnormTo10bit (sample.rgb);
output.Plane0 = float4 (scaled.g, 0, 0, 0);
output.Plane1 = float4 (scaled.b, 0, 0, 0);
output.Plane2 = float4 (scaled.r, 0, 0, 0);
return output;
}
};
class OutputGBR_12 : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
float3 scaled = UnormTo12bit (sample.rgb);
output.Plane0 = float4 (scaled.g, 0, 0, 0);
output.Plane1 = float4 (scaled.b, 0, 0, 0);
output.Plane2 = float4 (scaled.r, 0, 0, 0);
return output;
}
};
class OutputRGBP : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
output.Plane0 = float4 (sample.r, 0, 0, 0);
output.Plane1 = float4 (sample.g, 0, 0, 0);
output.Plane2 = float4 (sample.b, 0, 0, 0);
return output;
}
};
class OutputBGRP : IOutputPlanar
{
PS_OUTPUT_PLANAR Build (float4 sample)
{
PS_OUTPUT_PLANAR output;
output.Plane0 = float4 (sample.b, 0, 0, 0);
output.Plane1 = float4 (sample.g, 0, 0, 0);
output.Plane2 = float4 (sample.r, 0, 0, 0);
return output;
}
};
interface IOutputPlanarFull
{
PS_OUTPUT_PLANAR_FULL Build (float4 sample);
};
class OutputGBRA : IOutputPlanarFull
{
PS_OUTPUT_PLANAR_FULL Build (float4 sample)
{
PS_OUTPUT_PLANAR_FULL output;
output.Plane0 = float4 (sample.g, 0, 0, 0);
output.Plane1 = float4 (sample.b, 0, 0, 0);
output.Plane2 = float4 (sample.r, 0, 0, 0);
output.Plane3 = float4 (sample.a * alphaFactor, 0, 0, 0);
return output;
}
};
class OutputGBRA_10 : IOutputPlanarFull
{
PS_OUTPUT_PLANAR_FULL Build (float4 sample)
{
PS_OUTPUT_PLANAR_FULL output;
float4 scaled;
sample.a *= alphaFactor;
scaled = UnormTo10bit (sample);
output.Plane0 = float4 (scaled.g, 0, 0, 0);
output.Plane1 = float4 (scaled.b, 0, 0, 0);
output.Plane2 = float4 (scaled.r, 0, 0, 0);
output.Plane3 = float4 (scaled.a, 0, 0, 0);
return output;
}
};
class OutputGBRA_12 : IOutputPlanarFull
{
PS_OUTPUT_PLANAR_FULL Build (float4 sample)
{
PS_OUTPUT_PLANAR_FULL output;
float4 scaled;
sample.a *= alphaFactor;
scaled = UnormTo12bit (sample);
output.Plane0 = float4 (scaled.g, 0, 0, 0);
output.Plane1 = float4 (scaled.b, 0, 0, 0);
output.Plane2 = float4 (scaled.r, 0, 0, 0);
output.Plane3 = float4 (scaled.a, 0, 0, 0);
return output;
}
};
OUTPUT_TYPE ENTRY_POINT (PS_INPUT input)
{
SAMPLER g_sampler;
CONVERTER g_converter;
OUTPUT_BUILDER g_builder;
return g_builder.Build (g_converter.Execute (g_sampler.Execute (input.Texture)));
}

View file

@ -0,0 +1,45 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
cbuffer VsConstBuffer : register(b0, space1)
{
matrix Transform;
};
struct VS_INPUT
{
float4 Position : POSITION;
float2 Texture : TEXCOORD;
};
struct VS_OUTPUT
{
float4 Position : SV_POSITION;
float2 Texture : TEXCOORD;
};
VS_OUTPUT VSMain_converter (VS_INPUT input)
{
VS_OUTPUT output;
output.Position = mul (Transform, input.Position);
output.Texture = input.Texture;
return output;
}

View file

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# GStreamer
# Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301, USA.
import sys
import os
import argparse
start_header = """/*
* This file is autogenerated by collect_hlsl_header.py
*/
#pragma once
"""
start_map = """
#define MAKE_BYTECODE(name) { G_STRINGIFY (name), { g_##name, sizeof (g_##name)} }
static const std::map<std::string, std::pair<const BYTE *, SIZE_T>> precompiled_bytecode = {
"""
end_map = """};
#undef MAKE_BYTECODE
"""
def main(args):
parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header')
parser.add_argument("--input", help="the precompiled HLSL header directory")
parser.add_argument("--output", help="output header file location")
parser.add_argument("--prefix", help="HLSL header filename prefix")
args = parser.parse_args(args)
# Scan precompiled PSMain_*.h headers in build directory
# and generate single header
hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".h") ]
with open(args.output, 'w', newline='\n', encoding='utf8') as f:
f.write(start_header)
for file in hlsl_headers:
f.write("#include \"")
f.write(file)
f.write("\"\n")
f.write(start_map)
for file in hlsl_headers:
f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(file)[0]))
f.write(end_map)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View file

@ -0,0 +1,159 @@
hlsl_conv_ps_source = files('PSMain_converter.hlsl')
hlsl_conv_ps_input_formats = [
['NV12', false, 2],
['NV21', false, 2],
['I420', false, 3],
['YV12', false, 3],
['I420_10', false, 3],
['I420_12', false, 3],
['VUYA', false, 1],
['Y410', false, 1],
['AYUV', false, 1],
['Y412', false, 1],
['RGBA', true, 1],
['RGBAPremul', true, 1],
['RGBx', true, 1],
['GBR', true, 3],
['GBR_10', true, 3],
['GBR_12', true, 3],
['GBRA', true, 4],
['GBRA_10', true, 4],
['GBRA_12', true, 4],
['RGBP', true, 3],
['BGRP', true, 3],
['xRGB', true, 1],
['ARGB', true, 1],
['xBGR', true, 1],
['ABGR', true, 1],
['BGR10A2', true, 1],
['BGRA64', true, 1],
['RBGA', true, 1],
['GRAY', false, 1],
]
hlsl_conv_ps_output_formats = [
['PS_OUTPUT_LUMA', 'Luma', false],
['PS_OUTPUT_LUMA', 'Luma_10', false],
['PS_OUTPUT_LUMA', 'Luma_12', false],
['PS_OUTPUT_CHROMA', 'ChromaNV12', false],
['PS_OUTPUT_CHROMA', 'ChromaNV21', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaYV12', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_10', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_12', false],
['PS_OUTPUT_PLANAR', 'Y444', false],
['PS_OUTPUT_PLANAR', 'Y444_10', false],
['PS_OUTPUT_PLANAR', 'Y444_12', false],
['PS_OUTPUT_PLANAR', 'GBR', true],
['PS_OUTPUT_PLANAR', 'GBR_10', true],
['PS_OUTPUT_PLANAR', 'GBR_12', true],
['PS_OUTPUT_PLANAR', 'RGBP', true],
['PS_OUTPUT_PLANAR', 'BGRP', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA_10', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA_12', true],
['PS_OUTPUT_PACKED', 'RGBA', true],
['PS_OUTPUT_PACKED', 'RGBAPremul', true],
['PS_OUTPUT_PACKED', 'RBGA', true],
['PS_OUTPUT_PACKED', 'RGBx', true],
['PS_OUTPUT_PACKED', 'VUYA', false],
['PS_OUTPUT_PACKED', 'AYUV', false],
['PS_OUTPUT_PACKED', 'xRGB', true],
['PS_OUTPUT_PACKED', 'ARGB', true],
['PS_OUTPUT_PACKED', 'xBGR', true],
['PS_OUTPUT_PACKED', 'ABGR', true],
]
extra_converters = [
'Gamma',
'Primary',
]
header_collector = find_program('collect_hlsl_header.py')
foreach input_format : hlsl_conv_ps_input_formats
in_format = input_format.get(0)
num_srv = input_format.get(2)
foreach output_format : hlsl_conv_ps_output_formats
converters = []
if input_format.get(1) != output_format.get(2)
converters += ['Simple']
else
converters += ['Identity', 'Range']
endif
output_type = output_format.get(0)
output_builder = output_format.get(1)
foreach conv : converters
entry_point = 'PSMain_@0@_@1@_@2@'.format(in_format, conv, output_builder)
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : hlsl_conv_ps_source,
output : header,
command : [dxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', 'ps_6_0',
'/D', 'OUTPUT_TYPE=@0@'.format(output_type),
'/D', 'ENTRY_POINT=@0@'.format(entry_point),
'/D', 'SAMPLER=Sampler@0@'.format(in_format),
'/D', 'CONVERTER=Converter@0@'.format(conv),
'/D', 'OUTPUT_BUILDER=Output@0@'.format(output_builder),
'/D', 'NUM_SRV_@0@=1'.format(num_srv),
'/all-resources-bound',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach
foreach conv : extra_converters
entry_point = 'PSMain_@0@_@1@_@2@'.format(in_format, conv, output_builder)
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : hlsl_conv_ps_source,
output : header,
command : [dxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', 'ps_6_0',
'/D', 'OUTPUT_TYPE=@0@'.format(output_type),
'/D', 'ENTRY_POINT=@0@'.format(entry_point),
'/D', 'SAMPLER=Sampler@0@'.format(in_format),
'/D', 'CONVERTER=Converter@0@'.format(conv),
'/D', 'OUTPUT_BUILDER=Output@0@'.format(output_builder),
'/D', 'NUM_SRV_@0@=1'.format(num_srv),
'/D', 'BUILD_LUT=1',
'/all-resources-bound',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach
endforeach
endforeach
header_collection = 'PSMainConverter.h'
generated_collection = custom_target(header_collection,
input : hlsl_precompiled,
output : header_collection,
command : [header_collector,
'--input', meson.current_build_dir(),
'--prefix', 'PSMain_',
'--output', '@OUTPUT@'
])
hlsl_precompiled += generated_collection
hlsl_sources = [
['VSMain_converter', 'vs_6_0'],
]
foreach shader : hlsl_sources
entry_point = shader.get(0)
source = files('@0@.hlsl'.format(entry_point))
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : source,
output : header,
command : [dxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', shader.get(1),
'/all-resources-bound',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach

View file

@ -5,6 +5,9 @@ d3d12_sources = [
'gstd3d12commandallocatorpool.cpp',
'gstd3d12commandlistpool.cpp',
'gstd3d12commandqueue.cpp',
'gstd3d12converter-builder.cpp',
'gstd3d12converter.cpp',
'gstd3d12convert.cpp',
'gstd3d12decoder.cpp',
'gstd3d12descriptorpool.cpp',
'gstd3d12device.cpp',
@ -14,12 +17,15 @@ d3d12_sources = [
'gstd3d12h264dec.cpp',
'gstd3d12h265dec.cpp',
'gstd3d12memory.cpp',
'gstd3d12pluginutils.cpp',
'gstd3d12upload.cpp',
'gstd3d12utils.cpp',
'gstd3d12vp9dec.cpp',
'plugin.cpp',
]
hlsl_precompiled = []
extra_args = [
'-DGST_USE_UNSTABLE_API',
# Disable this warning error. Otherwise d3dx12.h will break build
@ -40,12 +46,16 @@ endif
d3d12_lib = cc.find_library('d3d12', required : d3d12_option)
dxgi_lib = cc.find_library('dxgi', required : d3d12_option)
d3dcompiler_lib = cc.find_library('d3dcompiler', required : d3d12_option)
dx_headers_dep = dependency('DirectX-Headers',
version: '>= 1.611',
allow_fallback: true,
required: d3d12_option)
dxc = find_program('dxc', required : d3d12_option)
if not gstdxva_dep.found() or not d3d12_lib.found() or not dxgi_lib.found() or not dx_headers_dep.found()
if not gstdxva_dep.found() or not d3d12_lib.found() or not dxgi_lib.found() \
or not dx_headers_dep.found() or not d3dcompiler_lib.found() \
or not dxc.found()
if d3d12_option.enabled()
error('The d3d12 was enabled explicitly, but required GstD3D11 dependencies were not found.')
endif
@ -54,6 +64,8 @@ endif
d3d12_headers = [
'dxgi1_6.h',
'd3dcompiler.h',
'DirectXMath.h',
]
have_d3d12_headers = true
@ -70,13 +82,23 @@ if not have_d3d12_headers
subdir_done()
endif
hlsl_precompiled = []
subdir('hlsl')
# https://learn.microsoft.com/en-us/windows/win32/dxmath/pg-xnamath-internals#windows-sse-versus-sse2
# x86 with Windows 7 or older may not support SSE2
if host_machine.cpu_family() != 'x86'
extra_args += ['-DHAVE_DIRECTX_MATH_SIMD']
endif
gstd3d12 = library('gstd3d12',
d3d12_sources,
d3d12_sources + hlsl_precompiled,
c_args : gst_plugins_bad_args + extra_args,
cpp_args: gst_plugins_bad_args + extra_args,
include_directories : [configinc],
dependencies : [gstbase_dep, gstvideo_dep, gstcodecs_dep,
gstdxva_dep, d3d12_lib, dxgi_lib, dx_headers_dep],
gstdxva_dep, d3d12_lib, dxgi_lib, d3dcompiler_lib,
dx_headers_dep],
install : true,
install_dir : plugins_install_dir,
)

View file

@ -29,6 +29,7 @@
#include <gst/gst.h>
#include "gstd3d12.h"
#include "gstd3d12convert.h"
#include "gstd3d12download.h"
#include "gstd3d12upload.h"
#include "gstd3d12h264dec.h"
@ -103,6 +104,8 @@ plugin_init (GstPlugin * plugin)
gst_object_unref (device);
}
gst_element_register (plugin,
"d3d12convert", GST_RANK_NONE, GST_TYPE_D3D12_CONVERT);
gst_element_register (plugin,
"d3d12download", GST_RANK_NONE, GST_TYPE_D3D12_DOWNLOAD);
gst_element_register (plugin,