gst-plugins-rs/net/onvif/src/onvifoverlay/imp.rs
Mathieu Duponchelle a4a5caec53 net/onvif: implement ONVIF metadata processing elements
- RTP payloader and depayloader

- Aggregator to pair per-frame metadata with media frames

- Overlay to render detected shapes
2022-05-06 11:17:04 +03:00

759 lines
26 KiB
Rust

use gst::glib;
use gst::prelude::*;
use gst::subclass::prelude::*;
use gst_video::prelude::*;
use pango::prelude::*;
use once_cell::sync::Lazy;
use std::collections::HashSet;
use std::sync::Mutex;
use minidom::Element;
static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
gst::DebugCategory::new(
"onvifoverlay",
gst::DebugColorFlags::empty(),
Some("ONVIF overlay element"),
)
});
const DEFAULT_FONT_DESC: &str = "monospace 12";
// Shape description in cairo coordinates (0, 0) is top left
#[derive(Debug)]
struct Shape {
x: u32,
y: u32,
width: u32,
height: u32,
// Optional text rendered from top left of rectangle
tag: Option<String>,
}
#[derive(Default)]
struct State {
video_info: Option<gst_video::VideoInfo>,
composition: Option<gst_video::VideoOverlayComposition>,
layout: Option<pango::Layout>,
attach: bool,
}
// SAFETY: Required because `pango::Layout` is not `Send` but the whole `State` needs to be.
// We ensure that no additional references to the layout are ever created, which makes it safe
// to send it to other threads as long as only a single thread uses it concurrently.
unsafe impl Send for State {}
struct Settings {
font_desc: String,
}
impl Default for Settings {
fn default() -> Self {
Self {
font_desc: String::from(DEFAULT_FONT_DESC),
}
}
}
pub struct OnvifOverlay {
srcpad: gst::Pad,
sinkpad: gst::Pad,
state: Mutex<State>,
settings: Mutex<Settings>,
}
impl OnvifOverlay {
fn negotiate(&self, element: &super::OnvifOverlay) -> Result<gst::FlowSuccess, gst::FlowError> {
let video_info = match self.state.lock().unwrap().video_info.as_ref() {
Some(video_info) => Ok(video_info.clone()),
None => {
gst::element_error!(
element,
gst::CoreError::Negotiation,
["Element hasn't received valid video caps at negotiation time"]
);
Err(gst::FlowError::NotNegotiated)
}
}?;
let mut caps = video_info.to_caps().unwrap();
let mut downstream_accepts_meta = false;
let upstream_has_meta = caps
.features(0)
.map(|f| f.contains(&gst_video::CAPS_FEATURE_META_GST_VIDEO_OVERLAY_COMPOSITION))
.unwrap_or(false);
if !upstream_has_meta {
let mut caps_clone = caps.clone();
let overlay_caps = caps_clone.make_mut();
if let Some(features) = overlay_caps.features_mut(0) {
features.add(&gst_video::CAPS_FEATURE_META_GST_VIDEO_OVERLAY_COMPOSITION);
let peercaps = self.srcpad.peer_query_caps(Some(&caps_clone));
downstream_accepts_meta = !peercaps.is_empty();
if downstream_accepts_meta {
caps = caps_clone;
}
}
}
gst::debug!(
CAT,
obj: element,
"upstream has meta: {}, downstream accepts meta: {}",
upstream_has_meta,
downstream_accepts_meta
);
if upstream_has_meta || downstream_accepts_meta {
let mut query = gst::query::Allocation::new(&caps, false);
if !self.srcpad.push_event(gst::event::Caps::new(&caps)) {
return Err(gst::FlowError::NotNegotiated);
}
if !self.srcpad.peer_query(&mut query)
&& self.srcpad.pad_flags().contains(gst::PadFlags::FLUSHING)
{
return Err(gst::FlowError::NotNegotiated);
}
let attach = query
.find_allocation_meta::<gst_video::VideoOverlayCompositionMeta>()
.is_some();
gst::debug!(CAT, obj: element, "attach meta: {}", attach);
self.state.lock().unwrap().attach = attach;
Ok(gst::FlowSuccess::Ok)
} else {
self.state.lock().unwrap().attach = false;
if !self.srcpad.push_event(gst::event::Caps::new(&caps)) {
Err(gst::FlowError::NotNegotiated)
} else {
Ok(gst::FlowSuccess::Ok)
}
}
}
fn render_shape_buffer(
&self,
state: &mut State,
width: u32,
height: u32,
tag: Option<&str>,
) -> Option<(gst::Buffer, u32, u32)> {
let mut text_width = 0;
let mut text_height = 0;
// If we have text to render, update the layout first in order to compute
// the final size
let layout = tag.and_then(|tag| {
state.layout.as_ref().unwrap().set_text(tag);
state.layout.clone()
});
if let Some(ref layout) = layout {
let (_ink_rect, logical_rect) = layout.extents();
text_width = logical_rect.width() / pango::SCALE;
text_height = logical_rect.height() / pango::SCALE;
}
let total_height = height.max(text_height as u32);
let total_width = width.max(text_width as u32);
let mut buffer = gst::Buffer::with_size((total_width * total_height) as usize * 4).ok()?;
gst_video::VideoMeta::add(
buffer.get_mut().unwrap(),
gst_video::VideoFrameFlags::empty(),
#[cfg(target_endian = "little")]
gst_video::VideoFormat::Bgra,
#[cfg(target_endian = "big")]
gst_video::VideoFormat::Argb,
total_width as u32,
total_height as u32,
)
.ok()?;
let buffer = buffer.into_mapped_buffer_writable().unwrap();
// Pass ownership of the buffer to the cairo surface but keep around
// a raw pointer so we can later retrieve it again when the surface
// is done
let buffer_ptr = unsafe { buffer.buffer().as_ptr() };
let surface = cairo::ImageSurface::create_for_data(
buffer,
cairo::Format::ARgb32,
total_width as i32,
total_height as i32,
total_width as i32 * 4,
)
.ok()?;
let cr = cairo::Context::new(&surface).ok()?;
let line_width = 1.;
// Clear background
cr.set_operator(cairo::Operator::Source);
cr.set_source_rgba(0.0, 0.0, 0.0, 0.0);
cr.paint().ok()?;
cr.save().ok()?;
// Now render the rectangle
cr.move_to(line_width, line_width);
cr.line_to(line_width, height as f64 - line_width);
cr.line_to(width as f64 - line_width, height as f64 - line_width);
cr.line_to(width as f64 - line_width, line_width);
cr.close_path();
cr.set_source_rgba(1., 0., 0., 1.);
cr.set_line_width(line_width);
let _ = cr.stroke();
cr.restore().ok()?;
// Finally render the text, if any
if let Some(layout) = layout {
cr.save().ok()?;
cr.move_to(0., 0.);
cr.set_operator(cairo::Operator::Over);
cr.set_source_rgba(1.0, 1.0, 1.0, 1.0);
pangocairo::functions::layout_path(&cr, &layout);
cr.stroke().ok()?;
cr.restore().ok()?;
cr.save().ok()?;
cr.move_to(0., 0.);
cr.set_source_rgba(0.0, 0.0, 0.0, 1.0);
pangocairo::functions::show_layout(&cr, &layout);
cr.restore().ok()?;
}
drop(cr);
// Safety: The surface still owns a mutable reference to the buffer but our reference
// to the surface here is the last one. After dropping the surface the buffer would be
// freed, so we keep an additional strong reference here before dropping the surface,
// which is then returned. As such it's guaranteed that nothing is using the buffer
// anymore mutably.
unsafe {
assert_eq!(
cairo::ffi::cairo_surface_get_reference_count(surface.to_raw_none()),
1
);
let buffer = glib::translate::from_glib_none(buffer_ptr);
drop(surface);
Some((buffer, total_width, total_height))
}
}
// Update our overlay composition with a set of rectangles
fn overlay_shapes(&self, state: &mut State, element: &super::OnvifOverlay, shapes: Vec<Shape>) {
if shapes.is_empty() {
state.composition = None;
return;
}
if state.layout.is_none() {
let fontmap = match pangocairo::FontMap::new() {
Some(fontmap) => Ok(fontmap),
None => {
gst::element_error!(
element,
gst::LibraryError::Failed,
["Failed to create pangocairo font map"]
);
Err(gst::FlowError::Error)
}
}
.unwrap();
let context = match fontmap.create_context() {
Some(context) => Ok(context),
None => {
gst::element_error!(
element,
gst::LibraryError::Failed,
["Failed to create font map context"]
);
Err(gst::FlowError::Error)
}
}
.unwrap();
context.set_language(&pango::Language::from_string("en_US"));
context.set_base_dir(pango::Direction::Ltr);
let layout = pango::Layout::new(&context);
layout.set_alignment(pango::Alignment::Left);
let font_desc =
pango::FontDescription::from_string(&self.settings.lock().unwrap().font_desc);
layout.set_font_description(Some(&font_desc));
state.layout = Some(layout);
}
let mut composition = gst_video::VideoOverlayComposition::default();
let composition_mut = composition.get_mut().unwrap();
for shape in &shapes {
// Sanity check: don't render 0-sized shapes
if shape.width == 0 || shape.height == 0 {
continue;
}
gst::debug!(
CAT,
obj: element,
"Rendering shape with tag {:?} x {} y {} width {} height {}",
shape.tag,
shape.x,
shape.y,
shape.width,
shape.height
);
let (buffer, width, height) = match self.render_shape_buffer(
state,
shape.width,
shape.height,
shape.tag.as_deref(),
) {
Some(ret) => ret,
None => {
gst::error!(CAT, obj: element, "Failed to render buffer");
state.composition = None;
return;
}
};
let rect = gst_video::VideoOverlayRectangle::new_raw(
&buffer,
shape.x as i32,
shape.y as i32,
width,
height,
gst_video::VideoOverlayFormatFlags::PREMULTIPLIED_ALPHA,
);
composition_mut.add_rectangle(&rect);
}
state.composition = Some(composition);
}
fn sink_chain(
&self,
pad: &gst::Pad,
element: &super::OnvifOverlay,
mut buffer: gst::Buffer,
) -> Result<gst::FlowSuccess, gst::FlowError> {
gst::trace!(CAT, obj: pad, "Handling buffer {:?}", buffer);
if self.srcpad.check_reconfigure() {
if let Err(err) = self.negotiate(element) {
if self.srcpad.pad_flags().contains(gst::PadFlags::FLUSHING) {
self.srcpad.mark_reconfigure();
return Ok(gst::FlowSuccess::Ok);
} else {
return Err(err);
}
}
}
let mut state = self.state.lock().unwrap();
let video_info = state.video_info.as_ref().unwrap();
let width = video_info.width() as i32;
let height = video_info.height() as i32;
if let Ok(meta) = gst::meta::CustomMeta::from_buffer(&buffer, "OnvifXMLFrameMeta") {
let s = meta.structure();
let mut shapes: Vec<Shape> = Vec::new();
if let Ok(frames) = s.get::<gst::BufferList>("frames") {
gst::log!(CAT, obj: element, "Overlaying {} frames", frames.len());
// Metadata for multiple frames may be attached to this frame, either because:
//
// * Multiple analytics modules are producing metadata
// * The metadata for two frames produced by the same module is attached
// to this frame, for instance because of resynchronization or other
// timing-related situations
//
// We want to display all detected objects for the first case, but only the
// latest version for the second case. As frames are sorted in increasing temporal
// order, we iterate them in reverse to start with the most recent, and deduplicate
// by object id.
let mut object_ids = HashSet::new();
for buffer in frames.iter().rev() {
let buffer = buffer.map_readable().map_err(|_| {
gst::element_error!(
element,
gst::ResourceError::Read,
["Failed to map buffer readable"]
);
gst::FlowError::Error
})?;
let utf8 = std::str::from_utf8(buffer.as_ref()).map_err(|err| {
gst::element_error!(
element,
gst::StreamError::Format,
["Failed to decode buffer as UTF-8: {}", err]
);
gst::FlowError::Error
})?;
let root = utf8.parse::<Element>().map_err(|err| {
gst::element_error!(
element,
gst::ResourceError::Read,
["Failed to parse buffer as XML: {}", err]
);
gst::FlowError::Error
})?;
for object in root.children() {
if object.is("Object", "http://www.onvif.org/ver10/schema") {
gst::trace!(CAT, obj: element, "Handling object {:?}", object);
let object_id = match object.attr("ObjectId") {
Some(id) => id.to_string(),
None => {
gst::warning!(CAT, obj: element, "XML Object with no ObjectId");
continue;
}
};
if !object_ids.insert(object_id.clone()) {
gst::debug!(CAT, "Skipping older version of object {}", object_id);
continue;
}
let appearance = match object
.get_child("Appearance", "http://www.onvif.org/ver10/schema")
{
Some(appearance) => appearance,
None => continue,
};
let shape = match appearance
.get_child("Shape", "http://www.onvif.org/ver10/schema")
{
Some(shape) => shape,
None => continue,
};
let tag = appearance
.get_child("Class", "http://www.onvif.org/ver10/schema")
.and_then(|class| {
class.get_child("Type", "http://www.onvif.org/ver10/schema")
})
.map(|t| t.text());
let bbox = match shape
.get_child("BoundingBox", "http://www.onvif.org/ver10/schema")
{
Some(bbox) => bbox,
None => {
gst::warning!(
CAT,
obj: element,
"XML Shape with no BoundingBox"
);
continue;
}
};
let left: f64 = match bbox.attr("left").and_then(|val| val.parse().ok())
{
Some(val) => val,
None => {
gst::warning!(
CAT,
obj: element,
"BoundingBox with no left attribute"
);
continue;
}
};
let right: f64 =
match bbox.attr("right").and_then(|val| val.parse().ok()) {
Some(val) => val,
None => {
gst::warning!(
CAT,
obj: element,
"BoundingBox with no right attribute"
);
continue;
}
};
let top: f64 = match bbox.attr("top").and_then(|val| val.parse().ok()) {
Some(val) => val,
None => {
gst::warning!(
CAT,
obj: element,
"BoundingBox with no top attribute"
);
continue;
}
};
let bottom: f64 =
match bbox.attr("bottom").and_then(|val| val.parse().ok()) {
Some(val) => val,
None => {
gst::warning!(
CAT,
obj: element,
"BoundingBox with no bottom attribute"
);
continue;
}
};
let x1 = width / 2 + ((left * (width / 2) as f64) as i32);
let y1 = height / 2 - ((top * (height / 2) as f64) as i32);
let x2 = width / 2 + ((right * (width / 2) as f64) as i32);
let y2 = height / 2 - ((bottom * (height / 2) as f64) as i32);
shapes.push(Shape {
x: x1 as u32,
y: y1 as u32,
width: (x2 - x1) as u32,
height: (y2 - y1) as u32,
tag,
});
}
}
}
if !frames.is_empty() {
self.overlay_shapes(&mut state, element, shapes);
}
}
}
if let Some(composition) = &state.composition {
let buffer = buffer.make_mut();
if state.attach {
gst_video::VideoOverlayCompositionMeta::add(buffer, composition);
} else {
let mut frame = gst_video::VideoFrameRef::from_buffer_ref_writable(
buffer,
state.video_info.as_ref().unwrap(),
)
.unwrap();
if composition.blend(&mut frame).is_err() {
gst::error!(CAT, obj: pad, "Failed to blend composition");
}
}
}
drop(state);
self.srcpad.push(buffer)
}
fn sink_event(&self, pad: &gst::Pad, element: &super::OnvifOverlay, event: gst::Event) -> bool {
use gst::EventView;
gst::log!(CAT, obj: pad, "Handling event {:?}", event);
match event.view() {
EventView::Caps(c) => {
let mut state = self.state.lock().unwrap();
state.video_info = gst_video::VideoInfo::from_caps(c.caps()).ok();
drop(state);
self.srcpad.check_reconfigure();
match self.negotiate(element) {
Ok(_) => true,
Err(_) => {
self.srcpad.mark_reconfigure();
true
}
}
}
EventView::FlushStop(..) => {
let mut state = self.state.lock().unwrap();
state.composition = None;
pad.event_default(Some(element), event)
}
_ => pad.event_default(Some(element), event),
}
}
}
#[glib::object_subclass]
impl ObjectSubclass for OnvifOverlay {
const NAME: &'static str = "GstOnvifOverlay";
type Type = super::OnvifOverlay;
type ParentType = gst::Element;
fn with_class(klass: &Self::Class) -> Self {
let templ = klass.pad_template("sink").unwrap();
let sinkpad = gst::Pad::builder_with_template(&templ, Some("sink"))
.chain_function(|pad, parent, buffer| {
OnvifOverlay::catch_panic_pad_function(
parent,
|| Err(gst::FlowError::Error),
|overlay, element| overlay.sink_chain(pad, element, buffer),
)
})
.event_function(|pad, parent, event| {
OnvifOverlay::catch_panic_pad_function(
parent,
|| false,
|overlay, element| overlay.sink_event(pad, element, event),
)
})
.flags(gst::PadFlags::PROXY_CAPS)
.flags(gst::PadFlags::PROXY_ALLOCATION)
.build();
let templ = klass.pad_template("src").unwrap();
let srcpad = gst::Pad::builder_with_template(&templ, Some("src"))
.flags(gst::PadFlags::PROXY_CAPS)
.flags(gst::PadFlags::PROXY_ALLOCATION)
.build();
Self {
srcpad,
sinkpad,
state: Mutex::new(State::default()),
settings: Mutex::new(Settings::default()),
}
}
}
impl ObjectImpl for OnvifOverlay {
fn properties() -> &'static [glib::ParamSpec] {
static PROPERTIES: Lazy<Vec<glib::ParamSpec>> = Lazy::new(|| {
vec![glib::ParamSpecString::new(
"font-desc",
"Font Description",
"Pango font description of font to be used for rendering",
Some(DEFAULT_FONT_DESC),
glib::ParamFlags::READWRITE,
)]
});
PROPERTIES.as_ref()
}
fn set_property(
&self,
_obj: &Self::Type,
_id: usize,
value: &glib::Value,
pspec: &glib::ParamSpec,
) {
match pspec.name() {
"font-desc" => {
self.settings.lock().unwrap().font_desc = value
.get::<Option<String>>()
.expect("type checked upstream")
.unwrap_or_else(|| DEFAULT_FONT_DESC.into());
self.state.lock().unwrap().layout.take();
}
_ => unimplemented!(),
};
}
fn property(&self, _obj: &Self::Type, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
match pspec.name() {
"font-desc" => self.settings.lock().unwrap().font_desc.to_value(),
_ => unimplemented!(),
}
}
fn constructed(&self, obj: &Self::Type) {
self.parent_constructed(obj);
obj.add_pad(&self.sinkpad).unwrap();
obj.add_pad(&self.srcpad).unwrap();
}
}
impl GstObjectImpl for OnvifOverlay {}
impl ElementImpl for OnvifOverlay {
fn metadata() -> Option<&'static gst::subclass::ElementMetadata> {
static ELEMENT_METADATA: Lazy<gst::subclass::ElementMetadata> = Lazy::new(|| {
gst::subclass::ElementMetadata::new(
"ONVIF overlay",
"Video/Overlay",
"Renders ONVIF analytics meta over raw video frames",
"Mathieu Duponchelle <mathieu@centricular.com>",
)
});
Some(&*ELEMENT_METADATA)
}
fn pad_templates() -> &'static [gst::PadTemplate] {
static PAD_TEMPLATES: Lazy<Vec<gst::PadTemplate>> = Lazy::new(|| {
let caps = gst_video::VideoFormat::iter_raw()
.into_video_caps()
.unwrap()
.build();
let sink_pad_template = gst::PadTemplate::new(
"sink",
gst::PadDirection::Sink,
gst::PadPresence::Always,
&caps,
)
.unwrap();
let src_pad_template = gst::PadTemplate::new(
"src",
gst::PadDirection::Src,
gst::PadPresence::Always,
&caps,
)
.unwrap();
vec![src_pad_template, sink_pad_template]
});
PAD_TEMPLATES.as_ref()
}
fn change_state(
&self,
element: &Self::Type,
transition: gst::StateChange,
) -> Result<gst::StateChangeSuccess, gst::StateChangeError> {
gst::trace!(CAT, obj: element, "Changing state {:?}", transition);
match transition {
gst::StateChange::ReadyToPaused | gst::StateChange::PausedToReady => {
// Reset the whole state
let mut state = self.state.lock().unwrap();
*state = State::default();
}
_ => (),
}
self.parent_change_state(element, transition)
}
}