gstreamer/ext/mpg123/gstmpg123audiodec.c

/*  MP3 decoding plugin for GStreamer using the mpg123 library
 *  Copyright (C) 2012 Carlos Rafael Giani
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "gstmpg123audiodec.h"

#include <stdlib.h>
#include <string.h>

GST_DEBUG_CATEGORY_STATIC (mpg123_debug);
#define GST_CAT_DEFAULT mpg123_debug

/*
 * Omitted sample formats that mpg123 supports (or at least can support):
 *  - 8bit integer signed
 *  - 8bit integer unsigned
 *  - a-law
 *  - mu-law
 *  - 64bit float
 *
 * The first four formats are not supported by the GstAudioDecoder base class.
 * (The internal gst_audio_format_from_caps_structure() call fails.)
 *
 * The 64bit float issue is tricky. mpg123 actually decodes to "real",
 * not necessarily to "float".
 *
 * "real" can be fixed point, 32bit float, 64bit float. There seems to be
 * no way how to find out which one of them is actually used.
 *
 * However, in all known installations, "real" equals 32bit float, so that's
 * what is used.
 */

static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("audio/mpeg, "
        "mpegversion = (int) { 1 }, "
        "layer = (int) [ 1, 3 ], "
        "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 }, "
        "channels = (int) [ 1, 2 ], " "parsed = (boolean) true ")
    );

static void gst_mpg123_audio_dec_finalize (GObject * object);
static gboolean gst_mpg123_audio_dec_start (GstAudioDecoder * dec);
static gboolean gst_mpg123_audio_dec_stop (GstAudioDecoder * dec);
static GstFlowReturn gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec
    * mpg123_decoder, unsigned char const *decoded_bytes,
    size_t const num_decoded_bytes);
static GstFlowReturn gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
    GstBuffer * buffer);
static gboolean gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec,
    GstCaps * incoming_caps);
static void gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard);

G_DEFINE_TYPE (GstMpg123AudioDec, gst_mpg123_audio_dec, GST_TYPE_AUDIO_DECODER);

static void
gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
{
  GObjectClass *object_class;
  GstAudioDecoderClass *base_class;
  GstElementClass *element_class;
  GstPadTemplate *src_template;
  int error;

  GST_DEBUG_CATEGORY_INIT (mpg123_debug, "mpg123", 0, "mpg123 mp3 decoder");

  object_class = G_OBJECT_CLASS (klass);
  base_class = GST_AUDIO_DECODER_CLASS (klass);
  element_class = GST_ELEMENT_CLASS (klass);

  object_class->finalize = gst_mpg123_audio_dec_finalize;

  gst_element_class_set_static_metadata (element_class,
      "mpg123 mp3 decoder",
      "Codec/Decoder/Audio",
      "Decodes mp3 streams using the mpg123 library",
      "Carlos Rafael Giani <dv@pseudoterminal.org>");

  /*
     Not using static pad template for srccaps, since the comma-separated list of formats needs to be
     created depending on whatever mpg123 supports
   */
  {
    const int *format_list;
    const long *rates_list;
    size_t num, i;
    GString *s;

    s = g_string_new ("audio/x-raw, ");

    mpg123_encodings (&format_list, &num);
    g_string_append (s, "format = { ");
    for (i = 0; i < num; ++i) {
      switch (format_list[i]) {
        case MPG123_ENC_SIGNED_16:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S16));
          break;
        case MPG123_ENC_UNSIGNED_16:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U16));
          break;
        case MPG123_ENC_SIGNED_24:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S24));
          break;
        case MPG123_ENC_UNSIGNED_24:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U24));
          break;
        case MPG123_ENC_SIGNED_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S32));
          break;
        case MPG123_ENC_UNSIGNED_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U32));
          break;
        case MPG123_ENC_FLOAT_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (F32));
          break;
        default:
          GST_DEBUG ("Ignoring mpg123 format %d", format_list[i]);
          break;
      }
    }
    g_string_append (s, " }, ");

    mpg123_rates (&rates_list, &num);
    g_string_append (s, "rate = (int) { ");
    for (i = 0; i < num; ++i) {
      g_string_append_printf (s, "%s%lu", (i > 0) ? ", " : "", rates_list[i]);
    }
    g_string_append (s, "}, ");

    g_string_append (s, "channels = (int) [ 1, 2 ], ");
    g_string_append (s, "layout = (string) interleaved");

    src_template = gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
        gst_caps_from_string (s->str));

    g_string_free (s, TRUE);
  }

  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&sink_template));
  gst_element_class_add_pad_template (element_class, src_template);

  base_class->start = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_start);
  base_class->stop = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_stop);
  base_class->handle_frame =
      GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_handle_frame);
  base_class->set_format = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_set_format);
  base_class->flush = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_flush);

  error = mpg123_init ();
  if (G_UNLIKELY (error != MPG123_OK))
    GST_ERROR ("Could not initialize mpg123 library: %s",
        mpg123_plain_strerror (error));
  else
    GST_TRACE ("mpg123 library initialized");
}


void
gst_mpg123_audio_dec_init (GstMpg123AudioDec * mpg123_decoder)
{
  mpg123_decoder->handle = NULL;
}


static void
gst_mpg123_audio_dec_finalize (GObject * object)
{
  GstMpg123AudioDec *mpg123_decoder = GST_MPG123_AUDIO_DEC (object);
  if (G_LIKELY (mpg123_decoder->handle != NULL)) {
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
  }
}


static gboolean
gst_mpg123_audio_dec_start (GstAudioDecoder * dec)
{
  GstMpg123AudioDec *mpg123_decoder;
  int error;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
  error = 0;

  mpg123_decoder->handle = mpg123_new (NULL, &error);
  mpg123_decoder->has_next_audioinfo = FALSE;
  mpg123_decoder->frame_offset = 0;

  /*
     Initially, the mpg123 handle comes with a set of default formats supported. This clears this set. 
     This is necessary, since only one format shall be supported (see set_format for more).
   */
  mpg123_format_none (mpg123_decoder->handle);

  mpg123_param (mpg123_decoder->handle, MPG123_REMOVE_FLAGS, MPG123_GAPLESS, 0);        /* Built-in mpg123 support for gapless decoding is disabled for now, since it does not work well with seeking */
  mpg123_param (mpg123_decoder->handle, MPG123_ADD_FLAGS, MPG123_SEEKBUFFER, 0);        /* Tells mpg123 to use a small read-ahead buffer for better MPEG sync; essential for MP3 radio streams */
  mpg123_param (mpg123_decoder->handle, MPG123_RESYNC_LIMIT, -1, 0);    /* Sets the resync limit to the end of the stream (e.g. don't give up prematurely) */

  /* Open in feed mode (= encoded data is fed manually into the handle). */
  error = mpg123_open_feed (mpg123_decoder->handle);

  if (G_UNLIKELY (error != MPG123_OK)) {
    GstElement *element = GST_ELEMENT (dec);
    GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
        ("Error opening mpg123 feed: %s", mpg123_plain_strerror (error)));
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
    return FALSE;
  }

  GST_DEBUG_OBJECT (dec, "mpg123 decoder started");

  return TRUE;
}


static gboolean
gst_mpg123_audio_dec_stop (GstAudioDecoder * dec)
{
  GstMpg123AudioDec *mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  if (G_LIKELY (mpg123_decoder->handle != NULL)) {
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
  }

  GST_DEBUG_OBJECT (dec, "mpg123 decoder stopped");

  return TRUE;
}


static GstFlowReturn
gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec * mpg123_decoder,
    unsigned char const *decoded_bytes, size_t const num_decoded_bytes)
{
  GstBuffer *output_buffer;
  GstFlowReturn alloc_error;
  GstAudioDecoder *dec;

  output_buffer = NULL;
  dec = GST_AUDIO_DECODER (mpg123_decoder);

  if ((num_decoded_bytes == 0) || (decoded_bytes == NULL)) {
    /* This occurs in the first few frames, which do not carry data; once MPG123_AUDIO_DEC_NEW_FORMAT is received, the empty frames stop occurring */
    GST_TRACE_OBJECT (mpg123_decoder,
        "Nothing was decoded -> no output buffer to push");
    return GST_FLOW_OK;
  }

  output_buffer = gst_buffer_new_allocate (NULL, num_decoded_bytes, NULL);
  alloc_error = (output_buffer == NULL) ? GST_FLOW_ERROR : GST_FLOW_OK;

  if (alloc_error != GST_FLOW_OK) {
    /* This is necessary to advance playback in time, even when nothing was decoded. */
    return gst_audio_decoder_finish_frame (dec, NULL, 1);
  } else {
    GstMapInfo info;

    if (gst_buffer_map (output_buffer, &info, GST_MAP_WRITE)) {
      if (info.size != num_decoded_bytes)
        GST_ERROR_OBJECT (mpg123_decoder,
            "Mapped memory region has size %u instead of expected size %u",
            info.size, num_decoded_bytes);
      else
        memcpy (info.data, decoded_bytes, num_decoded_bytes);

      gst_buffer_unmap (output_buffer, &info);
    } else
      GST_ERROR_OBJECT (mpg123_decoder, "Could not map buffer");

    return gst_audio_decoder_finish_frame (dec, output_buffer, 1);
  }
}


static GstFlowReturn
gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec, GstBuffer * buffer)
{
  GstMpg123AudioDec *mpg123_decoder;
  int decode_error;
  unsigned char *decoded_bytes;
  size_t num_decoded_bytes;

  if (G_UNLIKELY (!buffer))
    return GST_FLOW_OK;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  if (G_UNLIKELY (mpg123_decoder->handle == NULL)) {
    GstElement *element = GST_ELEMENT (dec);
    GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
        ("mpg123 handle is NULL"));
    return GST_FLOW_ERROR;
  }

  /* The actual decoding */
  {
    unsigned char const *inmemory;
    size_t inmemsize;
    GstMemory *memory;
    GstMapInfo info;

    memory = gst_buffer_get_all_memory (buffer);
    if (memory == NULL)
      return GST_FLOW_ERROR;

    if (!gst_memory_map (memory, &info, GST_MAP_READ)) {
      gst_memory_unref (memory);
      return GST_FLOW_ERROR;
    }

    inmemory = info.data;
    inmemsize = info.size;

    mpg123_feed (mpg123_decoder->handle, inmemory, inmemsize);
    decoded_bytes = NULL;
    num_decoded_bytes = 0;
    decode_error = mpg123_decode_frame (mpg123_decoder->handle,
        &mpg123_decoder->frame_offset, &decoded_bytes, &num_decoded_bytes);

    gst_memory_unmap (memory, &info);
    gst_memory_unref (memory);
  }

  switch (decode_error) {
    case MPG123_NEW_FORMAT:
      /*
         As mentioned in gst_mpg123_audio_dec_set_format(), the next audioinfo is not set immediately;
         instead, the code waits for mpg123 to take note of the new format, and then sets the audioinfo
         This fixes glitches with mp3s containing several format headers (for example, first half using 44.1kHz, second half 32 kHz)
       */

      GST_DEBUG_OBJECT (dec,
          "mpg123 reported a new format -> setting next srccaps");

      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
          num_decoded_bytes);

      /*
         If there is a next audioinfo, use it, then set has_next_audioinfo to FALSE, to make sure
         gst_audio_decoder_set_output_format() isn't called again until set_format is called by the base class
       */
      if (mpg123_decoder->has_next_audioinfo) {
        if (!gst_audio_decoder_set_output_format (dec,
                &(mpg123_decoder->next_audioinfo))) {
          GstElement *element = GST_ELEMENT (dec);
          GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
              ("Unable to set output format"));
        }
        mpg123_decoder->has_next_audioinfo = FALSE;
      }

      break;

    case MPG123_NEED_MORE:
    case MPG123_OK:
      return gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
          decoded_bytes, num_decoded_bytes);

      /* If this happens, then the upstream parser somehow missed the ending of the bitstream */
    case MPG123_DONE:
      GST_DEBUG_OBJECT (dec, "mpg123 is done decoding");
      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
          num_decoded_bytes);
      return GST_FLOW_EOS;

      /* Anything else is considered an error */
    default:
    {
      GstElement *element = GST_ELEMENT (dec);
      GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL), ("Decoding error: %s",
              mpg123_plain_strerror (decode_error)));

      return GST_FLOW_ERROR;
    }
  }

  return GST_FLOW_OK;
}


static gboolean
gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec, GstCaps * incoming_caps)
{
/*
	Using the parsed information upstream, and the list of allowed caps downstream, this code
	tries to find a suitable audio info. It is important to keep in mind that the rate and number of channels
	should never deviate from the one the bitstream has, otherwise mpg123 has to mix channels and/or
	resample (and as its docs say, its internal resampler is very crude). The sample format, however,
	can be chosen freely, because the MPEG specs do not mandate any special format.
	Therefore, rate and number of channels are taken from upstream (which parsed the MPEG frames, so
	the incoming_caps contain exactly the rate and number of channels the bitstream actually has), while
	the sample format is chosen by trying out all caps that are allowed by downstream. This way, the output
	is adjusted to what the downstream prefers.

	Also, the new output audio info is not set immediately. Instead, it is considered the "next audioinfo".
	The code waits for mpg123 to notice the new format (= when mpg123_decode_frame() returns MPG123_AUDIO_DEC_NEW_FORMAT),
	and then sets the next audioinfo. Otherwise, the next audioinfo is set too soon, which may cause problems with
	mp3s containing several format headers. One example would be an mp3 with the first 30 seconds using 44.1 kHz,
	then the next 30 seconds using 32 kHz. Rare, but possible.

	STEPS:

	1. get rate and channels from incoming_caps
	2. get allowed caps from src pad
	3. for each structure in allowed caps:
	3.1. take format
	3.2. if the combination of format with rate and channels is unsupported by mpg123, go to (3),
	     or exit with error if there are no more structures to try
	3.3. create next audioinfo out of rate,channels,format, and exit
*/


  int rate, channels;
  GstMpg123AudioDec *mpg123_decoder;
  GstCaps *allowed_srccaps;
  guint structure_nr;
  gboolean match_found = FALSE;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  if (G_UNLIKELY (mpg123_decoder->handle == NULL)) {
    GstElement *element = GST_ELEMENT (dec);
    GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
        ("mpg123 handle is NULL"));
    return FALSE;
  }

  mpg123_decoder->has_next_audioinfo = FALSE;

  /* Get rate and channels from incoming_caps */
  {
    GstStructure *structure;
    gboolean err = FALSE;

    /* Only the first structure is used (multiple incoming structures don't make sense */
    structure = gst_caps_get_structure (incoming_caps, 0);

    if (!gst_structure_get_int (structure, "rate", &rate)) {
      err = TRUE;
      GST_ERROR_OBJECT (dec, "Incoming caps do not have a rate value");
    }
    if (!gst_structure_get_int (structure, "channels", &channels)) {
      err = TRUE;
      GST_ERROR_OBJECT (dec, "Incoming caps do not have a channel value");
    }

    if (err)
      return FALSE;
  }

  /* Get the caps that are allowed by downstream */
  {
    GstCaps *allowed_srccaps_unnorm =
        gst_pad_get_allowed_caps (GST_AUDIO_DECODER_SRC_PAD (dec));
    allowed_srccaps = gst_caps_normalize (allowed_srccaps_unnorm);
    /* TODO: this causes errors with 1.0 - perhaps a bug? */
    /*gst_caps_unref(allowed_srccaps_unnorm); */
  }

  /* Go through all allowed caps, pick the first one that matches */
  for (structure_nr = 0; structure_nr < gst_caps_get_size (allowed_srccaps);
      ++structure_nr) {
    GstStructure *structure;
    gchar const *format_str;
    GstAudioFormat format;
    int encoding;

    structure = gst_caps_get_structure (allowed_srccaps, structure_nr);

    format_str = gst_structure_get_string (structure, "format");
    if (format_str == NULL) {
      GST_DEBUG_OBJECT (dec, "Could not get format from src caps");
      continue;
    }

    format = gst_audio_format_from_string (format_str);
    if (format == GST_AUDIO_FORMAT_UNKNOWN) {
      GST_DEBUG_OBJECT (dec, "Unknown format %s", format_str);
      continue;
    }

    switch (format) {
      case GST_AUDIO_FORMAT_S16:
        encoding = MPG123_ENC_SIGNED_16;
        break;
      case GST_AUDIO_FORMAT_S24:
        encoding = MPG123_ENC_SIGNED_24;
        break;
      case GST_AUDIO_FORMAT_S32:
        encoding = MPG123_ENC_SIGNED_32;
        break;
      case GST_AUDIO_FORMAT_U16:
        encoding = MPG123_ENC_UNSIGNED_16;
        break;
      case GST_AUDIO_FORMAT_U24:
        encoding = MPG123_ENC_UNSIGNED_24;
        break;
      case GST_AUDIO_FORMAT_U32:
        encoding = MPG123_ENC_UNSIGNED_32;
        break;
      case GST_AUDIO_FORMAT_F32:
        encoding = MPG123_ENC_FLOAT_32;
        break;
      default:
        GST_DEBUG_OBJECT (dec,
            "Format %s in srccaps is not supported", format_str);
        continue;
    }

    {
      int err;

      /* Cleanup old formats & set new one */
      mpg123_format_none (mpg123_decoder->handle);
      err = mpg123_format (mpg123_decoder->handle, rate, channels, encoding);
      if (err != MPG123_OK) {
        GST_DEBUG_OBJECT (dec,
            "mpg123 cannot use caps %" GST_PTR_FORMAT
            " because mpg123_format() failed: %s", structure,
            mpg123_plain_strerror (err));
        continue;
      }
    }

    gst_audio_info_init (&(mpg123_decoder->next_audioinfo));
    gst_audio_info_set_format (&(mpg123_decoder->next_audioinfo), format, rate,
        channels, NULL);
    GST_DEBUG_OBJECT (dec, "The next audio format is: %s, %u Hz, %u channels",
        format_str, rate, channels);
    mpg123_decoder->has_next_audioinfo = TRUE;

    match_found = TRUE;

    break;
  }

  gst_caps_unref (allowed_srccaps);

  return match_found;
}


static void
gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard)
{
  int error;
  GstMpg123AudioDec *mpg123_decoder;

  hard = hard;

  GST_DEBUG_OBJECT (dec, "Flushing decoder");

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  if (G_UNLIKELY (mpg123_decoder->handle == NULL)) {
    GstElement *element = GST_ELEMENT (dec);
    GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
        ("mpg123 handle is NULL"));
    return;
  }

  /* Flush by reopening the feed */
  mpg123_close (mpg123_decoder->handle);
  error = mpg123_open_feed (mpg123_decoder->handle);

  if (G_UNLIKELY (error != MPG123_OK)) {
    GstElement *element = GST_ELEMENT (dec);
    GST_ELEMENT_ERROR (element, STREAM, DECODE, (NULL),
        ("Error reopening mpg123 feed: %s", mpg123_plain_strerror (error)));
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
  }

  mpg123_decoder->has_next_audioinfo = FALSE;

  /*
     opening/closing feeds do not affect the format defined by the mpg123_format() call that was made in gst_mpg123_audio_dec_set_format(),
     and since the up/downstream caps are not expected to change here, no mpg123_format() calls are done
   */
}

static gboolean
plugin_init (GstPlugin * plugin)
{
  return gst_element_register (plugin, "mpg123audiodec",
      GST_RANK_MARGINAL, gst_mpg123_audio_dec_get_type ());
}

GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
    GST_VERSION_MINOR,
    mpg123, "mp3 decoding based on the mpg123 library",
    plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)