diff --git a/docs/plugins/gst_plugins_cache.json b/docs/plugins/gst_plugins_cache.json
index c6e6e16b..982e0d02 100644
--- a/docs/plugins/gst_plugins_cache.json
+++ b/docs/plugins/gst_plugins_cache.json
@@ -650,6 +650,12 @@
                         "direction": "src",
                         "presence": "request",
                         "type": "GstTranslationSrcPad"
+                    },
+                    "translation_src_%%u": {
+                        "caps": "text/x-raw:\n         format: utf8\n",
+                        "direction": "src",
+                        "presence": "request",
+                        "type": "GstTranslationSrcPad"
                     }
                 },
                 "properties": {
@@ -773,7 +779,7 @@
                         "construct": false,
                         "construct-only": false,
                         "controllable": false,
-                        "default": "3000",
+                        "default": "5000",
                         "max": "-1",
                         "min": "0",
                         "mutable": "ready",
@@ -858,6 +864,21 @@
                     }
                 ]
             },
+            "GstAwsTranscriberTranslationTokenizationMethod": {
+                "kind": "enum",
+                "values": [
+                    {
+                        "desc": "None: don't tokenize translations",
+                        "name": "none",
+                        "value": "0"
+                    },
+                    {
+                        "desc": "Span based: insert spans in the transript text and use the resulting spans in the translations to reproduce speech pacing.",
+                        "name": "span-based",
+                        "value": "1"
+                    }
+                ]
+            },
             "GstAwsTranscriberVocabularyFilterMethod": {
                 "kind": "enum",
                 "values": [
@@ -919,6 +940,18 @@
                         "readable": true,
                         "type": "gchararray",
                         "writable": true
+                    },
+                    "tokenization-method": {
+                        "blurb": "The tokenization method to apply to translations",
+                        "conditionally-available": false,
+                        "construct": false,
+                        "construct-only": false,
+                        "controllable": false,
+                        "default": "none (0)",
+                        "mutable": "ready",
+                        "readable": true,
+                        "type": "GstAwsTranscriberTranslationTokenizationMethod",
+                        "writable": true
                     }
                 }
             }
diff --git a/net/aws/src/transcriber/imp.rs b/net/aws/src/transcriber/imp.rs
index d856e6d4..b8238dff 100644
--- a/net/aws/src/transcriber/imp.rs
+++ b/net/aws/src/transcriber/imp.rs
@@ -35,7 +35,10 @@ use once_cell::sync::Lazy;
 
 use super::transcribe::{TranscriberLoop, TranscriptEvent, TranscriptItem, TranscriptionSettings};
 use super::translate::{TranslatedItem, TranslationLoop, TranslationQueue};
-use super::{AwsTranscriberResultStability, AwsTranscriberVocabularyFilterMethod, CAT};
+use super::{
+    AwsTranscriberResultStability, AwsTranscriberVocabularyFilterMethod,
+    TranslationTokenizationMethod, CAT,
+};
 
 static RUNTIME: Lazy<runtime::Runtime> = Lazy::new(|| {
     runtime::Builder::new_multi_thread()
@@ -73,6 +76,8 @@ pub const GRANULARITY: gst::ClockTime = gst::ClockTime::from_mseconds(100);
 const OUTPUT_LANG_CODE_PROPERTY: &str = "language-code";
 const DEFAULT_OUTPUT_LANG_CODE: Option<&str> = None;
 
+const TRANSLATION_TOKENIZATION_PROPERTY: &str = "tokenization-method";
+
 #[derive(Debug, Clone)]
 pub(super) struct Settings {
     transcribe_latency: gst::ClockTime,
@@ -850,8 +855,8 @@ struct TranslationPadTask {
     needs_translate: bool,
     translation_queue: TranslationQueue,
     translation_loop_handle: Option<task::JoinHandle<Result<(), gst::ErrorMessage>>>,
-    to_translation_tx: Option<mpsc::Sender<TranscriptItem>>,
-    from_translation_rx: Option<mpsc::Receiver<TranslatedItem>>,
+    to_translation_tx: Option<mpsc::Sender<Vec<TranscriptItem>>>,
+    from_translation_rx: Option<mpsc::Receiver<Vec<TranslatedItem>>>,
     translate_latency: gst::ClockTime,
     transcript_lookahead: gst::ClockTime,
     send_events: bool,
@@ -991,14 +996,14 @@ impl TranslationPadTask {
             // before current latency budget is exhausted.
             futures::select_biased! {
                 _ = timeout => return Ok(()),
-                translated_item = from_translation_rx.next() => {
-                    let Some(translated_item) = translated_item else {
+                translated_items = from_translation_rx.next() => {
+                    let Some(translated_items) = translated_items else {
                         const ERR: &str = "translation chan terminated";
                         gst::debug!(CAT, imp: self.pad, "{ERR}");
                         return Err(gst::error_msg!(gst::StreamError::Failed, ["{ERR}"]));
                     };
 
-                    self.translated_items.push_back(translated_item);
+                    self.translated_items.extend(translated_items);
                     self.pending_translations = self.pending_translations.saturating_sub(1);
 
                     return Ok(());
@@ -1027,9 +1032,9 @@ impl TranslationPadTask {
             }
         };
 
-        for item in transcript_items.iter() {
-            if let Some(ready_item) = self.translation_queue.push(item) {
-                self.send_for_translation(ready_item).await?;
+        for items in transcript_items.iter() {
+            if let Some(ready_items) = self.translation_queue.push(items) {
+                self.send_for_translation(ready_items).await?;
             }
         }
 
@@ -1072,19 +1077,12 @@ impl TranslationPadTask {
 
             let deadline = translation_eta.saturating_sub(max_delay);
 
-            if let Some(ready_item) = self
+            if let Some(ready_items) = self
                 .translation_queue
                 .dequeue(deadline, self.transcript_lookahead)
             {
-                gst::debug!(
-                    CAT,
-                    imp: self.pad,
-                    "Forcing transcript at pts {} with duration {} to translation",
-                    ready_item.pts,
-                    ready_item.duration,
-                );
-
-                if self.send_for_translation(ready_item).await.is_err() {
+                gst::debug!(CAT, imp: self.pad, "Forcing  {} transcripts to translation", ready_items.len());
+                if self.send_for_translation(ready_items).await.is_err() {
                     return false;
                 }
             }
@@ -1240,13 +1238,13 @@ impl TranslationPadTask {
 
     async fn send_for_translation(
         &mut self,
-        transcript_item: TranscriptItem,
+        transcript_items: Vec<TranscriptItem>,
     ) -> Result<(), gst::ErrorMessage> {
         let res = self
             .to_translation_tx
             .as_mut()
             .expect("to_translation chan must be available in translation mode")
-            .send(transcript_item)
+            .send(transcript_items)
             .await;
 
         if res.is_err() {
@@ -1346,6 +1344,7 @@ impl TranslationPadTask {
                     &self.pad,
                     &elem_settings.language_code,
                     pad_settings.language_code.as_deref().unwrap(),
+                    pad_settings.tokenization_method,
                     to_translation_rx,
                     from_translation_tx,
                 ));
@@ -1384,6 +1383,7 @@ impl Default for TranslationPadState {
 #[derive(Debug, Default, Clone)]
 struct TranslationPadSettings {
     language_code: Option<String>,
+    tokenization_method: TranslationTokenizationMethod,
 }
 
 #[derive(Debug, Default)]
@@ -1566,12 +1566,20 @@ impl ObjectSubclass for TranslationSrcPad {
 impl ObjectImpl for TranslationSrcPad {
     fn properties() -> &'static [glib::ParamSpec] {
         static PROPERTIES: Lazy<Vec<glib::ParamSpec>> = Lazy::new(|| {
-            vec![glib::ParamSpecString::builder(OUTPUT_LANG_CODE_PROPERTY)
-                .nick("Language Code")
-                .blurb("The Language the Stream must be translated to")
-                .default_value(DEFAULT_OUTPUT_LANG_CODE)
-                .mutable_ready()
-                .build()]
+            vec![
+                glib::ParamSpecString::builder(OUTPUT_LANG_CODE_PROPERTY)
+                    .nick("Language Code")
+                    .blurb("The Language the Stream must be translated to")
+                    .default_value(DEFAULT_OUTPUT_LANG_CODE)
+                    .mutable_ready()
+                    .build(),
+                glib::ParamSpecEnum::builder(TRANSLATION_TOKENIZATION_PROPERTY)
+                    .nick("Translations tokenization method")
+                    .blurb("The tokenization method to apply to translations")
+                    .default_value(TranslationTokenizationMethod::default())
+                    .mutable_ready()
+                    .build(),
+            ]
         });
 
         PROPERTIES.as_ref()
@@ -1582,6 +1590,9 @@ impl ObjectImpl for TranslationSrcPad {
             OUTPUT_LANG_CODE_PROPERTY => {
                 self.settings.lock().unwrap().language_code = value.get().unwrap()
             }
+            TRANSLATION_TOKENIZATION_PROPERTY => {
+                self.settings.lock().unwrap().tokenization_method = value.get().unwrap()
+            }
             _ => unimplemented!(),
         }
     }
@@ -1589,6 +1600,9 @@ impl ObjectImpl for TranslationSrcPad {
     fn property(&self, _id: usize, pspec: &glib::ParamSpec) -> glib::Value {
         match pspec.name() {
             OUTPUT_LANG_CODE_PROPERTY => self.settings.lock().unwrap().language_code.to_value(),
+            TRANSLATION_TOKENIZATION_PROPERTY => {
+                self.settings.lock().unwrap().tokenization_method.to_value()
+            }
             _ => unimplemented!(),
         }
     }
diff --git a/net/aws/src/transcriber/mod.rs b/net/aws/src/transcriber/mod.rs
index eb2a28f7..faad2748 100644
--- a/net/aws/src/transcriber/mod.rs
+++ b/net/aws/src/transcriber/mod.rs
@@ -79,6 +79,21 @@ impl From<AwsTranscriberVocabularyFilterMethod> for VocabularyFilterMethod {
     }
 }
 
+#[derive(Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy, glib::Enum)]
+#[repr(u32)]
+#[enum_type(name = "GstAwsTranscriberTranslationTokenizationMethod")]
+#[non_exhaustive]
+pub enum TranslationTokenizationMethod {
+    #[default]
+    #[enum_value(name = "None: don't tokenize translations", nick = "none")]
+    None = 0,
+    #[enum_value(
+        name = "Span based: insert spans in the transript text and use the resulting spans in the translations to reproduce speech pacing.",
+        nick = "span-based"
+    )]
+    SpanBased = 1,
+}
+
 glib::wrapper! {
     pub struct Transcriber(ObjectSubclass<imp::Transcriber>) @extends gst::Element, gst::Object, @implements gst::ChildProxy;
 }
@@ -94,6 +109,8 @@ pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
             .mark_as_plugin_api(gst::PluginAPIFlags::empty());
         AwsTranscriberVocabularyFilterMethod::static_type()
             .mark_as_plugin_api(gst::PluginAPIFlags::empty());
+        TranslationTokenizationMethod::static_type()
+            .mark_as_plugin_api(gst::PluginAPIFlags::empty());
         TranslationSrcPad::static_type().mark_as_plugin_api(gst::PluginAPIFlags::empty());
     }
     gst::Element::register(
diff --git a/net/aws/src/transcriber/transcribe.rs b/net/aws/src/transcriber/transcribe.rs
index 7b683f3b..97301380 100644
--- a/net/aws/src/transcriber/transcribe.rs
+++ b/net/aws/src/transcriber/transcribe.rs
@@ -69,18 +69,6 @@ impl TranscriptItem {
             is_punctuation: matches!(item.r#type, Some(model::ItemType::Punctuation)),
         })
     }
-
-    #[inline]
-    pub fn push(&mut self, item: &TranscriptItem) {
-        self.duration += item.duration;
-
-        self.is_punctuation &= item.is_punctuation;
-        if !item.is_punctuation {
-            self.content.push(' ');
-        }
-
-        self.content.push_str(&item.content);
-    }
 }
 
 #[derive(Clone)]
diff --git a/net/aws/src/transcriber/translate.rs b/net/aws/src/transcriber/translate.rs
index b689bd63..fc49674d 100644
--- a/net/aws/src/transcriber/translate.rs
+++ b/net/aws/src/transcriber/translate.rs
@@ -18,8 +18,12 @@ use std::collections::VecDeque;
 
 use super::imp::TranslationSrcPad;
 use super::transcribe::TranscriptItem;
-use super::CAT;
+use super::{TranslationTokenizationMethod, CAT};
 
+const SPAN_START: &str = "<span>";
+const SPAN_END: &str = "</span>";
+
+#[derive(Debug)]
 pub struct TranslatedItem {
     pub pts: gst::ClockTime,
     pub duration: gst::ClockTime,
@@ -49,7 +53,7 @@ impl TranslationQueue {
     /// Pushes the provided item.
     ///
     /// Returns `Some(..)` if items are ready for translation.
-    pub fn push(&mut self, transcript_item: &TranscriptItem) -> Option<TranscriptItem> {
+    pub fn push(&mut self, transcript_item: &TranscriptItem) -> Option<Vec<TranscriptItem>> {
         // Keep track of the item individually so we can schedule translation precisely.
         self.items.push_back(transcript_item.clone());
 
@@ -57,16 +61,7 @@ impl TranslationQueue {
             // This makes it a good chunk for translation.
             // Concatenate as a single item for translation
 
-            let mut items = self.items.drain(..);
-
-            let mut item_acc = items.next()?;
-            for item in items {
-                item_acc.push(&item);
-            }
-
-            item_acc.push(transcript_item);
-
-            return Some(item_acc);
+            return Some(self.items.drain(..).collect());
         }
 
         // Regular case: no separator detected, don't push transcript items
@@ -78,12 +73,12 @@ impl TranslationQueue {
 
     /// Dequeues items from the specified `deadline` up to `lookahead`.
     ///
-    /// Returns `Some(..)` with the accumulated items matching the criteria.
+    /// Returns `Some(..)` if some items match the criteria.
     pub fn dequeue(
         &mut self,
         deadline: gst::ClockTime,
         lookahead: gst::ClockTime,
-    ) -> Option<TranscriptItem> {
+    ) -> Option<Vec<TranscriptItem>> {
         if self.items.front()?.pts < deadline {
             // First item is too early to be sent to translation now
             // we can wait for more items to accumulate.
@@ -94,17 +89,16 @@ impl TranslationQueue {
         // Try to get up to lookahead more items to improve translation accuracy
         let limit = deadline + lookahead;
 
-        let mut item_acc = self.items.pop_front().unwrap();
+        let mut items_acc = vec![self.items.pop_front().unwrap()];
         while let Some(item) = self.items.front() {
             if item.pts > limit {
                 break;
             }
 
-            let item = self.items.pop_front().unwrap();
-            item_acc.push(&item);
+            items_acc.push(self.items.pop_front().unwrap());
         }
 
-        Some(item_acc)
+        Some(items_acc)
     }
 }
 
@@ -113,8 +107,9 @@ pub struct TranslationLoop {
     client: aws_translate::Client,
     input_lang: String,
     output_lang: String,
-    transcript_rx: mpsc::Receiver<TranscriptItem>,
-    translation_tx: mpsc::Sender<TranslatedItem>,
+    tokenization_method: TranslationTokenizationMethod,
+    transcript_rx: mpsc::Receiver<Vec<TranscriptItem>>,
+    translation_tx: mpsc::Sender<Vec<TranslatedItem>>,
 }
 
 impl TranslationLoop {
@@ -123,8 +118,9 @@ impl TranslationLoop {
         pad: &TranslationSrcPad,
         input_lang: &str,
         output_lang: &str,
-        transcript_rx: mpsc::Receiver<TranscriptItem>,
-        translation_tx: mpsc::Sender<TranslatedItem>,
+        tokenization_method: TranslationTokenizationMethod,
+        transcript_rx: mpsc::Receiver<Vec<TranscriptItem>>,
+        translation_tx: mpsc::Sender<Vec<TranslatedItem>>,
     ) -> Self {
         let aws_config = imp.aws_config.lock().unwrap();
         let aws_config = aws_config
@@ -136,6 +132,7 @@ impl TranslationLoop {
             client: aws_sdk_translate::Client::new(aws_config),
             input_lang: input_lang.to_string(),
             output_lang: output_lang.to_string(),
+            tokenization_method,
             transcript_rx,
             translation_tx,
         }
@@ -167,40 +164,70 @@ impl TranslationLoop {
     }
 
     pub async fn run(mut self) -> Result<(), gst::ErrorMessage> {
-        while let Some(transcript_item) = self.transcript_rx.next().await {
-            let TranscriptItem {
-                pts,
-                duration,
-                content,
-                ..
-            } = transcript_item;
+        use TranslationTokenizationMethod as Tokenization;
 
-            let translated_text = if content.is_empty() {
-                content
-            } else {
-                self.client
-                    .translate_text()
-                    .set_source_language_code(Some(self.input_lang.clone()))
-                    .set_target_language_code(Some(self.output_lang.clone()))
-                    .set_text(Some(content))
-                    .send()
-                    .await
-                    .map_err(|err| {
-                        let err = format!("Failed to call translation service: {err}");
-                        gst::info!(CAT, imp: self.pad, "{err}");
-                        gst::error_msg!(gst::LibraryError::Failed, ["{err}"])
-                    })?
-                    .translated_text
-                    .unwrap_or_default()
+        while let Some(transcript_items) = self.transcript_rx.next().await {
+            if transcript_items.is_empty() {
+                continue;
+            }
+
+            let (ts_duration_list, content): (Vec<(gst::ClockTime, gst::ClockTime)>, String) =
+                transcript_items
+                    .into_iter()
+                    .map(|item| {
+                        (
+                            (item.pts, item.duration),
+                            match self.tokenization_method {
+                                Tokenization::None => item.content,
+                                Tokenization::SpanBased => {
+                                    format!("{SPAN_START}{}{SPAN_END}", item.content)
+                                }
+                            },
+                        )
+                    })
+                    .unzip();
+
+            gst::trace!(CAT, imp: self.pad, "Translating {content} with {ts_duration_list:?}");
+
+            let translated_text = self
+                .client
+                .translate_text()
+                .set_source_language_code(Some(self.input_lang.clone()))
+                .set_target_language_code(Some(self.output_lang.clone()))
+                .set_text(Some(content))
+                .send()
+                .await
+                .map_err(|err| {
+                    let err = format!("Failed to call translation service: {err}");
+                    gst::info!(CAT, imp: self.pad, "{err}");
+                    gst::error_msg!(gst::LibraryError::Failed, ["{err}"])
+                })?
+                .translated_text
+                .unwrap_or_default();
+
+            gst::trace!(CAT, imp: self.pad, "Got translation {translated_text}");
+
+            let translated_items = match self.tokenization_method {
+                Tokenization::None => {
+                    // Push translation as a single item
+                    let mut ts_duration_iter = ts_duration_list.into_iter().peekable();
+
+                    let &(first_pts, _) = ts_duration_iter.peek().expect("at least one item");
+                    let (last_pts, last_duration) =
+                        ts_duration_iter.last().expect("at least one item");
+
+                    vec![TranslatedItem {
+                        pts: first_pts,
+                        duration: last_pts.saturating_sub(first_pts) + last_duration,
+                        content: translated_text,
+                    }]
+                }
+                Tokenization::SpanBased => span_tokenize_items(&translated_text, ts_duration_list),
             };
 
-            let translated_item = TranslatedItem {
-                pts,
-                duration,
-                content: translated_text,
-            };
+            gst::trace!(CAT, imp: self.pad, "Sending {translated_items:?}");
 
-            if self.translation_tx.send(translated_item).await.is_err() {
+            if self.translation_tx.send(translated_items).await.is_err() {
                 gst::info!(
                     CAT,
                     imp: self.pad,
@@ -213,3 +240,374 @@ impl TranslationLoop {
         Ok(())
     }
 }
+
+/// Parses translated items from the `translation` `String` using `span` tags.
+///
+/// The `translation` is expected to have been returned by the `Translate` ws.
+/// It can contain id-less `<span>` and `</span>` tags, matching similar
+/// id-less tags from the content submitted to the `Translate` ws.
+///
+/// This parser accepts both serial `<span></span>` as well as nested
+/// `<span><span></span></span>`.
+///
+/// The parsed items are assigned the ts and duration from `ts_duration_list`
+/// in their order of appearance.
+///
+/// If more parsed items are found, the last item will concatenate the remaining items.
+///
+/// If less parsed items are found, the last item will be assign the remaining
+/// duration from the `ts_duration_list`.
+fn span_tokenize_items(
+    translation: &str,
+    ts_duration_list: impl IntoIterator<Item = (gst::ClockTime, gst::ClockTime)>,
+) -> Vec<TranslatedItem> {
+    const SPAN_START_LEN: usize = SPAN_START.len();
+    const SPAN_END_LEN: usize = SPAN_END.len();
+
+    let mut translated_items = vec![];
+
+    let mut ts_duration_iter = ts_duration_list.into_iter();
+
+    // Content for a translated item
+    let mut content = String::new();
+
+    // Alleged span chunk
+    let mut chunk = String::new();
+
+    for c in translation.chars() {
+        if content.is_empty() && c.is_whitespace() {
+            // ignore leading whitespaces
+            continue;
+        }
+
+        if chunk.is_empty() {
+            if c == '<' {
+                // Start an alleged span chunk
+                chunk.push(c);
+            } else {
+                content.push(c);
+            }
+
+            continue;
+        }
+
+        chunk.push(c);
+
+        match chunk.len() {
+            len if len < SPAN_START_LEN => continue,
+            SPAN_START_LEN => {
+                if chunk != SPAN_START {
+                    continue;
+                }
+                // Got a <span>
+            }
+            SPAN_END_LEN => {
+                if chunk != SPAN_END {
+                    continue;
+                }
+                // Got a </span>
+            }
+            _ => {
+                // Can no longer be a span
+                content.extend(chunk.drain(..));
+                continue;
+            }
+        }
+
+        // got a span
+        chunk.clear();
+
+        if content.is_empty() {
+            continue;
+        }
+
+        // Add pending content
+        // assign it the next pts and duration from the input list
+        if let Some((pts, duration)) = ts_duration_iter.next() {
+            translated_items.push(TranslatedItem {
+                pts,
+                duration,
+                content,
+            });
+
+            content = String::new();
+        } else if let Some(last_item) = translated_items.last_mut() {
+            // exhausted available pts and duration
+            // add content to last item
+            if !last_item.content.ends_with(' ') {
+                last_item.content.push(' ');
+            }
+            last_item.content.extend(content.drain(..));
+        }
+    }
+
+    content.extend(chunk.drain(..));
+
+    if !content.is_empty() {
+        // Add last content
+        if let Some((pts, mut duration)) = ts_duration_iter.next() {
+            if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
+                // Fix remaining duration
+                duration = last_pts.saturating_sub(pts) + last_duration;
+            }
+
+            translated_items.push(TranslatedItem {
+                pts,
+                duration,
+                content,
+            });
+        } else if let Some(last_item) = translated_items.last_mut() {
+            // No more pts and duration in the index
+            // Add remaining content to the last item pushed
+            if !last_item.content.ends_with(' ') {
+                last_item.content.push(' ');
+            }
+            last_item.content.push_str(&content);
+        }
+    } else if let Some((last_pts, last_duration)) = ts_duration_iter.last() {
+        if let Some(last_item) = translated_items.last_mut() {
+            // No more content, but need to fix last item's duration
+            last_item.duration = last_pts.saturating_sub(last_item.pts) + last_duration;
+        }
+    }
+
+    translated_items
+}
+
+#[cfg(test)]
+mod tests {
+    use super::span_tokenize_items;
+    use gst::prelude::*;
+
+    #[test]
+    fn serial_spans() {
+        let input = "<span>first</span> <span>second</span> <span>third</span>";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (4.seconds(), 3.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 1.seconds());
+        assert_eq!(second.duration, 2.seconds());
+        assert_eq!(second.content, "second");
+
+        let third = items.next().unwrap();
+        assert_eq!(third.pts, 4.seconds());
+        assert_eq!(third.duration, 3.seconds());
+        assert_eq!(third.content, "third");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn serial_and_nested_spans() {
+        let input = "<span>first</span> <span>second <span>third</span></span> <span>fourth</span>";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (3.seconds(), 1.seconds()),
+            (4.seconds(), 2.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 1.seconds());
+        assert_eq!(second.duration, 2.seconds());
+        assert_eq!(second.content, "second ");
+
+        let third = items.next().unwrap();
+        assert_eq!(third.pts, 3.seconds());
+        assert_eq!(third.duration, 1.seconds());
+        assert_eq!(third.content, "third");
+
+        let fourth = items.next().unwrap();
+        assert_eq!(fourth.pts, 4.seconds());
+        assert_eq!(fourth.duration, 2.seconds());
+        assert_eq!(fourth.content, "fourth");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn nonspaned_serial_and_nested_spans() {
+        let input = "Initial <span>first</span> <span>second <span>third</span></span> <span>fourth</span> final";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 1.seconds()),
+            (2.seconds(), 1.seconds()),
+            (3.seconds(), 1.seconds()),
+            (4.seconds(), 1.seconds()),
+            (5.seconds(), 1.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let init = items.next().unwrap();
+        assert_eq!(init.pts, 0.seconds());
+        assert_eq!(init.duration, 1.seconds());
+        assert_eq!(init.content, "Initial ");
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 1.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 2.seconds());
+        assert_eq!(second.duration, 1.seconds());
+        assert_eq!(second.content, "second ");
+
+        let third = items.next().unwrap();
+        assert_eq!(third.pts, 3.seconds());
+        assert_eq!(third.duration, 1.seconds());
+        assert_eq!(third.content, "third");
+
+        let fourth = items.next().unwrap();
+        assert_eq!(fourth.pts, 4.seconds());
+        assert_eq!(fourth.duration, 1.seconds());
+        assert_eq!(fourth.content, "fourth");
+
+        let final_ = items.next().unwrap();
+        assert_eq!(final_.pts, 5.seconds());
+        assert_eq!(final_.duration, 1.seconds());
+        assert_eq!(final_.content, "final");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn more_parsed_items() {
+        let input = "<span>first</span> <span>second</span> <span>third</span> <span>fourth</span>";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (4.seconds(), 3.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 1.seconds());
+        assert_eq!(second.duration, 2.seconds());
+        assert_eq!(second.content, "second");
+
+        let third = items.next().unwrap();
+        assert_eq!(third.pts, 4.seconds());
+        assert_eq!(third.duration, 3.seconds());
+        assert_eq!(third.content, "third fourth");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn more_parsed_items_nonspan_final() {
+        let input = "<span>first</span> <span>second</span> <span>third</span> final";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (4.seconds(), 3.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 1.seconds());
+        assert_eq!(second.duration, 2.seconds());
+        assert_eq!(second.content, "second");
+
+        let third = items.next().unwrap();
+        assert_eq!(third.pts, 4.seconds());
+        assert_eq!(third.duration, 3.seconds());
+        assert_eq!(third.content, "third final");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn less_parsed_items() {
+        let input = "<span>first</span> <span>second</span>";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (4.seconds(), 3.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let second = items.next().unwrap();
+        assert_eq!(second.pts, 1.seconds());
+        assert_eq!(second.duration, 6.seconds());
+        assert_eq!(second.content, "second");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn less_parsed_items_nonspan_final() {
+        let input = "<span>first</span> final";
+        let ts_duration_list = vec![
+            (0.seconds(), 1.seconds()),
+            (1.seconds(), 2.seconds()),
+            (4.seconds(), 3.seconds()),
+        ];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "first");
+
+        let final_ = items.next().unwrap();
+        assert_eq!(final_.pts, 1.seconds());
+        assert_eq!(final_.duration, 6.seconds());
+        assert_eq!(final_.content, "final");
+
+        assert!(items.next().is_none());
+    }
+
+    #[test]
+    fn utf8_input() {
+        let input = "caractères accentués";
+        let ts_duration_list = vec![(0.seconds(), 1.seconds())];
+
+        let mut items = span_tokenize_items(input, ts_duration_list).into_iter();
+
+        let first = items.next().unwrap();
+        assert_eq!(first.pts, 0.seconds());
+        assert_eq!(first.duration, 1.seconds());
+        assert_eq!(first.content, "caractères accentués");
+
+        assert!(items.next().is_none());
+    }
+}