From 04a095491541cd47355e17e3636bff8fe86490cd Mon Sep 17 00:00:00 2001 From: Sacha Chua Date: Sat, 29 Oct 2022 08:29:16 -0400 Subject: Unbreak my script --- roles/caption/templates/process-captions.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 66f39dd..f3f317d 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -32,7 +32,10 @@ import datetime import sys import webvtt import xml.etree.ElementTree as ET -from lhotse import RecordinRecording, AudioSource, SupervisionSegment, SupervisionSet, create_cut_set_e +from lhotse import RecordingSet, Recording, AudioSource, SupervisionSegment, SupervisionSet, create_cut_set_eager, align_with_torchaudio, CutSet, annotate_with_whisper +import whisper +import re +import os import json import torch @@ -153,7 +156,7 @@ def generate_srv2(work): captions.append(SupervisionSegment(id=rec_id + '-sup' + '%05d' % i, channel=recs[0].channel_ids[0], recording_id=rec_id, start=to_sec(caption.start), duration=to_sec(caption.end) - to_sec(caption.start), text=caption.text, language='English')) sups = SupervisionSet.from_segments(captions) main = CutSet.from_manifests(recordings=recs, supervisions=sups) - work['cuts'] = main.trim_to_supervisions(keep_all_channels=True) + work['cuts'] = main.trim_to_supervisions(keep_overlapping=False, keep_all_channels=True) cuts_aligned = align_with_torchaudio(work['cuts']) root = ET.Element("timedtext") doc = ET.SubElement(root, "window") -- cgit v1.2.3