From 0b07963c81155b621dd45b878b869a78b8c9de49 Mon Sep 17 00:00:00 2001 From: Sacha Chua Date: Sat, 5 Nov 2022 07:55:05 -0400 Subject: Caption updates --- roles/caption/templates/process-captions.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'roles/caption/templates/process-captions.py') diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 223531b..1b6515c 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -81,6 +81,8 @@ def get_files_to_work_on(directory): info[slug]['vtt'] = f elif re.search('srv2$', filename): info[slug]['srv2'] = f + elif re.search('txt$', filename): + info[slug]['txt'] = f needs_work = [] if JSON_FILE: with open(JSON_FILE) as f: @@ -108,10 +110,11 @@ def extract_audio(work): if 'Audio: vorbis' in output.decode(): extension = 'ogg' new_file = work['base'] + '.' + extension - acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus' + acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus' log("Extracting audio from %s acodec %s" % (work['video'], acodec)) output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT) work['audio'] = new_file + subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']]) return work def to_sec(time_str): @@ -142,18 +145,21 @@ def generate_captions(work): result = clean_up_timestamps(result) with open(new_file, 'w') as vtt: whisper.utils.write_vtt(result['segments'], file=vtt) - with open(work['base'] + '.txt') as txt: + with open(work['base'] + '.txt', 'w') as txt: whisper.utils.write_txt(result['segments'], file=txt) work['vtt'] = new_file + work['txt'] = work['base'] + '.txt' + subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']]) if 'srv2' in work: del work['srv2'] return work def generate_text(work): - with open(work['base'] + '.txt') as txt: + with open(work['base'] + '.txt', 'w') as txt: for i, caption in enumerate(webvtt.read(work['vtt'])): - txt.write(caption.text) - work['text'] = work['base'] + '.txt' - + txt.write(caption.text + "\n") + work['txt'] = work['base'] + '.txt' + return work + def generate_srv2(work): """Generate a SRV2 file.""" log("Generating SRV2") -- cgit v1.2.3