From 0b07963c81155b621dd45b878b869a78b8c9de49 Mon Sep 17 00:00:00 2001
From: Sacha Chua <sacha@sachachua.com>
Date: Sat, 5 Nov 2022 07:55:05 -0400
Subject: Caption updates

---
 roles/caption/templates/process-captions.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'roles/caption/templates/process-captions.py')

diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index 223531b..1b6515c 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -81,6 +81,8 @@ def get_files_to_work_on(directory):
                 info[slug]['vtt'] = f
             elif re.search('srv2$', filename):
                 info[slug]['srv2'] = f
+            elif re.search('txt$', filename):
+                info[slug]['txt'] = f
     needs_work = []
     if JSON_FILE:
         with open(JSON_FILE) as f:
@@ -108,10 +110,11 @@ def extract_audio(work):
     if 'Audio: vorbis' in output.decode():
         extension = 'ogg'
     new_file = work['base'] + '.' + extension
-    acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus'
+    acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus'
     log("Extracting audio from %s acodec %s" % (work['video'], acodec))
     output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)
     work['audio'] = new_file
+    subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']])
     return work
 
 def to_sec(time_str):
@@ -142,18 +145,21 @@ def generate_captions(work):
     result = clean_up_timestamps(result)
     with open(new_file, 'w') as vtt:
         whisper.utils.write_vtt(result['segments'], file=vtt)
-    with open(work['base'] + '.txt') as txt:
+    with open(work['base'] + '.txt', 'w') as txt:
         whisper.utils.write_txt(result['segments'], file=txt)
     work['vtt'] = new_file
+    work['txt'] = work['base'] + '.txt'
+    subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']])
     if 'srv2' in work: del work['srv2']
     return work
 
 def generate_text(work):
-    with open(work['base'] + '.txt') as txt:
+    with open(work['base'] + '.txt', 'w') as txt:
         for i, caption in enumerate(webvtt.read(work['vtt'])):
-            txt.write(caption.text)
-    work['text'] = work['base'] + '.txt'
-    
+            txt.write(caption.text + "\n")
+    work['txt'] = work['base'] + '.txt'
+    return work
+
 def generate_srv2(work):
     """Generate a SRV2 file."""
     log("Generating SRV2")
-- 
cgit v1.2.3