copy scripts

author: Sacha Chua <sacha@sachachua.com> 2023-10-10 10:21:30 -0400
committer: Sacha Chua <sacha@sachachua.com> 2023-10-10 10:21:30 -0400
commit: 4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a (patch)
tree: 9dee331d9ba8328df4dc8c448c1e1554e82ec721 /roles/caption/templates/process-captions.py
parent: f9b00ef5d578e331f0c8269e6138cdac45fc4e99 (diff)
download: emacsconf-ansible-4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a.tar.xz
emacsconf-ansible-4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a.zip
1 files changed, 9 insertions, 7 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index dc7ef29..a42439b 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -93,7 +93,7 @@ def get_files_to_work_on(directory):
             talk = next(filter(lambda talk: talk['slug'] == val['slug'], talks), None)
         if talk:
             val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']),
-                                       base_name(talk['video-slug']))
+                                       base_name(talk['file-prefix']))
         else:
             val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']),
                                        base_name(val['video'] or val['audio']))
@@ -114,7 +114,8 @@ def extract_audio(work):
     log("Extracting audio from %s acodec %s" % (work['video'], acodec))
     output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)
     work['audio'] = new_file
-    subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['audio']])
+    if os.path.isfile("/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh"):
+        subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['audio']])
     return work
 
 def to_sec(time_str):
@@ -143,13 +144,14 @@ def generate_captions(work):
         audio = whisper.pad_or_trim(audio)
     result = model.transcribe(audio, verbose=True, language="en")
     result = clean_up_timestamps(result)
-    with open(new_file, 'w') as vtt:
-        whisper.utils.write_vtt(result['segments'], file=vtt)
-    with open(work['base'] + '.txt', 'w') as txt:
-        whisper.utils.write_txt(result['segments'], file=txt)
+    vtt_writer = whisper.utils.get_writer('vtt', os.path.dirname(new_file))
+    txt_writer = whisper.utils.get_writer('txt', os.path.dirname(new_file))
+    vtt_writer(result, work['audio'], {'max_line_width': 60, 'max_line_count': None, 'highlight_words': None})
+    txt_writer(result, work['audio'], {'max_line_width': 60, 'max_line_count': None, 'highlight_words': None})
     work['vtt'] = new_file
     work['txt'] = work['base'] + '.txt'
-    subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['vtt'], work['txt']])
+    if os.path.isfile("/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh"):
+        subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['vtt'], work['txt']])
     if 'srv2' in work: del work['srv2']
     return work
author	Sacha Chua <sacha@sachachua.com>	2023-10-10 10:21:30 -0400
committer	Sacha Chua <sacha@sachachua.com>	2023-10-10 10:21:30 -0400
commit	4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a (patch)
tree	9dee331d9ba8328df4dc8c448c1e1554e82ec721 /roles/caption/templates/process-captions.py
parent	f9b00ef5d578e331f0c8269e6138cdac45fc4e99 (diff)
download	emacsconf-ansible-4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a.tar.xz emacsconf-ansible-4ef6627aa71a2c94a2ae7615d5e93d6689ac8b5a.zip