aeneas, caption updates

author: Sacha Chua <sacha@sachachua.com> 2022-10-30 10:01:09 -0400
committer: Sacha Chua <sacha@sachachua.com> 2022-10-30 10:01:09 -0400
commit: dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741 (patch)
tree: a91c17218c45ee07087618c1713193c806acec05 /roles/caption/templates/process-captions.py
parent: 1bd3049f1692273a49eca0a92ec6710f2a2d0ded (diff)
download: emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.tar.xz
emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.zip
1 files changed, 5 insertions, 2 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index f3f317d..6f6b013 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -47,6 +47,7 @@ TRIM_AUDIO = False
 MODEL = os.environ.get('MODEL', 'large')  # Set to tiny for testing
 WORK_DIR = "{{ emacsconf_caption_dir }}"
 JSON_FILE = os.path.join(WORK_DIR, 'talks.json')
+DO_SRV2 = False
 
 # ----------------------------------------------------------------
 
@@ -94,7 +95,7 @@ def get_files_to_work_on(directory):
         else:
             val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']),
                                        base_name(val['video'] or val['audio']))
-        if ALWAYS or (not 'vtt' in val or not 'srv2' in val):
+        if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val)):
             if not 'audio' in val and 'video' in val:
                 # No audio, need to convert it
                 val = extract_audio(val)
@@ -141,6 +142,8 @@ def generate_captions(work):
     result = clean_up_timestamps(result)
     with open(new_file, 'w') as vtt:
         whisper.utils.write_vtt(result['segments'], file=vtt)
+    with open(work['base'] + '.txt')
+        whisper.utils.write_txt(result['segments'], file=txt)
     work['vtt'] = new_file
     if 'srv2' in work: del work['srv2']
     return work
@@ -209,7 +212,7 @@ if len(needs_work) > 0:
                 if work['audio']:
                     if ALWAYS or not 'vtt' in work:
                         work = generate_captions(work)
-                    if ALWAYS or not 'srv2' in work:
+                    if DO_SRV2 and (ALWAYS or not 'srv2' in work):
                         work = generate_srv2(work)
                         #     print("Aligning words", audio_file, datetime.datetime.now())
                         #     word_cuts = align_words(cuts)
author	Sacha Chua <sacha@sachachua.com>	2022-10-30 10:01:09 -0400
committer	Sacha Chua <sacha@sachachua.com>	2022-10-30 10:01:09 -0400
commit	dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741 (patch)
tree	a91c17218c45ee07087618c1713193c806acec05 /roles/caption/templates/process-captions.py
parent	1bd3049f1692273a49eca0a92ec6710f2a2d0ded (diff)
download	emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.tar.xz emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.zip