diff options
author | Sacha Chua <sacha@sachachua.com> | 2022-10-30 10:01:09 -0400 |
---|---|---|
committer | Sacha Chua <sacha@sachachua.com> | 2022-10-30 10:01:09 -0400 |
commit | dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741 (patch) | |
tree | a91c17218c45ee07087618c1713193c806acec05 /roles/caption/templates | |
parent | 1bd3049f1692273a49eca0a92ec6710f2a2d0ded (diff) | |
download | emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.tar.xz emacsconf-ansible-dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741.zip |
aeneas, caption updates
Diffstat (limited to 'roles/caption/templates')
-rwxr-xr-x | roles/caption/templates/process-captions.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index f3f317d..6f6b013 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -47,6 +47,7 @@ TRIM_AUDIO = False MODEL = os.environ.get('MODEL', 'large') # Set to tiny for testing WORK_DIR = "{{ emacsconf_caption_dir }}" JSON_FILE = os.path.join(WORK_DIR, 'talks.json') +DO_SRV2 = False # ---------------------------------------------------------------- @@ -94,7 +95,7 @@ def get_files_to_work_on(directory): else: val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']), base_name(val['video'] or val['audio'])) - if ALWAYS or (not 'vtt' in val or not 'srv2' in val): + if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val)): if not 'audio' in val and 'video' in val: # No audio, need to convert it val = extract_audio(val) @@ -141,6 +142,8 @@ def generate_captions(work): result = clean_up_timestamps(result) with open(new_file, 'w') as vtt: whisper.utils.write_vtt(result['segments'], file=vtt) + with open(work['base'] + '.txt') + whisper.utils.write_txt(result['segments'], file=txt) work['vtt'] = new_file if 'srv2' in work: del work['srv2'] return work @@ -209,7 +212,7 @@ if len(needs_work) > 0: if work['audio']: if ALWAYS or not 'vtt' in work: work = generate_captions(work) - if ALWAYS or not 'srv2' in work: + if DO_SRV2 and (ALWAYS or not 'srv2' in work): work = generate_srv2(work) # print("Aligning words", audio_file, datetime.datetime.now()) # word_cuts = align_words(cuts) |