summaryrefslogtreecommitdiffstats
path: root/roles/caption/templates/process-captions.py
diff options
context:
space:
mode:
authorSacha Chua <sacha@sachachua.com>2022-11-02 13:08:03 -0400
committerSacha Chua <sacha@sachachua.com>2022-11-02 13:08:03 -0400
commitc569e02d8ab5ebcbb5a44e10c14799fa097c9bc7 (patch)
tree05528005f12160fd505696cb5d89f22d30249f32 /roles/caption/templates/process-captions.py
parent4696aa1fed214fe1084d7c3deecaaa289bc0a1fd (diff)
downloademacsconf-ansible-c569e02d8ab5ebcbb5a44e10c14799fa097c9bc7.tar.xz
emacsconf-ansible-c569e02d8ab5ebcbb5a44e10c14799fa097c9bc7.zip
OBS overlays, starting to get publish and edit on res
Diffstat (limited to 'roles/caption/templates/process-captions.py')
-rwxr-xr-xroles/caption/templates/process-captions.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index 50c62d1..223531b 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -95,7 +95,7 @@ def get_files_to_work_on(directory):
else:
val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']),
base_name(val['video'] or val['audio']))
- if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val)):
+ if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val) or (not 'txt' in val)):
if not 'audio' in val and 'video' in val:
# No audio, need to convert it
val = extract_audio(val)
@@ -142,12 +142,18 @@ def generate_captions(work):
result = clean_up_timestamps(result)
with open(new_file, 'w') as vtt:
whisper.utils.write_vtt(result['segments'], file=vtt)
- with open(work['base'] + '.txt'):
+ with open(work['base'] + '.txt') as txt:
whisper.utils.write_txt(result['segments'], file=txt)
work['vtt'] = new_file
if 'srv2' in work: del work['srv2']
return work
+def generate_text(work):
+ with open(work['base'] + '.txt') as txt:
+ for i, caption in enumerate(webvtt.read(work['vtt'])):
+ txt.write(caption.text)
+ work['text'] = work['base'] + '.txt'
+
def generate_srv2(work):
"""Generate a SRV2 file."""
log("Generating SRV2")
@@ -218,6 +224,8 @@ if len(needs_work) > 0:
# word_cuts = align_words(cuts)
# convert_cuts_to_word_timing(audio_file, word_cuts)
log("Done %s" % str(work['base']))
+ if not 'txt' in work:
+ work = generate_text(work)
needs_work = get_files_to_work_on(directory)
else:
log("No work needed.")