diff options
Diffstat (limited to '')
-rwxr-xr-x | roles/caption/templates/process-captions.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 50c62d1..223531b 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -95,7 +95,7 @@ def get_files_to_work_on(directory): else: val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']), base_name(val['video'] or val['audio'])) - if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val)): + if ALWAYS or (not 'vtt' in val or (DO_SRV2 and not 'srv2' in val) or (not 'txt' in val)): if not 'audio' in val and 'video' in val: # No audio, need to convert it val = extract_audio(val) @@ -142,12 +142,18 @@ def generate_captions(work): result = clean_up_timestamps(result) with open(new_file, 'w') as vtt: whisper.utils.write_vtt(result['segments'], file=vtt) - with open(work['base'] + '.txt'): + with open(work['base'] + '.txt') as txt: whisper.utils.write_txt(result['segments'], file=txt) work['vtt'] = new_file if 'srv2' in work: del work['srv2'] return work +def generate_text(work): + with open(work['base'] + '.txt') as txt: + for i, caption in enumerate(webvtt.read(work['vtt'])): + txt.write(caption.text) + work['text'] = work['base'] + '.txt' + def generate_srv2(work): """Generate a SRV2 file.""" log("Generating SRV2") @@ -218,6 +224,8 @@ if len(needs_work) > 0: # word_cuts = align_words(cuts) # convert_cuts_to_word_timing(audio_file, word_cuts) log("Done %s" % str(work['base'])) + if not 'txt' in work: + work = generate_text(work) needs_work = get_files_to_work_on(directory) else: log("No work needed.") |