diff options
author | Sacha Chua <sacha@sachachua.com> | 2022-11-05 07:55:05 -0400 |
---|---|---|
committer | Sacha Chua <sacha@sachachua.com> | 2022-11-05 07:55:05 -0400 |
commit | 0b07963c81155b621dd45b878b869a78b8c9de49 (patch) | |
tree | 86c8f7756784cb71d246e40967856c528a3edad4 /roles/caption/templates/process-captions.py | |
parent | ffe7c227fa5a617e23f74cab5f41758e00a6be5c (diff) | |
download | emacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.tar.xz emacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.zip |
Caption updates
Diffstat (limited to '')
-rwxr-xr-x | roles/caption/templates/process-captions.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 223531b..1b6515c 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -81,6 +81,8 @@ def get_files_to_work_on(directory): info[slug]['vtt'] = f elif re.search('srv2$', filename): info[slug]['srv2'] = f + elif re.search('txt$', filename): + info[slug]['txt'] = f needs_work = [] if JSON_FILE: with open(JSON_FILE) as f: @@ -108,10 +110,11 @@ def extract_audio(work): if 'Audio: vorbis' in output.decode(): extension = 'ogg' new_file = work['base'] + '.' + extension - acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus' + acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus' log("Extracting audio from %s acodec %s" % (work['video'], acodec)) output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT) work['audio'] = new_file + subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']]) return work def to_sec(time_str): @@ -142,18 +145,21 @@ def generate_captions(work): result = clean_up_timestamps(result) with open(new_file, 'w') as vtt: whisper.utils.write_vtt(result['segments'], file=vtt) - with open(work['base'] + '.txt') as txt: + with open(work['base'] + '.txt', 'w') as txt: whisper.utils.write_txt(result['segments'], file=txt) work['vtt'] = new_file + work['txt'] = work['base'] + '.txt' + subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']]) if 'srv2' in work: del work['srv2'] return work def generate_text(work): - with open(work['base'] + '.txt') as txt: + with open(work['base'] + '.txt', 'w') as txt: for i, caption in enumerate(webvtt.read(work['vtt'])): - txt.write(caption.text) - work['text'] = work['base'] + '.txt' - + txt.write(caption.text + "\n") + work['txt'] = work['base'] + '.txt' + return work + def generate_srv2(work): """Generate a SRV2 file.""" log("Generating SRV2") |