summaryrefslogtreecommitdiffstats
path: root/roles/caption/templates/process-captions.py
diff options
context:
space:
mode:
Diffstat (limited to 'roles/caption/templates/process-captions.py')
-rwxr-xr-xroles/caption/templates/process-captions.py18
1 files changed, 12 insertions, 6 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index 223531b..1b6515c 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -81,6 +81,8 @@ def get_files_to_work_on(directory):
info[slug]['vtt'] = f
elif re.search('srv2$', filename):
info[slug]['srv2'] = f
+ elif re.search('txt$', filename):
+ info[slug]['txt'] = f
needs_work = []
if JSON_FILE:
with open(JSON_FILE) as f:
@@ -108,10 +110,11 @@ def extract_audio(work):
if 'Audio: vorbis' in output.decode():
extension = 'ogg'
new_file = work['base'] + '.' + extension
- acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus'
+ acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus'
log("Extracting audio from %s acodec %s" % (work['video'], acodec))
output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)
work['audio'] = new_file
+ subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']])
return work
def to_sec(time_str):
@@ -142,18 +145,21 @@ def generate_captions(work):
result = clean_up_timestamps(result)
with open(new_file, 'w') as vtt:
whisper.utils.write_vtt(result['segments'], file=vtt)
- with open(work['base'] + '.txt') as txt:
+ with open(work['base'] + '.txt', 'w') as txt:
whisper.utils.write_txt(result['segments'], file=txt)
work['vtt'] = new_file
+ work['txt'] = work['base'] + '.txt'
+ subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']])
if 'srv2' in work: del work['srv2']
return work
def generate_text(work):
- with open(work['base'] + '.txt') as txt:
+ with open(work['base'] + '.txt', 'w') as txt:
for i, caption in enumerate(webvtt.read(work['vtt'])):
- txt.write(caption.text)
- work['text'] = work['base'] + '.txt'
-
+ txt.write(caption.text + "\n")
+ work['txt'] = work['base'] + '.txt'
+ return work
+
def generate_srv2(work):
"""Generate a SRV2 file."""
log("Generating SRV2")