diff options
Diffstat (limited to 'roles/caption')
-rwxr-xr-x | roles/caption/templates/process-captions.py | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 6f6b013..da99203 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -40,8 +40,8 @@ import json import torch THREADS = {{ cpus }} -VIDEO_REGEXP = '\.(webm|mov|mp4)$' -AUDIO_REGEXP = '\.(ogg|opus)$' +VIDEO_REGEXP = '\\.(webm|mov|mp4|mkv)$' +AUDIO_REGEXP = '\\.(ogg|opus)$' ALWAYS = False TRIM_AUDIO = False MODEL = os.environ.get('MODEL', 'large') # Set to tiny for testing @@ -57,12 +57,12 @@ def get_slug_from_filename(filename): return m.group(1) else: return os.path.basename(os.path.dirname(filename)) - + def get_files_to_work_on(directory): """Return the list of audio files to work on. The specified directory is checked recursively. Skip any videos that already have caption files. - + Convert any videos that don't already have audio files, and return the audio files instead. When there are multiple videos and audio files for a talk, pick one. """ @@ -75,11 +75,11 @@ def get_files_to_work_on(directory): info[slug]['slug'] = slug if re.search(AUDIO_REGEXP, filename): info[slug]['audio'] = f - elif re.search(VIDEO_REGEXP, filename): + elif re.search(VIDEO_REGEXP, filename): info[slug]['video'] = f - elif re.search('vtt$', filename): + elif re.search('vtt$', filename): info[slug]['vtt'] = f - elif re.search('srv2$', filename): + elif re.search('srv2$', filename): info[slug]['srv2'] = f needs_work = [] if JSON_FILE: @@ -108,7 +108,7 @@ def extract_audio(work): if 'Audio: vorbis' in output.decode(): extension = 'ogg' new_file = work['base'] + '.' + extension - acodec = 'copy' if re.search('webm$', work['video']) else 'libopus' + acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus' log("Extracting audio from %s acodec %s" % (work['video'], acodec)) output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT) work['audio'] = new_file @@ -129,7 +129,7 @@ def clean_up_timestamps(result): seg['end'] = min(segs[i + 1]['start'] - 0.001, seg['end']) result['segments'] = segs return result - + def generate_captions(work): """Generate a VTT file based on the audio file.""" log("Generating captions") @@ -147,7 +147,7 @@ def generate_captions(work): work['vtt'] = new_file if 'srv2' in work: del work['srv2'] return work - + def generate_srv2(work): """Generate a SRV2 file.""" log("Generating SRV2") |