diff options
| author | Sacha Chua <sacha@sachachua.com> | 2022-10-30 10:01:19 -0400 | 
|---|---|---|
| committer | Sacha Chua <sacha@sachachua.com> | 2022-10-30 10:01:19 -0400 | 
| commit | a1e9bd2ba2cabd37a298c4ed951dfe2344bd750f (patch) | |
| tree | 9fdf368fa3fbb693a929339fe6232dd5a5a380e8 | |
| parent | dbb0b8498e2f4eb5aa168c38c9e25a3edc0bf741 (diff) | |
| parent | 48c4a80dfd735e6c85c45783dff42a8ee6f9d468 (diff) | |
| download | emacsconf-ansible-a1e9bd2ba2cabd37a298c4ed951dfe2344bd750f.tar.xz emacsconf-ansible-a1e9bd2ba2cabd37a298c4ed951dfe2344bd750f.zip | |
Merge branch 'main' of git.emacsconf.org:pub/emacsconf-ansible into main
Diffstat (limited to '')
| -rwxr-xr-x | roles/caption/templates/process-captions.py | 20 | 
1 files changed, 10 insertions, 10 deletions
| diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 6f6b013..da99203 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -40,8 +40,8 @@ import json  import torch  THREADS = {{ cpus }} -VIDEO_REGEXP = '\.(webm|mov|mp4)$' -AUDIO_REGEXP = '\.(ogg|opus)$' +VIDEO_REGEXP = '\\.(webm|mov|mp4|mkv)$' +AUDIO_REGEXP = '\\.(ogg|opus)$'  ALWAYS = False  TRIM_AUDIO = False  MODEL = os.environ.get('MODEL', 'large')  # Set to tiny for testing @@ -57,12 +57,12 @@ def get_slug_from_filename(filename):          return m.group(1)      else:          return os.path.basename(os.path.dirname(filename)) -         +  def get_files_to_work_on(directory):      """Return the list of audio files to work on.      The specified directory is checked recursively.      Skip any videos that already have caption files. -     +      Convert any videos that don't already have audio files, and return the audio files instead.      When there are multiple videos and audio files for a talk, pick one.      """ @@ -75,11 +75,11 @@ def get_files_to_work_on(directory):              info[slug]['slug'] = slug              if re.search(AUDIO_REGEXP, filename):                  info[slug]['audio'] = f -            elif re.search(VIDEO_REGEXP, filename):  +            elif re.search(VIDEO_REGEXP, filename):                  info[slug]['video'] = f -            elif re.search('vtt$', filename):  +            elif re.search('vtt$', filename):                  info[slug]['vtt'] = f -            elif re.search('srv2$', filename):  +            elif re.search('srv2$', filename):                  info[slug]['srv2'] = f      needs_work = []      if JSON_FILE: @@ -108,7 +108,7 @@ def extract_audio(work):      if 'Audio: vorbis' in output.decode():          extension = 'ogg'      new_file = work['base'] + '.' + extension -    acodec = 'copy' if re.search('webm$', work['video']) else 'libopus' +    acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus'      log("Extracting audio from %s acodec %s" % (work['video'], acodec))      output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)      work['audio'] = new_file @@ -129,7 +129,7 @@ def clean_up_timestamps(result):          seg['end'] = min(segs[i + 1]['start'] - 0.001, seg['end'])      result['segments'] = segs      return result -     +  def generate_captions(work):      """Generate a VTT file based on the audio file."""      log("Generating captions") @@ -147,7 +147,7 @@ def generate_captions(work):      work['vtt'] = new_file      if 'srv2' in work: del work['srv2']      return work -     +  def generate_srv2(work):      """Generate a SRV2 file."""      log("Generating SRV2") | 
