From 9ec328a0ad4e91f8ac366e5e9c4dd2eaee10ce66 Mon Sep 17 00:00:00 2001
From: Leo Vivier <zaeph@zaeph.net>
Date: Sun, 30 Oct 2022 06:41:32 +0100
Subject: Remove trailing whitespace

Truly an invaluable commit.
---
 roles/caption/templates/process-captions.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'roles')

diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index f3f317d..f2ba2f7 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -56,12 +56,12 @@ def get_slug_from_filename(filename):
         return m.group(1)
     else:
         return os.path.basename(os.path.dirname(filename))
-        
+
 def get_files_to_work_on(directory):
     """Return the list of audio files to work on.
     The specified directory is checked recursively.
     Skip any videos that already have caption files.
-    
+
     Convert any videos that don't already have audio files, and return the audio files instead.
     When there are multiple videos and audio files for a talk, pick one.
     """
@@ -74,11 +74,11 @@ def get_files_to_work_on(directory):
             info[slug]['slug'] = slug
             if re.search(AUDIO_REGEXP, filename):
                 info[slug]['audio'] = f
-            elif re.search(VIDEO_REGEXP, filename): 
+            elif re.search(VIDEO_REGEXP, filename):
                 info[slug]['video'] = f
-            elif re.search('vtt$', filename): 
+            elif re.search('vtt$', filename):
                 info[slug]['vtt'] = f
-            elif re.search('srv2$', filename): 
+            elif re.search('srv2$', filename):
                 info[slug]['srv2'] = f
     needs_work = []
     if JSON_FILE:
@@ -128,7 +128,7 @@ def clean_up_timestamps(result):
         seg['end'] = min(segs[i + 1]['start'] - 0.001, seg['end'])
     result['segments'] = segs
     return result
-    
+
 def generate_captions(work):
     """Generate a VTT file based on the audio file."""
     log("Generating captions")
@@ -144,7 +144,7 @@ def generate_captions(work):
     work['vtt'] = new_file
     if 'srv2' in work: del work['srv2']
     return work
-    
+
 def generate_srv2(work):
     """Generate a SRV2 file."""
     log("Generating SRV2")
-- 
cgit v1.2.3


From 7e051db8aa749ae689af8013c4af0a54c8479201 Mon Sep 17 00:00:00 2001
From: Leo Vivier <zaeph@zaeph.net>
Date: Sun, 30 Oct 2022 06:42:20 +0100
Subject: Add another ext to VIDEO_REGEXP

---
 roles/caption/templates/process-captions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'roles')

diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index f2ba2f7..67f82ce 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -40,7 +40,7 @@ import json
 import torch
 
 THREADS = {{ cpus }}
-VIDEO_REGEXP = '\.(webm|mov|mp4)$'
+VIDEO_REGEXP = '\.(webm|mov|mp4|mkv)$'
 AUDIO_REGEXP = '\.(ogg|opus)$'
 ALWAYS = False
 TRIM_AUDIO = False
-- 
cgit v1.2.3


From 48c4a80dfd735e6c85c45783dff42a8ee6f9d468 Mon Sep 17 00:00:00 2001
From: Leo Vivier <zaeph@zaeph.net>
Date: Sun, 30 Oct 2022 07:10:28 +0100
Subject: Try to fix regexps

---
 roles/caption/templates/process-captions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'roles')

diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index 67f82ce..b650bec 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -40,8 +40,8 @@ import json
 import torch
 
 THREADS = {{ cpus }}
-VIDEO_REGEXP = '\.(webm|mov|mp4|mkv)$'
-AUDIO_REGEXP = '\.(ogg|opus)$'
+VIDEO_REGEXP = '\\.(webm|mov|mp4|mkv)$'
+AUDIO_REGEXP = '\\.(ogg|opus)$'
 ALWAYS = False
 TRIM_AUDIO = False
 MODEL = os.environ.get('MODEL', 'large')  # Set to tiny for testing
@@ -107,7 +107,7 @@ def extract_audio(work):
     if 'Audio: vorbis' in output.decode():
         extension = 'ogg'
     new_file = work['base'] + '.' + extension
-    acodec = 'copy' if re.search('webm$', work['video']) else 'libopus'
+    acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus'
     log("Extracting audio from %s acodec %s" % (work['video'], acodec))
     output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)
     work['audio'] = new_file
-- 
cgit v1.2.3