summaryrefslogtreecommitdiffstats
path: root/roles/caption
diff options
context:
space:
mode:
authorSacha Chua <sacha@sachachua.com>2022-11-05 07:55:05 -0400
committerSacha Chua <sacha@sachachua.com>2022-11-05 07:55:05 -0400
commit0b07963c81155b621dd45b878b869a78b8c9de49 (patch)
tree86c8f7756784cb71d246e40967856c528a3edad4 /roles/caption
parentffe7c227fa5a617e23f74cab5f41758e00a6be5c (diff)
downloademacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.tar.xz
emacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.zip
Caption updates
Diffstat (limited to 'roles/caption')
-rw-r--r--roles/caption/tasks/main.yml39
-rwxr-xr-xroles/caption/templates/process-captions.py18
-rwxr-xr-xroles/caption/templates/process-prerec.sh18
-rwxr-xr-xroles/caption/templates/reencode.sh43
-rwxr-xr-xroles/caption/templates/run-aeneas.sh14
-rwxr-xr-xroles/caption/templates/update-task-status.sh10
-rwxr-xr-xroles/caption/templates/upload.sh6
7 files changed, 133 insertions, 15 deletions
diff --git a/roles/caption/tasks/main.yml b/roles/caption/tasks/main.yml
index c1511cf..a69d848 100644
--- a/roles/caption/tasks/main.yml
+++ b/roles/caption/tasks/main.yml
@@ -30,20 +30,41 @@
state: present
- name: Ensure the directory exists
file:
- path: "{{ emacsconf_caption_dir }}"
+ path: "{{ emacsconf_caption_dir }}/scripts"
state: directory
-- name: Copy the script for processing the files
- tags: process-captions
+ owner: "{{ emacsconf_user }}"
+ group: "{{ emacsconf_group }}"
+- name: Recreate encoding script
+ tags: process-prerec
template:
- src: process-captions.py
- dest: "{{ emacsconf_caption_dir }}/process-captions.py"
+ src: "{{ item }}"
+ dest: "{{ emacsconf_caption_dir }}/scripts/{{ item }}"
+ owner: "{{ emacsconf_user }}"
+ group: "{{ emacsconf_group }}"
+ force: no
mode: 0775
-- name: Copy the inotify script
- tags: process-captions
+ loop:
+ - reencode.sh
+- name: Copy scripts for processing
+ tags: process-prerec
template:
- src: inotify-process-captions.sh
- dest: "{{ emacsconf_caption_dir }}/inotify-process-captions.sh"
+ src: "{{ item }}"
+ dest: "{{ emacsconf_caption_dir }}/scripts/{{ item }}"
+ owner: "{{ emacsconf_user }}"
+ group: "{{ emacsconf_group }}"
mode: 0775
+ loop:
+ - process-captions.py
+ - process-prerec.sh
+ - update-task-status.sh
+ - upload.sh
+ - run-aeneas.sh
+# - name: Copy the inotify script
+# tags: process-captions
+# template:
+# src: inotify-process-captions.sh
+# dest: "{{ emacsconf_caption_dir }}/inotify-process-captions.sh"
+# mode: 0775
- name: Copy talks.json
tags: talks-json
template:
diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py
index 223531b..1b6515c 100755
--- a/roles/caption/templates/process-captions.py
+++ b/roles/caption/templates/process-captions.py
@@ -81,6 +81,8 @@ def get_files_to_work_on(directory):
info[slug]['vtt'] = f
elif re.search('srv2$', filename):
info[slug]['srv2'] = f
+ elif re.search('txt$', filename):
+ info[slug]['txt'] = f
needs_work = []
if JSON_FILE:
with open(JSON_FILE) as f:
@@ -108,10 +110,11 @@ def extract_audio(work):
if 'Audio: vorbis' in output.decode():
extension = 'ogg'
new_file = work['base'] + '.' + extension
- acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus'
+ acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus'
log("Extracting audio from %s acodec %s" % (work['video'], acodec))
output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)
work['audio'] = new_file
+ subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']])
return work
def to_sec(time_str):
@@ -142,18 +145,21 @@ def generate_captions(work):
result = clean_up_timestamps(result)
with open(new_file, 'w') as vtt:
whisper.utils.write_vtt(result['segments'], file=vtt)
- with open(work['base'] + '.txt') as txt:
+ with open(work['base'] + '.txt', 'w') as txt:
whisper.utils.write_txt(result['segments'], file=txt)
work['vtt'] = new_file
+ work['txt'] = work['base'] + '.txt'
+ subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']])
if 'srv2' in work: del work['srv2']
return work
def generate_text(work):
- with open(work['base'] + '.txt') as txt:
+ with open(work['base'] + '.txt', 'w') as txt:
for i, caption in enumerate(webvtt.read(work['vtt'])):
- txt.write(caption.text)
- work['text'] = work['base'] + '.txt'
-
+ txt.write(caption.text + "\n")
+ work['txt'] = work['base'] + '.txt'
+ return work
+
def generate_srv2(work):
"""Generate a SRV2 file."""
log("Generating SRV2")
diff --git a/roles/caption/templates/process-prerec.sh b/roles/caption/templates/process-prerec.sh
new file mode 100755
index 0000000..e49aa72
--- /dev/null
+++ b/roles/caption/templates/process-prerec.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# {{ ansible_managed }}
+
+ORIGINAL=$1
+REENCODED=$(echo "$ORIGINAL" | perl -pe 's/^(emacsconf-[0-9]*-.*?--.*?--.*?--).*/$1reencoded.webm/')
+SLUG=$(echo "$ORIGINAL" | perl -ne '/^emacsconf-[0-9]*-(.*?)--/ && print $1')
+MAIN=$(echo "$ORIGINAL" | perl -pe 's/^(emacsconf-[0-9]*-.*?--.*?--.*?--).*/$1main.webm/')
+SCREEN=reencode-$SLUG
+if ! ( screen -ls | grep -q $SLUG ); then
+ screen -dmS $SCREEN
+fi
+( cd /data/emacsconf/cache; ./update-cache )
+/data/emacsconf/2022/scripts/update-task-status.sh $SLUG "WAITING_FOR_PREREC" "PROCESSING"
+#if [[ ! -f "$REENCODED" ]]; then
+screen -S $SCREEN -X screen -t reencode-$SLUG /bin/bash -c "/data/emacsconf/2022/scripts/reencode.sh \"$ORIGINAL\" \"$REENCODED\" && /data/emacsconf/2022/scripts/upload.sh $REENCODED && exec /bin/bash" &
+#fi
+screen -S $SCREEN -X screen -t captions-$SLUG /bin/bash -c "/data/emacsconf/2022/scripts/process-captions.py $(dirname $ORIGINAL); /data/emacsconf/2022/scripts/update-task-status.sh $SLUG PROCESSING TO_ASSIGN; exec /bin/bash"
+screen -x $SCREEN
diff --git a/roles/caption/templates/reencode.sh b/roles/caption/templates/reencode.sh
new file mode 100755
index 0000000..e3a82eb
--- /dev/null
+++ b/roles/caption/templates/reencode.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Defaults
+q=32
+cpu=4
+time_limit=""
+print_only=false
+
+while getopts :q:c:t:s OPT; do
+ case $OPT in
+ q|+q)
+ q="$OPTARG"
+ ;;
+ c|+c)
+ cpu="$OPTARG"
+ ;;
+ t|+t)
+ time_limit="-to $OPTARG"
+ ;;
+ s)
+ print_only=true
+ ;;
+ *)
+ echo "usage: `basename $0` [+-q ARG] [+-c ARG} [--] ARGS..."
+ exit 2
+ esac
+done
+shift `expr $OPTIND - 1`
+OPTIND=1
+
+command="$(cat<<EOF
+ffmpeg -y -i "$1" $time_limit -c:v libvpx-vp9 -b:v 0 -crf $q -an -row-mt 1 -tile-columns 2 -tile-rows 2 -cpu-used $cpu -g 240 -pass 1 -f webm -threads $cpu /dev/null &&
+ ffmpeg -y -i "$1" $time_limit -c:v libvpx-vp9 -b:v 0 -crf $q -c:a libopus -row-mt 1 -tile-columns 2 -tile-rows 2 -cpu-used $cpu -pass 2 -g 240 -threads $cpu "$2"
+EOF
+)"
+
+if [ $print_only == true ]; then
+ echo "$command"
+else
+ eval "$command"
+fi
diff --git a/roles/caption/templates/run-aeneas.sh b/roles/caption/templates/run-aeneas.sh
new file mode 100755
index 0000000..6f40134
--- /dev/null
+++ b/roles/caption/templates/run-aeneas.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/bash
+#
+# {{ ansible_managed }}
+#
+AUDIO=$(ls *.opus *.ogg | head -n1)
+BASE=$(echo $AUDIO | perl -pe 's/^(emacsconf-2022-.*?--.*?--.*?)--.*/$1/')
+echo $AUDIO
+echo $BASE
+if [ ! -f $BASE--whisper.vtt ]; then
+ cp ${BASE}--main.vtt ${BASE}--whisper.vtt
+fi
+python3 -m aeneas.tools.execute_task $AUDIO *.txt "task_language=eng|os_task_file_format=vtt|is_text_type=plain" ${BASE}--aeneas.vtt
+cp ${BASE}--aeneas.vtt ${BASE}--main.vtt
+/data/emacsconf/2022/scripts/upload.sh ${BASE}--main.vtt
diff --git a/roles/caption/templates/update-task-status.sh b/roles/caption/templates/update-task-status.sh
new file mode 100755
index 0000000..18b7c1b
--- /dev/null
+++ b/roles/caption/templates/update-task-status.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# {{ ansible_managed }}
+SLUG="$1"
+FROM_STATUS="$2"
+TO_STATUS="$3"
+cd ~/emacsconf-2022-private
+git pull
+emacsclient --eval "(progn (emacsconf-update-talk-status \"$SLUG\" \"$FROM_STATUS\" \"$TO_STATUS\") (emacsconf-with-talk-heading \"$SLUG\" (emacsconf-cache-video-data (emacsconf-get-talk-info-for-subtree)) (save-buffer)))"
+git commit -m "Update task status for $SLUG" conf.org
+git push
diff --git a/roles/caption/templates/upload.sh b/roles/caption/templates/upload.sh
new file mode 100755
index 0000000..f723d29
--- /dev/null
+++ b/roles/caption/templates/upload.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# {{ ansible_managed }}
+
+scp $* orga@media.emacsconf.org:~/backstage
+emacsclient --eval "(emacsconf-publish-backstage-index)"
+rsync -avze ssh orga@media.emacsconf.org:~/backstage/ /data/emacsconf/cache/