diff options
| author | Sacha Chua <sacha@sachachua.com> | 2022-11-05 07:55:05 -0400 | 
|---|---|---|
| committer | Sacha Chua <sacha@sachachua.com> | 2022-11-05 07:55:05 -0400 | 
| commit | 0b07963c81155b621dd45b878b869a78b8c9de49 (patch) | |
| tree | 86c8f7756784cb71d246e40967856c528a3edad4 /roles | |
| parent | ffe7c227fa5a617e23f74cab5f41758e00a6be5c (diff) | |
| download | emacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.tar.xz emacsconf-ansible-0b07963c81155b621dd45b878b869a78b8c9de49.zip  | |
Caption updates
Diffstat (limited to 'roles')
| -rw-r--r-- | roles/caption/tasks/main.yml | 39 | ||||
| -rwxr-xr-x | roles/caption/templates/process-captions.py | 18 | ||||
| -rwxr-xr-x | roles/caption/templates/process-prerec.sh | 18 | ||||
| -rwxr-xr-x | roles/caption/templates/reencode.sh | 43 | ||||
| -rwxr-xr-x | roles/caption/templates/run-aeneas.sh | 14 | ||||
| -rwxr-xr-x | roles/caption/templates/update-task-status.sh | 10 | ||||
| -rwxr-xr-x | roles/caption/templates/upload.sh | 6 | 
7 files changed, 133 insertions, 15 deletions
diff --git a/roles/caption/tasks/main.yml b/roles/caption/tasks/main.yml index c1511cf..a69d848 100644 --- a/roles/caption/tasks/main.yml +++ b/roles/caption/tasks/main.yml @@ -30,20 +30,41 @@      state: present  - name: Ensure the directory exists    file: -    path: "{{ emacsconf_caption_dir }}" +    path: "{{ emacsconf_caption_dir }}/scripts"      state: directory -- name: Copy the script for processing the files -  tags: process-captions +    owner: "{{ emacsconf_user }}" +    group: "{{ emacsconf_group }}" +- name: Recreate encoding script +  tags: process-prerec    template: -    src: process-captions.py -    dest: "{{ emacsconf_caption_dir }}/process-captions.py" +    src: "{{ item }}" +    dest: "{{ emacsconf_caption_dir }}/scripts/{{ item }}" +    owner: "{{ emacsconf_user }}" +    group: "{{ emacsconf_group }}" +    force: no      mode: 0775 -- name: Copy the inotify script -  tags: process-captions +  loop: +    - reencode.sh +- name: Copy scripts for processing +  tags: process-prerec    template: -    src: inotify-process-captions.sh -    dest: "{{ emacsconf_caption_dir }}/inotify-process-captions.sh" +    src: "{{ item }}" +    dest: "{{ emacsconf_caption_dir }}/scripts/{{ item }}" +    owner: "{{ emacsconf_user }}" +    group: "{{ emacsconf_group }}"      mode: 0775 +  loop: +    - process-captions.py +    - process-prerec.sh +    - update-task-status.sh +    - upload.sh +    - run-aeneas.sh +# - name: Copy the inotify script +#   tags: process-captions +#   template: +#     src: inotify-process-captions.sh +#     dest: "{{ emacsconf_caption_dir }}/inotify-process-captions.sh" +#     mode: 0775  - name: Copy talks.json    tags: talks-json    template: diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index 223531b..1b6515c 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -81,6 +81,8 @@ def get_files_to_work_on(directory):                  info[slug]['vtt'] = f              elif re.search('srv2$', filename):                  info[slug]['srv2'] = f +            elif re.search('txt$', filename): +                info[slug]['txt'] = f      needs_work = []      if JSON_FILE:          with open(JSON_FILE) as f: @@ -108,10 +110,11 @@ def extract_audio(work):      if 'Audio: vorbis' in output.decode():          extension = 'ogg'      new_file = work['base'] + '.' + extension -    acodec = 'copy' if re.search('\\.(webm|mp4|mkv)$', work['video']) else 'libopus' +    acodec = 'copy' if re.search('\\.webm$', work['video']) else 'libopus'      log("Extracting audio from %s acodec %s" % (work['video'], acodec))      output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT)      work['audio'] = new_file +    subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['audio']])      return work  def to_sec(time_str): @@ -142,18 +145,21 @@ def generate_captions(work):      result = clean_up_timestamps(result)      with open(new_file, 'w') as vtt:          whisper.utils.write_vtt(result['segments'], file=vtt) -    with open(work['base'] + '.txt') as txt: +    with open(work['base'] + '.txt', 'w') as txt:          whisper.utils.write_txt(result['segments'], file=txt)      work['vtt'] = new_file +    work['txt'] = work['base'] + '.txt' +    subprocess.call(["/data/emacsconf/2022/scripts/upload.sh", work['vtt'], work['txt']])      if 'srv2' in work: del work['srv2']      return work  def generate_text(work): -    with open(work['base'] + '.txt') as txt: +    with open(work['base'] + '.txt', 'w') as txt:          for i, caption in enumerate(webvtt.read(work['vtt'])): -            txt.write(caption.text) -    work['text'] = work['base'] + '.txt' -     +            txt.write(caption.text + "\n") +    work['txt'] = work['base'] + '.txt' +    return work +  def generate_srv2(work):      """Generate a SRV2 file."""      log("Generating SRV2") diff --git a/roles/caption/templates/process-prerec.sh b/roles/caption/templates/process-prerec.sh new file mode 100755 index 0000000..e49aa72 --- /dev/null +++ b/roles/caption/templates/process-prerec.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# {{ ansible_managed }} + +ORIGINAL=$1 +REENCODED=$(echo "$ORIGINAL" | perl -pe 's/^(emacsconf-[0-9]*-.*?--.*?--.*?--).*/$1reencoded.webm/') +SLUG=$(echo "$ORIGINAL" | perl -ne '/^emacsconf-[0-9]*-(.*?)--/ && print $1') +MAIN=$(echo "$ORIGINAL" | perl -pe 's/^(emacsconf-[0-9]*-.*?--.*?--.*?--).*/$1main.webm/') +SCREEN=reencode-$SLUG +if ! ( screen -ls | grep -q $SLUG ); then +    screen -dmS $SCREEN +fi +( cd /data/emacsconf/cache; ./update-cache ) +/data/emacsconf/2022/scripts/update-task-status.sh $SLUG "WAITING_FOR_PREREC" "PROCESSING" +#if [[ ! -f "$REENCODED" ]]; then +screen -S $SCREEN -X screen -t reencode-$SLUG /bin/bash -c "/data/emacsconf/2022/scripts/reencode.sh \"$ORIGINAL\" \"$REENCODED\" && /data/emacsconf/2022/scripts/upload.sh $REENCODED && exec /bin/bash" & +#fi +screen -S $SCREEN -X screen -t captions-$SLUG /bin/bash -c "/data/emacsconf/2022/scripts/process-captions.py $(dirname $ORIGINAL); /data/emacsconf/2022/scripts/update-task-status.sh $SLUG PROCESSING TO_ASSIGN; exec /bin/bash" +screen -x $SCREEN diff --git a/roles/caption/templates/reencode.sh b/roles/caption/templates/reencode.sh new file mode 100755 index 0000000..e3a82eb --- /dev/null +++ b/roles/caption/templates/reencode.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Defaults +q=32 +cpu=4 +time_limit="" +print_only=false + +while getopts :q:c:t:s OPT; do +    case $OPT in +        q|+q) +            q="$OPTARG" +            ;; +        c|+c) +            cpu="$OPTARG" +            ;; +        t|+t) +            time_limit="-to $OPTARG" +            ;; +        s) +            print_only=true +            ;; +        *) +            echo "usage: `basename $0` [+-q ARG] [+-c ARG} [--] ARGS..." +            exit 2 +    esac +done +shift `expr $OPTIND - 1` +OPTIND=1 + +command="$(cat<<EOF +ffmpeg -y -i "$1" $time_limit -c:v libvpx-vp9 -b:v 0 -crf $q -an -row-mt 1 -tile-columns 2 -tile-rows 2 -cpu-used $cpu -g 240 -pass 1 -f webm -threads $cpu /dev/null && +    ffmpeg -y -i "$1" $time_limit -c:v libvpx-vp9 -b:v 0 -crf $q -c:a libopus -row-mt 1 -tile-columns 2 -tile-rows 2 -cpu-used $cpu -pass 2 -g 240 -threads $cpu "$2" +EOF +)" + +if [ $print_only == true ]; then +    echo "$command" +else +    eval "$command" +fi diff --git a/roles/caption/templates/run-aeneas.sh b/roles/caption/templates/run-aeneas.sh new file mode 100755 index 0000000..6f40134 --- /dev/null +++ b/roles/caption/templates/run-aeneas.sh @@ -0,0 +1,14 @@ +#!/usr/bin/bash +# +# {{ ansible_managed }} +# +AUDIO=$(ls *.opus *.ogg | head -n1) +BASE=$(echo $AUDIO | perl -pe 's/^(emacsconf-2022-.*?--.*?--.*?)--.*/$1/') +echo $AUDIO +echo $BASE +if [ ! -f $BASE--whisper.vtt ]; then +  cp ${BASE}--main.vtt ${BASE}--whisper.vtt +fi +python3 -m aeneas.tools.execute_task $AUDIO *.txt "task_language=eng|os_task_file_format=vtt|is_text_type=plain" ${BASE}--aeneas.vtt +cp ${BASE}--aeneas.vtt ${BASE}--main.vtt +/data/emacsconf/2022/scripts/upload.sh ${BASE}--main.vtt diff --git a/roles/caption/templates/update-task-status.sh b/roles/caption/templates/update-task-status.sh new file mode 100755 index 0000000..18b7c1b --- /dev/null +++ b/roles/caption/templates/update-task-status.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# {{ ansible_managed }} +SLUG="$1" +FROM_STATUS="$2" +TO_STATUS="$3" +cd ~/emacsconf-2022-private +git pull +emacsclient --eval "(progn (emacsconf-update-talk-status \"$SLUG\" \"$FROM_STATUS\" \"$TO_STATUS\") (emacsconf-with-talk-heading \"$SLUG\" (emacsconf-cache-video-data (emacsconf-get-talk-info-for-subtree)) (save-buffer)))" +git commit -m "Update task status for $SLUG" conf.org +git push diff --git a/roles/caption/templates/upload.sh b/roles/caption/templates/upload.sh new file mode 100755 index 0000000..f723d29 --- /dev/null +++ b/roles/caption/templates/upload.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# {{ ansible_managed }} + +scp $* orga@media.emacsconf.org:~/backstage +emacsclient --eval "(emacsconf-publish-backstage-index)" +rsync -avze ssh orga@media.emacsconf.org:~/backstage/ /data/emacsconf/cache/  | 
