diff options
Diffstat (limited to '')
-rw-r--r-- | roles/caption/tasks/main.yml | 11 | ||||
-rwxr-xr-x | roles/caption/templates/process-captions.py | 16 |
2 files changed, 20 insertions, 7 deletions
diff --git a/roles/caption/tasks/main.yml b/roles/caption/tasks/main.yml index b34a67d..fea78f4 100644 --- a/roles/caption/tasks/main.yml +++ b/roles/caption/tasks/main.yml @@ -34,6 +34,17 @@ state: directory owner: "{{ emacsconf_user }}" group: "{{ emacsconf_group }}" +- name: Copy scripts for processing + tags: process-prerec + template: + src: "{{ item }}" + dest: "{{ emacsconf_caption_dir }}/scripts/{{ item }}" + owner: "{{ emacsconf_user }}" + group: "{{ emacsconf_group }}" + mode: 0775 + loop: + - process-captions.py + - split-captions.py # - name: Copy the inotify script # tags: process-captions # template: diff --git a/roles/caption/templates/process-captions.py b/roles/caption/templates/process-captions.py index dc7ef29..a42439b 100755 --- a/roles/caption/templates/process-captions.py +++ b/roles/caption/templates/process-captions.py @@ -93,7 +93,7 @@ def get_files_to_work_on(directory): talk = next(filter(lambda talk: talk['slug'] == val['slug'], talks), None) if talk: val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']), - base_name(talk['video-slug'])) + base_name(talk['file-prefix'])) else: val['base'] = os.path.join(os.path.dirname(val['video'] or val['audio']), base_name(val['video'] or val['audio'])) @@ -114,7 +114,8 @@ def extract_audio(work): log("Extracting audio from %s acodec %s" % (work['video'], acodec)) output = subprocess.check_output(['ffmpeg', '-y', '-i', work['video'], '-acodec', acodec, '-vn', new_file], stderr=subprocess.STDOUT) work['audio'] = new_file - subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['audio']]) + if os.path.isfile("/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh"): + subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['audio']]) return work def to_sec(time_str): @@ -143,13 +144,14 @@ def generate_captions(work): audio = whisper.pad_or_trim(audio) result = model.transcribe(audio, verbose=True, language="en") result = clean_up_timestamps(result) - with open(new_file, 'w') as vtt: - whisper.utils.write_vtt(result['segments'], file=vtt) - with open(work['base'] + '.txt', 'w') as txt: - whisper.utils.write_txt(result['segments'], file=txt) + vtt_writer = whisper.utils.get_writer('vtt', os.path.dirname(new_file)) + txt_writer = whisper.utils.get_writer('txt', os.path.dirname(new_file)) + vtt_writer(result, work['audio'], {'max_line_width': 60, 'max_line_count': None, 'highlight_words': None}) + txt_writer(result, work['audio'], {'max_line_width': 60, 'max_line_count': None, 'highlight_words': None}) work['vtt'] = new_file work['txt'] = work['base'] + '.txt' - subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['vtt'], work['txt']]) + if os.path.isfile("/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh"): + subprocess.call(["/data/emacsconf/{{ emacsconf_year }}/scripts/upload.sh", work['vtt'], work['txt']]) if 'srv2' in work: del work['srv2'] return work |