summaryrefslogtreecommitdiff
path: root/config/mpv/scripts/subs2srs/utils
diff options
context:
space:
mode:
authornavewindre <boneyaard@gmail.com>2025-04-05 03:00:29 +0200
committernavewindre <boneyaard@gmail.com>2025-04-05 03:00:29 +0200
commitd6c4365b8de32b621ac46074a9b69908b95686c0 (patch)
tree495cb5b1aa7e68ab6ec07fa5fb09904a8c7e47e7 /config/mpv/scripts/subs2srs/utils
parentb24463f3d045783b8f4e72926054d53b908e150f (diff)
a
Diffstat (limited to 'config/mpv/scripts/subs2srs/utils')
-rw-r--r--config/mpv/scripts/subs2srs/utils/base64.lua46
-rw-r--r--config/mpv/scripts/subs2srs/utils/filename_factory.lua89
-rw-r--r--config/mpv/scripts/subs2srs/utils/forvo.lua145
-rw-r--r--config/mpv/scripts/subs2srs/utils/pause_timer.lua33
-rw-r--r--config/mpv/scripts/subs2srs/utils/play_control.lua61
-rw-r--r--config/mpv/scripts/subs2srs/utils/switch.lua38
-rw-r--r--config/mpv/scripts/subs2srs/utils/timings.lua28
7 files changed, 440 insertions, 0 deletions
diff --git a/config/mpv/scripts/subs2srs/utils/base64.lua b/config/mpv/scripts/subs2srs/utils/base64.lua
new file mode 100644
index 0000000..0fe2d06
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/base64.lua
@@ -0,0 +1,46 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Encoding and decoding in base64
+]]
+
+-- http://lua-users.org/wiki/BaseSixtyFour
+
+-- character table string
+local b = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+
+-- encoding
+local function enc(data)
+ return ((data:gsub('.', function(x)
+ local r,b='',x:byte()
+ for i=8,1,-1 do r=r..(b%2^i-b%2^(i-1)>0 and '1' or '0') end
+ return r;
+ end)..'0000'):gsub('%d%d%d?%d?%d?%d?', function(x)
+ if (#x < 6) then return '' end
+ local c=0
+ for i=1,6 do c=c+(x:sub(i,i)=='1' and 2^(6-i) or 0) end
+ return b:sub(c+1,c+1)
+ end)..({ '', '==', '=' })[#data%3+1])
+end
+
+-- decoding
+local function dec(data)
+ data = string.gsub(data, '[^'..b..'=]', '')
+ return (data:gsub('.', function(x)
+ if (x == '=') then return '' end
+ local r,f='',(b:find(x)-1)
+ for i=6,1,-1 do r=r..(f%2^i-f%2^(i-1)>0 and '1' or '0') end
+ return r;
+ end):gsub('%d%d%d?%d?%d?%d?%d?%d?', function(x)
+ if (#x ~= 8) then return '' end
+ local c=0
+ for i=1,8 do c=c+(x:sub(i,i)=='1' and 2^(8-i) or 0) end
+ return string.char(c)
+ end))
+end
+
+return {
+ enc = enc,
+ dec = dec,
+}
diff --git a/config/mpv/scripts/subs2srs/utils/filename_factory.lua b/config/mpv/scripts/subs2srs/utils/filename_factory.lua
new file mode 100644
index 0000000..a794fc7
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/filename_factory.lua
@@ -0,0 +1,89 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Creates image and audio filenames compatible with Anki.
+]]
+
+local mp = require('mp')
+local h = require('helpers')
+
+local filename
+
+local anki_compatible_length = (function()
+ -- Anki forcibly mutilates all filenames longer than 119 bytes when you run `Tools->Check Media...`.
+ local allowed_bytes = 119
+ local timestamp_bytes = #'_99h99m99s999ms-99h99m99s999ms.webp'
+
+ return function(str, timestamp)
+ -- if timestamp provided, recalculate limit_bytes
+ local limit_bytes = allowed_bytes - (timestamp and #timestamp or timestamp_bytes)
+
+ if #str <= limit_bytes then
+ return str
+ end
+
+ local bytes_per_char = h.contains_non_latin_letters(str) and #'車' or #'z'
+ local limit_chars = math.floor(limit_bytes / bytes_per_char)
+
+ if limit_chars == limit_bytes then
+ return str:sub(1, limit_bytes)
+ end
+
+ local ret = h.subprocess {
+ 'awk',
+ '-v', string.format('str=%s', str),
+ '-v', string.format('limit=%d', limit_chars),
+ 'BEGIN{print substr(str, 1, limit); exit}'
+ }
+
+ if ret.status == 0 then
+ ret.stdout = h.remove_newlines(ret.stdout)
+ ret.stdout = h.remove_leading_trailing_spaces(ret.stdout)
+ return ret.stdout
+ else
+ return 'subs2srs_' .. os.time()
+ end
+ end
+end)()
+
+local make_media_filename = function()
+ filename = mp.get_property("filename") -- filename without path
+ filename = h.remove_extension(filename)
+ filename = h.remove_filename_text_in_parentheses(filename)
+ filename = h.remove_text_in_brackets(filename)
+ filename = h.remove_special_characters(filename)
+end
+
+local function timestamp_range(start_timestamp, end_timestamp, extension)
+ -- Generates a filename suffix of the form: _00h00m00s000ms-99h99m99s999ms.extension
+ -- Extension must already contain the dot.
+ return string.format(
+ '_%s_%s%s',
+ h.human_readable_time(start_timestamp),
+ h.human_readable_time(end_timestamp),
+ extension
+ )
+end
+
+local function timestamp_static(timestamp, extension)
+ -- Generates a filename suffix of the form: _00h00m00s000ms.extension
+ -- Extension must already contain the dot.
+ return string.format(
+ '_%s%s',
+ h.human_readable_time(timestamp),
+ extension
+ )
+end
+
+local make_filename = function(...)
+ local args = {...}
+ local timestamp = #args < 3 and timestamp_static(...) or timestamp_range(...)
+ return string.lower(anki_compatible_length(filename, timestamp) .. timestamp)
+end
+
+mp.register_event("file-loaded", make_media_filename)
+
+return {
+ make_filename = make_filename,
+}
diff --git a/config/mpv/scripts/subs2srs/utils/forvo.lua b/config/mpv/scripts/subs2srs/utils/forvo.lua
new file mode 100644
index 0000000..09bc596
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/forvo.lua
@@ -0,0 +1,145 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Utils for downloading pronunciations from Forvo
+]]
+
+local utils = require('mp.utils')
+local msg = require('mp.msg')
+local h = require('helpers')
+local base64 = require('utils.base64')
+local self = {
+ output_dir_path = nil,
+}
+
+local function url_encode(url)
+ -- https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
+ local char_to_hex = function(c)
+ return string.format("%%%02X", string.byte(c))
+ end
+ if url == nil then
+ return
+ end
+ url = url:gsub("\n", "\r\n")
+ url = url:gsub("([^%w _%%%-%.~])", char_to_hex)
+ url = url:gsub(" ", "+")
+ return url
+end
+
+local function reencode(source_path, dest_path)
+ local args = {
+ 'mpv',
+ source_path,
+ '--loop-file=no',
+ '--keep-open=no',
+ '--video=no',
+ '--no-ocopy-metadata',
+ '--no-sub',
+ '--audio-channels=mono',
+ '--oacopts-add=vbr=on',
+ '--oacopts-add=application=voip',
+ '--oacopts-add=compression_level=10',
+ '--af-append=silenceremove=1:0:-50dB',
+ table.concat { '--oac=', self.config.audio_codec },
+ table.concat { '--of=', self.config.audio_format },
+ table.concat { '--oacopts-add=b=', self.config.audio_bitrate },
+ table.concat { '-o=', dest_path }
+ }
+ return h.subprocess(args)
+end
+
+local function reencode_and_store(source_path, filename)
+ local reencoded_path = utils.join_path(self.output_dir_path, filename)
+ local result = reencode(source_path, reencoded_path)
+ return result.status == 0
+end
+
+local function curl_save(source_url, save_location)
+ local curl_args = { 'curl', source_url, '-s', '-L', '-o', save_location }
+ return h.subprocess(curl_args).status == 0
+end
+
+local function get_pronunciation_url(word)
+ local file_format = self.config.audio_extension:sub(2)
+ local forvo_page = h.subprocess { 'curl', '-s', string.format('https://forvo.com/search/%s/ja', url_encode(word)) }.stdout
+ local play_params = string.match(forvo_page, "Play%((.-)%);")
+
+ if play_params then
+ local iter = string.gmatch(play_params, "'(.-)'")
+ local formats = { mp3 = iter(), ogg = iter() }
+ return string.format('https://audio00.forvo.com/%s/%s', file_format, base64.dec(formats[file_format]))
+ end
+end
+
+local function make_forvo_filename(word)
+ return string.format('forvo_%s%s', self.platform.windows and os.time() or word, self.config.audio_extension)
+end
+
+local function get_forvo_pronunciation(word)
+ local audio_url = get_pronunciation_url(word)
+
+ if h.is_empty(audio_url) then
+ msg.warn(string.format("Seems like Forvo doesn't have audio for word %s.", word))
+ return
+ end
+
+ local filename = make_forvo_filename(word)
+ local tmp_filepath = utils.join_path(self.platform.tmp_dir(), filename)
+
+ local result
+ if curl_save(audio_url, tmp_filepath) and reencode_and_store(tmp_filepath, filename) then
+ result = string.format(self.config.audio_template, filename)
+ else
+ msg.warn(string.format("Couldn't download audio for word %s from Forvo.", word))
+ end
+
+ os.remove(tmp_filepath)
+ return result
+end
+
+local append = function(new_data, stored_data)
+ if self.config.use_forvo == 'no' then
+ -- forvo functionality was disabled in the config file
+ return new_data
+ end
+
+ if type(stored_data[self.config.vocab_audio_field]) ~= 'string' then
+ -- there is no field configured to store forvo pronunciation
+ return new_data
+ end
+
+ if h.is_empty(stored_data[self.config.vocab_field]) then
+ -- target word field is empty. can't continue.
+ return new_data
+ end
+
+ if self.config.use_forvo == 'always' or h.is_empty(stored_data[self.config.vocab_audio_field]) then
+ local forvo_pronunciation = get_forvo_pronunciation(stored_data[self.config.vocab_field])
+ if not h.is_empty(forvo_pronunciation) then
+ if self.config.vocab_audio_field == self.config.audio_field then
+ -- improperly configured fields. don't lose sentence audio
+ new_data[self.config.audio_field] = forvo_pronunciation .. new_data[self.config.audio_field]
+ else
+ new_data[self.config.vocab_audio_field] = forvo_pronunciation
+ end
+ end
+ end
+
+ return new_data
+end
+
+local set_output_dir = function(dir_path)
+ self.output_dir_path = dir_path
+end
+
+local function init(config, platform)
+ self.config = config
+ self.platform = platform
+end
+
+return {
+ append = append,
+ init = init,
+ set_output_dir = set_output_dir,
+}
diff --git a/config/mpv/scripts/subs2srs/utils/pause_timer.lua b/config/mpv/scripts/subs2srs/utils/pause_timer.lua
new file mode 100644
index 0000000..e37b0ea
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/pause_timer.lua
@@ -0,0 +1,33 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Pause timer stops playback when reaching a set timing.
+]]
+
+local mp = require('mp')
+local stop_time = -1
+local check_stop
+
+local set_stop_time = function(time)
+ stop_time = time
+ mp.observe_property("time-pos", "number", check_stop)
+end
+
+local stop = function()
+ mp.unobserve_property(check_stop)
+ stop_time = -1
+end
+
+check_stop = function(_, time)
+ if time > stop_time then
+ stop()
+ mp.set_property("pause", "yes")
+ end
+end
+
+return {
+ set_stop_time = set_stop_time,
+ check_stop = check_stop,
+ stop = stop,
+}
diff --git a/config/mpv/scripts/subs2srs/utils/play_control.lua b/config/mpv/scripts/subs2srs/utils/play_control.lua
new file mode 100644
index 0000000..901377d
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/play_control.lua
@@ -0,0 +1,61 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Provides additional methods for controlling playback.
+]]
+
+local mp = require('mp')
+local h = require('helpers')
+local pause_timer = require('utils.pause_timer')
+local Subtitle = require('subtitles.subtitle')
+
+local current_sub
+
+local function stop_at_the_end(sub)
+ pause_timer.set_stop_time(sub['end'] - 0.050)
+ h.notify("Playing till the end of the sub...", "info", 3)
+end
+
+local function play_till_sub_end()
+ local sub = Subtitle:now()
+ mp.commandv('seek', sub['start'], 'absolute')
+ mp.set_property("pause", "no")
+ stop_at_the_end(sub)
+end
+
+local function sub_seek(direction, pause)
+ mp.commandv("sub_seek", direction == 'backward' and '-1' or '1')
+ mp.commandv("seek", "0.015", "relative+exact")
+ if pause then
+ mp.set_property("pause", "yes")
+ end
+ pause_timer.stop()
+end
+
+local function sub_rewind()
+ mp.commandv('seek', Subtitle:now()['start'] + 0.015, 'absolute')
+ pause_timer.stop()
+end
+
+local function check_sub()
+ local sub = Subtitle:now()
+ if sub and sub ~= current_sub then
+ mp.unobserve_property(check_sub)
+ stop_at_the_end(sub)
+ end
+end
+
+local function play_till_next_sub_end()
+ current_sub = Subtitle:now()
+ mp.observe_property("sub-text", "string", check_sub)
+ mp.set_property("pause", "no")
+ h.notify("Waiting till next sub...", "info", 10)
+end
+
+return {
+ play_till_sub_end = play_till_sub_end,
+ play_till_next_sub_end = play_till_next_sub_end,
+ sub_seek = sub_seek,
+ sub_rewind = sub_rewind,
+}
diff --git a/config/mpv/scripts/subs2srs/utils/switch.lua b/config/mpv/scripts/subs2srs/utils/switch.lua
new file mode 100644
index 0000000..5dac1c6
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/switch.lua
@@ -0,0 +1,38 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Switch cycles between values in a table.
+]]
+
+local make_switch = function(states)
+ local self = {
+ states = states,
+ current_state = 1
+ }
+ local bump = function()
+ self.current_state = self.current_state + 1
+ if self.current_state > #self.states then
+ self.current_state = 1
+ end
+ end
+ local get = function()
+ return self.states[self.current_state]
+ end
+ local set = function(new_state)
+ for idx, value in ipairs(self.states) do
+ if value == new_state then
+ self.current_state = idx
+ end
+ end
+ end
+ return {
+ bump = bump,
+ get = get,
+ set = set,
+ }
+end
+
+return {
+ new = make_switch
+}
diff --git a/config/mpv/scripts/subs2srs/utils/timings.lua b/config/mpv/scripts/subs2srs/utils/timings.lua
new file mode 100644
index 0000000..d2408d6
--- /dev/null
+++ b/config/mpv/scripts/subs2srs/utils/timings.lua
@@ -0,0 +1,28 @@
+--[[
+Copyright: Ren Tatsumoto and contributors
+License: GNU GPL, version 3 or later; http://www.gnu.org/licenses/gpl.html
+
+Object that remembers manually set timings.
+]]
+
+local new_timings = function()
+ local self = { ['start'] = -1, ['end'] = -1, }
+ local is_set = function(position)
+ return self[position] >= 0
+ end
+ local set = function(position, time)
+ self[position] = time
+ end
+ local get = function(position)
+ return self[position]
+ end
+ return {
+ is_set = is_set,
+ set = set,
+ get = get,
+ }
+end
+
+return {
+ new = new_timings,
+}