add code
This commit is contained in:
parent
6a9e7d8764
commit
50307575b2
1 changed files with 205 additions and 0 deletions
205
video2anki.py
Normal file
205
video2anki.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
# GNU AFFERO GENERAL PUBLIC LICENSE v3.0 (see LICENSE or https://www.gnu.org/licenses/agpl-3.0.txt)
|
||||
|
||||
import pysrt, sys, getopt, numbers, genanki, random, tempfile, shutil, traceback, datetime, subprocess, multiprocessing, html
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
TrackA = 0
|
||||
TrackB = 0
|
||||
Media = ""
|
||||
CJK = "none"
|
||||
Encoding = "iso-8859-1"
|
||||
Title = "Unknown Title"
|
||||
|
||||
TMP = tempfile.mkdtemp()+"/"
|
||||
#TMP = "TMP/"
|
||||
|
||||
AnkiModel = genanki.Model(random.randrange(1 << 30, 1 << 31),
|
||||
'Video2Anki With Media',
|
||||
fields = [{'name' : 'Question'},
|
||||
{'name' : 'Answer'},
|
||||
{'name' : 'Voiceline'}],
|
||||
templates = [{'name' : 'Card',
|
||||
'qfmt' : '{{Question}}<br><{{Voiceline}}',
|
||||
'afmt' : '{{FrontSide}}<hr id="answer">{{Answer}}'}])
|
||||
|
||||
def parse_args():
|
||||
opt = "m:a:b:c:e:t:"
|
||||
lopt = ["media-file=", "track-a=", "track-b=", "cjk-mode=", "encoding=", "title="]
|
||||
args, vals = getopt.getopt(sys.argv[1:], opt, lopt)
|
||||
try:
|
||||
for arg, val in args:
|
||||
print("<"+arg+"> -> "+val)
|
||||
if arg in ("-m", "--media-file"):
|
||||
global Media
|
||||
Media = val
|
||||
print("media set to ", Media)
|
||||
elif arg in ("-a", "--track-a"):
|
||||
global TrackA
|
||||
TrackA = val
|
||||
elif arg in ("-b", "--track-b"):
|
||||
global TrackB
|
||||
TrackB = val
|
||||
elif arg in ("-e", "--encoding"):
|
||||
global Encoding
|
||||
Encoding = val
|
||||
elif arg in ("-t", "--title"):
|
||||
global Title
|
||||
Title = val
|
||||
elif arg in ("-c", "--cjk-mode"):
|
||||
if val in ("c", "j", "k"):
|
||||
global CJK
|
||||
CJK = val
|
||||
else:
|
||||
raise Exception('invalid cjk value: ', val)
|
||||
except getopt.error as err:
|
||||
print(str(err))
|
||||
|
||||
def to_seconds(t):
|
||||
s = t.hour*60*60 + t.minute*60 + t.second
|
||||
m = int(t.microsecond/1000)
|
||||
return str(s)+'.'+str(m)[0]
|
||||
|
||||
def to_duration(start, end):
|
||||
t = start
|
||||
sa = t.hour*60*60 + t.minute*60 + t.second
|
||||
ma = int(t.microsecond/1000)
|
||||
t = end
|
||||
se = t.hour*60*60 + t.minute*60 + t.second
|
||||
me = int(t.microsecond/1000)
|
||||
s = se-sa
|
||||
m = me-ma
|
||||
if m < 0:
|
||||
s = s - 1
|
||||
m = 1000-m
|
||||
return str(s)+'.'+str(m)[0]
|
||||
|
||||
def extract_audio(name, start, end):
|
||||
cmds = ["ffmpeg", "-y", "-threads", str(multiprocessing.cpu_count()), "-i", Media, "-ss", to_seconds(start), "-t", to_duration(start, end), name]
|
||||
cmd = ""
|
||||
for c in cmds:
|
||||
cmd = cmd + str(c) + " "
|
||||
subprocess.check_call(cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True)
|
||||
|
||||
def extract_track(idx):
|
||||
cmds = ["ffmpeg", "-i", Media, "-map=0:s:"+str(idx), TMP+Media+"_"+str(idx)+".srt"]
|
||||
cmd = ""
|
||||
for c in cmds:
|
||||
cmd = cmd + str(c) + " "
|
||||
subprocess.check_call(cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True)
|
||||
return TMP+Media+"_"+idx+".srt"
|
||||
|
||||
def get_track(track):
|
||||
if isinstance(track, numbers.Number):
|
||||
track = extract_srt(track)
|
||||
return pysrt.open(track)
|
||||
|
||||
def overlaps(start_a, end_a, start_b, end_b):
|
||||
start_c = start_b if start_a < start_b else start_a
|
||||
end_c = end_b if end_a > end_b else end_b
|
||||
return start_c < end_c
|
||||
|
||||
def apply_cjk(original):
|
||||
text = ""
|
||||
lookup = ""
|
||||
if CJK=="c":
|
||||
lookup = "https://strokeorder.com/chinese/"
|
||||
elif CJK=="j":
|
||||
lookup = "https://japandict.com/kanji/"
|
||||
elif CJK=="k":
|
||||
lookup = "https://koreanhanja.app/"
|
||||
else:
|
||||
return html.escape(original)
|
||||
for p in original:
|
||||
escaped_p = html.escape(p)
|
||||
if is_cjk(p) and lookup!="":
|
||||
text = text + "<a href=\"" + lookup + escaped_p + "\">" + escaped_p + "</a>"
|
||||
else:
|
||||
text = text + escaped_p
|
||||
return text
|
||||
|
||||
def is_cjk(p):
|
||||
cjk_ranges = [
|
||||
( 0x4E00, 0x62FF),
|
||||
( 0x6300, 0x77FF),
|
||||
( 0x7800, 0x8CFF),
|
||||
( 0x8D00, 0x9FCC),
|
||||
( 0x3400, 0x4DB5),
|
||||
(0x20000, 0x215FF),
|
||||
(0x21600, 0x230FF),
|
||||
(0x23100, 0x245FF),
|
||||
(0x24600, 0x260FF),
|
||||
(0x26100, 0x275FF),
|
||||
(0x27600, 0x290FF),
|
||||
(0x29100, 0x2A6DF),
|
||||
(0x2A700, 0x2B734),
|
||||
(0x2B740, 0x2B81D),
|
||||
(0x2B820, 0x2CEAF),
|
||||
(0x2CEB0, 0x2EBEF),
|
||||
(0x2F800, 0x2FA1F)
|
||||
]
|
||||
cp = ord(p)
|
||||
for bottom, top in cjk_ranges:
|
||||
if cp >= bottom and cp <= top:
|
||||
return True
|
||||
return False
|
||||
|
||||
def match_tracks(a, b):
|
||||
matched = []
|
||||
media = []
|
||||
matchbar = tqdm(total=len(a) if len(a) > len(b) else len(b))
|
||||
for sub_a in a:
|
||||
start_a = sub_a.start.to_time()
|
||||
end_a = sub_a.end.to_time()
|
||||
for sub_b in b:
|
||||
start_b = sub_b.start.to_time()
|
||||
end_b = sub_b.end.to_time()
|
||||
if start_b > end_a:
|
||||
# skip a, b is past a
|
||||
break
|
||||
if end_b < start_a:
|
||||
# skip b, a is past b
|
||||
matchbar.update(1)
|
||||
continue
|
||||
elif overlaps(start_a, end_a, start_b, end_b):
|
||||
# match
|
||||
audio_name = str(len(matched)) + ".opus"
|
||||
media.append(TMP + audio_name)
|
||||
extract_audio(TMP + audio_name, start_a, end_a)
|
||||
text_b = apply_cjk(html.escape(sub_b.text))
|
||||
matched.append(genanki.Note(model=AnkiModel, fields=[text_b, html.escape(sub_a.text), "[sound:"+audio_name+"]"]))
|
||||
matchbar.update(1)
|
||||
matchbar.close()
|
||||
return matched, media
|
||||
|
||||
try:
|
||||
parse_args()
|
||||
if Path(Media).is_file()==False:
|
||||
raise Exception('file <', Media, '> does not exist')
|
||||
global AnkiDeck
|
||||
AnkiDeck = genanki.Deck(random.randrange(1 << 30, 1 << 31),Title)
|
||||
subs_a = get_track(TrackA)
|
||||
subs_b = get_track(TrackB)
|
||||
print("matching subtitles of both tracks...")
|
||||
notes, media = match_tracks(subs_a, subs_b)
|
||||
for note in notes:
|
||||
AnkiDeck.add_note(note)
|
||||
print("generating anki deck")
|
||||
package = genanki.Package(AnkiDeck)
|
||||
package.media_files = media
|
||||
print("writing apkg file...")
|
||||
package.write_to_file(Title+".apkg")
|
||||
print("done")
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e.output)
|
||||
|
||||
shutil.rmtree(TMP)
|
Loading…
Reference in a new issue