2020-08-08 15:18:14 +02:00
|
|
|
import hashlib
|
2020-08-08 15:26:13 +02:00
|
|
|
import os
|
2020-03-08 14:48:04 +01:00
|
|
|
import re
|
2020-03-07 10:45:39 +01:00
|
|
|
from subprocess import run
|
|
|
|
|
|
|
|
import youtube_dl
|
2020-04-15 18:11:45 +02:00
|
|
|
from peewee import DoesNotExist
|
2020-03-07 10:45:39 +01:00
|
|
|
|
2020-08-08 15:18:14 +02:00
|
|
|
from models import Episode, Series, Line, Phrase
|
|
|
|
from utils import srtdir, pretty_title
|
2020-03-07 10:45:39 +01:00
|
|
|
|
2020-04-15 18:11:45 +02:00
|
|
|
series_data = [
|
|
|
|
{
|
|
|
|
"name": "Campaign 1",
|
|
|
|
"playlist_id": "PL1tiwbzkOjQz7D0l_eLJGAISVtcL7oRu_",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "Campaign 2",
|
|
|
|
"playlist_id": "PL1tiwbzkOjQxD0jjAE7PsWoaCrs0EkBH2"
|
2020-03-07 10:45:39 +01:00
|
|
|
|
2020-04-15 18:11:45 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "Handbooker Helper",
|
|
|
|
"playlist_id": "PL1tiwbzkOjQyr6-gqJ8r29j_rJkR49uDN",
|
|
|
|
"single_speaker": True
|
|
|
|
}
|
|
|
|
]
|
2020-03-08 18:48:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2020-08-08 15:26:13 +02:00
|
|
|
os.nice(15)
|
2020-04-15 18:11:45 +02:00
|
|
|
for series in series_data:
|
|
|
|
name = series["name"]
|
|
|
|
playlist_id = series["playlist_id"]
|
|
|
|
is_campaign = "Campaign" in name
|
|
|
|
try:
|
|
|
|
s = Series.select().where(Series.title == name).get()
|
|
|
|
except DoesNotExist:
|
|
|
|
s = Series()
|
|
|
|
s.title = name
|
|
|
|
|
|
|
|
s.is_campaign = is_campaign
|
|
|
|
s.single_speaker = "single_speaker" in series and series["single_speaker"]
|
|
|
|
s.save()
|
2020-03-08 18:48:14 +01:00
|
|
|
ydl_opts = {
|
|
|
|
'extract_flat': True
|
|
|
|
}
|
|
|
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
2020-04-15 18:11:45 +02:00
|
|
|
playlist = ydl.extract_info("https://www.youtube.com/playlist?list=" + playlist_id, download=False)
|
2020-03-08 18:48:14 +01:00
|
|
|
videos = playlist["entries"]
|
|
|
|
|
|
|
|
print(v["url"] for v in videos)
|
|
|
|
|
|
|
|
ydl_opts = {
|
|
|
|
"writesubtitles": True,
|
|
|
|
"subtitleslangs": ["en"],
|
|
|
|
"skip_download": True,
|
|
|
|
}
|
|
|
|
regex = re.compile(r"Ep(?:is|si)ode (\d+)")
|
|
|
|
|
|
|
|
for nr, video in enumerate(videos, 1):
|
2020-04-15 18:11:45 +02:00
|
|
|
# if Episode.select().where((Episode.season == campaign) & (Episode.video_number == nr)).count() == 1:
|
|
|
|
# print(f"already imported {vttfile}")
|
|
|
|
# continue
|
2020-03-08 18:48:14 +01:00
|
|
|
try:
|
2020-04-15 18:11:45 +02:00
|
|
|
e = Episode.select().where((Episode.series == s) & (Episode.video_number == nr)).get()
|
|
|
|
except DoesNotExist:
|
|
|
|
e = Episode()
|
|
|
|
e.series = s
|
|
|
|
e.video_number = nr
|
|
|
|
e.title = video["title"]
|
2020-08-08 15:18:14 +02:00
|
|
|
e.pretty_title = pretty_title(video["title"])
|
2020-04-15 18:11:45 +02:00
|
|
|
if s.is_campaign:
|
|
|
|
try:
|
|
|
|
match = regex.search(video["title"])
|
|
|
|
e.episode_number = int(match.group(1))
|
|
|
|
except AttributeError:
|
|
|
|
if s.title == "Campaign 1": # one-shots at the end of campaign 1
|
|
|
|
e.episode_number = e.video_number - 3
|
|
|
|
else:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
e.episode_number = e.video_number
|
2020-03-08 18:48:14 +01:00
|
|
|
e.youtube_id = video["url"]
|
|
|
|
e.save()
|
2020-04-15 18:11:45 +02:00
|
|
|
vttfile = srtdir / str(e.id)
|
|
|
|
ydl_opts["outtmpl"] = str(vttfile)
|
2020-03-08 18:48:14 +01:00
|
|
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
|
|
|
ydl.download([f'https://www.youtube.com/watch?v={e.youtube_id}'])
|
2020-08-08 15:18:14 +02:00
|
|
|
run(["ffmpeg", "-y", "-i", vttfile.with_suffix(".en.vtt"), vttfile.with_suffix(".srt")])
|
|
|
|
e.downloaded = True
|
|
|
|
try:
|
|
|
|
vttfile.with_suffix(".en.vtt").unlink()
|
|
|
|
with vttfile.with_suffix(".srt").open("rb") as f:
|
|
|
|
file_hash = hashlib.sha256()
|
|
|
|
while True:
|
|
|
|
chunk = f.read(8192)
|
|
|
|
if not chunk:
|
|
|
|
break
|
|
|
|
file_hash.update(chunk)
|
|
|
|
if e.subtitle_hash != file_hash.hexdigest():
|
|
|
|
Line.delete().where(Line.episode == e)
|
|
|
|
Phrase.delete().where(Phrase.episode == e)
|
|
|
|
e.phrases_imported = False
|
|
|
|
e.text_imported = False
|
|
|
|
e.subtitle_hash = file_hash.hexdigest()
|
|
|
|
except FileNotFoundError:
|
|
|
|
e.downloaded = False
|
|
|
|
e.save()
|
2020-03-08 18:48:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|