mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-19 15:23:44 +02:00
49 lines
1.7 KiB
Python
49 lines
1.7 KiB
Python
from html import unescape
|
|
|
|
from peewee import fn
|
|
from srt import parse
|
|
|
|
from models import Person, Line, Episode, db
|
|
from typo import fix_typo
|
|
from utils import td_to_milliseconds, get_filename
|
|
|
|
campaign = 2
|
|
|
|
for episode_nr in range(1, 95):
|
|
file = get_filename(campaign, episode_nr)
|
|
text = file.read_text()
|
|
subtitlelines = parse(text)
|
|
print(episode_nr)
|
|
person = None
|
|
episode = Episode.get(season=campaign, episode_number=episode_nr)
|
|
with db.atomic():
|
|
i = 0
|
|
for line in subtitlelines:
|
|
i += 1
|
|
assert i == line.index
|
|
text = unescape(line.content)
|
|
dbline = Line()
|
|
if ":" in text:
|
|
name, resttext = text.split(":", maxsplit=1)
|
|
if name.isupper():
|
|
formatted_name = fix_typo(name.strip()).title()
|
|
if formatted_name == "San":
|
|
print(name.title())
|
|
person, created = Person.get_or_create(name=formatted_name)
|
|
text = resttext.strip()
|
|
else:
|
|
if text.startswith("(") and text.endswith(")"):
|
|
dbline.isnote = True
|
|
person = None
|
|
elif text.startswith("[") and text.endswith("]"):
|
|
dbline.ismeta = True
|
|
person = None
|
|
text = text.replace("\n", " ")
|
|
dbline.text = text
|
|
dbline.search_text = fn.to_tsvector('english', text)
|
|
dbline.person = person
|
|
dbline.starttime = td_to_milliseconds(line.start)
|
|
dbline.endtime = td_to_milliseconds(line.end)
|
|
dbline.episode = episode
|
|
dbline.order = line.index
|
|
dbline.save()
|