1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00
cr-search/main.py
2020-03-07 10:45:39 +01:00

49 lines
1.7 KiB
Python

from html import unescape
from peewee import fn
from srt import parse
from models import Person, Line, Episode, db
from typo import fix_typo
from utils import td_to_milliseconds, get_filename
campaign = 2
for episode_nr in range(1, 95):
file = get_filename(campaign, episode_nr)
text = file.read_text()
subtitlelines = parse(text)
print(episode_nr)
person = None
episode = Episode.get(season=campaign, episode_number=episode_nr)
with db.atomic():
i = 0
for line in subtitlelines:
i += 1
assert i == line.index
text = unescape(line.content)
dbline = Line()
if ":" in text:
name, resttext = text.split(":", maxsplit=1)
if name.isupper():
formatted_name = fix_typo(name.strip()).title()
if formatted_name == "San":
print(name.title())
person, created = Person.get_or_create(name=formatted_name)
text = resttext.strip()
else:
if text.startswith("(") and text.endswith(")"):
dbline.isnote = True
person = None
elif text.startswith("[") and text.endswith("]"):
dbline.ismeta = True
person = None
text = text.replace("\n", " ")
dbline.text = text
dbline.search_text = fn.to_tsvector('english', text)
dbline.person = person
dbline.starttime = td_to_milliseconds(line.start)
dbline.endtime = td_to_milliseconds(line.end)
dbline.episode = episode
dbline.order = line.index
dbline.save()