mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-11 06:03:45 +02:00
generalise data model to series
This commit is contained in:
parent
3cd78c3514
commit
df3c1b62eb
12 changed files with 165 additions and 97 deletions
12
colors.py
12
colors.py
|
@ -1,16 +1,6 @@
|
|||
from data import colors_c2
|
||||
from models import Person
|
||||
|
||||
colors_c2 = {
|
||||
"Laura": "#59c3f9",
|
||||
"Marisha": "#00146e",
|
||||
"Liam": "#fe8413",
|
||||
"Taliesin": "#be1c0d",
|
||||
"Ashley": "#868984",
|
||||
"Sam": "#dae1dd",
|
||||
"Travis": "#076708",
|
||||
"Matt": "#471f0e" # random color
|
||||
}
|
||||
|
||||
p: Person
|
||||
for p in Person.select():
|
||||
print(p)
|
||||
|
|
|
@ -7,5 +7,3 @@ dbauth = {
|
|||
}
|
||||
|
||||
sentryDSN = None
|
||||
|
||||
skip_download = False
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from sys import argv
|
||||
|
||||
from models import db, Phrase, Episode, Person, Line
|
||||
from models import db, Series, Phrase, Episode, Person, Line
|
||||
|
||||
|
||||
def confirm(message: str) -> None:
|
||||
|
@ -14,8 +14,8 @@ mode = argv[1]
|
|||
|
||||
if mode == "all":
|
||||
confirm("Delete all Data? ")
|
||||
db.drop_tables([Episode, Person, Line, Phrase])
|
||||
db.create_tables([Episode, Person, Line, Phrase])
|
||||
db.drop_tables([Series, Episode, Person, Line, Phrase])
|
||||
db.create_tables([Series, Episode, Person, Line, Phrase])
|
||||
elif mode == "phrases":
|
||||
confirm("Delete all Phrases? ")
|
||||
db.drop_tables([Phrase])
|
||||
|
|
86
fetch.py
86
fetch.py
|
@ -2,25 +2,49 @@ import re
|
|||
from subprocess import run
|
||||
|
||||
import youtube_dl
|
||||
from peewee import DoesNotExist
|
||||
|
||||
import config
|
||||
from models import Episode
|
||||
from models import Episode, Series
|
||||
# https://www.youtube.com/playlist?list=
|
||||
from utils import srtdir
|
||||
|
||||
campaign_playlists = {
|
||||
1: "https://www.youtube.com/playlist?list=PL1tiwbzkOjQz7D0l_eLJGAISVtcL7oRu_",
|
||||
2: "https://www.youtube.com/playlist?list=PL1tiwbzkOjQxD0jjAE7PsWoaCrs0EkBH2"
|
||||
}
|
||||
series_data = [
|
||||
{
|
||||
"name": "Campaign 1",
|
||||
"playlist_id": "PL1tiwbzkOjQz7D0l_eLJGAISVtcL7oRu_",
|
||||
},
|
||||
{
|
||||
"name": "Campaign 2",
|
||||
"playlist_id": "PL1tiwbzkOjQxD0jjAE7PsWoaCrs0EkBH2"
|
||||
|
||||
},
|
||||
{
|
||||
"name": "Handbooker Helper",
|
||||
"playlist_id": "PL1tiwbzkOjQyr6-gqJ8r29j_rJkR49uDN",
|
||||
"single_speaker": True
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
for campaign in range(1, 3):
|
||||
for series in series_data:
|
||||
name = series["name"]
|
||||
playlist_id = series["playlist_id"]
|
||||
is_campaign = "Campaign" in name
|
||||
try:
|
||||
s = Series.select().where(Series.title == name).get()
|
||||
except DoesNotExist:
|
||||
s = Series()
|
||||
s.title = name
|
||||
|
||||
s.is_campaign = is_campaign
|
||||
s.single_speaker = "single_speaker" in series and series["single_speaker"]
|
||||
s.save()
|
||||
ydl_opts = {
|
||||
'extract_flat': True
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
playlist = ydl.extract_info(campaign_playlists[campaign], download=False)
|
||||
playlist = ydl.extract_info("https://www.youtube.com/playlist?list=" + playlist_id, download=False)
|
||||
videos = playlist["entries"]
|
||||
|
||||
print(v["url"] for v in videos)
|
||||
|
@ -33,30 +57,42 @@ def main():
|
|||
regex = re.compile(r"Ep(?:is|si)ode (\d+)")
|
||||
|
||||
for nr, video in enumerate(videos, 1):
|
||||
vttfile = srtdir / f"C{campaign}E{nr}"
|
||||
ydl_opts["outtmpl"] = str(vttfile)
|
||||
if Episode.select().where((Episode.season == campaign) & (Episode.video_number == nr)).count() == 1:
|
||||
print(f"already imported {vttfile}")
|
||||
continue
|
||||
e = Episode()
|
||||
e.season = campaign
|
||||
e.video_number = nr
|
||||
# if Episode.select().where((Episode.season == campaign) & (Episode.video_number == nr)).count() == 1:
|
||||
# print(f"already imported {vttfile}")
|
||||
# continue
|
||||
try:
|
||||
match = regex.search(video["title"])
|
||||
e.episode_number = int(match.group(1))
|
||||
except AttributeError:
|
||||
if campaign == 1: # one-shots at the end of campaign 1
|
||||
e.episode_number = e.video_number - 3
|
||||
else:
|
||||
raise
|
||||
e = Episode.select().where((Episode.series == s) & (Episode.video_number == nr)).get()
|
||||
except DoesNotExist:
|
||||
e = Episode()
|
||||
e.series = s
|
||||
e.video_number = nr
|
||||
e.title = video["title"]
|
||||
if s.is_campaign:
|
||||
try:
|
||||
match = regex.search(video["title"])
|
||||
e.episode_number = int(match.group(1))
|
||||
except AttributeError:
|
||||
if s.title == "Campaign 1": # one-shots at the end of campaign 1
|
||||
e.episode_number = e.video_number - 3
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
e.episode_number = e.video_number
|
||||
e.youtube_id = video["url"]
|
||||
e.save()
|
||||
if config.skip_download:
|
||||
vttfile = srtdir / str(e.id)
|
||||
ydl_opts["outtmpl"] = str(vttfile)
|
||||
if e.downloaded:
|
||||
continue
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([f'https://www.youtube.com/watch?v={e.youtube_id}'])
|
||||
run(["ffmpeg", "-i", vttfile.with_suffix(".en.vtt"), vttfile.with_suffix(".srt")])
|
||||
vttfile.with_suffix(".en.vtt").unlink()
|
||||
e.downloaded = True
|
||||
try:
|
||||
vttfile.with_suffix(".en.vtt").unlink()
|
||||
except FileNotFoundError:
|
||||
e.downloaded = False
|
||||
e.save()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
60
import.py
60
import.py
|
@ -5,9 +5,9 @@ from alive_progress import alive_bar
|
|||
from peewee import fn
|
||||
from srt import parse
|
||||
|
||||
from models import Person, Line, Episode, db
|
||||
from models import Person, Line, Episode, db, Series
|
||||
from typo import fix_typo
|
||||
from utils import td_to_milliseconds, get_filename
|
||||
from utils import td_to_milliseconds, srtdir, episode_speaker
|
||||
|
||||
|
||||
def is_invalid_name(name: str) -> bool:
|
||||
|
@ -19,14 +19,16 @@ def is_invalid_name(name: str) -> bool:
|
|||
|
||||
def main():
|
||||
all_people = set()
|
||||
for campaign in range(1, 3):
|
||||
for episode in Episode.select().where((Episode.text_imported == False) & (Episode.season == campaign)):
|
||||
for series in Series.select():
|
||||
for episode in Episode.select().where(
|
||||
(Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded)
|
||||
):
|
||||
with open("names.txt", "w") as f:
|
||||
f.write("\n".join(sorted(p for p in all_people if "\n" not in p)))
|
||||
file = get_filename(campaign, episode.video_number)
|
||||
file = srtdir / f"{episode.id}.srt"
|
||||
text = file.read_text()
|
||||
subtitlelines = list(parse(text))
|
||||
print(episode.video_number, episode.episode_number)
|
||||
print(episode.video_number, episode.title)
|
||||
person = None
|
||||
with db.atomic():
|
||||
with alive_bar(len(subtitlelines)) as bar:
|
||||
|
@ -37,28 +39,32 @@ def main():
|
|||
assert i == line.index
|
||||
text = unescape(line.content)
|
||||
dbline = Line()
|
||||
if ":" in text:
|
||||
name, resttext = text.split(":", maxsplit=1)
|
||||
if name and name[-1].isupper() and not is_invalid_name(name):
|
||||
people = []
|
||||
name = name.lower()
|
||||
for word in re.split('[,&/]|and| an ', name):
|
||||
word = word.strip()
|
||||
word = fix_typo(word).title()
|
||||
word = word.strip()
|
||||
if word:
|
||||
people.append(word)
|
||||
all_people.update(people)
|
||||
formatted_name = ", ".join(people)
|
||||
person, created = Person.get_or_create(name=formatted_name, season=campaign)
|
||||
text = resttext.strip()
|
||||
if not series.single_speaker:
|
||||
if ":" in text:
|
||||
name, resttext = text.split(":", maxsplit=1)
|
||||
if name and name[-1].isupper() and not is_invalid_name(name):
|
||||
people = []
|
||||
name = name.lower()
|
||||
for word in re.split('[,&/]|and| an ', name):
|
||||
word = word.strip()
|
||||
word = fix_typo(word).title()
|
||||
word = word.strip()
|
||||
if word:
|
||||
people.append(word)
|
||||
all_people.update(people)
|
||||
formatted_name = ", ".join(people)
|
||||
person, created = Person.get_or_create(name=formatted_name, series=series)
|
||||
text = resttext.strip()
|
||||
else:
|
||||
if text.startswith("(") and text.endswith(")"):
|
||||
dbline.isnote = True
|
||||
person = None
|
||||
elif text.startswith("[") and text.endswith("]"):
|
||||
dbline.ismeta = True
|
||||
person = None
|
||||
else:
|
||||
if text.startswith("(") and text.endswith(")"):
|
||||
dbline.isnote = True
|
||||
person = None
|
||||
elif text.startswith("[") and text.endswith("]"):
|
||||
dbline.ismeta = True
|
||||
person = None
|
||||
person_name = episode_speaker(series.title, episode.video_number)
|
||||
person,created = Person.get_or_create(name=person_name, series=series)
|
||||
text = text.replace("\n", " ")
|
||||
dbline.text = text
|
||||
dbline.search_text = fn.to_tsvector('english', text)
|
||||
|
|
17
models.py
17
models.py
|
@ -12,16 +12,24 @@ class BaseModel(Model):
|
|||
database = db
|
||||
|
||||
|
||||
class Series(BaseModel):
|
||||
title = CharField(max_length=100)
|
||||
is_campaign = BooleanField()
|
||||
single_speaker = BooleanField()
|
||||
|
||||
|
||||
class Episode(BaseModel):
|
||||
season = IntegerField()
|
||||
series = ForeignKeyField(Series, backref="episodes")
|
||||
episode_number = IntegerField()
|
||||
video_number = IntegerField()
|
||||
youtube_id = CharField(max_length=11)
|
||||
title = CharField(max_length=100)
|
||||
downloaded = BooleanField(default=False)
|
||||
text_imported = BooleanField(default=False)
|
||||
phrases_imported = BooleanField(default=False)
|
||||
|
||||
class Meta:
|
||||
indexes = ((("season", "video_number"), True),)
|
||||
indexes = ((("series", "video_number"), True),)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
|
@ -31,9 +39,10 @@ class Episode(BaseModel):
|
|||
class Person(BaseModel):
|
||||
name = CharField()
|
||||
color = CharField(null=True)
|
||||
season = IntegerField()
|
||||
series = ForeignKeyField(Series)
|
||||
|
||||
class Meta:
|
||||
indexes = ((("name", "season"), True),)
|
||||
indexes = ((("name", "series"), True),)
|
||||
|
||||
|
||||
FULL_TEXT_SEARCH = '''SELECT id, text, ts_rank_cd(search_text, query) AS rank
|
||||
|
|
|
@ -12,7 +12,7 @@ from stopwords import STOP_WORDS
|
|||
nlp: English = spacy.load("en_core_web_sm", disable=["ner", "textcat"])
|
||||
nlp.Defaults.stop_words = STOP_WORDS
|
||||
for episode in Episode.select().where((Episode.phrases_imported == False) & (Episode.text_imported == True)):
|
||||
print(f"Campaign {episode.season} Episode {episode.episode_number}")
|
||||
print(episode.video_number, episode.title)
|
||||
person = None
|
||||
text = ""
|
||||
line_select = Line.select().where(Line.episode == episode)
|
||||
|
|
14
server.py
14
server.py
|
@ -19,20 +19,20 @@ def add_cors(response):
|
|||
return response
|
||||
|
||||
|
||||
global_excludes = [Line.search_text, Episode.phrases_imported, Episode.text_imported]
|
||||
global_excludes = [Line.search_text, Episode.phrases_imported, Episode.text_imported, Person.series]
|
||||
|
||||
|
||||
@app.route("/api/suggest")
|
||||
def question():
|
||||
query: str = request.args.get('query')
|
||||
until = request.args.get('until')
|
||||
season = request.args.get('season')
|
||||
if not query or not until or not season:
|
||||
series = request.args.get('series')
|
||||
if not query or not until or not series:
|
||||
return "no suggest query", 400
|
||||
if len(query) > 50:
|
||||
return "too long query", 400
|
||||
phrases = Phrase.select(Phrase.text, Alias(fn.SUM(Phrase.count), "total_count")).join(Episode).where(
|
||||
(Episode.season == season) &
|
||||
(Episode.series == series) &
|
||||
(Episode.episode_number <= until) &
|
||||
(Phrase.text.contains(query))
|
||||
).group_by(Phrase.text).order_by(SQL("total_count DESC")).limit(10)
|
||||
|
@ -43,8 +43,8 @@ def question():
|
|||
def search():
|
||||
query = request.args.get('query')
|
||||
until = request.args.get('until')
|
||||
season = request.args.get('season')
|
||||
if not query or not until or not season:
|
||||
series = request.args.get('series')
|
||||
if not query or not until or not series:
|
||||
return "no suggest query", 400
|
||||
if len(query) > 50:
|
||||
return "too long query", 400
|
||||
|
@ -56,7 +56,7 @@ def search():
|
|||
&
|
||||
(Episode.episode_number <= until)
|
||||
&
|
||||
(Episode.season == season)
|
||||
(Episode.series == series)
|
||||
).order_by(SQL("rank DESC")).join(Person).switch(Line).join(Episode).limit(20)
|
||||
|
||||
if len(results) == 0:
|
||||
|
|
10
utils.py
10
utils.py
|
@ -1,5 +1,8 @@
|
|||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from data import single_speaker
|
||||
|
||||
srtdir = Path("./data/subtitles/")
|
||||
|
||||
|
@ -12,5 +15,8 @@ def milliseconds_to_td(ms: int) -> timedelta:
|
|||
return timedelta(milliseconds=ms)
|
||||
|
||||
|
||||
def get_filename(campaign: int, episode: int) -> Path:
|
||||
return srtdir / f"C{campaign}E{episode}.srt"
|
||||
def episode_speaker(series_title: str, episode: int) -> Optional[str]:
|
||||
series = single_speaker[series_title]
|
||||
if episode in series:
|
||||
return series[episode]
|
||||
return None
|
||||
|
|
|
@ -4,10 +4,18 @@ export interface Person {
|
|||
"color": string;
|
||||
}
|
||||
|
||||
export interface Series {
|
||||
"id": number;
|
||||
"is_campaign": boolean;
|
||||
"title": string;
|
||||
}
|
||||
|
||||
export interface Episode {
|
||||
"episode_number": number;
|
||||
"id": number;
|
||||
"season": number;
|
||||
"series": Series;
|
||||
"title": string;
|
||||
"video_number": number;
|
||||
"youtube_id": string;
|
||||
}
|
||||
|
||||
|
@ -34,3 +42,13 @@ export interface ServerMessage {
|
|||
status: string;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface SeriesNames {
|
||||
"id": number;
|
||||
"title": string;
|
||||
}
|
||||
|
||||
export interface ServerData {
|
||||
"series": SeriesNames[];
|
||||
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ export default new Router({
|
|||
redirect: "/search/2/10/",
|
||||
},
|
||||
{
|
||||
path: "/search/:season/:episode/:keyword?",
|
||||
path: "/search/:series/:episode/:keyword?",
|
||||
name: "search",
|
||||
component: Home,
|
||||
// props: true,
|
||||
|
|
|
@ -78,15 +78,16 @@
|
|||
class="form-control" type="number" v-model="episode"
|
||||
min="1" max="300">
|
||||
<span>in</span>
|
||||
<select title="campaign selection" class="custom-select" v-model="season">
|
||||
<option value="1">Campaign 1</option>
|
||||
<option value="2">Campaign 2</option>
|
||||
<select title="campaign selection" class="custom-select" v-model="series">
|
||||
<option v-for="series in serverData.series" v-bind:value="series.id">
|
||||
{{ series.title }}
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
<b-alert v-if="error" show :variant="error.status">{{error.message}}</b-alert>
|
||||
<div class="entry" v-for="result in searchResult">
|
||||
<div class="title">
|
||||
<div>{{episodeName(firstLine(result))}} {{formatTimestamp(firstLine(result).starttime)}}</div>
|
||||
<div>{{formatTimestamp(firstLine(result).starttime)}} {{episodeName(firstLine(result))}}</div>
|
||||
<div class="buttons">
|
||||
<button class="btn" @click="playVideo(result)" title="View video on YouTube">
|
||||
<b-icon-play-fill></b-icon-play-fill>
|
||||
|
@ -114,7 +115,7 @@
|
|||
// @ts-ignore
|
||||
import Autocomplete from "@trevoreyre/autocomplete-vue";
|
||||
// import "@trevoreyre/autocomplete-vue/dist/style.css";
|
||||
import {Line, Result, ServerMessage} from "@/interfaces";
|
||||
import {Line, Result, ServerData, ServerMessage} from "@/interfaces";
|
||||
import {BAlert, BIcon, BIconPlayFill} from "bootstrap-vue";
|
||||
// @ts-ignore
|
||||
import VueYoutube from "vue-youtube";
|
||||
|
@ -134,9 +135,10 @@
|
|||
},
|
||||
data() {
|
||||
return {
|
||||
serverData : require("../../data.json") as ServerData,
|
||||
searchResult: [] as Result[],
|
||||
keyword: this.$route.params.keyword,
|
||||
season: this.$route.params.season,
|
||||
series: this.$route.params.series,
|
||||
episode: this.$route.params.episode,
|
||||
error: undefined as ServerMessage | undefined,
|
||||
ytOptIn: false,
|
||||
|
@ -155,8 +157,8 @@
|
|||
if (localStorage.ytOptIn) {
|
||||
this.ytOptIn = localStorage.ytOptIn;
|
||||
}
|
||||
if (this.season == null) {
|
||||
this.season = "2";
|
||||
if (this.series == null) {
|
||||
this.series = "2";
|
||||
}
|
||||
if (this.episode == null) {
|
||||
this.episode = "10";
|
||||
|
@ -172,7 +174,7 @@
|
|||
},
|
||||
methods: {
|
||||
suggest(input: string) {
|
||||
const url = baseURL + "suggest?query=" + input + "&until=" + this.episode + "&season=" + this.season;
|
||||
const url = baseURL + "suggest?query=" + input + "&until=" + this.episode + "&series=" + this.series;
|
||||
|
||||
return new Promise((resolve) => {
|
||||
if (input.length < 1) {
|
||||
|
@ -193,7 +195,7 @@
|
|||
if (!this.keyword) {
|
||||
return;
|
||||
}
|
||||
const url = baseURL + "search?query=" + this.keyword + "&until=" + this.episode + "&season=" + this.season;
|
||||
const url = baseURL + "search?query=" + this.keyword + "&until=" + this.episode + "&series=" + this.series;
|
||||
|
||||
fetch(url)
|
||||
.then((response) => response.json())
|
||||
|
@ -225,7 +227,10 @@
|
|||
return result.lines[0];
|
||||
},
|
||||
episodeName(line: Line): string {
|
||||
return `C${line.episode.season}E${line.episode.episode_number}`;
|
||||
if (line.episode.series.is_campaign) {
|
||||
return `Episode ${line.episode.episode_number}`;
|
||||
}
|
||||
return line.episode.title;
|
||||
},
|
||||
formatTimestamp(ts: number) {
|
||||
return new Date(ts).toISOString().substr(11, 8);
|
||||
|
@ -301,8 +306,8 @@
|
|||
// @ts-ignore
|
||||
this.$router.replace({params: {...this.$route.params, episode: val}});
|
||||
}, 300),
|
||||
season(val: string): void {
|
||||
this.$router.replace({params: {...this.$route.params, season: val}});
|
||||
series(val: string): void {
|
||||
this.$router.replace({params: {...this.$route.params, series: val}});
|
||||
},
|
||||
keyword(val: string): void {
|
||||
this.$router.replace({params: {...this.$route.params, keyword: val}});
|
||||
|
|
Loading…
Reference in a new issue