mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-19 15:23:44 +02:00
pyupgrade
This commit is contained in:
parent
9cff4968c1
commit
dece8e5328
12 changed files with 55 additions and 38 deletions
|
@ -1,7 +1,6 @@
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
from statistics import mean, stdev
|
from statistics import mean, stdev
|
||||||
from typing import Tuple
|
|
||||||
|
|
||||||
from alive_progress import alive_bar
|
from alive_progress import alive_bar
|
||||||
from peewee import SelectQuery
|
from peewee import SelectQuery
|
||||||
|
@ -11,7 +10,7 @@ from app import db
|
||||||
from server import search, suggest, exact_search
|
from server import search, suggest, exact_search
|
||||||
|
|
||||||
|
|
||||||
def benchmark_query(query: SelectQuery, filename: str = None) -> Tuple[float, float]:
|
def benchmark_query(query: SelectQuery, filename: str = None) -> tuple[float, float]:
|
||||||
query, params = query.sql()
|
query, params = query.sql()
|
||||||
|
|
||||||
query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query
|
query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query
|
||||||
|
|
7
data.py
7
data.py
|
@ -1,5 +1,4 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, List
|
|
||||||
|
|
||||||
colors = {
|
colors = {
|
||||||
"campaign1": {
|
"campaign1": {
|
||||||
|
@ -94,10 +93,10 @@ assert set(single_speaker["Handbooker Helper"].keys()) == set(range(1, 44 + 1))
|
||||||
class SeriesData:
|
class SeriesData:
|
||||||
name: str
|
name: str
|
||||||
slug: str
|
slug: str
|
||||||
playlist_id: Optional[str] = None
|
playlist_id: str | None = None
|
||||||
videos: Optional[List[str]] = None
|
videos: list[str] | None = None
|
||||||
single_speaker: bool = False
|
single_speaker: bool = False
|
||||||
initial_speaker: Optional[str] = None
|
initial_speaker: str | None = None
|
||||||
|
|
||||||
|
|
||||||
series_data = [
|
series_data = [
|
||||||
|
|
2
fetch.py
2
fetch.py
|
@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None:
|
||||||
f.write(r.content)
|
f.write(r.content)
|
||||||
changed = False
|
changed = False
|
||||||
try:
|
try:
|
||||||
e = Episode.select().where((Episode.youtube_id == url)).get()
|
e = Episode.select().where(Episode.youtube_id == url).get()
|
||||||
if args.skip_existing and e.downloaded:
|
if args.skip_existing and e.downloaded:
|
||||||
continue
|
continue
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
|
|
13
import.py
13
import.py
|
@ -1,7 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from html import unescape
|
from html import unescape
|
||||||
from typing import List, Optional, Set, Union
|
|
||||||
|
|
||||||
from alive_progress import alive_bar
|
from alive_progress import alive_bar
|
||||||
from peewee import fn, chunked
|
from peewee import fn, chunked
|
||||||
|
@ -29,13 +28,13 @@ def add_to_text(text: str, add: str) -> str:
|
||||||
return add
|
return add
|
||||||
|
|
||||||
|
|
||||||
def line_key(line: Line) -> Union[str, Line]:
|
def line_key(line: Line) -> str | Line:
|
||||||
if line.ismeta or line.isnote:
|
if line.ismeta or line.isnote:
|
||||||
return line
|
return line
|
||||||
return line.person
|
return line.person
|
||||||
|
|
||||||
|
|
||||||
def group_lines(dblines: List[Line]) -> List[Line]:
|
def group_lines(dblines: list[Line]) -> list[Line]:
|
||||||
final_lines = []
|
final_lines = []
|
||||||
order = 0
|
order = 0
|
||||||
|
|
||||||
|
@ -74,7 +73,7 @@ def group_lines(dblines: List[Line]) -> List[Line]:
|
||||||
return final_lines
|
return final_lines
|
||||||
|
|
||||||
|
|
||||||
def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, episode: Episode, order: int,
|
def insert_subtitle(text: str, person: Person | None, subline: Subtitle, episode: Episode, order: int,
|
||||||
isnote: bool = False, ismeta: bool = False) -> Line:
|
isnote: bool = False, ismeta: bool = False) -> Line:
|
||||||
dbline = Line()
|
dbline = Line()
|
||||||
if not text:
|
if not text:
|
||||||
|
@ -94,7 +93,7 @@ def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, epis
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
os.nice(15)
|
os.nice(15)
|
||||||
all_people: Set[str] = set()
|
all_people: set[str] = set()
|
||||||
for series in Series.select().order_by(Series.id):
|
for series in Series.select().order_by(Series.id):
|
||||||
for episode in Episode.select().where(
|
for episode in Episode.select().where(
|
||||||
(Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded)
|
(Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded)
|
||||||
|
@ -103,9 +102,9 @@ def main() -> None:
|
||||||
f.write("\n".join(sorted(p for p in all_people if "\n" not in p)))
|
f.write("\n".join(sorted(p for p in all_people if "\n" not in p)))
|
||||||
file = srtdir / f"{episode.id}.srt"
|
file = srtdir / f"{episode.id}.srt"
|
||||||
strtext = file.read_text()
|
strtext = file.read_text()
|
||||||
subtitlelines: List[Subtitle] = list(parse(strtext))
|
subtitlelines: list[Subtitle] = list(parse(strtext))
|
||||||
print(episode.video_number, episode.pretty_title)
|
print(episode.video_number, episode.pretty_title)
|
||||||
person: Optional[Person] = None
|
person: Person | None = None
|
||||||
with db.atomic():
|
with db.atomic():
|
||||||
dblines = []
|
dblines = []
|
||||||
i = 0
|
i = 0
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import en_core_web_md
|
import en_core_web_md
|
||||||
from alive_progress import alive_bar
|
from alive_progress import alive_bar
|
||||||
|
@ -23,7 +22,7 @@ class Noun:
|
||||||
count: int = 1
|
count: int = 1
|
||||||
|
|
||||||
|
|
||||||
lemma_cache: Dict[str, str] = {}
|
lemma_cache: dict[str, str] = {}
|
||||||
|
|
||||||
nlp: Language = en_core_web_md.load(disable=["ner", "textcat"])
|
nlp: Language = en_core_web_md.load(disable=["ner", "textcat"])
|
||||||
nlp.Defaults.stop_words = STOP_WORDS
|
nlp.Defaults.stop_words = STOP_WORDS
|
||||||
|
@ -48,7 +47,7 @@ for episode in Episode.select().where((Episode.phrases_imported == False) & (Epi
|
||||||
print("run nlp")
|
print("run nlp")
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print("nlp finished")
|
print("nlp finished")
|
||||||
nouns: Dict[str, Noun] = {}
|
nouns: dict[str, Noun] = {}
|
||||||
chunk: Span
|
chunk: Span
|
||||||
noun_chunks = list(doc.noun_chunks)
|
noun_chunks = list(doc.noun_chunks)
|
||||||
with alive_bar(len(noun_chunks), title='lemmatizing and counting') as bar:
|
with alive_bar(len(noun_chunks), title='lemmatizing and counting') as bar:
|
||||||
|
|
10
server.py
10
server.py
|
@ -1,6 +1,5 @@
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from flask import request, jsonify, Response, abort, g
|
from flask import request, jsonify, Response, abort, g
|
||||||
from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN
|
from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN
|
||||||
|
@ -19,6 +18,7 @@ from suggestions import suggestions
|
||||||
|
|
||||||
app.register_blueprint(ssr_routes)
|
app.register_blueprint(ssr_routes)
|
||||||
|
|
||||||
|
|
||||||
def add_cors(response: Response) -> Response:
|
def add_cors(response: Response) -> Response:
|
||||||
header = response.headers
|
header = response.headers
|
||||||
header['Access-Control-Allow-Origin'] = '*'
|
header['Access-Control-Allow-Origin'] = '*'
|
||||||
|
@ -34,7 +34,7 @@ def before_request():
|
||||||
def after_request(response: Response):
|
def after_request(response: Response):
|
||||||
diff = time.perf_counter() - g.start
|
diff = time.perf_counter() - g.start
|
||||||
if response.response:
|
if response.response:
|
||||||
response.headers.set("Server-Timing", f"server;dur={diff *1000 :.5f}")
|
response.headers.set("Server-Timing", f"server;dur={diff * 1000 :.5f}")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,11 +194,11 @@ def series():
|
||||||
@app.route("/api/episodes")
|
@app.route("/api/episodes")
|
||||||
@cache.cached(timeout=60 * 60 * 24)
|
@cache.cached(timeout=60 * 60 * 24)
|
||||||
def api_episodes():
|
def api_episodes():
|
||||||
all_series: List[Series] = Series.select().order_by(Series.order)
|
all_series: list[Series] = Series.select().order_by(Series.order)
|
||||||
data = []
|
data = []
|
||||||
for series in all_series:
|
for series in all_series:
|
||||||
|
|
||||||
episodes: List[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number)
|
episodes: list[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number)
|
||||||
|
|
||||||
series_data = []
|
series_data = []
|
||||||
for episode in episodes:
|
for episode in episodes:
|
||||||
|
@ -241,7 +241,7 @@ def transcript():
|
||||||
(Episode.series.slug == series)
|
(Episode.series.slug == series)
|
||||||
).join(Series).get()
|
).join(Series).get()
|
||||||
|
|
||||||
lines: List[Line] = Line.select(Line, Person).where(
|
lines: list[Line] = Line.select(Line, Person).where(
|
||||||
(Episode.episode_number == episode_number)
|
(Episode.episode_number == episode_number)
|
||||||
&
|
&
|
||||||
(Episode.series.slug == series)
|
(Episode.series.slug == series)
|
||||||
|
|
2
stats.py
2
stats.py
|
@ -27,7 +27,7 @@ class Stats(ABC):
|
||||||
|
|
||||||
class MultiColumnStats(Stats):
|
class MultiColumnStats(Stats):
|
||||||
|
|
||||||
def as_data(self) -> List[Dict[str, Any]]:
|
def as_data(self) -> list[dict[str, Any]]:
|
||||||
data = []
|
data = []
|
||||||
cur = self.execute()
|
cur = self.execute()
|
||||||
column_names = [d.name for d in cur.description]
|
column_names = [d.name for d in cur.description]
|
||||||
|
|
|
@ -28,14 +28,13 @@ That said, I try to only use phrases that don't contain spoilers themselves.
|
||||||
#
|
#
|
||||||
"""
|
"""
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Suggestion:
|
class Suggestion:
|
||||||
text: str
|
text: str
|
||||||
# only show this suggestion to people who have watched at least this episode
|
# only show this suggestion to people who have watched at least this episode
|
||||||
episode: Optional[int] = None
|
episode: int | None = None
|
||||||
|
|
||||||
|
|
||||||
suggestions = {
|
suggestions = {
|
||||||
|
|
31
tests.sql
31
tests.sql
|
@ -1,4 +1,7 @@
|
||||||
select e.pretty_title, text,char_length(line.text) as len from line join episode e on e.id = line.episode_id order by len desc;
|
select e.pretty_title, text, char_length(line.text) as len
|
||||||
|
from line
|
||||||
|
join episode e on e.id = line.episode_id
|
||||||
|
order by len desc;
|
||||||
|
|
||||||
|
|
||||||
SELECT pg_size_pretty(pg_relation_size('phrase'));
|
SELECT pg_size_pretty(pg_relation_size('phrase'));
|
||||||
|
@ -6,12 +9,15 @@ SELECT pg_size_pretty(pg_relation_size('phrase'));
|
||||||
delete
|
delete
|
||||||
from phrase;
|
from phrase;
|
||||||
|
|
||||||
delete from line;
|
delete
|
||||||
|
from line;
|
||||||
|
|
||||||
update episode
|
update episode
|
||||||
set text_imported= False, phrases_imported=False;
|
set text_imported= False,
|
||||||
|
phrases_imported= False;
|
||||||
|
|
||||||
update person set color=null;
|
update person
|
||||||
|
set color=null;
|
||||||
|
|
||||||
EXPLAIN analyse
|
EXPLAIN analyse
|
||||||
SELECT text, sum(count) as total_count
|
SELECT text, sum(count) as total_count
|
||||||
|
@ -76,3 +82,20 @@ SELECT *
|
||||||
FROM ts_stat('SELECT search_text from line')
|
FROM ts_stat('SELECT search_text from line')
|
||||||
order by nentry desc
|
order by nentry desc
|
||||||
limit 500;
|
limit 500;
|
||||||
|
|
||||||
|
SELECT *, ts_rank("search_text", websearch_to_tsquery('english', 'I cast regret')) AS "rank"
|
||||||
|
FROM line
|
||||||
|
INNER JOIN person ON (line.person_id = person.id)
|
||||||
|
INNER JOIN episode ON (line.episode_id = episode.id)
|
||||||
|
WHERE (
|
||||||
|
(line.search_text @@ websearch_to_tsquery('english', 'I cast regret')) AND
|
||||||
|
(episode.episode_number <= 1000) AND
|
||||||
|
(episode.series_id = 2)
|
||||||
|
)
|
||||||
|
ORDER BY rank DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
select websearch_to_tsquery('english', 'I cast regret');
|
||||||
|
|
||||||
|
INSERT INTO line (text, search_text, ...) values ('This is a longer example text', to_tsvector('english', 'This is a longer example text'));
|
||||||
|
select to_tsvector('english', 'This is a longer example text');
|
||||||
|
|
3
utils.py
3
utils.py
|
@ -1,7 +1,6 @@
|
||||||
import re
|
import re
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from app import cache
|
from app import cache
|
||||||
from data import single_speaker
|
from data import single_speaker
|
||||||
|
@ -17,7 +16,7 @@ def milliseconds_to_td(ms: int) -> timedelta:
|
||||||
return timedelta(milliseconds=ms)
|
return timedelta(milliseconds=ms)
|
||||||
|
|
||||||
|
|
||||||
def episode_speaker(series_title: str, episode: int) -> Optional[str]:
|
def episode_speaker(series_title: str, episode: int) -> str | None:
|
||||||
try:
|
try:
|
||||||
series = single_speaker[series_title]
|
series = single_speaker[series_title]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|
12
web/package-lock.json
generated
12
web/package-lock.json
generated
|
@ -3976,9 +3976,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/caniuse-lite": {
|
"node_modules/caniuse-lite": {
|
||||||
"version": "1.0.30001363",
|
"version": "1.0.30001458",
|
||||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz",
|
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz",
|
||||||
"integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==",
|
"integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
@ -15459,9 +15459,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"caniuse-lite": {
|
"caniuse-lite": {
|
||||||
"version": "1.0.30001363",
|
"version": "1.0.30001458",
|
||||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz",
|
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz",
|
||||||
"integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==",
|
"integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"case-sensitive-paths-webpack-plugin": {
|
"case-sensitive-paths-webpack-plugin": {
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
experimental transcript view.</p>
|
experimental transcript view.</p>
|
||||||
<p>If you have any feedback, ideas for improvements or bugs, feel free to contact me at
|
<p>If you have any feedback, ideas for improvements or bugs, feel free to contact me at
|
||||||
<a href="mailto:cr@lw1.at">cr@lw1.at</a> or <a href="https://twitter.com/lw1_at">on Twitter</a>.</p>
|
<a href="mailto:cr@lw1.at">cr@lw1.at</a> or <a href="https://twitter.com/lw1_at">on Twitter</a>.</p>
|
||||||
|
<p>You can learn more about this website <a href="https://lw1.at/en/cr-search/">here</a>.</p>
|
||||||
<div class="footnote-list">
|
<div class="footnote-list">
|
||||||
<ol>
|
<ol>
|
||||||
<li>all episodes with manually created subtitles (including Shows and One-Shots)</li>
|
<li>all episodes with manually created subtitles (including Shows and One-Shots)</li>
|
||||||
|
|
Loading…
Reference in a new issue