1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00

pyupgrade

This commit is contained in:
Lukas Winkler 2023-04-25 22:51:19 +02:00
parent 9cff4968c1
commit dece8e5328
Signed by: lukas
GPG key ID: 54DE4D798D244853
12 changed files with 55 additions and 38 deletions

View file

@ -1,7 +1,6 @@
import json import json
import shutil import shutil
from statistics import mean, stdev from statistics import mean, stdev
from typing import Tuple
from alive_progress import alive_bar from alive_progress import alive_bar
from peewee import SelectQuery from peewee import SelectQuery
@ -11,7 +10,7 @@ from app import db
from server import search, suggest, exact_search from server import search, suggest, exact_search
def benchmark_query(query: SelectQuery, filename: str = None) -> Tuple[float, float]: def benchmark_query(query: SelectQuery, filename: str = None) -> tuple[float, float]:
query, params = query.sql() query, params = query.sql()
query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query

View file

@ -1,5 +1,4 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, List
colors = { colors = {
"campaign1": { "campaign1": {
@ -94,10 +93,10 @@ assert set(single_speaker["Handbooker Helper"].keys()) == set(range(1, 44 + 1))
class SeriesData: class SeriesData:
name: str name: str
slug: str slug: str
playlist_id: Optional[str] = None playlist_id: str | None = None
videos: Optional[List[str]] = None videos: list[str] | None = None
single_speaker: bool = False single_speaker: bool = False
initial_speaker: Optional[str] = None initial_speaker: str | None = None
series_data = [ series_data = [

View file

@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None:
f.write(r.content) f.write(r.content)
changed = False changed = False
try: try:
e = Episode.select().where((Episode.youtube_id == url)).get() e = Episode.select().where(Episode.youtube_id == url).get()
if args.skip_existing and e.downloaded: if args.skip_existing and e.downloaded:
continue continue
except DoesNotExist: except DoesNotExist:

View file

@ -1,7 +1,6 @@
import os import os
import re import re
from html import unescape from html import unescape
from typing import List, Optional, Set, Union
from alive_progress import alive_bar from alive_progress import alive_bar
from peewee import fn, chunked from peewee import fn, chunked
@ -29,13 +28,13 @@ def add_to_text(text: str, add: str) -> str:
return add return add
def line_key(line: Line) -> Union[str, Line]: def line_key(line: Line) -> str | Line:
if line.ismeta or line.isnote: if line.ismeta or line.isnote:
return line return line
return line.person return line.person
def group_lines(dblines: List[Line]) -> List[Line]: def group_lines(dblines: list[Line]) -> list[Line]:
final_lines = [] final_lines = []
order = 0 order = 0
@ -74,7 +73,7 @@ def group_lines(dblines: List[Line]) -> List[Line]:
return final_lines return final_lines
def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, episode: Episode, order: int, def insert_subtitle(text: str, person: Person | None, subline: Subtitle, episode: Episode, order: int,
isnote: bool = False, ismeta: bool = False) -> Line: isnote: bool = False, ismeta: bool = False) -> Line:
dbline = Line() dbline = Line()
if not text: if not text:
@ -94,7 +93,7 @@ def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, epis
def main() -> None: def main() -> None:
os.nice(15) os.nice(15)
all_people: Set[str] = set() all_people: set[str] = set()
for series in Series.select().order_by(Series.id): for series in Series.select().order_by(Series.id):
for episode in Episode.select().where( for episode in Episode.select().where(
(Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded) (Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded)
@ -103,9 +102,9 @@ def main() -> None:
f.write("\n".join(sorted(p for p in all_people if "\n" not in p))) f.write("\n".join(sorted(p for p in all_people if "\n" not in p)))
file = srtdir / f"{episode.id}.srt" file = srtdir / f"{episode.id}.srt"
strtext = file.read_text() strtext = file.read_text()
subtitlelines: List[Subtitle] = list(parse(strtext)) subtitlelines: list[Subtitle] = list(parse(strtext))
print(episode.video_number, episode.pretty_title) print(episode.video_number, episode.pretty_title)
person: Optional[Person] = None person: Person | None = None
with db.atomic(): with db.atomic():
dblines = [] dblines = []
i = 0 i = 0

View file

@ -1,6 +1,5 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict
import en_core_web_md import en_core_web_md
from alive_progress import alive_bar from alive_progress import alive_bar
@ -23,7 +22,7 @@ class Noun:
count: int = 1 count: int = 1
lemma_cache: Dict[str, str] = {} lemma_cache: dict[str, str] = {}
nlp: Language = en_core_web_md.load(disable=["ner", "textcat"]) nlp: Language = en_core_web_md.load(disable=["ner", "textcat"])
nlp.Defaults.stop_words = STOP_WORDS nlp.Defaults.stop_words = STOP_WORDS
@ -48,7 +47,7 @@ for episode in Episode.select().where((Episode.phrases_imported == False) & (Epi
print("run nlp") print("run nlp")
doc = nlp(text) doc = nlp(text)
print("nlp finished") print("nlp finished")
nouns: Dict[str, Noun] = {} nouns: dict[str, Noun] = {}
chunk: Span chunk: Span
noun_chunks = list(doc.noun_chunks) noun_chunks = list(doc.noun_chunks)
with alive_bar(len(noun_chunks), title='lemmatizing and counting') as bar: with alive_bar(len(noun_chunks), title='lemmatizing and counting') as bar:

View file

@ -1,6 +1,5 @@
import random import random
import time import time
from typing import List
from flask import request, jsonify, Response, abort, g from flask import request, jsonify, Response, abort, g
from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN
@ -19,6 +18,7 @@ from suggestions import suggestions
app.register_blueprint(ssr_routes) app.register_blueprint(ssr_routes)
def add_cors(response: Response) -> Response: def add_cors(response: Response) -> Response:
header = response.headers header = response.headers
header['Access-Control-Allow-Origin'] = '*' header['Access-Control-Allow-Origin'] = '*'
@ -34,7 +34,7 @@ def before_request():
def after_request(response: Response): def after_request(response: Response):
diff = time.perf_counter() - g.start diff = time.perf_counter() - g.start
if response.response: if response.response:
response.headers.set("Server-Timing", f"server;dur={diff *1000 :.5f}") response.headers.set("Server-Timing", f"server;dur={diff * 1000 :.5f}")
return response return response
@ -194,11 +194,11 @@ def series():
@app.route("/api/episodes") @app.route("/api/episodes")
@cache.cached(timeout=60 * 60 * 24) @cache.cached(timeout=60 * 60 * 24)
def api_episodes(): def api_episodes():
all_series: List[Series] = Series.select().order_by(Series.order) all_series: list[Series] = Series.select().order_by(Series.order)
data = [] data = []
for series in all_series: for series in all_series:
episodes: List[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number) episodes: list[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number)
series_data = [] series_data = []
for episode in episodes: for episode in episodes:
@ -241,7 +241,7 @@ def transcript():
(Episode.series.slug == series) (Episode.series.slug == series)
).join(Series).get() ).join(Series).get()
lines: List[Line] = Line.select(Line, Person).where( lines: list[Line] = Line.select(Line, Person).where(
(Episode.episode_number == episode_number) (Episode.episode_number == episode_number)
& &
(Episode.series.slug == series) (Episode.series.slug == series)

View file

@ -27,7 +27,7 @@ class Stats(ABC):
class MultiColumnStats(Stats): class MultiColumnStats(Stats):
def as_data(self) -> List[Dict[str, Any]]: def as_data(self) -> list[dict[str, Any]]:
data = [] data = []
cur = self.execute() cur = self.execute()
column_names = [d.name for d in cur.description] column_names = [d.name for d in cur.description]

View file

@ -28,14 +28,13 @@ That said, I try to only use phrases that don't contain spoilers themselves.
# #
""" """
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional
@dataclass @dataclass
class Suggestion: class Suggestion:
text: str text: str
# only show this suggestion to people who have watched at least this episode # only show this suggestion to people who have watched at least this episode
episode: Optional[int] = None episode: int | None = None
suggestions = { suggestions = {

View file

@ -1,4 +1,7 @@
select e.pretty_title, text,char_length(line.text) as len from line join episode e on e.id = line.episode_id order by len desc; select e.pretty_title, text, char_length(line.text) as len
from line
join episode e on e.id = line.episode_id
order by len desc;
SELECT pg_size_pretty(pg_relation_size('phrase')); SELECT pg_size_pretty(pg_relation_size('phrase'));
@ -6,12 +9,15 @@ SELECT pg_size_pretty(pg_relation_size('phrase'));
delete delete
from phrase; from phrase;
delete from line; delete
from line;
update episode update episode
set text_imported= False, phrases_imported=False; set text_imported= False,
phrases_imported= False;
update person set color=null; update person
set color=null;
EXPLAIN analyse EXPLAIN analyse
SELECT text, sum(count) as total_count SELECT text, sum(count) as total_count
@ -76,3 +82,20 @@ SELECT *
FROM ts_stat('SELECT search_text from line') FROM ts_stat('SELECT search_text from line')
order by nentry desc order by nentry desc
limit 500; limit 500;
SELECT *, ts_rank("search_text", websearch_to_tsquery('english', 'I cast regret')) AS "rank"
FROM line
INNER JOIN person ON (line.person_id = person.id)
INNER JOIN episode ON (line.episode_id = episode.id)
WHERE (
(line.search_text @@ websearch_to_tsquery('english', 'I cast regret')) AND
(episode.episode_number <= 1000) AND
(episode.series_id = 2)
)
ORDER BY rank DESC
LIMIT 20;
select websearch_to_tsquery('english', 'I cast regret');
INSERT INTO line (text, search_text, ...) values ('This is a longer example text', to_tsvector('english', 'This is a longer example text'));
select to_tsvector('english', 'This is a longer example text');

View file

@ -1,7 +1,6 @@
import re import re
from datetime import timedelta from datetime import timedelta
from pathlib import Path from pathlib import Path
from typing import Optional
from app import cache from app import cache
from data import single_speaker from data import single_speaker
@ -17,7 +16,7 @@ def milliseconds_to_td(ms: int) -> timedelta:
return timedelta(milliseconds=ms) return timedelta(milliseconds=ms)
def episode_speaker(series_title: str, episode: int) -> Optional[str]: def episode_speaker(series_title: str, episode: int) -> str | None:
try: try:
series = single_speaker[series_title] series = single_speaker[series_title]
except KeyError: except KeyError:

12
web/package-lock.json generated
View file

@ -3976,9 +3976,9 @@
} }
}, },
"node_modules/caniuse-lite": { "node_modules/caniuse-lite": {
"version": "1.0.30001363", "version": "1.0.30001458",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz",
"integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==", "integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==",
"dev": true, "dev": true,
"funding": [ "funding": [
{ {
@ -15459,9 +15459,9 @@
} }
}, },
"caniuse-lite": { "caniuse-lite": {
"version": "1.0.30001363", "version": "1.0.30001458",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz",
"integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==", "integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==",
"dev": true "dev": true
}, },
"case-sensitive-paths-webpack-plugin": { "case-sensitive-paths-webpack-plugin": {

View file

@ -11,7 +11,7 @@
experimental transcript view.</p> experimental transcript view.</p>
<p>If you have any feedback, ideas for improvements or bugs, feel free to contact me at <p>If you have any feedback, ideas for improvements or bugs, feel free to contact me at
<a href="mailto:cr@lw1.at">cr@lw1.at</a> or <a href="https://twitter.com/lw1_at">on Twitter</a>.</p> <a href="mailto:cr@lw1.at">cr@lw1.at</a> or <a href="https://twitter.com/lw1_at">on Twitter</a>.</p>
<p>You can learn more about this website <a href="https://lw1.at/en/cr-search/">here</a>.</p>
<div class="footnote-list"> <div class="footnote-list">
<ol> <ol>
<li>all episodes with manually created subtitles (including Shows and One-Shots)</li> <li>all episodes with manually created subtitles (including Shows and One-Shots)</li>