diff --git a/benchmark.py b/benchmark.py index cf7c578..60cfea3 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,7 +1,6 @@ import json import shutil from statistics import mean, stdev -from typing import Tuple from alive_progress import alive_bar from peewee import SelectQuery @@ -11,7 +10,7 @@ from app import db from server import search, suggest, exact_search -def benchmark_query(query: SelectQuery, filename: str = None) -> Tuple[float, float]: +def benchmark_query(query: SelectQuery, filename: str = None) -> tuple[float, float]: query, params = query.sql() query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query diff --git a/data.py b/data.py index f15907c..694d655 100644 --- a/data.py +++ b/data.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Optional, List colors = { "campaign1": { @@ -94,10 +93,10 @@ assert set(single_speaker["Handbooker Helper"].keys()) == set(range(1, 44 + 1)) class SeriesData: name: str slug: str - playlist_id: Optional[str] = None - videos: Optional[List[str]] = None + playlist_id: str | None = None + videos: list[str] | None = None single_speaker: bool = False - initial_speaker: Optional[str] = None + initial_speaker: str | None = None series_data = [ diff --git a/fetch.py b/fetch.py index cc44fe2..bcba184 100644 --- a/fetch.py +++ b/fetch.py @@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None: f.write(r.content) changed = False try: - e = Episode.select().where((Episode.youtube_id == url)).get() + e = Episode.select().where(Episode.youtube_id == url).get() if args.skip_existing and e.downloaded: continue except DoesNotExist: diff --git a/import.py b/import.py index a92532b..8a66008 100644 --- a/import.py +++ b/import.py @@ -1,7 +1,6 @@ import os import re from html import unescape -from typing import List, Optional, Set, Union from alive_progress import alive_bar from peewee import fn, chunked @@ -29,13 +28,13 @@ def add_to_text(text: str, add: str) -> str: return add -def line_key(line: Line) -> Union[str, Line]: +def line_key(line: Line) -> str | Line: if line.ismeta or line.isnote: return line return line.person -def group_lines(dblines: List[Line]) -> List[Line]: +def group_lines(dblines: list[Line]) -> list[Line]: final_lines = [] order = 0 @@ -74,7 +73,7 @@ def group_lines(dblines: List[Line]) -> List[Line]: return final_lines -def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, episode: Episode, order: int, +def insert_subtitle(text: str, person: Person | None, subline: Subtitle, episode: Episode, order: int, isnote: bool = False, ismeta: bool = False) -> Line: dbline = Line() if not text: @@ -94,7 +93,7 @@ def insert_subtitle(text: str, person: Optional[Person], subline: Subtitle, epis def main() -> None: os.nice(15) - all_people: Set[str] = set() + all_people: set[str] = set() for series in Series.select().order_by(Series.id): for episode in Episode.select().where( (Episode.text_imported == False) & (Episode.series == series) & (Episode.downloaded) @@ -103,9 +102,9 @@ def main() -> None: f.write("\n".join(sorted(p for p in all_people if "\n" not in p))) file = srtdir / f"{episode.id}.srt" strtext = file.read_text() - subtitlelines: List[Subtitle] = list(parse(strtext)) + subtitlelines: list[Subtitle] = list(parse(strtext)) print(episode.video_number, episode.pretty_title) - person: Optional[Person] = None + person: Person | None = None with db.atomic(): dblines = [] i = 0 diff --git a/phrases.py b/phrases.py index 0d1f5ad..1fa941b 100644 --- a/phrases.py +++ b/phrases.py @@ -1,6 +1,5 @@ import os from dataclasses import dataclass -from typing import Dict import en_core_web_md from alive_progress import alive_bar @@ -23,7 +22,7 @@ class Noun: count: int = 1 -lemma_cache: Dict[str, str] = {} +lemma_cache: dict[str, str] = {} nlp: Language = en_core_web_md.load(disable=["ner", "textcat"]) nlp.Defaults.stop_words = STOP_WORDS @@ -48,7 +47,7 @@ for episode in Episode.select().where((Episode.phrases_imported == False) & (Epi print("run nlp") doc = nlp(text) print("nlp finished") - nouns: Dict[str, Noun] = {} + nouns: dict[str, Noun] = {} chunk: Span noun_chunks = list(doc.noun_chunks) with alive_bar(len(noun_chunks), title='lemmatizing and counting') as bar: diff --git a/server.py b/server.py index f00b158..4360904 100644 --- a/server.py +++ b/server.py @@ -1,6 +1,5 @@ import random import time -from typing import List from flask import request, jsonify, Response, abort, g from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN @@ -19,6 +18,7 @@ from suggestions import suggestions app.register_blueprint(ssr_routes) + def add_cors(response: Response) -> Response: header = response.headers header['Access-Control-Allow-Origin'] = '*' @@ -34,7 +34,7 @@ def before_request(): def after_request(response: Response): diff = time.perf_counter() - g.start if response.response: - response.headers.set("Server-Timing", f"server;dur={diff *1000 :.5f}") + response.headers.set("Server-Timing", f"server;dur={diff * 1000 :.5f}") return response @@ -194,11 +194,11 @@ def series(): @app.route("/api/episodes") @cache.cached(timeout=60 * 60 * 24) def api_episodes(): - all_series: List[Series] = Series.select().order_by(Series.order) + all_series: list[Series] = Series.select().order_by(Series.order) data = [] for series in all_series: - episodes: List[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number) + episodes: list[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number) series_data = [] for episode in episodes: @@ -241,7 +241,7 @@ def transcript(): (Episode.series.slug == series) ).join(Series).get() - lines: List[Line] = Line.select(Line, Person).where( + lines: list[Line] = Line.select(Line, Person).where( (Episode.episode_number == episode_number) & (Episode.series.slug == series) diff --git a/stats.py b/stats.py index 05a4967..0b68f4e 100644 --- a/stats.py +++ b/stats.py @@ -27,7 +27,7 @@ class Stats(ABC): class MultiColumnStats(Stats): - def as_data(self) -> List[Dict[str, Any]]: + def as_data(self) -> list[dict[str, Any]]: data = [] cur = self.execute() column_names = [d.name for d in cur.description] diff --git a/suggestions.py b/suggestions.py index d3877fa..54cfc43 100644 --- a/suggestions.py +++ b/suggestions.py @@ -28,14 +28,13 @@ That said, I try to only use phrases that don't contain spoilers themselves. # """ from dataclasses import dataclass -from typing import Optional @dataclass class Suggestion: text: str # only show this suggestion to people who have watched at least this episode - episode: Optional[int] = None + episode: int | None = None suggestions = { diff --git a/tests.sql b/tests.sql index 5b65d14..344fca8 100644 --- a/tests.sql +++ b/tests.sql @@ -1,4 +1,7 @@ -select e.pretty_title, text,char_length(line.text) as len from line join episode e on e.id = line.episode_id order by len desc; +select e.pretty_title, text, char_length(line.text) as len +from line + join episode e on e.id = line.episode_id +order by len desc; SELECT pg_size_pretty(pg_relation_size('phrase')); @@ -6,12 +9,15 @@ SELECT pg_size_pretty(pg_relation_size('phrase')); delete from phrase; -delete from line; +delete +from line; update episode -set text_imported= False, phrases_imported=False; +set text_imported= False, + phrases_imported= False; -update person set color=null; +update person +set color=null; EXPLAIN analyse SELECT text, sum(count) as total_count @@ -76,3 +82,20 @@ SELECT * FROM ts_stat('SELECT search_text from line') order by nentry desc limit 500; + +SELECT *, ts_rank("search_text", websearch_to_tsquery('english', 'I cast regret')) AS "rank" +FROM line + INNER JOIN person ON (line.person_id = person.id) + INNER JOIN episode ON (line.episode_id = episode.id) +WHERE ( + (line.search_text @@ websearch_to_tsquery('english', 'I cast regret')) AND + (episode.episode_number <= 1000) AND + (episode.series_id = 2) + ) +ORDER BY rank DESC +LIMIT 20; + +select websearch_to_tsquery('english', 'I cast regret'); + +INSERT INTO line (text, search_text, ...) values ('This is a longer example text', to_tsvector('english', 'This is a longer example text')); +select to_tsvector('english', 'This is a longer example text'); diff --git a/utils.py b/utils.py index acda9f9..c021c79 100644 --- a/utils.py +++ b/utils.py @@ -1,7 +1,6 @@ import re from datetime import timedelta from pathlib import Path -from typing import Optional from app import cache from data import single_speaker @@ -17,7 +16,7 @@ def milliseconds_to_td(ms: int) -> timedelta: return timedelta(milliseconds=ms) -def episode_speaker(series_title: str, episode: int) -> Optional[str]: +def episode_speaker(series_title: str, episode: int) -> str | None: try: series = single_speaker[series_title] except KeyError: diff --git a/web/package-lock.json b/web/package-lock.json index a110005..4639093 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -3976,9 +3976,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001363", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz", - "integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==", + "version": "1.0.30001458", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz", + "integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==", "dev": true, "funding": [ { @@ -15459,9 +15459,9 @@ } }, "caniuse-lite": { - "version": "1.0.30001363", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001363.tgz", - "integrity": "sha512-HpQhpzTGGPVMnCjIomjt+jvyUu8vNFo3TaDiZ/RcoTrlOq/5+tC8zHdsbgFB6MxmaY+jCpsH09aD80Bb4Ow3Sg==", + "version": "1.0.30001458", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001458.tgz", + "integrity": "sha512-lQ1VlUUq5q9ro9X+5gOEyH7i3vm+AYVT1WDCVB69XOZ17KZRhnZ9J0Sqz7wTHQaLBJccNCHq8/Ww5LlOIZbB0w==", "dev": true }, "case-sensitive-paths-webpack-plugin": { diff --git a/web/src/components/Intro.vue b/web/src/components/Intro.vue index e17fcd4..94e5285 100644 --- a/web/src/components/Intro.vue +++ b/web/src/components/Intro.vue @@ -11,7 +11,7 @@ experimental transcript view.

If you have any feedback, ideas for improvements or bugs, feel free to contact me at cr@lw1.at or on Twitter.

- +

You can learn more about this website here.

  1. all episodes with manually created subtitles (including Shows and One-Shots)