mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-11 06:03:45 +02:00
add redis caching and better database
This commit is contained in:
parent
76dd2177a6
commit
4b3e829dee
11 changed files with 103 additions and 34 deletions
9
app.py
9
app.py
|
@ -1,4 +1,5 @@
|
|||
from flask import Flask
|
||||
from flask_caching import Cache
|
||||
from playhouse.flask_utils import FlaskDB
|
||||
from playhouse.pool import PooledPostgresqlDatabase
|
||||
|
||||
|
@ -9,15 +10,21 @@ DATABASE = PooledPostgresqlDatabase(**config.dbauth)
|
|||
if config.sentryDSN:
|
||||
import sentry_sdk
|
||||
from sentry_sdk.integrations.flask import FlaskIntegration
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=config.sentryDSN,
|
||||
integrations=[FlaskIntegration()]
|
||||
)
|
||||
|
||||
CACHE_TYPE = "RedisCache"
|
||||
|
||||
if config.production:
|
||||
CACHE_REDIS_URL = "unix:///run/redis-crsearch/redis-server.sock"
|
||||
|
||||
# Create a Flask WSGI app and configure it using values from the module.
|
||||
app = Flask(__name__)
|
||||
app.config.from_object(__name__)
|
||||
|
||||
cache = Cache(app)
|
||||
flask_db = FlaskDB(app)
|
||||
|
||||
db = flask_db.database
|
||||
|
|
16
benchmark.py
16
benchmark.py
|
@ -7,12 +7,12 @@ from alive_progress import alive_bar
|
|||
from peewee import SelectQuery
|
||||
from psycopg2._psycopg import cursor
|
||||
|
||||
from models import db
|
||||
from server import search, suggest
|
||||
from app import db
|
||||
from server import search, suggest, exact_search
|
||||
|
||||
|
||||
def benchmark_query(query: SelectQuery, filename: str = None) -> Tuple[float, float]:
|
||||
query, params = test_search.sql()
|
||||
query, params = query.sql()
|
||||
|
||||
query = "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) " + query
|
||||
|
||||
|
@ -45,12 +45,14 @@ def statistics(query: SelectQuery, filename: str, repeats: int = 500) -> None:
|
|||
print(mean(execution_times), stdev(execution_times))
|
||||
|
||||
|
||||
test_search = search("hello", 1000, 1, 200)
|
||||
test_search = search("hello", 1000, "campaign2", 200)
|
||||
statistics(test_search, filename="search_hello")
|
||||
test_search = search("a very long search query with a lot of stop word", 1000, 1, 200)
|
||||
test_search = exact_search("hello", 1000, "campaign2", 200)
|
||||
statistics(test_search, filename="exact_search", repeats=50)
|
||||
test_search = search("a very long search query with a lot of stop word", 1000, "campaign2", 200)
|
||||
statistics(test_search, filename="search_long")
|
||||
|
||||
test_search = suggest("gnoll", 1000, 1)
|
||||
test_search = suggest("gnoll", 1000, "campaign2")
|
||||
statistics(test_search, filename="suggest_simple")
|
||||
test_search = suggest("gu", 1000, 1)
|
||||
test_search = suggest("gu", 1000, "campaign2")
|
||||
statistics(test_search, filename="suggest_two_letter", repeats=100)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from sys import argv
|
||||
|
||||
from models import db, Series, Phrase, Episode, Person, Line
|
||||
from app import db
|
||||
from models import Series, Phrase, Episode, Person, Line
|
||||
|
||||
|
||||
def confirm(message: str) -> None:
|
||||
|
|
4
fetch.py
4
fetch.py
|
@ -12,7 +12,7 @@ from peewee import DoesNotExist
|
|||
|
||||
from data import series_data
|
||||
from models import Episode, Series, Line, Phrase
|
||||
from utils import srtdir, pretty_title, title_to_episodenumber
|
||||
from utils import srtdir, pretty_title, title_to_episodenumber, clear_cache
|
||||
|
||||
static_path = Path("static")
|
||||
|
||||
|
@ -112,7 +112,7 @@ def main(args) -> None:
|
|||
except FileNotFoundError:
|
||||
e.downloaded = False
|
||||
e.save()
|
||||
|
||||
clear_cache()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description="fetch episode data from YouTube")
|
||||
|
|
|
@ -7,9 +7,10 @@ from alive_progress import alive_bar
|
|||
from peewee import fn, chunked
|
||||
from srt import parse, Subtitle
|
||||
|
||||
from models import Person, Line, Episode, db, Series
|
||||
from app import db
|
||||
from models import Person, Line, Episode, Series
|
||||
from typo import fix_typo
|
||||
from utils import td_to_milliseconds, srtdir, episode_speaker
|
||||
from utils import td_to_milliseconds, srtdir, episode_speaker, clear_cache
|
||||
|
||||
|
||||
def is_invalid_name(name: str) -> bool:
|
||||
|
@ -134,6 +135,7 @@ def main() -> None:
|
|||
|
||||
episode.text_imported = True
|
||||
episode.save()
|
||||
clear_cache()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
12
models.py
12
models.py
|
@ -1,18 +1,14 @@
|
|||
from datetime import datetime
|
||||
|
||||
from peewee import PostgresqlDatabase, Model, IntegerField, CharField, BooleanField, ForeignKeyField, DateTimeField, \
|
||||
from peewee import IntegerField, CharField, BooleanField, ForeignKeyField, DateTimeField, \
|
||||
DateField
|
||||
from playhouse.postgres_ext import TSVectorField
|
||||
|
||||
from config import dbauth
|
||||
|
||||
db = PostgresqlDatabase(**dbauth)
|
||||
db.connect()
|
||||
from app import flask_db
|
||||
|
||||
|
||||
class BaseModel(Model):
|
||||
class Meta:
|
||||
database = db
|
||||
class BaseModel(flask_db.Model):
|
||||
...
|
||||
|
||||
|
||||
class Series(BaseModel):
|
||||
|
|
|
@ -9,8 +9,10 @@ from spacy.lang.en import Language
|
|||
from spacy.tokens.span import Span
|
||||
from spacy.tokens.token import Token
|
||||
|
||||
from models import Episode, Line, db, Phrase
|
||||
from app import db
|
||||
from models import Episode, Line, Phrase
|
||||
from stopwords import STOP_WORDS
|
||||
from utils import clear_cache
|
||||
|
||||
os.nice(15)
|
||||
|
||||
|
@ -81,3 +83,4 @@ for episode in Episode.select().where((Episode.phrases_imported == False) & (Epi
|
|||
|
||||
episode.phrases_imported = True
|
||||
episode.save()
|
||||
clear_cache()
|
||||
|
|
67
poetry.lock
generated
67
poetry.lock
generated
|
@ -57,6 +57,14 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.4"
|
||||
description = "Cross-platform colored terminal text."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[[package]]
|
||||
name = "cymem"
|
||||
version = "2.0.5"
|
||||
|
@ -79,6 +87,7 @@ spacy = ">=3.0.0,<3.1.0"
|
|||
[package.source]
|
||||
type = "url"
|
||||
url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.0.0/en_core_web_md-3.0.0.tar.gz"
|
||||
|
||||
[[package]]
|
||||
name = "flask"
|
||||
version = "2.0.1"
|
||||
|
@ -97,6 +106,17 @@ Werkzeug = ">=2.0"
|
|||
async = ["asgiref (>=3.2)"]
|
||||
dotenv = ["python-dotenv"]
|
||||
|
||||
[[package]]
|
||||
name = "flask-caching"
|
||||
version = "1.10.1"
|
||||
description = "Adds caching support to your Flask application"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
|
||||
[package.dependencies]
|
||||
Flask = "*"
|
||||
|
||||
[[package]]
|
||||
name = "gunicorn"
|
||||
version = "20.1.0"
|
||||
|
@ -243,6 +263,17 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "redis"
|
||||
version = "3.5.3"
|
||||
description = "Python client for Redis key-value store"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[package.extras]
|
||||
hiredis = ["hiredis (>=0.1.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.25.1"
|
||||
|
@ -263,7 +294,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
|||
|
||||
[[package]]
|
||||
name = "sentry-sdk"
|
||||
version = "1.1.0"
|
||||
version = "1.2.0"
|
||||
description = "Python client for Sentry (https://sentry.io)"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -284,6 +315,7 @@ chalice = ["chalice (>=1.16.0)"]
|
|||
django = ["django (>=1.8)"]
|
||||
falcon = ["falcon (>=1.4)"]
|
||||
flask = ["flask (>=0.11)", "blinker (>=1.1)"]
|
||||
httpx = ["httpx (>=0.16.0)"]
|
||||
pure_eval = ["pure-eval", "executing", "asttokens"]
|
||||
pyspark = ["pyspark (>=2.4.4)"]
|
||||
rq = ["rq (>=0.6)"]
|
||||
|
@ -367,7 +399,7 @@ transformers = ["spacy-transformers (>=1.0.1,<1.1.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "spacy-legacy"
|
||||
version = "3.0.6"
|
||||
version = "3.0.7"
|
||||
description = "Legacy registered functions for spaCy backwards compatibility"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -429,12 +461,15 @@ torch = ["torch (>=1.5.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.61.1"
|
||||
version = "4.61.2"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["py-make (>=0.1.0)", "twine", "wheel"]
|
||||
notebook = ["ipywidgets (>=6)"]
|
||||
|
@ -500,7 +535,7 @@ python-versions = "*"
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "86eee8fba06068f62881fc46114eb1861a7eb3e4130c7ac0f9ffc457645b7157"
|
||||
content-hash = "08d267706eac1a2c5f03bac0f5a705d627399f2e75e64cbd5f98c1a34fa0a86c"
|
||||
|
||||
[metadata.files]
|
||||
alive-progress = [
|
||||
|
@ -541,6 +576,10 @@ click = [
|
|||
{file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
|
||||
{file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"},
|
||||
]
|
||||
colorama = [
|
||||
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
|
||||
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
|
||||
]
|
||||
cymem = [
|
||||
{file = "cymem-2.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9d72d69f7a62a280199c3aa7bc550685c47d6d0689b2d299e6492253b86d2437"},
|
||||
{file = "cymem-2.0.5-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:8ea57e6923f40eb51012352161bb5707c14a5a5ce901ff72021e59df06221655"},
|
||||
|
@ -561,6 +600,10 @@ flask = [
|
|||
{file = "Flask-2.0.1-py3-none-any.whl", hash = "sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"},
|
||||
{file = "Flask-2.0.1.tar.gz", hash = "sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55"},
|
||||
]
|
||||
flask-caching = [
|
||||
{file = "Flask-Caching-1.10.1.tar.gz", hash = "sha256:cf19b722fcebc2ba03e4ae7c55b532ed53f0cbf683ce36fafe5e881789a01c00"},
|
||||
{file = "Flask_Caching-1.10.1-py3-none-any.whl", hash = "sha256:bcda8acbc7508e31e50f63e9b1ab83185b446f6b6318bd9dd1d45626fba2e903"},
|
||||
]
|
||||
gunicorn = [
|
||||
{file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"},
|
||||
]
|
||||
|
@ -730,13 +773,17 @@ pyparsing = [
|
|||
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
|
||||
{file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"},
|
||||
]
|
||||
redis = [
|
||||
{file = "redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"},
|
||||
{file = "redis-3.5.3.tar.gz", hash = "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"},
|
||||
]
|
||||
requests = [
|
||||
{file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"},
|
||||
{file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"},
|
||||
]
|
||||
sentry-sdk = [
|
||||
{file = "sentry-sdk-1.1.0.tar.gz", hash = "sha256:c1227d38dca315ba35182373f129c3e2722e8ed999e52584e6aca7d287870739"},
|
||||
{file = "sentry_sdk-1.1.0-py2.py3-none-any.whl", hash = "sha256:c7d380a21281e15be3d9f67a3c4fbb4f800c481d88ff8d8931f39486dd7b4ada"},
|
||||
{file = "sentry-sdk-1.2.0.tar.gz", hash = "sha256:9907adbdd30a55b818914512cc143e6beae0bb3ba78b2649f4b079752eb0e424"},
|
||||
{file = "sentry_sdk-1.2.0-py2.py3-none-any.whl", hash = "sha256:593f6118cc6d3eba4786c3f802567c937bdb81b3c8e90436e8a29e84071c6936"},
|
||||
]
|
||||
setproctitle = [
|
||||
{file = "setproctitle-1.2.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:9106bcbacae534b6f82955b176723f1b2ca6514518aab44dffec05a583f8dca8"},
|
||||
|
@ -781,8 +828,8 @@ spacy = [
|
|||
{file = "spacy-3.0.6.tar.gz", hash = "sha256:5628ab89f1f568099c880b12a9c37f4ece29ab89260660cfdf728c02711879c5"},
|
||||
]
|
||||
spacy-legacy = [
|
||||
{file = "spacy-legacy-3.0.6.tar.gz", hash = "sha256:76d102a840cae96dbcc2637e5baca0c0c001e6c43b5879112c6b4431eb0efbdb"},
|
||||
{file = "spacy_legacy-3.0.6-py2.py3-none-any.whl", hash = "sha256:3029826f8cdd0da11331917a389d53d586f274837f35ed24b9263297a4574f50"},
|
||||
{file = "spacy-legacy-3.0.7.tar.gz", hash = "sha256:c46a23d8eb8b8e95a3fd087cce6fb91d090f5c6bd2710159035f08a1fdd982e4"},
|
||||
{file = "spacy_legacy-3.0.7-py2.py3-none-any.whl", hash = "sha256:e53fea9f11a67c1b6484062bef1a11484871de3132ffc77206f5e3e5ca9c92f4"},
|
||||
]
|
||||
srsly = [
|
||||
{file = "srsly-2.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f7c3374184bfb1aa852bcb8e45747b02f2dde0ebe62b4ddf4b0141affeab32e1"},
|
||||
|
@ -818,8 +865,8 @@ thinc = [
|
|||
{file = "thinc-8.0.7.tar.gz", hash = "sha256:7c4382f22a5a3864b88cafe6e64f7a69f64b32147575b5f1e6f2211a8adb11f9"},
|
||||
]
|
||||
tqdm = [
|
||||
{file = "tqdm-4.61.1-py2.py3-none-any.whl", hash = "sha256:aa0c29f03f298951ac6318f7c8ce584e48fa22ec26396e6411e43d038243bdb2"},
|
||||
{file = "tqdm-4.61.1.tar.gz", hash = "sha256:24be966933e942be5f074c29755a95b315c69a91f839a29139bf26ffffe2d3fd"},
|
||||
{file = "tqdm-4.61.2-py2.py3-none-any.whl", hash = "sha256:5aa445ea0ad8b16d82b15ab342de6b195a722d75fc1ef9934a46bba6feafbc64"},
|
||||
{file = "tqdm-4.61.2.tar.gz", hash = "sha256:8bb94db0d4468fea27d004a0f1d1c02da3cdedc00fe491c0de986b76a04d6b0a"},
|
||||
]
|
||||
typer = [
|
||||
{file = "typer-0.3.2-py3-none-any.whl", hash = "sha256:ba58b920ce851b12a2d790143009fa00ac1d05b3ff3257061ff69dbdfc3d161b"},
|
||||
|
|
|
@ -18,6 +18,8 @@ youtube_dl = "^2021.1.3"
|
|||
setproctitle = "^1.2.1"
|
||||
sentry-sdk = {extras = ["flask"], version = "^1.0.0"}
|
||||
blinker = "^1.4" # tmp workaround to make flask-sentry work
|
||||
Flask-Caching = "^1.10.1"
|
||||
redis = "^3.5.3"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ from playhouse.postgres_ext import TS_MATCH
|
|||
from playhouse.shortcuts import model_to_dict
|
||||
from psycopg2._psycopg import cursor
|
||||
|
||||
from app import app
|
||||
from app import app, db, cache
|
||||
from models import *
|
||||
|
||||
|
||||
|
@ -83,6 +83,7 @@ def api_search():
|
|||
until = 1000
|
||||
series = request.args.get('series')
|
||||
exact = request.args.get('exact', False)
|
||||
exact = False # don't allow exact searches
|
||||
if not query or not until or not series:
|
||||
return "no suggest query", 400
|
||||
if len(query) > 50:
|
||||
|
@ -110,7 +111,8 @@ def api_search():
|
|||
d: Line
|
||||
ri = 0
|
||||
for d in results:
|
||||
entry = model_to_dict(d, extra_attrs=[] if exact else ["rank"] , exclude=global_excludes + [Episode.subtitle_hash])
|
||||
entry = model_to_dict(d, extra_attrs=[] if exact else ["rank"],
|
||||
exclude=global_excludes + [Episode.subtitle_hash])
|
||||
if not exact:
|
||||
entry["rank"] = float(entry["rank"])
|
||||
data.append({"centerID": d.id, "resultID": ri, "offset": 1, "lines": [entry]})
|
||||
|
@ -145,6 +147,7 @@ def api_expand():
|
|||
|
||||
|
||||
@app.route("/api/series")
|
||||
@cache.cached(timeout=60 * 60 * 24)
|
||||
def series():
|
||||
series_list = []
|
||||
|
||||
|
@ -162,6 +165,7 @@ def series():
|
|||
|
||||
|
||||
@app.route("/api/episodes")
|
||||
@cache.cached(timeout=60 * 60 * 24)
|
||||
def api_episodes():
|
||||
all_series: List[Series] = Series.select().order_by(Series.id)
|
||||
data = []
|
||||
|
|
5
utils.py
5
utils.py
|
@ -3,6 +3,7 @@ from datetime import timedelta
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from app import cache
|
||||
from data import single_speaker
|
||||
|
||||
srtdir = Path("./data/subtitles/")
|
||||
|
@ -59,3 +60,7 @@ def pretty_title(title: str) -> str:
|
|||
return title.split("-")[0].strip()
|
||||
else:
|
||||
return title.strip()
|
||||
|
||||
|
||||
def clear_cache() -> None:
|
||||
cache.clear()
|
||||
|
|
Loading…
Reference in a new issue