1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00
cr-search/server.py

285 lines
8.8 KiB
Python
Raw Normal View History

2021-07-13 22:21:26 +02:00
import random
2023-02-26 23:43:17 +01:00
import time
2020-03-08 18:48:14 +01:00
2023-02-26 23:43:17 +01:00
from flask import request, jsonify, Response, abort, g
2021-07-16 11:50:56 +02:00
from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN
from playhouse.postgres_ext import TS_MATCH
2020-03-07 10:45:39 +01:00
from playhouse.shortcuts import model_to_dict
from psycopg2._psycopg import cursor
2021-07-07 17:27:21 +02:00
from app import app, db, cache
2020-03-07 10:45:39 +01:00
from models import *
2020-08-15 12:27:16 +02:00
# logger = logging.getLogger('peewee')
# logger.addHandler(logging.StreamHandler())
# logger.setLevel(logging.DEBUG)
2021-11-28 19:37:43 +01:00
from ssr import ssr_routes
2023-02-26 23:43:17 +01:00
from stats import aggregate_stats
2021-07-13 22:21:26 +02:00
from suggestions import suggestions
2020-03-08 14:48:04 +01:00
2021-11-28 19:37:43 +01:00
app.register_blueprint(ssr_routes)
2020-03-08 14:48:04 +01:00
2023-04-25 22:51:19 +02:00
2020-08-30 22:11:28 +02:00
def add_cors(response: Response) -> Response:
2020-03-07 10:45:39 +01:00
header = response.headers
header['Access-Control-Allow-Origin'] = '*'
return response
2023-02-26 23:43:17 +01:00
@app.before_request
def before_request():
g.start = time.perf_counter()
@app.after_request
def after_request(response: Response):
diff = time.perf_counter() - g.start
if response.response:
2023-04-25 22:51:19 +02:00
response.headers.set("Server-Timing", f"server;dur={diff * 1000 :.5f}")
2023-02-26 23:43:17 +01:00
return response
def suggest(query: str, until: int, series: str, limit: int = 10) -> ModelSelect:
return Phrase.select(Phrase.text, Alias(fn.SUM(Phrase.count), "total_count")).join(Episode).join(Series).where(
(Episode.series.slug == series) &
2020-08-15 12:27:16 +02:00
(Episode.episode_number <= until) &
(Phrase.text.contains(query))
).group_by(Phrase.text).order_by(SQL("total_count DESC")).limit(limit)
def search(query: str, until: int, series: str, limit: int = 50) -> ModelSelect:
2020-08-15 12:27:16 +02:00
a = Alias(fn.ts_rank_cd(Line.search_text, fn.websearch_to_tsquery('english', query), 1 + 4), "rank")
return Line.select(Line, Person, Episode, Series, a).where(
Expression(Line.search_text, TS_MATCH, fn.websearch_to_tsquery('english', query))
&
(Episode.episode_number <= until)
&
(Episode.series.slug == series)
2020-08-15 12:27:16 +02:00
).order_by(SQL("rank DESC")) \
2021-10-26 15:30:14 +02:00
.join(Person, join_type=JOIN.FULL).switch(Line) \
2020-08-15 12:27:16 +02:00
.join(Episode).join(Series) \
.limit(limit)
2021-07-06 17:39:25 +02:00
def exact_search(query: str, until: int, series: str, limit: int = 50) -> ModelSelect:
return Line.select(Line, Person, Episode, Series).where(
(Episode.episode_number <= until)
&
(Episode.series.slug == series)
&
(Line.text.contains(query))
).order_by(Episode.video_number, Line.order) \
.join(Person).switch(Line) \
.join(Episode).join(Series) \
.limit(limit)
global_excludes = [Line.search_text, Episode.phrases_imported, Episode.text_imported, Person.series, Episode.title]
2020-04-07 11:08:52 +02:00
2020-03-08 14:48:04 +01:00
@app.route("/api/suggest")
2020-08-15 12:27:16 +02:00
def api_question():
2020-03-08 14:48:04 +01:00
query: str = request.args.get('query')
2020-03-07 10:45:39 +01:00
until = request.args.get('until')
2021-07-06 17:39:25 +02:00
if until == "-":
until = 1000
2020-04-15 18:11:45 +02:00
series = request.args.get('series')
if not query or not until or not series:
2020-03-07 10:45:39 +01:00
return "no suggest query", 400
2021-07-15 22:24:56 +02:00
if len(query) > 500:
2020-03-08 18:48:14 +01:00
return "too long query", 400
2021-07-07 18:30:28 +02:00
cache_key = f"suggest_{until}_{series}_{query}"
if len(query) < 3:
result = cache.get(cache_key)
if result:
return jsonify(result)
2020-08-15 12:27:16 +02:00
phrases = suggest(query, until, series)
2021-07-07 18:30:28 +02:00
result = [p.text for p in phrases]
if len(query) < 3:
cache.set(cache_key, result, timeout=60 * 60 * 24 * 7)
return jsonify(result)
2020-03-07 10:45:39 +01:00
2020-03-08 14:48:04 +01:00
@app.route("/api/search")
2020-08-15 12:27:16 +02:00
def api_search():
2020-03-07 10:45:39 +01:00
query = request.args.get('query')
until = request.args.get('until')
2021-07-06 17:39:25 +02:00
if until == "-":
until = 1000
2020-04-15 18:11:45 +02:00
series = request.args.get('series')
2021-07-06 17:39:25 +02:00
exact = request.args.get('exact', False)
2021-07-07 17:27:21 +02:00
exact = False # don't allow exact searches
2020-04-15 18:11:45 +02:00
if not query or not until or not series:
2021-07-07 20:08:23 +02:00
return "no search query", 400
2021-07-15 22:24:56 +02:00
if len(query) > 500:
2020-03-08 18:48:14 +01:00
return "too long query", 400
2020-03-07 10:45:39 +01:00
2021-07-06 17:39:25 +02:00
if exact:
results = exact_search(query, until, series)
else:
results = search(query, until, series)
if len(results) == 0:
result: cursor = db.execute_sql("select websearch_to_tsquery('english',%s)", [query])
parsed = result.fetchone()[0]
if not parsed:
return jsonify({
"status": "warning",
"message": "Only stop words were used. Please try to add a less common word to the search."
})
else:
resp: Response = jsonify({"status": "warning", "message": f"No results were found for {parsed}"})
resp.status_code = 404
return resp
2020-03-07 10:45:39 +01:00
data = []
d: Line
ri = 0
for d in results:
2021-07-07 17:27:21 +02:00
entry = model_to_dict(d, extra_attrs=[] if exact else ["rank"],
exclude=global_excludes + [Episode.subtitle_hash])
2021-07-06 17:39:25 +02:00
if not exact:
entry["rank"] = float(entry["rank"])
2020-03-07 10:45:39 +01:00
data.append({"centerID": d.id, "resultID": ri, "offset": 1, "lines": [entry]})
ri += 1
return jsonify(data)
2020-03-08 14:48:04 +01:00
@app.route("/api/expand")
2020-08-15 12:27:16 +02:00
def api_expand():
2020-03-07 10:45:39 +01:00
center_id = request.args.get('centerID')
offset = int(request.args.get('offset', 1))
if not center_id:
return "no central line ID", 400
try:
center: Line = Line.select().where(Line.id == center_id).get()
except DoesNotExist:
return "not found", 404
lines = Line.select().where(
(Line.episode == center.episode) & (Line.order << [center.order - offset, center.order + offset])
)
l: Line
data = []
for l in lines:
2020-04-07 11:08:52 +02:00
entry = model_to_dict(l, exclude=global_excludes)
2020-03-07 10:45:39 +01:00
data.append(entry)
return jsonify(data)
2021-05-25 20:53:09 +02:00
@app.route("/api/series")
2021-07-07 17:27:21 +02:00
@cache.cached(timeout=60 * 60 * 24)
2021-05-25 20:53:09 +02:00
def series():
series_list = []
2021-10-26 15:30:14 +02:00
for series in Series.select().order_by(Series.order):
2022-07-06 23:42:37 +02:00
try:
last_episode: Episode = Episode.select().where(Episode.series == series).order_by(
Episode.upload_date.desc()).limit(
1).get()
except DoesNotExist:
continue
2021-10-26 15:30:14 +02:00
series_data = model_to_dict(series, exclude=[Series.order])
series_data["last_upload"] = last_episode.upload_date.strftime("%Y-%m-%d")
series_data["length"] = Episode.select().where(Episode.series == series).count()
series_list.append(series_data)
2021-05-25 20:53:09 +02:00
return jsonify({
"series": series_list
})
2020-08-07 22:29:54 +02:00
@app.route("/api/episodes")
2021-07-07 17:27:21 +02:00
@cache.cached(timeout=60 * 60 * 24)
2020-08-15 12:27:16 +02:00
def api_episodes():
2023-04-25 22:51:19 +02:00
all_series: list[Series] = Series.select().order_by(Series.order)
2020-08-07 22:29:54 +02:00
data = []
for series in all_series:
2023-04-25 22:51:19 +02:00
episodes: list[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number)
2020-08-07 22:29:54 +02:00
series_data = []
for episode in episodes:
entry = model_to_dict(episode, exclude=[Episode.series, Episode.title])
if entry["upload_date"]:
entry["upload_date"] = entry["upload_date"].strftime("%Y-%m-%d")
2020-08-07 22:29:54 +02:00
series_data.append(entry)
data.append({
"meta": model_to_dict(series),
"episodes": series_data
})
return jsonify(data)
2021-07-13 22:21:26 +02:00
@app.route("/api/suggestion")
def api_suggestion():
until = request.args.get('until')
series = request.args.get('series')
if series not in suggestions:
2021-07-18 11:18:09 +02:00
abort(404)
2021-07-13 22:21:26 +02:00
all_suggestions = suggestions[series]
if until == "-":
possible_suggestions = [s.text for s in all_suggestions]
else:
possible_suggestions = [s.text for s in all_suggestions if s.episode <= int(until)]
chosen_suggestion = random.choice(possible_suggestions)
return Response(chosen_suggestion, mimetype='text/plain')
2021-10-26 18:10:00 +02:00
@app.route("/api/transcript")
@cache.cached(timeout=60 * 60 * 24)
def transcript():
series = request.args.get('series')
episode_number = request.args.get('episode')
episode = Episode.select(Episode, Series).where(
(Episode.episode_number == episode_number)
&
(Episode.series.slug == series)
).join(Series).get()
2023-04-25 22:51:19 +02:00
lines: list[Line] = Line.select(Line, Person).where(
2021-10-26 18:10:00 +02:00
(Episode.episode_number == episode_number)
&
(Episode.series.slug == series)
).order_by(Line.order) \
.join(Person, join_type=JOIN.FULL).switch(Line) \
.join(Episode).join(Series)
line_data = []
for line in lines:
entry = model_to_dict(line, exclude=global_excludes + [Line.episode])
line_data.append(entry)
return jsonify({
"episode": model_to_dict(episode, exclude=global_excludes),
"lines": line_data
})
2021-10-26 21:03:45 +02:00
@app.route("/api/stats")
@cache.cached(timeout=60 * 60 * 24)
def stats():
2021-10-26 21:30:46 +02:00
return jsonify(aggregate_stats(plaintext=False))
2021-10-26 21:03:45 +02:00
@app.route("/api/stats/text")
@cache.cached(timeout=60 * 60 * 24)
def stats_text():
2021-10-26 21:30:46 +02:00
return Response(aggregate_stats(plaintext=True), mimetype='text/plain')
2021-10-26 21:03:45 +02:00
2020-03-07 10:45:39 +01:00
if __name__ == "__main__":
2021-07-16 11:50:56 +02:00
import logging
2021-10-26 15:30:14 +02:00
2021-07-16 11:50:56 +02:00
logger = logging.getLogger('peewee')
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
2020-03-07 10:45:39 +01:00
app.debug = True
app.after_request(add_cors)
app.run()