2021-07-13 22:21:26 +02:00
|
|
|
import random
|
2020-08-07 22:29:54 +02:00
|
|
|
from typing import List
|
2020-03-08 18:48:14 +01:00
|
|
|
|
2021-07-18 11:18:09 +02:00
|
|
|
from flask import request, jsonify, Response, abort
|
2021-07-16 11:50:56 +02:00
|
|
|
from peewee import fn, Alias, SQL, DoesNotExist, Expression, ModelSelect, JOIN
|
2020-08-07 18:09:46 +02:00
|
|
|
from playhouse.postgres_ext import TS_MATCH
|
2020-03-07 10:45:39 +01:00
|
|
|
from playhouse.shortcuts import model_to_dict
|
|
|
|
from psycopg2._psycopg import cursor
|
|
|
|
|
2021-07-07 17:27:21 +02:00
|
|
|
from app import app, db, cache
|
2020-03-07 10:45:39 +01:00
|
|
|
from models import *
|
2020-08-15 12:27:16 +02:00
|
|
|
# logger = logging.getLogger('peewee')
|
|
|
|
# logger.addHandler(logging.StreamHandler())
|
|
|
|
# logger.setLevel(logging.DEBUG)
|
2021-10-26 21:03:45 +02:00
|
|
|
from stats import TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson
|
2021-07-13 22:21:26 +02:00
|
|
|
from suggestions import suggestions
|
2020-03-08 14:48:04 +01:00
|
|
|
|
|
|
|
|
2020-08-30 22:11:28 +02:00
|
|
|
def add_cors(response: Response) -> Response:
|
2020-03-07 10:45:39 +01:00
|
|
|
header = response.headers
|
|
|
|
header['Access-Control-Allow-Origin'] = '*'
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
2021-07-04 22:24:51 +02:00
|
|
|
def suggest(query: str, until: int, series: str, limit: int = 10) -> ModelSelect:
|
|
|
|
return Phrase.select(Phrase.text, Alias(fn.SUM(Phrase.count), "total_count")).join(Episode).join(Series).where(
|
|
|
|
(Episode.series.slug == series) &
|
2020-08-15 12:27:16 +02:00
|
|
|
(Episode.episode_number <= until) &
|
|
|
|
(Phrase.text.contains(query))
|
|
|
|
).group_by(Phrase.text).order_by(SQL("total_count DESC")).limit(limit)
|
|
|
|
|
|
|
|
|
2021-07-04 22:24:51 +02:00
|
|
|
def search(query: str, until: int, series: str, limit: int = 50) -> ModelSelect:
|
2020-08-15 12:27:16 +02:00
|
|
|
a = Alias(fn.ts_rank_cd(Line.search_text, fn.websearch_to_tsquery('english', query), 1 + 4), "rank")
|
|
|
|
|
|
|
|
return Line.select(Line, Person, Episode, Series, a).where(
|
|
|
|
Expression(Line.search_text, TS_MATCH, fn.websearch_to_tsquery('english', query))
|
|
|
|
&
|
|
|
|
(Episode.episode_number <= until)
|
|
|
|
&
|
2021-07-04 22:24:51 +02:00
|
|
|
(Episode.series.slug == series)
|
2020-08-15 12:27:16 +02:00
|
|
|
).order_by(SQL("rank DESC")) \
|
2021-10-26 15:30:14 +02:00
|
|
|
.join(Person, join_type=JOIN.FULL).switch(Line) \
|
2020-08-15 12:27:16 +02:00
|
|
|
.join(Episode).join(Series) \
|
|
|
|
.limit(limit)
|
|
|
|
|
|
|
|
|
2021-07-06 17:39:25 +02:00
|
|
|
def exact_search(query: str, until: int, series: str, limit: int = 50) -> ModelSelect:
|
|
|
|
return Line.select(Line, Person, Episode, Series).where(
|
|
|
|
(Episode.episode_number <= until)
|
|
|
|
&
|
|
|
|
(Episode.series.slug == series)
|
|
|
|
&
|
|
|
|
(Line.text.contains(query))
|
|
|
|
).order_by(Episode.video_number, Line.order) \
|
|
|
|
.join(Person).switch(Line) \
|
|
|
|
.join(Episode).join(Series) \
|
|
|
|
.limit(limit)
|
|
|
|
|
|
|
|
|
2020-08-08 15:18:14 +02:00
|
|
|
global_excludes = [Line.search_text, Episode.phrases_imported, Episode.text_imported, Person.series, Episode.title]
|
2020-04-07 11:08:52 +02:00
|
|
|
|
|
|
|
|
2020-03-08 14:48:04 +01:00
|
|
|
@app.route("/api/suggest")
|
2020-08-15 12:27:16 +02:00
|
|
|
def api_question():
|
2020-03-08 14:48:04 +01:00
|
|
|
query: str = request.args.get('query')
|
2020-03-07 10:45:39 +01:00
|
|
|
until = request.args.get('until')
|
2021-07-06 17:39:25 +02:00
|
|
|
if until == "-":
|
|
|
|
until = 1000
|
2020-04-15 18:11:45 +02:00
|
|
|
series = request.args.get('series')
|
|
|
|
if not query or not until or not series:
|
2020-03-07 10:45:39 +01:00
|
|
|
return "no suggest query", 400
|
2021-07-15 22:24:56 +02:00
|
|
|
if len(query) > 500:
|
2020-03-08 18:48:14 +01:00
|
|
|
return "too long query", 400
|
2021-07-07 18:30:28 +02:00
|
|
|
cache_key = f"suggest_{until}_{series}_{query}"
|
|
|
|
if len(query) < 3:
|
|
|
|
result = cache.get(cache_key)
|
|
|
|
if result:
|
|
|
|
return jsonify(result)
|
2020-08-15 12:27:16 +02:00
|
|
|
phrases = suggest(query, until, series)
|
2021-07-07 18:30:28 +02:00
|
|
|
result = [p.text for p in phrases]
|
|
|
|
if len(query) < 3:
|
|
|
|
cache.set(cache_key, result, timeout=60 * 60 * 24 * 7)
|
|
|
|
return jsonify(result)
|
2020-03-07 10:45:39 +01:00
|
|
|
|
|
|
|
|
2020-03-08 14:48:04 +01:00
|
|
|
@app.route("/api/search")
|
2020-08-15 12:27:16 +02:00
|
|
|
def api_search():
|
2020-03-07 10:45:39 +01:00
|
|
|
query = request.args.get('query')
|
|
|
|
until = request.args.get('until')
|
2021-07-06 17:39:25 +02:00
|
|
|
if until == "-":
|
|
|
|
until = 1000
|
2020-04-15 18:11:45 +02:00
|
|
|
series = request.args.get('series')
|
2021-07-06 17:39:25 +02:00
|
|
|
exact = request.args.get('exact', False)
|
2021-07-07 17:27:21 +02:00
|
|
|
exact = False # don't allow exact searches
|
2020-04-15 18:11:45 +02:00
|
|
|
if not query or not until or not series:
|
2021-07-07 20:08:23 +02:00
|
|
|
return "no search query", 400
|
2021-07-15 22:24:56 +02:00
|
|
|
if len(query) > 500:
|
2020-03-08 18:48:14 +01:00
|
|
|
return "too long query", 400
|
2020-03-07 10:45:39 +01:00
|
|
|
|
2021-07-06 17:39:25 +02:00
|
|
|
if exact:
|
|
|
|
results = exact_search(query, until, series)
|
|
|
|
else:
|
|
|
|
results = search(query, until, series)
|
|
|
|
|
|
|
|
if len(results) == 0:
|
|
|
|
result: cursor = db.execute_sql("select websearch_to_tsquery('english',%s)", [query])
|
|
|
|
parsed = result.fetchone()[0]
|
|
|
|
if not parsed:
|
|
|
|
return jsonify({
|
|
|
|
"status": "warning",
|
|
|
|
"message": "Only stop words were used. Please try to add a less common word to the search."
|
|
|
|
})
|
|
|
|
else:
|
|
|
|
resp: Response = jsonify({"status": "warning", "message": f"No results were found for {parsed}"})
|
|
|
|
resp.status_code = 404
|
|
|
|
return resp
|
2020-03-07 10:45:39 +01:00
|
|
|
|
|
|
|
data = []
|
|
|
|
d: Line
|
|
|
|
ri = 0
|
|
|
|
for d in results:
|
2021-07-07 17:27:21 +02:00
|
|
|
entry = model_to_dict(d, extra_attrs=[] if exact else ["rank"],
|
|
|
|
exclude=global_excludes + [Episode.subtitle_hash])
|
2021-07-06 17:39:25 +02:00
|
|
|
if not exact:
|
|
|
|
entry["rank"] = float(entry["rank"])
|
2020-03-07 10:45:39 +01:00
|
|
|
data.append({"centerID": d.id, "resultID": ri, "offset": 1, "lines": [entry]})
|
|
|
|
ri += 1
|
|
|
|
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
2020-03-08 14:48:04 +01:00
|
|
|
@app.route("/api/expand")
|
2020-08-15 12:27:16 +02:00
|
|
|
def api_expand():
|
2020-03-07 10:45:39 +01:00
|
|
|
center_id = request.args.get('centerID')
|
|
|
|
offset = int(request.args.get('offset', 1))
|
|
|
|
if not center_id:
|
|
|
|
return "no central line ID", 400
|
|
|
|
|
|
|
|
try:
|
|
|
|
center: Line = Line.select().where(Line.id == center_id).get()
|
|
|
|
|
|
|
|
except DoesNotExist:
|
|
|
|
return "not found", 404
|
|
|
|
|
|
|
|
lines = Line.select().where(
|
|
|
|
(Line.episode == center.episode) & (Line.order << [center.order - offset, center.order + offset])
|
|
|
|
)
|
|
|
|
l: Line
|
|
|
|
data = []
|
|
|
|
for l in lines:
|
2020-04-07 11:08:52 +02:00
|
|
|
entry = model_to_dict(l, exclude=global_excludes)
|
2020-03-07 10:45:39 +01:00
|
|
|
data.append(entry)
|
|
|
|
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
2021-05-25 20:53:09 +02:00
|
|
|
@app.route("/api/series")
|
2021-07-07 17:27:21 +02:00
|
|
|
@cache.cached(timeout=60 * 60 * 24)
|
2021-05-25 20:53:09 +02:00
|
|
|
def series():
|
|
|
|
series_list = []
|
2021-10-26 15:30:14 +02:00
|
|
|
for series in Series.select().order_by(Series.order):
|
2021-07-04 22:24:51 +02:00
|
|
|
last_episode: Episode = Episode.select().where(Episode.series == series).order_by(
|
|
|
|
Episode.upload_date.desc()).limit(
|
|
|
|
1).get()
|
2021-10-26 15:30:14 +02:00
|
|
|
series_data = model_to_dict(series, exclude=[Series.order])
|
2021-07-04 22:24:51 +02:00
|
|
|
series_data["last_upload"] = last_episode.upload_date.strftime("%Y-%m-%d")
|
|
|
|
series_data["length"] = Episode.select().where(Episode.series == series).count()
|
|
|
|
series_list.append(series_data)
|
2021-05-25 20:53:09 +02:00
|
|
|
return jsonify({
|
|
|
|
"series": series_list
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-08-07 22:29:54 +02:00
|
|
|
@app.route("/api/episodes")
|
2021-07-07 17:27:21 +02:00
|
|
|
@cache.cached(timeout=60 * 60 * 24)
|
2020-08-15 12:27:16 +02:00
|
|
|
def api_episodes():
|
2021-10-26 15:30:14 +02:00
|
|
|
all_series: List[Series] = Series.select().order_by(Series.order)
|
2020-08-07 22:29:54 +02:00
|
|
|
data = []
|
|
|
|
for series in all_series:
|
|
|
|
|
|
|
|
episodes: List[Episode] = Episode.select().where(Episode.series == series).order_by(Episode.video_number)
|
|
|
|
|
|
|
|
series_data = []
|
|
|
|
for episode in episodes:
|
2020-08-08 15:18:14 +02:00
|
|
|
entry = model_to_dict(episode, exclude=[Episode.series, Episode.title])
|
2021-07-04 22:24:51 +02:00
|
|
|
if entry["upload_date"]:
|
|
|
|
entry["upload_date"] = entry["upload_date"].strftime("%Y-%m-%d")
|
2020-08-07 22:29:54 +02:00
|
|
|
series_data.append(entry)
|
|
|
|
data.append({
|
|
|
|
"meta": model_to_dict(series),
|
|
|
|
"episodes": series_data
|
|
|
|
})
|
|
|
|
|
|
|
|
return jsonify(data)
|
|
|
|
|
|
|
|
|
2021-07-13 22:21:26 +02:00
|
|
|
@app.route("/api/suggestion")
|
|
|
|
def api_suggestion():
|
|
|
|
until = request.args.get('until')
|
|
|
|
series = request.args.get('series')
|
|
|
|
if series not in suggestions:
|
2021-07-18 11:18:09 +02:00
|
|
|
abort(404)
|
2021-07-13 22:21:26 +02:00
|
|
|
all_suggestions = suggestions[series]
|
|
|
|
if until == "-":
|
|
|
|
possible_suggestions = [s.text for s in all_suggestions]
|
|
|
|
else:
|
|
|
|
possible_suggestions = [s.text for s in all_suggestions if s.episode <= int(until)]
|
|
|
|
chosen_suggestion = random.choice(possible_suggestions)
|
|
|
|
return Response(chosen_suggestion, mimetype='text/plain')
|
|
|
|
|
|
|
|
|
2021-10-26 18:10:00 +02:00
|
|
|
@app.route("/api/transcript")
|
|
|
|
@cache.cached(timeout=60 * 60 * 24)
|
|
|
|
def transcript():
|
|
|
|
series = request.args.get('series')
|
|
|
|
episode_number = request.args.get('episode')
|
|
|
|
|
|
|
|
episode = Episode.select(Episode, Series).where(
|
|
|
|
(Episode.episode_number == episode_number)
|
|
|
|
&
|
|
|
|
(Episode.series.slug == series)
|
|
|
|
).join(Series).get()
|
|
|
|
|
|
|
|
lines: List[Line] = Line.select(Line, Person).where(
|
|
|
|
(Episode.episode_number == episode_number)
|
|
|
|
&
|
|
|
|
(Episode.series.slug == series)
|
|
|
|
).order_by(Line.order) \
|
|
|
|
.join(Person, join_type=JOIN.FULL).switch(Line) \
|
|
|
|
.join(Episode).join(Series)
|
|
|
|
|
|
|
|
line_data = []
|
|
|
|
for line in lines:
|
|
|
|
entry = model_to_dict(line, exclude=global_excludes + [Line.episode])
|
|
|
|
|
|
|
|
line_data.append(entry)
|
|
|
|
|
|
|
|
return jsonify({
|
|
|
|
"episode": model_to_dict(episode, exclude=global_excludes),
|
|
|
|
"lines": line_data
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2021-10-26 21:03:45 +02:00
|
|
|
@app.route("/api/stats")
|
|
|
|
@cache.cached(timeout=60 * 60 * 24)
|
|
|
|
def stats():
|
|
|
|
return jsonify({
|
|
|
|
"TotalWords": TotalWords().as_data(),
|
|
|
|
"MostCommonNounChunks": MostCommonNounChunks().as_data(),
|
|
|
|
"LongestNounChunks": LongestNounChunks().as_data(),
|
|
|
|
"LinesPerPerson": LinesPerPerson().as_data()
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/api/stats/text")
|
|
|
|
@cache.cached(timeout=60 * 60 * 24)
|
|
|
|
def stats_text():
|
|
|
|
text = ""
|
|
|
|
|
|
|
|
for stats_class in [TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson]:
|
|
|
|
text += type(stats_class()).__name__.center(100, "#") + "\n"
|
|
|
|
text += stats_class().as_plaintext() + "\n\n"
|
|
|
|
|
|
|
|
return Response(text, mimetype='text/plain')
|
|
|
|
|
|
|
|
|
2020-03-07 10:45:39 +01:00
|
|
|
if __name__ == "__main__":
|
2021-07-16 11:50:56 +02:00
|
|
|
import logging
|
2021-10-26 15:30:14 +02:00
|
|
|
|
2021-07-16 11:50:56 +02:00
|
|
|
logger = logging.getLogger('peewee')
|
|
|
|
logger.addHandler(logging.StreamHandler())
|
|
|
|
logger.setLevel(logging.DEBUG)
|
2020-03-07 10:45:39 +01:00
|
|
|
app.debug = True
|
|
|
|
app.after_request(add_cors)
|
|
|
|
app.run()
|