1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00

better stats

This commit is contained in:
Lukas Winkler 2021-10-26 21:30:46 +02:00
parent f53f03263d
commit 166b8fbe11
Signed by: lukas
GPG key ID: 54DE4D798D244853
3 changed files with 35 additions and 16 deletions

View file

@ -12,7 +12,7 @@ from models import *
# logger = logging.getLogger('peewee')
# logger.addHandler(logging.StreamHandler())
# logger.setLevel(logging.DEBUG)
from stats import TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson
from stats import TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson, aggregate_stats
from suggestions import suggestions
@ -245,24 +245,13 @@ def transcript():
@app.route("/api/stats")
@cache.cached(timeout=60 * 60 * 24)
def stats():
return jsonify({
"TotalWords": TotalWords().as_data(),
"MostCommonNounChunks": MostCommonNounChunks().as_data(),
"LongestNounChunks": LongestNounChunks().as_data(),
"LinesPerPerson": LinesPerPerson().as_data()
})
return jsonify(aggregate_stats(plaintext=False))
@app.route("/api/stats/text")
@cache.cached(timeout=60 * 60 * 24)
def stats_text():
text = ""
for stats_class in [TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson]:
text += type(stats_class()).__name__.center(100, "#") + "\n"
text += stats_class().as_plaintext() + "\n\n"
return Response(text, mimetype='text/plain')
return Response(aggregate_stats(plaintext=True), mimetype='text/plain')
if __name__ == "__main__":

View file

@ -82,5 +82,32 @@ select sum(array_length(regexp_split_to_array(text,'\\s'),1)) from line
"""
if __name__ == '__main__':
print(TotalWords().as_data())
class PhraseTableSize(SingleValueStats):
query = "SELECT pg_size_pretty(pg_relation_size('phrase'));"
class LineTableSize(SingleValueStats):
query = "SELECT pg_size_pretty(pg_relation_size('line'));"
class TotalVideoTime(SingleValueStats):
query = """select (sum(endtime)::float / 1000/60/60) as hours
from (select distinct on (episode_id) endtime from line order by episode_id, "order" desc) as subquery
"""
def aggregate_stats(plaintext: bool):
text = ""
data = {}
for stats_class in [TotalWords, PhraseTableSize, LineTableSize, TotalVideoTime, MostCommonNounChunks,
LongestNounChunks, LinesPerPerson]:
name = type(stats_class()).__name__
if plaintext:
text += f" {name} ".center(80, "#") + "\n"
text += stats_class().as_plaintext() + "\n\n"
else:
data[name] = stats_class().as_data()
if plaintext:
return text
else:
return data

View file

@ -1,5 +1,8 @@
select e.pretty_title, text,char_length(line.text) as len from line join episode e on e.id = line.episode_id order by len desc;
SELECT pg_size_pretty(pg_relation_size('phrase'));
delete
from phrase;