better stats

2024-09-19 15:23:44 +02:00 · 2021-10-26 21:30:46 +02:00 · 2021-10-26 21:30:46 +02:00 · 166b8fbe11
commit 166b8fbe11
parent f53f03263d
3 changed files with 35 additions and 16 deletions
--- a/server.py
+++ b/server.py
@ -12,7 +12,7 @@ from models import *
 # logger = logging.getLogger('peewee')
 # logger.addHandler(logging.StreamHandler())
 # logger.setLevel(logging.DEBUG)
-from stats import TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson
+from stats import TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson, aggregate_stats
 from suggestions import suggestions


@ -245,24 +245,13 @@ def transcript():
@app.route("/api/stats")
@cache.cached(timeout=60 * 60 * 24)
 def stats():
-    return jsonify({
-        "TotalWords": TotalWords().as_data(),
-        "MostCommonNounChunks": MostCommonNounChunks().as_data(),
-        "LongestNounChunks": LongestNounChunks().as_data(),
-        "LinesPerPerson": LinesPerPerson().as_data()
-    })
+    return jsonify(aggregate_stats(plaintext=False))


@app.route("/api/stats/text")
@cache.cached(timeout=60 * 60 * 24)
 def stats_text():
-    text = ""
-
-    for stats_class in [TotalWords, MostCommonNounChunks, LongestNounChunks, LinesPerPerson]:
-        text += type(stats_class()).__name__.center(100, "#") + "\n"
-        text += stats_class().as_plaintext() + "\n\n"
-
-    return Response(text, mimetype='text/plain')
+    return Response(aggregate_stats(plaintext=True), mimetype='text/plain')


 if __name__ == "__main__":
--- a/stats.py
+++ b/stats.py
@ -82,5 +82,32 @@ select sum(array_length(regexp_split_to_array(text,'\\s'),1)) from line
 """


-if __name__ == '__main__':
-    print(TotalWords().as_data())
+class PhraseTableSize(SingleValueStats):
+    query = "SELECT pg_size_pretty(pg_relation_size('phrase'));"
+
+
+class LineTableSize(SingleValueStats):
+    query = "SELECT pg_size_pretty(pg_relation_size('line'));"
+
+
+class TotalVideoTime(SingleValueStats):
+    query = """select (sum(endtime)::float / 1000/60/60) as hours
+from (select distinct on (episode_id) endtime from line order by episode_id, "order" desc) as subquery  
+"""
+
+
+def aggregate_stats(plaintext: bool):
+    text = ""
+    data = {}
+    for stats_class in [TotalWords, PhraseTableSize, LineTableSize, TotalVideoTime, MostCommonNounChunks,
+                        LongestNounChunks, LinesPerPerson]:
+        name = type(stats_class()).__name__
+        if plaintext:
+            text += f" {name} ".center(80, "#") + "\n"
+            text += stats_class().as_plaintext() + "\n\n"
+        else:
+            data[name] = stats_class().as_data()
+    if plaintext:
+        return text
+    else:
+        return data
--- a/tests.sql
+++ b/tests.sql
@ -1,5 +1,8 @@
 select e.pretty_title, text,char_length(line.text) as len from line join episode e on e.id = line.episode_id order by len desc;

+
+SELECT pg_size_pretty(pg_relation_size('phrase'));
+
 delete
 from phrase;