-- number of subtitle lines per person select name, count(name) as count, sum(length(text)) as chars from line join person p on line.person_id = p.id group by name order by chars desc; -- most common noun chunks select text, sum(count) as count from phrase group by text order by count desc; -- longest noun chunks select text, char_length(phrase.text) as len from phrase order by len desc; -- delete -- from phrase; EXPLAIN analyse SELECT text, sum(count) as total_count FROM phrase where text ilike '%head%' group by text ORDER BY total_count DESC; -- query made by suggestion -- debug with https://dalibo.github.io/pev2/ EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) SELECT "t1"."text", SUM("t1"."count") AS "total_count" FROM "phrase" AS "t1" INNER JOIN "episode" AS "t2" ON ("t1"."episode_id" = "t2"."id") WHERE ((("t2"."series_id" = 1) AND ("t2"."episode_number" <= 30)) AND ("t1"."text" ILIKE '%head%')) GROUP BY "t1"."text" ORDER BY total_count DESC LIMIT 10; CREATE EXTENSION pg_trgm; CREATE INDEX phrases_text_index ON phrase USING gin (text gin_trgm_ops); drop index phrases_text_index; -- query made by full text search EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) SELECT "t1"."id", "t1"."text", "t1"."search_text", "t1"."person_id", "t1"."isnote", "t1"."ismeta", "t1"."starttime", "t1"."endtime", "t1"."episode_id", "t1"."order", "t2"."id", "t2"."name", "t2"."color", "t2"."season", "t3"."id", "t3"."season", "t3"."episode_number", "t3"."video_number", "t3"."youtube_id", "t3"."text_imported", "t3"."phrases_imported", ts_rank("t1"."search_text", websearch_to_tsquery('english', 'house')) AS "rank" FROM "line" AS "t1" INNER JOIN "person" AS "t2" ON ("t1"."person_id" = "t2"."id") INNER JOIN "episode" AS "t3" ON ("t1"."episode_id" = "t3"."id") WHERE ((("t1"."search_text" @@ websearch_to_tsquery('english', 'house')) AND ("t3"."episode_number" <= 1000)) AND ("t3"."season" = 1)) ORDER BY rank DESC LIMIT 20;