1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00

remove unused prototypes

This commit is contained in:
Lukas Winkler 2020-03-07 10:47:19 +01:00
parent 78bb5da2bf
commit 473dfa3b4d
Signed by: lukas
GPG key ID: 54DE4D798D244853
7 changed files with 1 additions and 209 deletions

View file

@ -1,48 +0,0 @@
import readline
from models import Phrase
class SimpleCompleter:
def __init__(self):
return
def complete(self, text, state):
response = None
if state == 0:
# This is the first time for this text, so build a match list.
if text:
phrases = Phrase.select().where((Phrase.until_episode == 9) & (Phrase.text % ("%" + text + "%")))
self.matches = [p.text for p in phrases]
# self.matches = [s
# for s in self.options
# if s and s.startswith(text)]
# Return the state'th item from the match list,
# if we have that many.
try:
response = self.matches[state]
except IndexError:
response = None
return response
def input_loop():
line = ''
while line != 'stop':
line = input('Prompt ("stop" to quit): ')
print('Dispatch %s' % line)
phrases = Phrase.select().where((Phrase.until_episode == 9) & (Phrase.text % ("%" + "test" + "%")))
print([p.text for p in phrases])
# Register our completer function
readline.set_completer(SimpleCompleter().complete)
# Use the tab key for completion
readline.parse_and_bind('tab: complete')
# Prompt the user for text
input_loop()

View file

@ -1,16 +0,0 @@
from sonic import IngestClient, ControlClient
from models import Line
with IngestClient("127.0.0.1", 1491, "SecretPassword") as ingestcl:
ingestcl.flush_collection("crsearch")
total = Line.select().count()
i = 0
for line in Line.select():
ingestcl.push("crsearch", "crsearch", str(line.id), line.text,lang="eng")
if i % 100 == 0: print(i, total)
i += 1
print(ingestcl.count("crsearch", "crsearch"))
with ControlClient("127.0.0.1", 1491, "SecretPassword") as controlcl:
controlcl.trigger("consolidate")

17
poetry.lock generated
View file

@ -237,20 +237,6 @@ sanic = ["sanic (>=0.8)"]
sqlalchemy = ["sqlalchemy (>=1.2)"]
tornado = ["tornado (>=5)"]
[[package]]
category = "main"
description = "python client for sonic search backend"
develop = true
name = "sonic-client"
optional = false
python-versions = "*"
version = "0.0.5"
[package.source]
reference = ""
type = "directory"
url = "python-sonic-client"
[[package]]
category = "main"
description = "Industrial-strength Natural Language Processing (NLP) in Python"
@ -382,7 +368,7 @@ python-versions = "*"
version = "2020.2.16"
[metadata]
content-hash = "99f0ade4553c864959a79089176ecda0a9358dcaee08902649efbd0a22e105f0"
content-hash = "7edbb99b94dd505828ad532bda828ca45a607a87b7719042332c0257f5d0a03e"
python-versions = "^3.8"
[metadata.files]
@ -577,7 +563,6 @@ sentry-sdk = [
{file = "sentry-sdk-0.14.2.tar.gz", hash = "sha256:480eee754e60bcae983787a9a13bc8f155a111aef199afaa4f289d6a76aa622a"},
{file = "sentry_sdk-0.14.2-py2.py3-none-any.whl", hash = "sha256:a920387dc3ee252a66679d0afecd34479fb6fc52c2bc20763793ed69e5b0dcc0"},
]
sonic-client = []
spacy = [
{file = "spacy-2.2.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:ce7fad73de7aed7ca2ee7c2404c77c72005f67ca95edae6f19f08947fb0f8ab3"},
{file = "spacy-2.2.3-cp35-cp35m-win_amd64.whl", hash = "sha256:3c83c061597b5dc94c939c511d3b72c2971257204f21976afc117a350e8fa92b"},

View file

@ -1,36 +0,0 @@
import logging
from datetime import datetime
from peewee import SQL, fn, Alias
from psycopg2._psycopg import cursor
from models import Line, Person, Episode, db
from utils import milliseconds_to_td
logger = logging.getLogger('peewee')
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
terms = "where a bunch of us nerdy-ass actors"
start_time = datetime.now()
a = Alias(fn.ts_rank(Line.search_text, fn.plainto_tsquery('english', terms)), "rank")
results = Line.select(Line, Person, Episode, a).where(
(Line.search_text.match(terms, language="english", plain=True))
&
(Episode.episode_number <= 100)
&
(Episode.season == 2)
).order_by(SQL("rank DESC")).join(Person).switch(Line).join(Episode).limit(100)
end_time = datetime.now()
print(end_time - start_time)
# results = Line.full_text_search(terms)
if len(results) == 0:
result: cursor = db.execute_sql("select plainto_tsquery('english',%s)", [terms])
parsed = result.fetchone()[0]
if not parsed:
raise ValueError("only stop words were used")
else:
print(parsed)
for line in results:
print(line.episode.name, milliseconds_to_td(line.starttime), line.rank, line.person.name + ": " + line.text)

View file

@ -9,7 +9,6 @@ python = "^3.8"
peewee = "^3.13.1"
PyMySQL = "^0.9.3"
srt = "^3.0.0"
sonic-client = {path = "python-sonic-client"}
psycopg2 = "^2.8.4"
youtube-dl = "^2020.2.16"
spacy = "^2.2.3"

View file

@ -1,23 +0,0 @@
import logging
from sonic import SearchClient
from models import Line, Episode, Person
from utils import milliseconds_to_td
logger = logging.getLogger('peewee')
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
with SearchClient("127.0.0.1", 1491, "SecretPassword") as querycl:
results = querycl.query("crsearch", "crsearch", "gnome", lang="eng")
real_results = []
for rs in results:
r = int(rs)
real_results.extend([r - 1, r, r + 1])
lines = Line.select(Line, Person, Episode).where(Line.id << real_results).join(Person).switch(Line).join(Episode)
for line in lines:
print(line.episode.name, milliseconds_to_td(line.starttime), line.person.name + ": " + line.text)
results = querycl.suggest("crsearch", "crsearch", "regular")
print(results)

View file

@ -1,69 +0,0 @@
from datetime import timedelta
from pyvtt import WebVTTItem, from_string, WebVTTTime
from srt import Subtitle as SrtSubtitle
from srt import parse
class SubtitleList:
def __init__(self, text: str, srt: bool):
self.srt = srt
self.list = []
if srt:
data = parse(text)
d: SrtSubtitle
for d in data:
self.list.append(Subtitle(srt=d))
else:
data = from_string(text)
i = 0
v: WebVTTItem
for v in data:
i += 1
self.list.append(Subtitle(vtt=v, i=i))
def __iter__(self):
yield from self.list
class Subtitle:
def __init__(self, srt: SrtSubtitle = None, vtt: WebVTTItem = None, i: int = None):
if srt:
self.is_srt = True
self.srt = srt
else:
self.is_srt = False
self.vtt = vtt
self.i = i
@staticmethod
def vtttime_to_td(vt: WebVTTTime) -> timedelta:
return timedelta(hours=vt.hours, minutes=vt.minutes, seconds=vt.seconds, milliseconds=vt.milliseconds)
@property
def content(self) -> str:
if self.is_srt:
return self.srt.content
else:
return self.vtt.text
@property
def index(self) -> int:
if self.is_srt:
return self.srt.index
else:
return self.i
@property
def start(self) -> timedelta:
if self.is_srt:
return self.srt.start
else:
return self.vtttime_to_td(self.vtt.start)
@property
def end(self) -> timedelta:
if self.is_srt:
return self.srt.end
else:
return self.vtttime_to_td(self.vtt.end)