mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-19 15:23:44 +02:00
remove unused prototypes
This commit is contained in:
parent
78bb5da2bf
commit
473dfa3b4d
7 changed files with 1 additions and 209 deletions
48
complete.py
48
complete.py
|
@ -1,48 +0,0 @@
|
|||
import readline
|
||||
|
||||
from models import Phrase
|
||||
|
||||
|
||||
class SimpleCompleter:
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
def complete(self, text, state):
|
||||
response = None
|
||||
if state == 0:
|
||||
# This is the first time for this text, so build a match list.
|
||||
if text:
|
||||
phrases = Phrase.select().where((Phrase.until_episode == 9) & (Phrase.text % ("%" + text + "%")))
|
||||
self.matches = [p.text for p in phrases]
|
||||
# self.matches = [s
|
||||
# for s in self.options
|
||||
# if s and s.startswith(text)]
|
||||
# Return the state'th item from the match list,
|
||||
# if we have that many.
|
||||
try:
|
||||
response = self.matches[state]
|
||||
except IndexError:
|
||||
response = None
|
||||
return response
|
||||
|
||||
|
||||
def input_loop():
|
||||
line = ''
|
||||
while line != 'stop':
|
||||
line = input('Prompt ("stop" to quit): ')
|
||||
print('Dispatch %s' % line)
|
||||
|
||||
|
||||
phrases = Phrase.select().where((Phrase.until_episode == 9) & (Phrase.text % ("%" + "test" + "%")))
|
||||
|
||||
print([p.text for p in phrases])
|
||||
|
||||
# Register our completer function
|
||||
readline.set_completer(SimpleCompleter().complete)
|
||||
|
||||
# Use the tab key for completion
|
||||
readline.parse_and_bind('tab: complete')
|
||||
|
||||
# Prompt the user for text
|
||||
input_loop()
|
16
index.py
16
index.py
|
@ -1,16 +0,0 @@
|
|||
from sonic import IngestClient, ControlClient
|
||||
|
||||
from models import Line
|
||||
|
||||
with IngestClient("127.0.0.1", 1491, "SecretPassword") as ingestcl:
|
||||
ingestcl.flush_collection("crsearch")
|
||||
total = Line.select().count()
|
||||
i = 0
|
||||
for line in Line.select():
|
||||
ingestcl.push("crsearch", "crsearch", str(line.id), line.text,lang="eng")
|
||||
if i % 100 == 0: print(i, total)
|
||||
i += 1
|
||||
print(ingestcl.count("crsearch", "crsearch"))
|
||||
|
||||
with ControlClient("127.0.0.1", 1491, "SecretPassword") as controlcl:
|
||||
controlcl.trigger("consolidate")
|
17
poetry.lock
generated
17
poetry.lock
generated
|
@ -237,20 +237,6 @@ sanic = ["sanic (>=0.8)"]
|
|||
sqlalchemy = ["sqlalchemy (>=1.2)"]
|
||||
tornado = ["tornado (>=5)"]
|
||||
|
||||
[[package]]
|
||||
category = "main"
|
||||
description = "python client for sonic search backend"
|
||||
develop = true
|
||||
name = "sonic-client"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
version = "0.0.5"
|
||||
|
||||
[package.source]
|
||||
reference = ""
|
||||
type = "directory"
|
||||
url = "python-sonic-client"
|
||||
|
||||
[[package]]
|
||||
category = "main"
|
||||
description = "Industrial-strength Natural Language Processing (NLP) in Python"
|
||||
|
@ -382,7 +368,7 @@ python-versions = "*"
|
|||
version = "2020.2.16"
|
||||
|
||||
[metadata]
|
||||
content-hash = "99f0ade4553c864959a79089176ecda0a9358dcaee08902649efbd0a22e105f0"
|
||||
content-hash = "7edbb99b94dd505828ad532bda828ca45a607a87b7719042332c0257f5d0a03e"
|
||||
python-versions = "^3.8"
|
||||
|
||||
[metadata.files]
|
||||
|
@ -577,7 +563,6 @@ sentry-sdk = [
|
|||
{file = "sentry-sdk-0.14.2.tar.gz", hash = "sha256:480eee754e60bcae983787a9a13bc8f155a111aef199afaa4f289d6a76aa622a"},
|
||||
{file = "sentry_sdk-0.14.2-py2.py3-none-any.whl", hash = "sha256:a920387dc3ee252a66679d0afecd34479fb6fc52c2bc20763793ed69e5b0dcc0"},
|
||||
]
|
||||
sonic-client = []
|
||||
spacy = [
|
||||
{file = "spacy-2.2.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:ce7fad73de7aed7ca2ee7c2404c77c72005f67ca95edae6f19f08947fb0f8ab3"},
|
||||
{file = "spacy-2.2.3-cp35-cp35m-win_amd64.whl", hash = "sha256:3c83c061597b5dc94c939c511d3b72c2971257204f21976afc117a350e8fa92b"},
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from peewee import SQL, fn, Alias
|
||||
from psycopg2._psycopg import cursor
|
||||
|
||||
from models import Line, Person, Episode, db
|
||||
from utils import milliseconds_to_td
|
||||
|
||||
logger = logging.getLogger('peewee')
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
terms = "where a bunch of us nerdy-ass actors"
|
||||
start_time = datetime.now()
|
||||
a = Alias(fn.ts_rank(Line.search_text, fn.plainto_tsquery('english', terms)), "rank")
|
||||
|
||||
results = Line.select(Line, Person, Episode, a).where(
|
||||
(Line.search_text.match(terms, language="english", plain=True))
|
||||
&
|
||||
(Episode.episode_number <= 100)
|
||||
&
|
||||
(Episode.season == 2)
|
||||
).order_by(SQL("rank DESC")).join(Person).switch(Line).join(Episode).limit(100)
|
||||
end_time = datetime.now()
|
||||
print(end_time - start_time)
|
||||
# results = Line.full_text_search(terms)
|
||||
if len(results) == 0:
|
||||
result: cursor = db.execute_sql("select plainto_tsquery('english',%s)", [terms])
|
||||
parsed = result.fetchone()[0]
|
||||
if not parsed:
|
||||
raise ValueError("only stop words were used")
|
||||
else:
|
||||
print(parsed)
|
||||
for line in results:
|
||||
print(line.episode.name, milliseconds_to_td(line.starttime), line.rank, line.person.name + ": " + line.text)
|
|
@ -9,7 +9,6 @@ python = "^3.8"
|
|||
peewee = "^3.13.1"
|
||||
PyMySQL = "^0.9.3"
|
||||
srt = "^3.0.0"
|
||||
sonic-client = {path = "python-sonic-client"}
|
||||
psycopg2 = "^2.8.4"
|
||||
youtube-dl = "^2020.2.16"
|
||||
spacy = "^2.2.3"
|
||||
|
|
23
search.py
23
search.py
|
@ -1,23 +0,0 @@
|
|||
import logging
|
||||
|
||||
from sonic import SearchClient
|
||||
|
||||
from models import Line, Episode, Person
|
||||
from utils import milliseconds_to_td
|
||||
|
||||
logger = logging.getLogger('peewee')
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
with SearchClient("127.0.0.1", 1491, "SecretPassword") as querycl:
|
||||
results = querycl.query("crsearch", "crsearch", "gnome", lang="eng")
|
||||
real_results = []
|
||||
for rs in results:
|
||||
r = int(rs)
|
||||
real_results.extend([r - 1, r, r + 1])
|
||||
lines = Line.select(Line, Person, Episode).where(Line.id << real_results).join(Person).switch(Line).join(Episode)
|
||||
for line in lines:
|
||||
print(line.episode.name, milliseconds_to_td(line.starttime), line.person.name + ": " + line.text)
|
||||
|
||||
results = querycl.suggest("crsearch", "crsearch", "regular")
|
||||
print(results)
|
|
@ -1,69 +0,0 @@
|
|||
from datetime import timedelta
|
||||
|
||||
from pyvtt import WebVTTItem, from_string, WebVTTTime
|
||||
from srt import Subtitle as SrtSubtitle
|
||||
from srt import parse
|
||||
|
||||
|
||||
class SubtitleList:
|
||||
def __init__(self, text: str, srt: bool):
|
||||
self.srt = srt
|
||||
self.list = []
|
||||
if srt:
|
||||
data = parse(text)
|
||||
d: SrtSubtitle
|
||||
for d in data:
|
||||
self.list.append(Subtitle(srt=d))
|
||||
else:
|
||||
data = from_string(text)
|
||||
i = 0
|
||||
v: WebVTTItem
|
||||
for v in data:
|
||||
i += 1
|
||||
self.list.append(Subtitle(vtt=v, i=i))
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.list
|
||||
|
||||
|
||||
class Subtitle:
|
||||
def __init__(self, srt: SrtSubtitle = None, vtt: WebVTTItem = None, i: int = None):
|
||||
if srt:
|
||||
self.is_srt = True
|
||||
self.srt = srt
|
||||
else:
|
||||
self.is_srt = False
|
||||
self.vtt = vtt
|
||||
self.i = i
|
||||
|
||||
@staticmethod
|
||||
def vtttime_to_td(vt: WebVTTTime) -> timedelta:
|
||||
return timedelta(hours=vt.hours, minutes=vt.minutes, seconds=vt.seconds, milliseconds=vt.milliseconds)
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
if self.is_srt:
|
||||
return self.srt.content
|
||||
else:
|
||||
return self.vtt.text
|
||||
|
||||
@property
|
||||
def index(self) -> int:
|
||||
if self.is_srt:
|
||||
return self.srt.index
|
||||
else:
|
||||
return self.i
|
||||
|
||||
@property
|
||||
def start(self) -> timedelta:
|
||||
if self.is_srt:
|
||||
return self.srt.start
|
||||
else:
|
||||
return self.vtttime_to_td(self.vtt.start)
|
||||
|
||||
@property
|
||||
def end(self) -> timedelta:
|
||||
if self.is_srt:
|
||||
return self.srt.end
|
||||
else:
|
||||
return self.vtttime_to_td(self.vtt.end)
|
Loading…
Reference in a new issue