mirror of
https://github.com/Findus23/nonsense.git
synced 2024-09-19 16:03:50 +02:00
add data of all crawls together
This commit is contained in:
parent
c2bed3a527
commit
bba1865186
5 changed files with 33 additions and 21 deletions
|
@ -14,7 +14,7 @@ class NonsenseSpider(Spider):
|
||||||
]
|
]
|
||||||
custom_settings = {
|
custom_settings = {
|
||||||
'FEED_FORMAT': 'json',
|
'FEED_FORMAT': 'json',
|
||||||
'FEED_URI': "../crawl.json"
|
'FEED_URI': "../crawlData/crawl.json"
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
|
|
|
@ -1,17 +1,16 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
import json
|
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
table = [[[0 for i in range(221)] for j in range(221)] for k in range(221)]
|
table = [[[0 for i in range(221)] for j in range(221)] for k in range(221)]
|
||||||
with open('crawl.json') as inputfile:
|
crawldata = utils.crawl_data()
|
||||||
crawldata = json.load(inputfile)
|
|
||||||
names = {result["name"] for result in crawldata}
|
names = {result["name"] for result in crawldata}
|
||||||
count = 0
|
count = 0
|
||||||
for name in names:
|
for name in names:
|
||||||
|
@ -88,7 +87,6 @@ def generate():
|
||||||
a = b = 32
|
a = b = 32
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
for _ in range(100):
|
for _ in range(100):
|
||||||
print(generate())
|
print(generate())
|
||||||
|
|
16
prepare.py
16
prepare.py
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import json
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
with open('crawl.json', "r") as inputfile:
|
import utils
|
||||||
crawldata = json.load(inputfile)
|
|
||||||
|
crawldata = utils.crawl_data()
|
||||||
|
|
||||||
descriptions = {result["description"] for result in crawldata}
|
descriptions = {result["description"] for result in crawldata}
|
||||||
print(len(descriptions))
|
print(len(descriptions))
|
||||||
|
@ -24,11 +24,11 @@ for d in descriptions:
|
||||||
suffix.update(re.findall("(-[\w.-]+)", d))
|
suffix.update(re.findall("(-[\w.-]+)", d))
|
||||||
|
|
||||||
words = {
|
words = {
|
||||||
"nouns": list(nouns),
|
"nouns": sorted(nouns),
|
||||||
"adj": list(adj),
|
"adj": sorted(adj),
|
||||||
"digit": list(digit),
|
"digit": sorted(digit),
|
||||||
"prefix": list(prefix),
|
"prefix": sorted(prefix),
|
||||||
"suffix": list(suffix)
|
"suffix": sorted(suffix)
|
||||||
}
|
}
|
||||||
with open('words.yaml', 'w') as outfile:
|
with open('words.yaml', 'w') as outfile:
|
||||||
yaml.dump(words, outfile, default_flow_style=False)
|
yaml.dump(words, outfile, default_flow_style=False)
|
||||||
|
|
|
@ -45,7 +45,7 @@ def subscribe(bot, update, job_queue):
|
||||||
chat_id = update.message.chat_id
|
chat_id = update.message.chat_id
|
||||||
# Add job to queue
|
# Add job to queue
|
||||||
if chat_id in subscriptions:
|
if chat_id in subscriptions:
|
||||||
update.message.reply_text('You are already subscribed')
|
update.message.reply_text('Du bist bereits angemeldet')
|
||||||
return
|
return
|
||||||
|
|
||||||
job = job_queue.run_daily(subscribe_notification,
|
job = job_queue.run_daily(subscribe_notification,
|
||||||
|
@ -53,7 +53,7 @@ def subscribe(bot, update, job_queue):
|
||||||
time=datetime.datetime.now().replace(minute=0, hour=8, second=0)
|
time=datetime.datetime.now().replace(minute=0, hour=8, second=0)
|
||||||
+ datetime.timedelta(days=1))
|
+ datetime.timedelta(days=1))
|
||||||
subscriptions[chat_id] = job
|
subscriptions[chat_id] = job
|
||||||
update.message.reply_text('Successfully subscribed')
|
update.message.reply_text('erfolgreich angemeldet')
|
||||||
|
|
||||||
|
|
||||||
def unsubscribe(bot, update):
|
def unsubscribe(bot, update):
|
||||||
|
@ -61,14 +61,14 @@ def unsubscribe(bot, update):
|
||||||
chat_id = update.message.chat_id
|
chat_id = update.message.chat_id
|
||||||
|
|
||||||
if chat_id not in subscriptions:
|
if chat_id not in subscriptions:
|
||||||
update.message.reply_text('You have no subscription')
|
update.message.reply_text('Du nicht angemeldet')
|
||||||
return
|
return
|
||||||
|
|
||||||
# Add job to queue
|
# Add job to queue
|
||||||
job = subscriptions[chat_id]
|
job = subscriptions[chat_id]
|
||||||
job.schedule_removal()
|
job.schedule_removal()
|
||||||
del subscriptions[chat_id]
|
del subscriptions[chat_id]
|
||||||
update.message.reply_text('Successfully unsubscribed')
|
update.message.reply_text('erfolgreich abgemeldet')
|
||||||
|
|
||||||
|
|
||||||
def multiple(bot, update, args):
|
def multiple(bot, update, args):
|
||||||
|
@ -83,7 +83,7 @@ def multiple(bot, update, args):
|
||||||
descriptions.append("+++ " + generate.get_description() + " +++")
|
descriptions.append("+++ " + generate.get_description() + " +++")
|
||||||
update.message.reply_text("\n".join(descriptions))
|
update.message.reply_text("\n".join(descriptions))
|
||||||
except (IndexError, ValueError):
|
except (IndexError, ValueError):
|
||||||
update.message.reply_text('Usage: /multiple <count>')
|
update.message.reply_text('Verwendung: /mehrere <anzahl>')
|
||||||
|
|
||||||
|
|
||||||
def error(bot, update, error):
|
def error(bot, update, error):
|
||||||
|
|
14
utils.py
Normal file
14
utils.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_data():
|
||||||
|
all_data = []
|
||||||
|
for file in glob.glob("crawlData/*.json"):
|
||||||
|
with open(file, "r") as inputfile:
|
||||||
|
all_data.extend(json.load(inputfile))
|
||||||
|
return all_data
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(crawl_data().__len__())
|
Loading…
Reference in a new issue