1
0
Fork 0
mirror of https://github.com/Findus23/nonsense.git synced 2024-09-19 16:03:50 +02:00

add data of all crawls together

This commit is contained in:
Lukas Winkler 2018-01-20 21:26:30 +01:00
parent c2bed3a527
commit bba1865186
No known key found for this signature in database
GPG key ID: 94AFBE7C2656A5B5
5 changed files with 33 additions and 21 deletions

View file

@ -14,7 +14,7 @@ class NonsenseSpider(Spider):
]
custom_settings = {
'FEED_FORMAT': 'json',
'FEED_URI': "../crawl.json"
'FEED_URI': "../crawlData/crawl.json"
}
def parse(self, response):

View file

@ -1,17 +1,16 @@
#!/usr/bin/python3
import json
import pickle
import os
import random
from PIL import Image
import utils
def gen():
table = [[[0 for i in range(221)] for j in range(221)] for k in range(221)]
with open('crawl.json') as inputfile:
crawldata = json.load(inputfile)
crawldata = utils.crawl_data()
names = {result["name"] for result in crawldata}
count = 0
for name in names:
@ -30,11 +29,11 @@ def gen():
def save(data):
with open('ikeaname.pickle', 'wb') as outfile:
pickle.dump(data, outfile,pickle.HIGHEST_PROTOCOL)
pickle.dump(data, outfile, pickle.HIGHEST_PROTOCOL)
def load():
with open('ikeaname.pickle',"rb") as inputfile:
with open('ikeaname.pickle', "rb") as inputfile:
table = pickle.load(inputfile)
return table
@ -88,7 +87,6 @@ def generate():
a = b = 32
if __name__ == "__main__":
for _ in range(100):
print(generate())

View file

@ -1,11 +1,11 @@
#!/usr/bin/env python3
import json
import re
import yaml
with open('crawl.json', "r") as inputfile:
crawldata = json.load(inputfile)
import utils
crawldata = utils.crawl_data()
descriptions = {result["description"] for result in crawldata}
print(len(descriptions))
@ -24,11 +24,11 @@ for d in descriptions:
suffix.update(re.findall("(-[\w.-]+)", d))
words = {
"nouns": list(nouns),
"adj": list(adj),
"digit": list(digit),
"prefix": list(prefix),
"suffix": list(suffix)
"nouns": sorted(nouns),
"adj": sorted(adj),
"digit": sorted(digit),
"prefix": sorted(prefix),
"suffix": sorted(suffix)
}
with open('words.yaml', 'w') as outfile:
yaml.dump(words, outfile, default_flow_style=False)

View file

@ -45,7 +45,7 @@ def subscribe(bot, update, job_queue):
chat_id = update.message.chat_id
# Add job to queue
if chat_id in subscriptions:
update.message.reply_text('You are already subscribed')
update.message.reply_text('Du bist bereits angemeldet')
return
job = job_queue.run_daily(subscribe_notification,
@ -53,7 +53,7 @@ def subscribe(bot, update, job_queue):
time=datetime.datetime.now().replace(minute=0, hour=8, second=0)
+ datetime.timedelta(days=1))
subscriptions[chat_id] = job
update.message.reply_text('Successfully subscribed')
update.message.reply_text('erfolgreich angemeldet')
def unsubscribe(bot, update):
@ -61,14 +61,14 @@ def unsubscribe(bot, update):
chat_id = update.message.chat_id
if chat_id not in subscriptions:
update.message.reply_text('You have no subscription')
update.message.reply_text('Du nicht angemeldet')
return
# Add job to queue
job = subscriptions[chat_id]
job.schedule_removal()
del subscriptions[chat_id]
update.message.reply_text('Successfully unsubscribed')
update.message.reply_text('erfolgreich abgemeldet')
def multiple(bot, update, args):
@ -83,7 +83,7 @@ def multiple(bot, update, args):
descriptions.append("+++ " + generate.get_description() + " +++")
update.message.reply_text("\n".join(descriptions))
except (IndexError, ValueError):
update.message.reply_text('Usage: /multiple <count>')
update.message.reply_text('Verwendung: /mehrere <anzahl>')
def error(bot, update, error):

14
utils.py Normal file
View file

@ -0,0 +1,14 @@
import glob
import json
def crawl_data():
all_data = []
for file in glob.glob("crawlData/*.json"):
with open(file, "r") as inputfile:
all_data.extend(json.load(inputfile))
return all_data
if __name__ == "__main__":
print(crawl_data().__len__())