mirror of
https://github.com/Findus23/nonsense.git
synced 2024-09-19 16:03:50 +02:00
add data of all crawls together
This commit is contained in:
parent
c2bed3a527
commit
bba1865186
5 changed files with 33 additions and 21 deletions
|
@ -14,7 +14,7 @@ class NonsenseSpider(Spider):
|
|||
]
|
||||
custom_settings = {
|
||||
'FEED_FORMAT': 'json',
|
||||
'FEED_URI': "../crawl.json"
|
||||
'FEED_URI': "../crawlData/crawl.json"
|
||||
}
|
||||
|
||||
def parse(self, response):
|
||||
|
|
12
ikeagen.py
12
ikeagen.py
|
@ -1,17 +1,16 @@
|
|||
#!/usr/bin/python3
|
||||
import json
|
||||
|
||||
import pickle
|
||||
|
||||
import os
|
||||
import random
|
||||
from PIL import Image
|
||||
|
||||
import utils
|
||||
|
||||
|
||||
def gen():
|
||||
table = [[[0 for i in range(221)] for j in range(221)] for k in range(221)]
|
||||
with open('crawl.json') as inputfile:
|
||||
crawldata = json.load(inputfile)
|
||||
crawldata = utils.crawl_data()
|
||||
names = {result["name"] for result in crawldata}
|
||||
count = 0
|
||||
for name in names:
|
||||
|
@ -30,11 +29,11 @@ def gen():
|
|||
|
||||
def save(data):
|
||||
with open('ikeaname.pickle', 'wb') as outfile:
|
||||
pickle.dump(data, outfile,pickle.HIGHEST_PROTOCOL)
|
||||
pickle.dump(data, outfile, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def load():
|
||||
with open('ikeaname.pickle',"rb") as inputfile:
|
||||
with open('ikeaname.pickle', "rb") as inputfile:
|
||||
table = pickle.load(inputfile)
|
||||
return table
|
||||
|
||||
|
@ -88,7 +87,6 @@ def generate():
|
|||
a = b = 32
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for _ in range(100):
|
||||
print(generate())
|
||||
|
|
16
prepare.py
16
prepare.py
|
@ -1,11 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
import json
|
||||
|
||||
import re
|
||||
import yaml
|
||||
|
||||
with open('crawl.json', "r") as inputfile:
|
||||
crawldata = json.load(inputfile)
|
||||
import utils
|
||||
|
||||
crawldata = utils.crawl_data()
|
||||
|
||||
descriptions = {result["description"] for result in crawldata}
|
||||
print(len(descriptions))
|
||||
|
@ -24,11 +24,11 @@ for d in descriptions:
|
|||
suffix.update(re.findall("(-[\w.-]+)", d))
|
||||
|
||||
words = {
|
||||
"nouns": list(nouns),
|
||||
"adj": list(adj),
|
||||
"digit": list(digit),
|
||||
"prefix": list(prefix),
|
||||
"suffix": list(suffix)
|
||||
"nouns": sorted(nouns),
|
||||
"adj": sorted(adj),
|
||||
"digit": sorted(digit),
|
||||
"prefix": sorted(prefix),
|
||||
"suffix": sorted(suffix)
|
||||
}
|
||||
with open('words.yaml', 'w') as outfile:
|
||||
yaml.dump(words, outfile, default_flow_style=False)
|
||||
|
|
|
@ -45,7 +45,7 @@ def subscribe(bot, update, job_queue):
|
|||
chat_id = update.message.chat_id
|
||||
# Add job to queue
|
||||
if chat_id in subscriptions:
|
||||
update.message.reply_text('You are already subscribed')
|
||||
update.message.reply_text('Du bist bereits angemeldet')
|
||||
return
|
||||
|
||||
job = job_queue.run_daily(subscribe_notification,
|
||||
|
@ -53,7 +53,7 @@ def subscribe(bot, update, job_queue):
|
|||
time=datetime.datetime.now().replace(minute=0, hour=8, second=0)
|
||||
+ datetime.timedelta(days=1))
|
||||
subscriptions[chat_id] = job
|
||||
update.message.reply_text('Successfully subscribed')
|
||||
update.message.reply_text('erfolgreich angemeldet')
|
||||
|
||||
|
||||
def unsubscribe(bot, update):
|
||||
|
@ -61,14 +61,14 @@ def unsubscribe(bot, update):
|
|||
chat_id = update.message.chat_id
|
||||
|
||||
if chat_id not in subscriptions:
|
||||
update.message.reply_text('You have no subscription')
|
||||
update.message.reply_text('Du nicht angemeldet')
|
||||
return
|
||||
|
||||
# Add job to queue
|
||||
job = subscriptions[chat_id]
|
||||
job.schedule_removal()
|
||||
del subscriptions[chat_id]
|
||||
update.message.reply_text('Successfully unsubscribed')
|
||||
update.message.reply_text('erfolgreich abgemeldet')
|
||||
|
||||
|
||||
def multiple(bot, update, args):
|
||||
|
@ -83,7 +83,7 @@ def multiple(bot, update, args):
|
|||
descriptions.append("+++ " + generate.get_description() + " +++")
|
||||
update.message.reply_text("\n".join(descriptions))
|
||||
except (IndexError, ValueError):
|
||||
update.message.reply_text('Usage: /multiple <count>')
|
||||
update.message.reply_text('Verwendung: /mehrere <anzahl>')
|
||||
|
||||
|
||||
def error(bot, update, error):
|
||||
|
|
14
utils.py
Normal file
14
utils.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import glob
|
||||
import json
|
||||
|
||||
|
||||
def crawl_data():
|
||||
all_data = []
|
||||
for file in glob.glob("crawlData/*.json"):
|
||||
with open(file, "r") as inputfile:
|
||||
all_data.extend(json.load(inputfile))
|
||||
return all_data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(crawl_data().__len__())
|
Loading…
Reference in a new issue