1
0
Fork 0
mirror of https://github.com/Findus23/se-simulator.git synced 2024-09-19 15:53:45 +02:00

database storage

This commit is contained in:
Lukas Winkler 2018-03-19 22:03:32 +01:00
parent 8edc3420cc
commit ad0a671cba
No known key found for this signature in database
GPG key ID: 94AFBE7C2656A5B5
5 changed files with 108 additions and 16 deletions

View file

@ -1,12 +1,15 @@
import html
from urllib.parse import urlparse
import requests
from urllib.parse import urlparse
from models import *
# for i in [Alias, Site]:
# mdls = [Question, Answer, Title, User, Alias, Site]
# for i in mdls:
# i.drop_table()
# for i in [Site, Alias]:
# for i in reversed(mdls):
# print(i)
# i.create_table()
r = requests.get("https://api.stackexchange.com/2.2/sites?pagesize=500")

View file

@ -12,9 +12,37 @@ class Site(BaseModel):
tag_background_color = CharField(max_length=7)
tag_foreground_color = CharField(max_length=7)
link_color = CharField(max_length=7)
enabled=BooleanField(default=True)
enabled = BooleanField(default=True)
class Alias(BaseModel):
site = ForeignKeyField(Site)
url = CharField(unique=True, max_length=50)
class Title(BaseModel):
text = CharField()
slug = CharField()
site = ForeignKeyField(Site)
class User(BaseModel):
username = CharField()
site = ForeignKeyField(Site)
class Question(BaseModel):
text = TextField()
upvotes = IntegerField(default=0)
downvotes = IntegerField(default=0)
title = ForeignKeyField(Title)
user = ForeignKeyField(User)
site = ForeignKeyField(Site)
class Answer(BaseModel):
text = TextField()
upvotes = IntegerField()
downvotes = IntegerField()
user = ForeignKeyField(User)
site = ForeignKeyField(Site)

View file

@ -1,7 +1,7 @@
import os
import jsonlines
import markovify
import os
from nltk.tokenize.moses import MosesDetokenizer, MosesTokenizer
from markov import MarkovText, MarkovUserName
from utils import *
@ -26,12 +26,12 @@ def load_chain(chainfile, mode):
return markov.from_json(data)
def generate_chain(basedir, mode):
def generate_chain(sourcedir, chainfile, mode):
combined_cains = None
chainlist = []
markov = get_markov(mode)
i = 0
with jsonlines.open(basedir + "/{type}.jsonl".format(type=mode), mode="r") as content:
with jsonlines.open(sourcedir + "/{type}.jsonl".format(type=mode), mode="r") as content:
for text in content:
text = text.strip()
try:
@ -57,17 +57,28 @@ def generate_chain(basedir, mode):
return chain
def get_chain(url, mode):
sourcedir = 'sites/{url}'.format(url=url, type=mode)
chainfile = 'sites/{url}/{type}.chain.json'.format(url=url, type=mode)
if os.path.exists(chainfile):
return load_chain(chainfile, mode)
else:
return generate_chain(sourcedir, chainfile, mode)
def generate_text(chain: markovify.Text, model):
if model == "Titles":
return chain.make_short_sentence(70)
else:
return chain.make_sentence()
if __name__ == "__main__":
basedir, mode = get_settings(2)
if mode not in ["Questions", "Answers", "Titles", "Usernames"]:
print("error")
exit()
chainfile = basedir + '/{type}.chain.json'.format(type=mode)
if os.path.exists(chainfile):
chain = load_chain(chainfile, mode)
else:
chain = generate_chain(basedir, mode)
chain = get_chain("sites/astronomy.stackexchange.com", mode)
for _ in range(10):
# walk = []
# for text in chain.gen():

45
todb.py Normal file
View file

@ -0,0 +1,45 @@
from slugify import slugify
from models import *
from text_generator import get_chain, generate_text
def add_username(site, count=100):
"""
:type site: Site
"""
chain = get_chain(site.url, "Usernames")
for _ in range(count):
username = generate_text(chain, "Usernames")
User.create(username=username, site=site)
def add_title(site, count=100):
chain = get_chain(site.url, "Titles")
for _ in range(count):
title = generate_text(chain, "Titles")
slug = slugify(title, max_length=70, word_boundary=True)
Title.create(text=title, slug=slug, site=site)
def add_question(site, count=100):
users = User.select().where(User.site == site).limit(count)
titles = Title.select().where(Title.site == site).limit(count)
chain = get_chain(site.url, "Questions")
for i in range(count):
text = generate_text(chain, "Questions")
title = titles[i]
print(title.text)
user = users[i]
print(user.username)
Question.create(text=text, title_id=title, user_id=user, site_id=site)
if __name__ == "__main__":
query = Site.select().where(Site.last_download.is_null(False)).limit(1)
s = query.get()
add_username(s)
add_title(s)
add_question(s)

View file

@ -1,7 +1,8 @@
import hashlib
import sys
import random
import resource
import string
import sys
from bs4 import BeautifulSoup
from internetarchive import get_item
@ -51,3 +52,7 @@ def get_settings(count):
return sys.argv[1]
elif count == 2:
return sys.argv[1], sys.argv[2]
def get_random_string(length):
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))