1
0
Fork 0
mirror of https://github.com/Findus23/se-simulator.git synced 2024-09-19 15:53:45 +02:00

minor changes

This commit is contained in:
Lukas Winkler 2018-03-30 15:07:18 +02:00
parent 0718fdb35f
commit 33aad7b132
6 changed files with 19 additions and 13 deletions

1
.gitignore vendored
View file

@ -1,6 +1,7 @@
sites
raw
downloads
chains
word-rnn-tensorflow/
.idea/
__pycache__/

View file

@ -12,7 +12,7 @@ files = get_files()
# TODO: name sites/id after real url
for file in glob.glob("downloads/**/*.7z"):
for file in glob.glob("downloads/**/*.7z", recursive=True):
if "meta" in file:
continue
filename = os.path.basename(file)
@ -46,8 +46,8 @@ for file in glob.glob("downloads/**/*.7z"):
print(code)
currentdir = os.getcwd()
rawdir = "raw/" + code
sitesdir = "sites/" + code
for dir in [rawdir, sitesdir]:
chainsdir = "chains/" + code
for dir in [rawdir, chainsdir]:
if not os.path.exists(dir):
os.mkdir(dir)
@ -57,7 +57,7 @@ for file in glob.glob("downloads/**/*.7z"):
subprocess.check_output(["7z", "x", "-aoa", code + ".7z"])
os.chdir(currentdir)
print("Start parsing")
parse_posts(rawdir, sitesdir)
parse_comments(rawdir, sitesdir)
parse_usernames(rawdir, sitesdir)
parse_posts(rawdir, rawdir)
parse_comments(rawdir, rawdir)
parse_usernames(rawdir, rawdir)
print("DONE")

View file

@ -27,7 +27,7 @@
<h2 class="answerheader">{{ answers|length }} Answers</h2>
{% for answer in answers %}
{% set vote=voted[("answer", answer.id)] %}
<div class="content answer">
<div class="content answer" id="{{ answer.id }}">
<div class="vote" data-id="{{ answer.id }}" data-type="answer" data-ranking="{{ answer.ci_lower_bound }}">
<a class="up {{ "active" if vote == True }}"></a>
<div>{{ answer.upvotes - answer.downvotes }}</div>
@ -38,11 +38,11 @@
<p>{{ paragraph }}</p>
{% endfor %}
<div class="contentfooter">
<div class="authorbox">
<a href="{{ request.url }}#{{ answer.id }}" class="authorbox">
answered {{ prettydate(answer.datetime) }}
<br>
{{ answer.user.username }}
</div>
</a>
</div>
</div>
</div>

View file

@ -58,8 +58,8 @@ def generate_chain(sourcedir, chainfile, mode):
def get_chain(url, mode):
sourcedir = 'sites/{url}'.format(url=url, type=mode)
chainfile = 'sites/{url}/{type}.chain.json'.format(url=url, type=mode)
sourcedir = 'raw/{url}'.format(url=url, type=mode)
chainfile = 'chains/{url}/{type}.chain.json'.format(url=url, type=mode)
if os.path.exists(chainfile):
return load_chain(chainfile, mode)
else:

View file

@ -1,6 +1,7 @@
import random
from datetime import datetime
import sys
from slugify import slugify
import utils
@ -62,7 +63,11 @@ def add_question(site, count=100):
if __name__ == "__main__":
query = Site.select().where(Site.last_download.is_null(False))
if len(sys.argv) > 1:
sites = sys.argv[1:]
query = Site.select().where((Site.last_download.is_null(False)) & (Site.url.in_(sites)))
else:
query = Site.select().where(Site.last_download.is_null(False))
for s in query:
add_username(s)
add_title(s)

View file

@ -51,7 +51,7 @@ pre > code {
background: #E1ECF9;
font-size: 12px;
padding: 5px;
color: #111;
}
}