1
0
Fork 0
mirror of https://github.com/Findus23/se-simulator.git synced 2024-09-19 15:53:45 +02:00
se-simulator/utils.py

54 lines
1.3 KiB
Python
Raw Normal View History

2018-03-16 22:52:34 +01:00
import hashlib
2018-03-10 19:28:02 +01:00
import sys
2018-03-16 18:48:54 +01:00
import resource
2018-03-16 20:31:43 +01:00
from bs4 import BeautifulSoup
2018-03-16 22:52:34 +01:00
from internetarchive import get_item
2018-03-16 20:31:43 +01:00
2018-03-16 18:48:54 +01:00
def print_stats(i, skipped=None):
print("{number} total entries".format(number=i))
if skipped:
print("{number} skipped".format(number=skipped))
print_ram()
def print_ram():
print("used {mb}MB".format(mb=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024))
2018-03-10 19:28:02 +01:00
2018-03-16 20:31:43 +01:00
def html2text(body):
soup = BeautifulSoup(body, "lxml")
for code in soup.find_all("code"):
code.decompose()
return soup.get_text()
2018-03-16 22:52:34 +01:00
def get_files():
ia = get_item("stackexchange")
return {x["name"]: x for x in ia.files}
def file_hash(filename):
"""from https://stackoverflow.com/a/44873382/4398037"""
h = hashlib.sha1()
with open(filename, 'rb', buffering=0) as f:
for b in iter(lambda: f.read(128 * 1024), b''):
h.update(b)
return h.hexdigest()
2018-03-10 19:28:02 +01:00
def get_settings(count):
if len(sys.argv) != count + 1:
if count == 1:
2018-03-10 20:50:33 +01:00
return "sites/workplace"
2018-03-10 19:28:02 +01:00
elif count == 2:
2018-03-10 20:50:33 +01:00
return "sites/workplace", "Title"
2018-03-10 19:28:02 +01:00
print("Please specify {x} parameters".format(x=count))
if count == 1:
return sys.argv[1]
elif count == 2:
return sys.argv[1], sys.argv[2]