2018-03-15 21:38:18 +01:00
|
|
|
from internetarchive import get_item, download
|
2018-03-11 21:30:46 +01:00
|
|
|
|
|
|
|
from models import *
|
2018-03-16 22:52:34 +01:00
|
|
|
from utils import *
|
2018-03-11 21:30:46 +01:00
|
|
|
|
|
|
|
ignored_se_sites = ["cs50.stackexchange.com"]
|
|
|
|
|
2018-03-16 22:52:34 +01:00
|
|
|
files = get_files()
|
2018-03-11 21:30:46 +01:00
|
|
|
for site in Site.select()[1:]:
|
|
|
|
if site.url in ignored_se_sites:
|
|
|
|
continue
|
|
|
|
key = site.url + ".7z"
|
|
|
|
offset = 0
|
|
|
|
while True:
|
|
|
|
if key in files:
|
|
|
|
file = files[key]
|
|
|
|
break
|
|
|
|
query = Alias.select(Alias.url).where(Alias.site == site).limit(1).offset(offset)
|
|
|
|
if len(query) == 0:
|
|
|
|
print("{site} ({url}) doesn't have a dump".format(site=site.name, url=site.url))
|
2018-03-15 21:38:18 +01:00
|
|
|
file = {}
|
2018-03-11 21:30:46 +01:00
|
|
|
break
|
|
|
|
key = query[0].url + ".7z"
|
|
|
|
offset += 1
|
2018-03-15 21:38:18 +01:00
|
|
|
if file:
|
|
|
|
sizeMB = int(file["size"]) / 1024 / 1024
|
|
|
|
if sizeMB < 50:
|
|
|
|
print(file)
|
|
|
|
print(sizeMB)
|
|
|
|
exit()
|