2020-07-18 21:59:42 +02:00
|
|
|
from typing import Tuple, Optional
|
|
|
|
|
2020-06-01 20:26:00 +02:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
2020-07-18 21:59:42 +02:00
|
|
|
from acros.utils.html import clean_html, string_to_bool
|
2020-06-01 20:26:00 +02:00
|
|
|
|
2020-07-18 21:59:42 +02:00
|
|
|
|
|
|
|
class WikipediaAPISummary:
|
|
|
|
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|
|
|
|
|
|
|
|
def __init__(self, title: str):
|
|
|
|
r = requests.get(self.urlbase + title)
|
|
|
|
r.raise_for_status()
|
|
|
|
self.data = r.json()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def title(self) -> str:
|
|
|
|
return self.data["title"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def extract(self) -> str:
|
|
|
|
return self.data["extract"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def extract_html(self) -> str:
|
|
|
|
return self.data["extract_html"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def timestamp(self) -> str:
|
|
|
|
return self.data["timestamp"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def image(self) -> Optional[str]:
|
|
|
|
if "originalimage" in self.data:
|
|
|
|
return self.data["originalimage"]["source"]
|
|
|
|
return None
|
2020-06-01 20:26:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_website_title(url: str) -> str:
|
|
|
|
r = requests.get(url)
|
|
|
|
r.raise_for_status()
|
|
|
|
soup = BeautifulSoup(r.text, features="html.parser")
|
|
|
|
title = soup.find("title")
|
|
|
|
return title.text
|
2020-07-18 21:59:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
class WikipediaImageAPIObject:
|
|
|
|
def __init__(self, filename: str):
|
|
|
|
self.filename = filename
|
|
|
|
print(self.api_url)
|
|
|
|
r = requests.get(self.api_url)
|
|
|
|
r.raise_for_status()
|
|
|
|
self.data = r.json()
|
|
|
|
self.image_obj = list(self.data["query"]["pages"].values())[0]
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_url(cls, url: str):
|
|
|
|
return cls(url.split("/")[-1])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def api_url(self):
|
|
|
|
return "https://commons.wikimedia.org/w/api.php" \
|
|
|
|
"?action=query" \
|
|
|
|
"&format=json" \
|
|
|
|
f"&titles=File:{self.filename}" \
|
|
|
|
"&prop=imageinfo" \
|
|
|
|
"&iiprop=extmetadata|size|url|timestamp" \
|
|
|
|
"&iiurlwidth=500"
|
|
|
|
|
|
|
|
@property
|
|
|
|
def pageid(self) -> int:
|
|
|
|
return self.image_obj["pageid"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def imageinfo(self):
|
|
|
|
return self.image_obj["imageinfo"][0]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def timestamp(self) -> str:
|
|
|
|
return self.imageinfo["timestamp"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def thumb_size(self) -> Tuple[int, int]:
|
|
|
|
return self.imageinfo["thumbwidth"], self.imageinfo["thumbheight"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def url(self) -> str:
|
|
|
|
return self.imageinfo["url"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def thumburl(self) -> str:
|
|
|
|
return self.imageinfo["thumburl"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def extmetadata(self):
|
|
|
|
return self.imageinfo["extmetadata"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def image_description(self) -> str:
|
|
|
|
return clean_html(self.extmetadata["ImageDescription"]["value"])
|
|
|
|
|
|
|
|
@property
|
2020-07-18 22:17:48 +02:00
|
|
|
def credit(self) -> Optional[str]:
|
|
|
|
if "Credit" in self.extmetadata:
|
|
|
|
return clean_html(self.extmetadata["Credit"]["value"])
|
2020-07-18 21:59:42 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def artist(self) -> str:
|
|
|
|
return clean_html(self.extmetadata["Artist"]["value"])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def license_short_name(self) -> str:
|
|
|
|
return self.extmetadata["LicenseShortName"]["value"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def license_url(self) -> Optional[str]:
|
|
|
|
if "LicenseUrl" in self.extmetadata:
|
|
|
|
return self.extmetadata["LicenseUrl"]["value"]
|
|
|
|
|
|
|
|
@property
|
|
|
|
def attribution_required(self) -> bool:
|
|
|
|
return string_to_bool(self.extmetadata["AttributionRequired"]["value"])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def copyrighted(self) -> bool:
|
|
|
|
return string_to_bool(self.extmetadata["Copyrighted"]["value"])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def attribution(self) -> Optional[str]:
|
|
|
|
if "Attribution" in self.extmetadata:
|
|
|
|
return self.extmetadata["Attribution"]["value"]
|