1
0
Fork 0
mirror of https://github.com/Findus23/acronomy.git synced 2024-09-19 15:33:45 +02:00
acronomy/acros/utils/apis.py

168 lines
4.6 KiB
Python

from typing import Tuple, Optional
import requests
from bs4 import BeautifulSoup
from django.core.cache import cache
from acros.utils.html import clean_html, string_to_bool
requests_session = requests.Session()
commit = cache.get("commit")
if commit:
commit_version = commit[:6]
else:
commit_version = ""
requests_session.headers.update({
"User-Agent": f"Acronomy {commit_version} (https://acronomy.lw1.at)"
})
class NotFoundError(FileNotFoundError):
"""Request could not be found in API"""
pass
class WikipediaAPISummary:
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
def __init__(self, title: str):
print(self.urlbase + title.replace("/", "%2F"))
r = requests_session.get(self.urlbase + title.replace("/", "%2F"))
try:
r.raise_for_status()
except requests.HTTPError:
raise NotFoundError("Wikipedia API returns error")
self.data = r.json()
@property
def title(self) -> str:
return self.data["title"]
@property
def extract(self) -> str:
return self.data["extract"]
@property
def extract_html(self) -> str:
return self.data["extract_html"]
@property
def description(self) -> str:
if "description" in self.data:
return self.data["description"]
@property
def description_source(self) -> str:
if "description_source" in self.data:
return self.data["description_source"]
@property
def wikibase_item(self) -> Optional[str]:
if "wikibase_item" in self.data:
return self.data["wikibase_item"]
@property
def timestamp(self) -> str:
return self.data["timestamp"]
@property
def image(self) -> Optional[str]:
if "originalimage" in self.data:
return self.data["originalimage"]["source"]
def get_website_title(url: str) -> str:
r = requests_session.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.text, features="html.parser")
title = soup.find("title")
return title.text
class WikipediaImageAPIObject:
def __init__(self, filename: str):
self.filename = filename
print(self.api_url)
r = requests_session.get(self.api_url)
r.raise_for_status()
self.data = r.json()
self.image_obj = list(self.data["query"]["pages"].values())[0]
if "imageinfo" not in self.image_obj or "AttributionRequired" not in self.extmetadata:
raise NotFoundError()
@classmethod
def from_url(cls, url: str):
return cls(url.split("/")[-1])
@property
def api_url(self):
return "https://commons.wikimedia.org/w/api.php" \
"?action=query" \
"&format=json" \
f"&titles=File:{self.filename}" \
"&prop=imageinfo" \
"&iiprop=extmetadata|size|url|timestamp" \
"&iiurlwidth=500"
@property
def pageid(self) -> int:
return self.image_obj["pageid"]
@property
def imageinfo(self):
return self.image_obj["imageinfo"][0]
@property
def timestamp(self) -> str:
return self.imageinfo["timestamp"]
@property
def thumb_size(self) -> Tuple[int, int]:
return self.imageinfo["thumbwidth"], self.imageinfo["thumbheight"]
@property
def url(self) -> str:
return self.imageinfo["url"]
@property
def thumburl(self) -> str:
return self.imageinfo["thumburl"]
@property
def extmetadata(self):
return self.imageinfo["extmetadata"]
@property
def image_description(self) -> str:
return clean_html(self.extmetadata["ImageDescription"]["value"])
@property
def credit(self) -> Optional[str]:
if "Credit" in self.extmetadata:
return clean_html(self.extmetadata["Credit"]["value"])
@property
def artist(self) -> Optional[str]:
if "Artist" in self.extmetadata:
return clean_html(self.extmetadata["Artist"]["value"])
@property
def license_short_name(self) -> str:
return self.extmetadata["LicenseShortName"]["value"]
@property
def license_url(self) -> Optional[str]:
if "LicenseUrl" in self.extmetadata:
return self.extmetadata["LicenseUrl"]["value"]
@property
def attribution_required(self) -> bool:
return string_to_bool(self.extmetadata["AttributionRequired"]["value"])
@property
def copyrighted(self) -> bool:
return string_to_bool(self.extmetadata["Copyrighted"]["value"])
@property
def attribution(self) -> Optional[str]:
if "Attribution" in self.extmetadata:
return self.extmetadata["Attribution"]["value"]