diff --git a/acros/forms.py b/acros/forms.py index 14e4cbf..e32f516 100644 --- a/acros/forms.py +++ b/acros/forms.py @@ -2,7 +2,7 @@ from django.core.exceptions import ValidationError from django.forms import ModelForm, TextInput, CharField from acros.models import Acronym, Tag -from acros.utils import parse_tags, edit_string_for_tags +from acros.utils.tags import parse_tags, edit_string_for_tags class TagWidget(TextInput): diff --git a/acros/models/Acronym.py b/acros/models/Acronym.py index 0b5d6d5..60ce08d 100644 --- a/acros/models/Acronym.py +++ b/acros/models/Acronym.py @@ -4,7 +4,7 @@ from django.utils.text import slugify from simple_history.models import HistoricalRecords from acros.models import Tag -from acros.utils import md_to_html +from acros.utils.conversion import md_to_html class Acronym(models.Model): diff --git a/acros/models/Host.py b/acros/models/Host.py index 250ff36..705aa60 100644 --- a/acros/models/Host.py +++ b/acros/models/Host.py @@ -22,7 +22,7 @@ class Host(models.Model): if not self.fetched or True: with TemporaryFile("rb+") as fd: r = requests.get(f"https://external-content.duckduckgo.com/ip3/{self.host}.ico") - if r.status_code == 200: + if r.ok: filename = self.host + ".png" for chunk in r.iter_content(chunk_size=128): fd.write(chunk) diff --git a/acros/models/Weblink.py b/acros/models/Weblink.py index d74c8d9..fb8a5bf 100644 --- a/acros/models/Weblink.py +++ b/acros/models/Weblink.py @@ -4,20 +4,21 @@ from django.db import models from simple_history.models import HistoricalRecords from acros.models import Acronym, Host +from acros.utils.apis import get_website_title class Weblink(models.Model): acronym = models.ForeignKey(Acronym, on_delete=models.CASCADE, related_name="links") url = models.URLField() host = models.ForeignKey(Host, on_delete=models.CASCADE, editable=False) + title = models.CharField(max_length=500, blank=True) history = HistoricalRecords() def __str__(self): return self.url - - def save(self, *args, **kwargs): uri = urlparse(self.url) self.host, created = Host.objects.get_or_create(host=uri.hostname) + self.title = get_website_title(self.url) super(Weblink, self).save(*args, **kwargs) diff --git a/acros/models/WikipediaLink.py b/acros/models/WikipediaLink.py index 1a39801..0798110 100644 --- a/acros/models/WikipediaLink.py +++ b/acros/models/WikipediaLink.py @@ -6,7 +6,7 @@ from django.db import models from simple_history.models import HistoricalRecords from acros.models import Acronym -from acros.utils import fetch_wikipedia_summary +from acros.utils.apis import fetch_wikipedia_summary class WikipediaLink(models.Model): @@ -24,10 +24,10 @@ class WikipediaLink(models.Model): def save(self, *args, **kwargs): if not self.fetched: - self.extract, self.extract_html, self.timestamp, thumbnail = fetch_wikipedia_summary(self.title) + self.extract, self.extract_html, self.timestamp, thumbnail_url = fetch_wikipedia_summary(self.title) with TemporaryFile("rb+") as fd: - r = requests.get(thumbnail["source"]) - filename = thumbnail["source"].split("/")[-1] + r = requests.get(thumbnail_url) + filename = thumbnail_url.split("/")[-1] for chunk in r.iter_content(chunk_size=128): fd.write(chunk) image_file = File(fd) diff --git a/acros/templates/acros/detail.html b/acros/templates/acros/detail.html index bbb277d..5628663 100644 --- a/acros/templates/acros/detail.html +++ b/acros/templates/acros/detail.html @@ -91,13 +91,13 @@
- {{ link.host.host }} + {{ link.title }} {% if link.host.icon %} {% endif %}
- test + {{ link.host.host }}
diff --git a/acros/utils/__init__.py b/acros/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/acros/utils/apis.py b/acros/utils/apis.py new file mode 100644 index 0000000..c203b2c --- /dev/null +++ b/acros/utils/apis.py @@ -0,0 +1,18 @@ +import requests +from bs4 import BeautifulSoup + + +def fetch_wikipedia_summary(title: str): + r = requests.get("https://en.wikipedia.org/api/rest_v1/page/summary/" + title) + r.raise_for_status() + data = r.json() + print(data) + return data["extract"], data["extract_html"], data["timestamp"], data["thumbnail"]["source"] + + +def get_website_title(url: str) -> str: + r = requests.get(url) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="html.parser") + title = soup.find("title") + return title.text diff --git a/acros/utils/conversion.py b/acros/utils/conversion.py new file mode 100644 index 0000000..4306b83 --- /dev/null +++ b/acros/utils/conversion.py @@ -0,0 +1,12 @@ +import markdown + + +def md_to_html(md: str) -> str: + html = markdown.markdown( + md, + output_format="html5", + extensions=[ + "nl2br" + ] + ) + return html diff --git a/acros/utils.py b/acros/utils/tags.py similarity index 88% rename from acros/utils.py rename to acros/utils/tags.py index 023e43b..8c072fb 100644 --- a/acros/utils.py +++ b/acros/utils/tags.py @@ -1,18 +1,3 @@ -import markdown -import requests - - -def md_to_html(md: str) -> str: - html = markdown.markdown( - md, - output_format="html5", - extensions=[ - "nl2br" - ] - ) - return html - - def parse_tags(tagstring): """ from https://github.com/jazzband/django-taggit/blob/master/taggit/utils.py @@ -84,7 +69,6 @@ def parse_tags(tagstring): words.sort() return words - def split_strip(string, delimiter=","): """ from https://github.com/jazzband/django-taggit/blob/master/taggit/utils.py @@ -100,7 +84,6 @@ def split_strip(string, delimiter=","): words = [w.strip() for w in string.split(delimiter)] return [w for w in words if w] - def edit_string_for_tags(tags): """ from https://github.com/jazzband/django-taggit/blob/master/taggit/utils.py @@ -125,13 +108,3 @@ def edit_string_for_tags(tags): # else: names.append(name) return ", ".join(sorted(names)) - - -def fetch_wikipedia_summary(title: str): - r = requests.get("https://en.wikipedia.org/api/rest_v1/page/summary/" + title) - if r.status_code != 200: - raise FileNotFoundError - data = r.json() - print(data) - return data["extract"], data["extract_html"], data["timestamp"], data["thumbnail"] - diff --git a/poetry.lock b/poetry.lock index 4fd9b5b..d64f30a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -41,6 +41,21 @@ version = "3.2.7" [package.extras] tests = ["pytest (>=4.3.0,<4.4.0)", "pytest-asyncio (>=0.10.0,<0.11.0)"] +[[package]] +category = "main" +description = "Screen-scraping library" +name = "beautifulsoup4" +optional = false +python-versions = "*" +version = "4.9.1" + +[package.dependencies] +soupsieve = [">1.2", "<2.0"] + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] category = "dev" description = "A thin, practical wrapper around terminal coloring, styling, and positioning" @@ -318,6 +333,14 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" version = "1.15.0" +[[package]] +category = "main" +description = "A modern CSS selector implementation for Beautiful Soup." +name = "soupsieve" +optional = false +python-versions = "*" +version = "1.9.6" + [[package]] category = "main" description = "Non-validating SQL parser" @@ -373,7 +396,7 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] testing = ["jaraco.itertools", "func-timeout"] [metadata] -content-hash = "9eae2cb8f2154c86a10640bc55fdd0920724dbcb718a2f11146550997d21e734" +content-hash = "58d0f8a0b59df62efefb3ee5688f52976722eef1851d152e03f28dcde9cc4686" python-versions = ">=3.7,<4.0" [metadata.files] @@ -402,6 +425,11 @@ asgiref = [ {file = "asgiref-3.2.7-py2.py3-none-any.whl", hash = "sha256:9ca8b952a0a9afa61d30aa6d3d9b570bb3fd6bafcf7ec9e6bed43b936133db1c"}, {file = "asgiref-3.2.7.tar.gz", hash = "sha256:8036f90603c54e93521e5777b2b9a39ba1bad05773fcf2d208f0299d1df58ce5"}, ] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.9.1-py2-none-any.whl", hash = "sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c"}, + {file = "beautifulsoup4-4.9.1-py3-none-any.whl", hash = "sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8"}, + {file = "beautifulsoup4-4.9.1.tar.gz", hash = "sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7"}, +] blessings = [ {file = "blessings-1.7-py2-none-any.whl", hash = "sha256:caad5211e7ba5afe04367cdd4cfc68fa886e2e08f6f35e76b7387d2109ccea6e"}, {file = "blessings-1.7-py3-none-any.whl", hash = "sha256:b1fdd7e7a675295630f9ae71527a8ebc10bfefa236b3d6aa4932ee4462c17ba3"}, @@ -577,6 +605,10 @@ six = [ {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, ] +soupsieve = [ + {file = "soupsieve-1.9.6-py2.py3-none-any.whl", hash = "sha256:feb1e937fa26a69e08436aad4a9037cd7e1d4c7212909502ba30701247ff8abd"}, + {file = "soupsieve-1.9.6.tar.gz", hash = "sha256:7985bacc98c34923a439967c1a602dc4f1e15f923b6fcf02344184f86cc7efaa"}, +] sqlparse = [ {file = "sqlparse-0.3.1-py2.py3-none-any.whl", hash = "sha256:022fb9c87b524d1f7862b3037e541f68597a730a8843245c349fc93e1643dc4e"}, {file = "sqlparse-0.3.1.tar.gz", hash = "sha256:e162203737712307dfe78860cc56c8da8a852ab2ee33750e33aeadf38d12c548"}, diff --git a/pyproject.toml b/pyproject.toml index c143e20..ea6b12f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ requests = "^2.23.0" ads = "^0.12.3" Pillow = "^7.1.2" gunicorn = "^20.0.4" +beautifulsoup4 = "^4.9.1" [tool.poetry.dev-dependencies] bpython = "^0.19"