From 188c566eae5664bc2adcba9589c5d9dee0c8dec6 Mon Sep 17 00:00:00 2001 From: Lukas Winkler Date: Tue, 21 Mar 2023 23:24:52 +0100 Subject: [PATCH] improve wikipedia fetching --- acros/admin.py | 1 + .../management/commands/refetch_wikipedia.py | 15 ++++++++----- ...calwikipedialink_wikibase_item_and_more.py | 22 +++++++++++++++++++ ...kipedialink_description_source_and_more.py | 22 +++++++++++++++++++ acros/models/WikipediaImage.py | 1 - acros/models/WikipediaLink.py | 4 ++++ acros/utils/apis.py | 10 +++++++++ 7 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 acros/migrations/0052_historicalwikipedialink_wikibase_item_and_more.py create mode 100644 acros/migrations/0053_historicalwikipedialink_description_source_and_more.py diff --git a/acros/admin.py b/acros/admin.py index 7945cde..8c91ae8 100644 --- a/acros/admin.py +++ b/acros/admin.py @@ -57,6 +57,7 @@ class LinkAdmin(SimpleHistoryAdmin): class WikipediaAdmin(SimpleHistoryAdmin): list_display = ["title", "acronym", "thumbnail"] + list_filter = ["description_source"] date_hierarchy = "timestamp" ... diff --git a/acros/management/commands/refetch_wikipedia.py b/acros/management/commands/refetch_wikipedia.py index 224897d..42434dc 100644 --- a/acros/management/commands/refetch_wikipedia.py +++ b/acros/management/commands/refetch_wikipedia.py @@ -1,5 +1,6 @@ +import time + from django.core.management.base import BaseCommand -from simple_history.utils import update_change_reason from acros.models import WikipediaLink @@ -10,8 +11,10 @@ class Command(BaseCommand): def handle(self, *args, **options): links = WikipediaLink.objects.all() for link in links: - if link.fetched: - self.stdout.write(link.title) - link.fetched = False - # update_change_reason(link, "refetch_wikipedia command") - link.save() + print(link) + self.stdout.write(link.title) + link.fetched = False + # update_change_reason(link, "refetch_wikipedia command") + link.clean() + link.save() + time.sleep(1) diff --git a/acros/migrations/0052_historicalwikipedialink_wikibase_item_and_more.py b/acros/migrations/0052_historicalwikipedialink_wikibase_item_and_more.py new file mode 100644 index 0000000..7407ffc --- /dev/null +++ b/acros/migrations/0052_historicalwikipedialink_wikibase_item_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.1.7 on 2023-03-21 22:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("acros", "0051_alter_historicalacronym_options_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="historicalwikipedialink", + name="wikibase_item", + field=models.CharField(blank=True, max_length=20, null=True), + ), + migrations.AddField( + model_name="wikipedialink", + name="wikibase_item", + field=models.CharField(blank=True, max_length=20, null=True), + ), + ] diff --git a/acros/migrations/0053_historicalwikipedialink_description_source_and_more.py b/acros/migrations/0053_historicalwikipedialink_description_source_and_more.py new file mode 100644 index 0000000..c65d1a0 --- /dev/null +++ b/acros/migrations/0053_historicalwikipedialink_description_source_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.1.7 on 2023-03-21 22:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("acros", "0052_historicalwikipedialink_wikibase_item_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="historicalwikipedialink", + name="description_source", + field=models.CharField(blank=True, max_length=20, null=True), + ), + migrations.AddField( + model_name="wikipedialink", + name="description_source", + field=models.CharField(blank=True, max_length=20, null=True), + ), + ] diff --git a/acros/models/WikipediaImage.py b/acros/models/WikipediaImage.py index ac3784f..dd5867f 100644 --- a/acros/models/WikipediaImage.py +++ b/acros/models/WikipediaImage.py @@ -1,6 +1,5 @@ from tempfile import TemporaryFile -import requests from django.core.files import File from django.db import models diff --git a/acros/models/WikipediaLink.py b/acros/models/WikipediaLink.py index 1c452cc..022de5e 100644 --- a/acros/models/WikipediaLink.py +++ b/acros/models/WikipediaLink.py @@ -14,6 +14,8 @@ class WikipediaLink(models.Model): extract = models.TextField(blank=True) extract_html = models.TextField(blank=True) description = models.TextField(blank=True, null=True) + description_source = models.CharField(blank=True, null=True, max_length=20) + wikibase_item = models.CharField(blank=True, null=True, max_length=20) thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles", blank=True, null=True) timestamp = models.DateTimeField(blank=True) @@ -29,8 +31,10 @@ class WikipediaLink(models.Model): self.extract = summary.extract self.extract_html = summary.extract_html self.description = summary.description + self.description_source = summary.description_source self.timestamp = summary.timestamp self.title = summary.title + self.wikibase_item = summary.wikibase_item if summary.image: filename = unquote(summary.image.split("/")[-1]) if filename.endswith(".svg.png"): diff --git a/acros/utils/apis.py b/acros/utils/apis.py index 6a6fe1d..9366a58 100644 --- a/acros/utils/apis.py +++ b/acros/utils/apis.py @@ -26,6 +26,7 @@ class WikipediaAPISummary: urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/" def __init__(self, title: str): + print(self.urlbase + title.replace("/", "%2F")) r = requests_session.get(self.urlbase + title.replace("/", "%2F")) try: r.raise_for_status() @@ -49,6 +50,15 @@ class WikipediaAPISummary: def description(self) -> str: if "description" in self.data: return self.data["description"] + @property + def description_source(self) -> str: + if "description_source" in self.data: + return self.data["description_source"] + + @property + def wikibase_item(self) -> Optional[str]: + if "wikibase_item" in self.data: + return self.data["wikibase_item"] @property def timestamp(self) -> str: