1
0
Fork 0
mirror of https://github.com/Findus23/acronomy.git synced 2024-09-19 15:33:45 +02:00

improve wikipedia fetching

This commit is contained in:
Lukas Winkler 2023-03-21 23:24:52 +01:00
parent 1bdc9e73cd
commit 188c566eae
Signed by: lukas
GPG key ID: 54DE4D798D244853
7 changed files with 68 additions and 7 deletions

View file

@ -57,6 +57,7 @@ class LinkAdmin(SimpleHistoryAdmin):
class WikipediaAdmin(SimpleHistoryAdmin): class WikipediaAdmin(SimpleHistoryAdmin):
list_display = ["title", "acronym", "thumbnail"] list_display = ["title", "acronym", "thumbnail"]
list_filter = ["description_source"]
date_hierarchy = "timestamp" date_hierarchy = "timestamp"
... ...

View file

@ -1,5 +1,6 @@
import time
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from simple_history.utils import update_change_reason
from acros.models import WikipediaLink from acros.models import WikipediaLink
@ -10,8 +11,10 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
links = WikipediaLink.objects.all() links = WikipediaLink.objects.all()
for link in links: for link in links:
if link.fetched: print(link)
self.stdout.write(link.title) self.stdout.write(link.title)
link.fetched = False link.fetched = False
# update_change_reason(link, "refetch_wikipedia command") # update_change_reason(link, "refetch_wikipedia command")
link.save() link.clean()
link.save()
time.sleep(1)

View file

@ -0,0 +1,22 @@
# Generated by Django 4.1.7 on 2023-03-21 22:09
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("acros", "0051_alter_historicalacronym_options_and_more"),
]
operations = [
migrations.AddField(
model_name="historicalwikipedialink",
name="wikibase_item",
field=models.CharField(blank=True, max_length=20, null=True),
),
migrations.AddField(
model_name="wikipedialink",
name="wikibase_item",
field=models.CharField(blank=True, max_length=20, null=True),
),
]

View file

@ -0,0 +1,22 @@
# Generated by Django 4.1.7 on 2023-03-21 22:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("acros", "0052_historicalwikipedialink_wikibase_item_and_more"),
]
operations = [
migrations.AddField(
model_name="historicalwikipedialink",
name="description_source",
field=models.CharField(blank=True, max_length=20, null=True),
),
migrations.AddField(
model_name="wikipedialink",
name="description_source",
field=models.CharField(blank=True, max_length=20, null=True),
),
]

View file

@ -1,6 +1,5 @@
from tempfile import TemporaryFile from tempfile import TemporaryFile
import requests
from django.core.files import File from django.core.files import File
from django.db import models from django.db import models

View file

@ -14,6 +14,8 @@ class WikipediaLink(models.Model):
extract = models.TextField(blank=True) extract = models.TextField(blank=True)
extract_html = models.TextField(blank=True) extract_html = models.TextField(blank=True)
description = models.TextField(blank=True, null=True) description = models.TextField(blank=True, null=True)
description_source = models.CharField(blank=True, null=True, max_length=20)
wikibase_item = models.CharField(blank=True, null=True, max_length=20)
thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles", thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles",
blank=True, null=True) blank=True, null=True)
timestamp = models.DateTimeField(blank=True) timestamp = models.DateTimeField(blank=True)
@ -29,8 +31,10 @@ class WikipediaLink(models.Model):
self.extract = summary.extract self.extract = summary.extract
self.extract_html = summary.extract_html self.extract_html = summary.extract_html
self.description = summary.description self.description = summary.description
self.description_source = summary.description_source
self.timestamp = summary.timestamp self.timestamp = summary.timestamp
self.title = summary.title self.title = summary.title
self.wikibase_item = summary.wikibase_item
if summary.image: if summary.image:
filename = unquote(summary.image.split("/")[-1]) filename = unquote(summary.image.split("/")[-1])
if filename.endswith(".svg.png"): if filename.endswith(".svg.png"):

View file

@ -26,6 +26,7 @@ class WikipediaAPISummary:
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/" urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
def __init__(self, title: str): def __init__(self, title: str):
print(self.urlbase + title.replace("/", "%2F"))
r = requests_session.get(self.urlbase + title.replace("/", "%2F")) r = requests_session.get(self.urlbase + title.replace("/", "%2F"))
try: try:
r.raise_for_status() r.raise_for_status()
@ -49,6 +50,15 @@ class WikipediaAPISummary:
def description(self) -> str: def description(self) -> str:
if "description" in self.data: if "description" in self.data:
return self.data["description"] return self.data["description"]
@property
def description_source(self) -> str:
if "description_source" in self.data:
return self.data["description_source"]
@property
def wikibase_item(self) -> Optional[str]:
if "wikibase_item" in self.data:
return self.data["wikibase_item"]
@property @property
def timestamp(self) -> str: def timestamp(self) -> str: