mirror of
https://github.com/Findus23/acronomy.git
synced 2024-09-18 14:33:43 +02:00
improve wikipedia fetching
This commit is contained in:
parent
1bdc9e73cd
commit
188c566eae
7 changed files with 68 additions and 7 deletions
|
@ -57,6 +57,7 @@ class LinkAdmin(SimpleHistoryAdmin):
|
|||
|
||||
class WikipediaAdmin(SimpleHistoryAdmin):
|
||||
list_display = ["title", "acronym", "thumbnail"]
|
||||
list_filter = ["description_source"]
|
||||
date_hierarchy = "timestamp"
|
||||
...
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import time
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from simple_history.utils import update_change_reason
|
||||
|
||||
from acros.models import WikipediaLink
|
||||
|
||||
|
@ -10,8 +11,10 @@ class Command(BaseCommand):
|
|||
def handle(self, *args, **options):
|
||||
links = WikipediaLink.objects.all()
|
||||
for link in links:
|
||||
if link.fetched:
|
||||
self.stdout.write(link.title)
|
||||
link.fetched = False
|
||||
# update_change_reason(link, "refetch_wikipedia command")
|
||||
link.save()
|
||||
print(link)
|
||||
self.stdout.write(link.title)
|
||||
link.fetched = False
|
||||
# update_change_reason(link, "refetch_wikipedia command")
|
||||
link.clean()
|
||||
link.save()
|
||||
time.sleep(1)
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
# Generated by Django 4.1.7 on 2023-03-21 22:09
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("acros", "0051_alter_historicalacronym_options_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="historicalwikipedialink",
|
||||
name="wikibase_item",
|
||||
field=models.CharField(blank=True, max_length=20, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="wikipedialink",
|
||||
name="wikibase_item",
|
||||
field=models.CharField(blank=True, max_length=20, null=True),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
# Generated by Django 4.1.7 on 2023-03-21 22:10
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("acros", "0052_historicalwikipedialink_wikibase_item_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="historicalwikipedialink",
|
||||
name="description_source",
|
||||
field=models.CharField(blank=True, max_length=20, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="wikipedialink",
|
||||
name="description_source",
|
||||
field=models.CharField(blank=True, max_length=20, null=True),
|
||||
),
|
||||
]
|
|
@ -1,6 +1,5 @@
|
|||
from tempfile import TemporaryFile
|
||||
|
||||
import requests
|
||||
from django.core.files import File
|
||||
from django.db import models
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@ class WikipediaLink(models.Model):
|
|||
extract = models.TextField(blank=True)
|
||||
extract_html = models.TextField(blank=True)
|
||||
description = models.TextField(blank=True, null=True)
|
||||
description_source = models.CharField(blank=True, null=True, max_length=20)
|
||||
wikibase_item = models.CharField(blank=True, null=True, max_length=20)
|
||||
thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles",
|
||||
blank=True, null=True)
|
||||
timestamp = models.DateTimeField(blank=True)
|
||||
|
@ -29,8 +31,10 @@ class WikipediaLink(models.Model):
|
|||
self.extract = summary.extract
|
||||
self.extract_html = summary.extract_html
|
||||
self.description = summary.description
|
||||
self.description_source = summary.description_source
|
||||
self.timestamp = summary.timestamp
|
||||
self.title = summary.title
|
||||
self.wikibase_item = summary.wikibase_item
|
||||
if summary.image:
|
||||
filename = unquote(summary.image.split("/")[-1])
|
||||
if filename.endswith(".svg.png"):
|
||||
|
|
|
@ -26,6 +26,7 @@ class WikipediaAPISummary:
|
|||
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|
||||
|
||||
def __init__(self, title: str):
|
||||
print(self.urlbase + title.replace("/", "%2F"))
|
||||
r = requests_session.get(self.urlbase + title.replace("/", "%2F"))
|
||||
try:
|
||||
r.raise_for_status()
|
||||
|
@ -49,6 +50,15 @@ class WikipediaAPISummary:
|
|||
def description(self) -> str:
|
||||
if "description" in self.data:
|
||||
return self.data["description"]
|
||||
@property
|
||||
def description_source(self) -> str:
|
||||
if "description_source" in self.data:
|
||||
return self.data["description_source"]
|
||||
|
||||
@property
|
||||
def wikibase_item(self) -> Optional[str]:
|
||||
if "wikibase_item" in self.data:
|
||||
return self.data["wikibase_item"]
|
||||
|
||||
@property
|
||||
def timestamp(self) -> str:
|
||||
|
|
Loading…
Reference in a new issue