mirror of
https://github.com/Findus23/acronomy.git
synced 2024-09-19 15:33:45 +02:00
improve wikipedia fetching
This commit is contained in:
parent
1bdc9e73cd
commit
188c566eae
7 changed files with 68 additions and 7 deletions
|
@ -57,6 +57,7 @@ class LinkAdmin(SimpleHistoryAdmin):
|
||||||
|
|
||||||
class WikipediaAdmin(SimpleHistoryAdmin):
|
class WikipediaAdmin(SimpleHistoryAdmin):
|
||||||
list_display = ["title", "acronym", "thumbnail"]
|
list_display = ["title", "acronym", "thumbnail"]
|
||||||
|
list_filter = ["description_source"]
|
||||||
date_hierarchy = "timestamp"
|
date_hierarchy = "timestamp"
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
import time
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from simple_history.utils import update_change_reason
|
|
||||||
|
|
||||||
from acros.models import WikipediaLink
|
from acros.models import WikipediaLink
|
||||||
|
|
||||||
|
@ -10,8 +11,10 @@ class Command(BaseCommand):
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
links = WikipediaLink.objects.all()
|
links = WikipediaLink.objects.all()
|
||||||
for link in links:
|
for link in links:
|
||||||
if link.fetched:
|
print(link)
|
||||||
self.stdout.write(link.title)
|
self.stdout.write(link.title)
|
||||||
link.fetched = False
|
link.fetched = False
|
||||||
# update_change_reason(link, "refetch_wikipedia command")
|
# update_change_reason(link, "refetch_wikipedia command")
|
||||||
|
link.clean()
|
||||||
link.save()
|
link.save()
|
||||||
|
time.sleep(1)
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
# Generated by Django 4.1.7 on 2023-03-21 22:09
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("acros", "0051_alter_historicalacronym_options_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="historicalwikipedialink",
|
||||||
|
name="wikibase_item",
|
||||||
|
field=models.CharField(blank=True, max_length=20, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="wikipedialink",
|
||||||
|
name="wikibase_item",
|
||||||
|
field=models.CharField(blank=True, max_length=20, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,22 @@
|
||||||
|
# Generated by Django 4.1.7 on 2023-03-21 22:10
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("acros", "0052_historicalwikipedialink_wikibase_item_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="historicalwikipedialink",
|
||||||
|
name="description_source",
|
||||||
|
field=models.CharField(blank=True, max_length=20, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="wikipedialink",
|
||||||
|
name="description_source",
|
||||||
|
field=models.CharField(blank=True, max_length=20, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -1,6 +1,5 @@
|
||||||
from tempfile import TemporaryFile
|
from tempfile import TemporaryFile
|
||||||
|
|
||||||
import requests
|
|
||||||
from django.core.files import File
|
from django.core.files import File
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,8 @@ class WikipediaLink(models.Model):
|
||||||
extract = models.TextField(blank=True)
|
extract = models.TextField(blank=True)
|
||||||
extract_html = models.TextField(blank=True)
|
extract_html = models.TextField(blank=True)
|
||||||
description = models.TextField(blank=True, null=True)
|
description = models.TextField(blank=True, null=True)
|
||||||
|
description_source = models.CharField(blank=True, null=True, max_length=20)
|
||||||
|
wikibase_item = models.CharField(blank=True, null=True, max_length=20)
|
||||||
thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles",
|
thumbnail = models.ForeignKey(WikipediaImage, on_delete=models.CASCADE, related_name="wiki_articles",
|
||||||
blank=True, null=True)
|
blank=True, null=True)
|
||||||
timestamp = models.DateTimeField(blank=True)
|
timestamp = models.DateTimeField(blank=True)
|
||||||
|
@ -29,8 +31,10 @@ class WikipediaLink(models.Model):
|
||||||
self.extract = summary.extract
|
self.extract = summary.extract
|
||||||
self.extract_html = summary.extract_html
|
self.extract_html = summary.extract_html
|
||||||
self.description = summary.description
|
self.description = summary.description
|
||||||
|
self.description_source = summary.description_source
|
||||||
self.timestamp = summary.timestamp
|
self.timestamp = summary.timestamp
|
||||||
self.title = summary.title
|
self.title = summary.title
|
||||||
|
self.wikibase_item = summary.wikibase_item
|
||||||
if summary.image:
|
if summary.image:
|
||||||
filename = unquote(summary.image.split("/")[-1])
|
filename = unquote(summary.image.split("/")[-1])
|
||||||
if filename.endswith(".svg.png"):
|
if filename.endswith(".svg.png"):
|
||||||
|
|
|
@ -26,6 +26,7 @@ class WikipediaAPISummary:
|
||||||
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|
urlbase = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|
||||||
|
|
||||||
def __init__(self, title: str):
|
def __init__(self, title: str):
|
||||||
|
print(self.urlbase + title.replace("/", "%2F"))
|
||||||
r = requests_session.get(self.urlbase + title.replace("/", "%2F"))
|
r = requests_session.get(self.urlbase + title.replace("/", "%2F"))
|
||||||
try:
|
try:
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
@ -49,6 +50,15 @@ class WikipediaAPISummary:
|
||||||
def description(self) -> str:
|
def description(self) -> str:
|
||||||
if "description" in self.data:
|
if "description" in self.data:
|
||||||
return self.data["description"]
|
return self.data["description"]
|
||||||
|
@property
|
||||||
|
def description_source(self) -> str:
|
||||||
|
if "description_source" in self.data:
|
||||||
|
return self.data["description_source"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def wikibase_item(self) -> Optional[str]:
|
||||||
|
if "wikibase_item" in self.data:
|
||||||
|
return self.data["wikibase_item"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def timestamp(self) -> str:
|
def timestamp(self) -> str:
|
||||||
|
|
Loading…
Reference in a new issue