Archived
1
0
Fork 0
This repository has been archived on 2024-06-28. You can view files and clone it, but cannot push or open issues or pull requests.
hunger/parser/fladerei.py

50 lines
1.3 KiB
Python
Raw Normal View History

2017-06-01 20:50:11 +02:00
import re
from datetime import datetime
import requests
from bs4 import BeautifulSoup
2017-06-09 08:26:03 +02:00
import config
2017-06-01 20:50:11 +02:00
name = "Fladerei"
fetch_url = "https://www.fladerei.com/dyn_inhalte/berggasse/tagesfladen_berggasse.html"
def fetch_recourse():
2017-06-09 08:26:03 +02:00
if config.DEBUG:
2017-06-01 20:50:11 +02:00
from website import fladerei_html as html
else:
r = requests.get(fetch_url)
2017-06-08 11:15:59 +02:00
r.encoding = "utf-8" # force UTF-8 as the meta tag is invalid
2017-06-01 20:50:11 +02:00
html = r.text
return html
def get_menus():
soup = BeautifulSoup(fetch_recourse(), 'html.parser')
table = soup.find("table", {"title": "Tagesfladen"})
tagesfladen = []
for smallblue in table.findAll("smallblue"):
smallblue.extract()
trs = table.findAll("tr")
i = 0
while i < len(trs):
dateregex = re.compile(r"\d{2}\.\d{2}\.")
datestring = dateregex.search(trs[i].td.span.text).group(0)
descr = trs[i]('td')[-1].text.strip()
extradescr = trs[i + 1]('td')[-1].text.strip()
if extradescr:
descr += " " + extradescr
date = datetime.strptime(datestring, "%d.%m.").replace(year=datetime.today().year)
tagesflade = {
2017-06-09 07:56:55 +02:00
"date": date.isoformat(),
2017-06-08 11:15:59 +02:00
"name": descr
2017-06-01 20:50:11 +02:00
}
tagesfladen.append(tagesflade)
i += 2
return tagesfladen