52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
import re
|
|
from datetime import datetime
|
|
from pprint import pprint
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
import config
|
|
|
|
name = "Zuppa"
|
|
|
|
fetch_url = "http://www.zuppa.at/essen/"
|
|
|
|
dateregex = re.compile("(\d+\.\d+\.\d{4})")
|
|
|
|
|
|
def fetch_recourse():
|
|
if config.DEBUG:
|
|
from website import zuppa_html as html
|
|
else:
|
|
r = requests.get(fetch_url)
|
|
html = r.text
|
|
return html
|
|
|
|
|
|
def get_menus():
|
|
tagesmenus = []
|
|
soup = BeautifulSoup(fetch_recourse(), 'html.parser')
|
|
hs_div = get_hauptsachen(soup)
|
|
for day_p in hs_div.find_all("p", text=dateregex):
|
|
datestring = dateregex.search(day_p.text).group(0)
|
|
date = datetime.strptime(datestring, "%d.%m.%Y")
|
|
for p in range(2):
|
|
title = []
|
|
p = day_p.findNext("p")
|
|
for strong in p.findAll('strong'):
|
|
if not any(str.isdigit(c) for c in strong.text):
|
|
title.append(strong.text.strip())
|
|
tagesflade = {
|
|
"date": date.isoformat(),
|
|
"name": " ".join(title).replace("\n", " ")
|
|
}
|
|
day_p = p # findNext should find the second mea
|
|
tagesmenus.append(tagesflade)
|
|
return tagesmenus
|
|
|
|
|
|
def get_hauptsachen(soup):
|
|
divs = soup.findAll("div", {"class": "menue_box"})
|
|
for div in divs:
|
|
if div.h2.text == "HAUPT SACHEN" or div.h2.text == "HAUPTSACHEN":
|
|
return div
|