1
0
Fork 0
mirror of https://github.com/Findus23/nonsense.git synced 2024-09-19 16:03:50 +02:00

update dependencies

This commit is contained in:
Lukas Winkler 2019-12-20 18:31:42 +01:00
parent 56a0f87e80
commit b3929fb40d
Signed by: lukas
GPG key ID: 54DE4D798D244853
2 changed files with 26 additions and 30 deletions

View file

@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)
class NonsenseSpider(Spider): class NonsenseSpider(Spider):
name = "nonsense" name = "nonsense"
start_urls = [ start_urls = [
"https://www.ikea.com/at/de/cat/produkte-functional/", "https://www.ikea.com/at/de/cat/produkte-products/",
] ]
custom_settings = { custom_settings = {
'FEED_FORMAT': 'json', 'FEED_FORMAT': 'json',
@ -16,13 +16,15 @@ class NonsenseSpider(Spider):
} }
def parse(self, response): def parse(self, response):
products = response.css(".product-compact") products = response.css("."
"")
for product in products: for product in products:
name = product.css(".product-compact__name::text").extract_first()
description = product.css(".product-compact__type::text").extract_first().strip().strip(",") description = product.css(".product-compact__type::text").extract_first().strip().strip(",")
yield { yield {
'name': product.css(".product-compact__name::text").extract_first(), 'name': name,
'description': description, 'description': description,
} }
for url in response.css("a.catalog-list__link::attr(href)"): for url in response.css("a.range-catalog-list__link::attr(href)"):
if url is not None: if url is not None:
yield response.follow(url, callback=self.parse) yield response.follow(url, callback=self.parse)

View file

@ -1,38 +1,32 @@
asn1crypto==0.24.0 attrs==19.3.0
attrs==18.2.0 Automat==0.8.0
Automat==0.7.0 cffi==1.13.2
beautifulsoup4==4.7.1
certifi==2019.6.16
cffi==1.12.2
chardet==3.0.4
Click==7.0 Click==7.0
constantly==15.1.0 constantly==15.1.0
cryptography==2.7 cryptography==2.8
cssselect==1.0.3 cssselect==1.1.0
Flask==1.1.1 Flask==1.1.1
hyperlink==18.0.0 hyperlink==19.0.0
idna==2.8 idna==2.8
incremental==17.5.0 incremental==17.5.0
itsdangerous==1.1.0 itsdangerous==1.1.0
Jinja2==2.10.1 Jinja2==2.10.3
lxml==4.3.4 lxml==4.4.2
MarkupSafe==1.1.1 MarkupSafe==1.1.1
parsel==1.5.1 parsel==1.5.2
pyasn1==0.4.4 Protego==0.1.16
pyasn1-modules==0.2.2 pyasn1==0.4.8
pyasn1-modules==0.2.7
pycparser==2.19 pycparser==2.19
PyDispatcher==2.0.5 PyDispatcher==2.0.5
PyHamcrest==1.9.0 PyHamcrest==1.9.0
pyOpenSSL==19.0.0 pyOpenSSL==19.1.0
PyYAML==5.1.1 PyYAML==5.2
queuelib==1.5.0 queuelib==1.5.0
requests==2.22.0 Scrapy==1.8.0
Scrapy==1.6.0
service-identity==18.1.0 service-identity==18.1.0
six==1.11.0 six==1.13.0
soupsieve==1.9 Twisted==19.10.0
Twisted==18.9.0 w3lib==1.21.0
urllib3==1.25.3 Werkzeug==0.16.0
w3lib==1.19.0 zope.interface==4.7.1
Werkzeug==0.15.4
zope.interface==4.6.0