1
0
Fork 0
mirror of https://github.com/Findus23/nonsense.git synced 2024-09-19 16:03:50 +02:00
nonsense/fetch.py

54 lines
1.4 KiB
Python
Raw Normal View History

2021-05-12 18:41:01 +02:00
import json
from time import sleep
import requests
categories_url = "https://shop.api.ingka.ikea.com/range/v2/at/de/category-browse"
products_url = "https://shop.api.ingka.ikea.com/range/v1/at/de/category-details?category="
class OutputWriter:
def __init__(self):
2023-02-04 21:45:50 +01:00
self.file = open("crawlData/out.jsonl", "w")
2021-05-12 18:41:01 +02:00
def log(self, name: str, description: str):
print(name, description)
text = json.dumps({"name": name, "description": description}, ensure_ascii=False)
self.file.write(text + "\n")
def close(self):
self.file.close()
output = OutputWriter()
s = requests.Session()
s.headers.update({"User-Agent": "IKEA App/2.26.0-4156 (iOS) NonsenseBot"})
r = s.get(categories_url)
r.raise_for_status()
categories = set()
data = r.json()["categories"]
for category in data:
2022-08-17 19:33:51 +02:00
print(category["categoryId"])
2021-05-12 18:41:01 +02:00
categories.add(category["categoryId"])
2022-08-17 19:33:51 +02:00
if "subcategories" not in category:
continue
2021-05-12 18:41:01 +02:00
for subcategory in category["subcategories"]:
categories.add(subcategory["categoryId"])
for category in categories:
print(category)
sleep(1) # make requests slowly
r = s.get(products_url + category)
r.raise_for_status()
products = r.json()["products"]
for product in products:
output.log(
name=product["title"],
description=product["description"]
)
output.close()