1
0
Fork 0
mirror of https://github.com/Findus23/nonsense.git synced 2024-09-19 16:03:50 +02:00
nonsense/utils.py

24 lines
556 B
Python
Raw Normal View History

2018-01-20 21:26:30 +01:00
import json
2021-05-12 18:41:01 +02:00
from pathlib import Path
datadir = Path("crawlData")
2018-01-20 21:26:30 +01:00
def crawl_data():
all_data = []
2021-05-12 18:41:01 +02:00
for file in datadir.glob("*.json"):
with file.open() as inputfile:
2018-01-20 21:26:30 +01:00
all_data.extend(json.load(inputfile))
2021-05-12 18:41:01 +02:00
for file in datadir.glob("*.jsonl"):
with file.open() as inputfile:
for line in inputfile:
if not line or line == "\n":
continue
all_data.append(json.loads(line))
2018-01-20 21:26:30 +01:00
return all_data
if __name__ == "__main__":
print(crawl_data().__len__())