commit ecc793da0777eafd3c183f7b160cd0647c93d61b Author: Lukas Winkler Date: Tue Apr 27 19:04:08 2021 +0200 initial version diff --git a/input.csv b/input.csv new file mode 100644 index 0000000..9b18c00 --- /dev/null +++ b/input.csv @@ -0,0 +1,4 @@ +label nb_visits +somelabel|anotherlabel 1423 +somelabel|anotherlabel|test 1187 +test|word 1131 diff --git a/out.csv b/out.csv new file mode 100644 index 0000000..25de6e0 --- /dev/null +++ b/out.csv @@ -0,0 +1,4 @@ +somelabel;2610 +anotherlabel;2610 +test;2318 +word;1131 diff --git a/script.py b/script.py new file mode 100644 index 0000000..a732c10 --- /dev/null +++ b/script.py @@ -0,0 +1,27 @@ +from pathlib import Path +from sys import argv + +inputfile = Path(argv[1]) +outputfile = Path(argv[2]) + +counter = {} + +with inputfile.open() as ip: + next(ip) # skip the first line + for line in ip: # go through all lines + columns = line.split("\t") + value = int(columns[1]) + names = columns[0].split("|") + for name in names: + if name not in counter: + # if this is the first time the name showed up, + # set the counter to the value + counter[name] = value + else: + # otherwise increase it by the value + counter[name] += value +print(counter) + +with outputfile.open("w") as wf: + for name, count in counter.items(): + wf.write(f"{name};{count}\n") # Cinema;4