mirror of
https://github.com/Findus23/column-transformation.git
synced 2024-09-19 14:43:46 +02:00
initial version
This commit is contained in:
commit
ecc793da07
3 changed files with 35 additions and 0 deletions
4
input.csv
Normal file
4
input.csv
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
label nb_visits
|
||||||
|
somelabel|anotherlabel 1423
|
||||||
|
somelabel|anotherlabel|test 1187
|
||||||
|
test|word 1131
|
|
4
out.csv
Normal file
4
out.csv
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
somelabel;2610
|
||||||
|
anotherlabel;2610
|
||||||
|
test;2318
|
||||||
|
word;1131
|
|
27
script.py
Normal file
27
script.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from sys import argv
|
||||||
|
|
||||||
|
inputfile = Path(argv[1])
|
||||||
|
outputfile = Path(argv[2])
|
||||||
|
|
||||||
|
counter = {}
|
||||||
|
|
||||||
|
with inputfile.open() as ip:
|
||||||
|
next(ip) # skip the first line
|
||||||
|
for line in ip: # go through all lines
|
||||||
|
columns = line.split("\t")
|
||||||
|
value = int(columns[1])
|
||||||
|
names = columns[0].split("|")
|
||||||
|
for name in names:
|
||||||
|
if name not in counter:
|
||||||
|
# if this is the first time the name showed up,
|
||||||
|
# set the counter to the value
|
||||||
|
counter[name] = value
|
||||||
|
else:
|
||||||
|
# otherwise increase it by the value
|
||||||
|
counter[name] += value
|
||||||
|
print(counter)
|
||||||
|
|
||||||
|
with outputfile.open("w") as wf:
|
||||||
|
for name, count in counter.items():
|
||||||
|
wf.write(f"{name};{count}\n") # Cinema;4
|
Loading…
Reference in a new issue