2015-02-08 12:13:45 +01:00
|
|
|
#!/bin/bash
|
|
|
|
rm ikeaname.txt
|
|
|
|
rm descr.txt
|
|
|
|
|
|
|
|
for i in {0..25}
|
|
|
|
do
|
2015-03-31 11:55:01 +02:00
|
|
|
wget -O download.tmp http://www.ikea.com/at/de/catalog/productsaz/$i/
|
2015-02-08 12:13:45 +01:00
|
|
|
grep "productsAzLink" download.tmp > lines.tmp
|
|
|
|
while read line; do
|
2015-03-31 11:55:01 +02:00
|
|
|
name=$(echo $line | grep -P -o "(?<=/\">).*(?=</a>)")
|
|
|
|
ikeaname=$(echo "$name" | egrep -o "(((([A-Z])+){2,}|\&) *)+")
|
2015-02-08 12:13:45 +01:00
|
|
|
descr=${name#$ikeaname}
|
|
|
|
echo $ikeaname >> ikeaname.txt
|
|
|
|
echo $descr >> descr.txt
|
|
|
|
done <lines.tmp
|
|
|
|
done
|
|
|
|
rm *.tmp
|
|
|
|
|
|
|
|
sort ikeaname.txt | uniq > ikeaname.txt.tmp
|
|
|
|
sort descr.txt | uniq > descr.txt.tmp
|
|
|
|
mv ikeaname.txt.tmp ikeaname.txt
|
|
|
|
mv descr.txt.tmp descr.txt
|
|
|
|
|