-
Notifications
You must be signed in to change notification settings - Fork 0
/
lavora.sh
executable file
·41 lines (32 loc) · 974 Bytes
/
lavora.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
FROM=2001
TO=2020
function lavora {
YEAR=$1
echo "FACENDO TUTTO PER L'ANNO $1"
echo "Downloading..."
curl https://dumps.wikimedia.org/other/mediawiki_history/2020-10/itwiki/2020-10.itwiki.${YEAR}.tsv.bz2 --output ${YEAR}.tsv.bz2
echo -e "Downloaded $YEAR\n"
echo "Extracting..."
bzip2 -d ${YEAR}.tsv.bz2
echo -e "Extracted $YEAR\n"
echo "Jsonizing..."
python3 main.py ${YEAR}
echo -e "Jsonized $YEAR\n"
echo "Importing..."
mongoimport --db=wikimedia_history_it --collection=revisions --file=revisions.json
mongoimport --db=wikimedia_history_it --collection=users --file=users.json
mongoimport --db=wikimedia_history_it --collection=pages --file=pages.json
echo "Imported $YEAR\n"
echo "Removing..."
rm ${YEAR}.tsv
rm pages.json
rm revisions.json
rm users.json
echo -e "Removed $YEAR\n"
}
for YEAR in `seq $FROM $TO`;
do
lavora $YEAR
echo -e "\n\n\n"
done