-
Notifications
You must be signed in to change notification settings - Fork 1
/
runmap.sh
executable file
·30 lines (24 loc) · 971 Bytes
/
runmap.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/sh
LANGUAGE="$1"
S3=s3://uisbucket/group-4
RESULTS=results
WIKI=wiki
LANGUAGES=languages
if [ "x$LANGUAGE" = "x" ]
then
echo "Usage:"
echo "$0 <language>"
echo ""
echo "Language <language> has to be stored in $S3/$LANGUGAGES/<language>$WIKI/<language>.txt"
echo "Results will be after job finishes in $S3/$RESULTS/<language>$WIKI/<ngram>/"
echo ""
echo "Mapper and reducer are python files stored in /mnt/DAT500"
echo "Three mappers must be provided with names as mapper1.py for unigram,"
echo "mapper2.py for bigram and mapper3.py for trigram"
exit 1
fi
LANGUAGE_LOWER=$(echo $LANGUAGE | tr "A-Z" "a-z")
for i in 1 2 3
do
/home/hadoop/bin/hadoop jar /home/hadoop/contrib/streaming/hadoop-streaming.jar -Dmapred.reduce.tasks=5 -mapper mapper$i.py -reducer reducer.py -input "$S3"/"$LANGUAGES"/"$LANGUAGE""$WIKI"/"$LANGUAGE".txt -output "$S3"/"$RESULTS"/"$LANGUAGE_LOWER""$WIKI"/$i/ -file /mnt/DAT500/mapper$i.py -file /mnt/DAT500/reducer.py &
done