-
Notifications
You must be signed in to change notification settings - Fork 22
/
linguakit-old
executable file
·89 lines (69 loc) · 3.9 KB
/
linguakit-old
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
help()
{
echo "Syntax: linguakit <lang> <module> <input> [options]
language = gl, es, en, pt
module = dep, tagger, mwe, key, recog, sent, rel, tok, kwic, link, sum, conj, coref
input = path of the input (by default a txt file or gz/zip)
'dep' dependency syntactic analysis
'tagger' part-of-speech tagging
'mwe' multiword extraction
'key' keyword extraction
'recog' language recognition
'sent' sentiment analysis
'rel' relation extraction
'tok' tokenizer
'seg' sentence segmentation
'lem' lemmatization
'kwic' keyword in context (concordances)
'link' entity linking and semantic annotation
'sum' text summarizer
'conj' verb conjugator
'coref' named entity coreference solver
Available command-line options:
-a 'dep' option: simple dependency analysis (by default syntactic output)
-fa 'dep' option: full dependency analysis
-c 'dep' option: tagged text with syntactic information (for correction rules)
-conll 'dep' option: CoNLL output style
-noner 'tagger' option: no NER or NEC is processed (by default PoS tagger output)
-ner 'tagger' option: PoS tagger with Named Entity Recognition - NER (only with 'tagger' module)
-nec 'tagger' option: PoS tagger with Named Entity Classification - NEC (only with 'tagger' module)
-crnec 'coref' option: NEC correction with NE Coreference Resolution
-chi 'mwe' option: chi-square co-occurrence measure (by default)
-log 'mwe' option: loglikelihood
-scp 'mwe' option: symmetrical conditional probability
-mi 'mwe' option: mutual information
-cooc 'mwe' option: co-occurrence counting
-split 'tok' option: tokenization with splitting
-sort 'tok' option: tokenization with tokens sorted by frequency
-tokens 'kwic' option: contexts are tokens
The kwic option is mandatory and also requires another argument: the keyword to be searched
-json 'link' option: json output format of entity linking (by default)
-xml 'link' option: xml output format of entity linking
1-100 'sum' option: percentage of the input text that will be summarized (by default 10%)
-pe 'conj' option: the verb conjugator uses European Portuguese (by default)
-pb 'conj' option: the verb conjugator uses Brasilian Portuguese
-pen 'conj' option: the verb conjugator uses European Portuguese before the spell agreement
-pbn 'conj' option: the verb conjugator uses Brasilian Portuguese before the spell agreement
"
exit
}
[ $# -lt 3 ] && help
if [ "$MOD" != "dep" ] && [ "$MOD" != "tagger" ] && [ "$MOD" != "sent" ] && [ "$MOD" != "mwe" ] && [ "$MOD" != "recog" ] && [ "$MOD" != "tok" ] && [ "$MOD" != "kwic" ] && [ "$MOD" != "key" ] && [ "$MOD" != "rel" ] && [ "$MOD" != "link" ] && [ "$MOD" != "sum" ] && [ "$MOD" != "seg" ] && [ "$MOD" != "lem" ] && [ "$MOD" != "conj" ] && [ "$MOD" != "coref" ]; then
help;
echo "help" ;
fi
if [ "$OPT" != "-a" ] && [ "$OPT" != "-fa" ] && [ "$OPT" != "-c" ] && [ "$OPT" != "-conll" ] && [ "$OPT" != "-crnec" ] && [ "$OPT" != "-nec" ] && [ "$OPT" != "-ner" ] && [ "$OPT" != "-noner" ] && [ "$OPT" != "-chi" ] && [ "$OPT" != "-log" ] && [ "$OPT" != "-mi" ] && [ "$OPT" != "-cooc" ] && [ "$OPT" != "-scp" ] && [ "$OPT" != "-split" ] && [ "$OPT" != "-sort" ] && [ "$OPT" != "-tokens" ] && [ "$OPT" != "-xml" ] && [ "$OPT" != "-json" ] && ! [[ $OPT =~ [0-9][0-9]? ]] && [ "$OPT" != "-pe" ] && [ "$OPT" != "-pb" ] && [ "$OPT" != "-pen" ] && [ "$OPT" != "-pbn" ] && [ "$OPT" != "-crnec" ] && [ "$OPT" != "" ] ; then
help;
echo "help" ;
fi
case $LING in
es) ;;
en) ;;
gl) ;;
pt) ;;
none) ;;
# fr) ;;
*) help
esac
perl "$(dirname "$0")"/linguakit.perl "$2" "$1" "${@:3:99}"