-
Notifications
You must be signed in to change notification settings - Fork 4
/
create_NSC_numbers_file
executable file
·113 lines (96 loc) · 3.21 KB
/
create_NSC_numbers_file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
source $vini_dir/sourceme
if [ $cell_line == SF268 ] ; then #map COSMIC to NCI-60 cell names
cell_line=SF-268
fi
if [ $cell_line == SF295 ] ; then
cell_line=SF-295
fi
if [ $cell_line == SF539 ] ; then
cell_line=SF-539
fi
if [ $cell_line == SNB75 ] ; then
cell_line=SNB-75
fi
if [ $cell_line == T47D ] ; then
cell_line=T-47D
fi
if [ $cell_line == TK10 ] ; then
cell_line=TK-10
fi
if [ $cell_line == MCF-7 ] ; then
cell_line=MCF7
fi
if [ $cell_line == T47D ] ; then
cell_line=T-47D
fi
if [ $cell_line == HT-29 ] ; then
cell_line=HT29
fi
if [ $cell_line == LOXIMVI ] ; then
cell_line=LOX
fi
#https://dtp.cancer.gov/dtpstandard/cancerscreeningdata/index.jsp
#(Chemical name that starts with AND 5 dose data)
#download GI50.zip from https://wiki.nci.nih.gov/display/NCIDTPdata/NCI-60+Growth+Inhibition+Data to $vini_dir/database
#AgentInfo.txt is downloaded from https://ctep.cancer.gov/protocoldevelopment/codes_values.htm#agent
if [ ! -e AgentInfo ]
then
echo -n "Downloading AgentInfo file..."
curl -o tmp -Ls https://ctepcore.nci.nih.gov/webobjs/CTEPWebsite/AgentInfo.html
python $HOME/.local/bin/html2text tmp > AgentInfo
echo "done."
fi
if [ ! -e GI50.csv ]
then
echo -n "Downloading GI50.zip, please wait."
wget -O GI50.zip https://wiki.nci.nih.gov/download/attachments/147193864/GI50.zip?version=5
unzip -o GI50.zip
rm GI50.zip
fi
echo -n "Creating NSC_numbers file..."
> tmp
while read -r line
do
drugname=`echo $line | awk -F',' '{print $1}'`
grep -w "$drugname" AgentInfo > lines
nolines=`wc -l < lines`
if [ $nolines -eq $NULL ]
then
echo "drug" $drugname "not found in AgentInfo file"
else
if [ $nolines -eq $ONES ]
then
NSC_NUMBER=`cat lines | awk 'NF>1{print $NF}'`
echo "NSC number for drug" $drugname "found:" $NSC_NUMBER
else
echo "Found several NSC numbers for drug" $drugname "in AgentInfo file:" ; echo
cat lines ; echo
NSC_NUMBER=`head -1 lines`
NSC_NUMBER=`echo $NSC_NUMBER | awk 'NF>1{print $NF}'`
echo "NSC number" $NSC_NUMBER "will be used." ; echo ; sleep 4
fi
printf "%s%s%s%s\n" "$drugname" "," " " "$NSC_NUMBER" >> tmp
fi
done < $vini_dir/ligands/ligands_list
sed -e "s/\r//g" tmp > NSC_numbers #remove ^M from file
grep $cell_line $vini_dir/database/GI50.csv > tmp #extract cell line entries from GI50.csv file
sed -e "s/\r//g" tmp > $vini_dir/database/${cell_line}_GI50 #remove ^M
> tmp #remove entries without values from the ${cell_line}_GI50 file
while read -r line
do
drugname=`echo $line | awk -F',' '{print $2}'`
grep -w $drugname $vini_dir/database/${cell_line}_GI50 > lines
nolines=`wc -l < lines`
if [ $nolines -gt $NULL ]
then
echo $line >> tmp
fi
done < $vini_dir/database/NSC_numbers
if [ -s tmp ]
then
mv tmp $vini_dir/database/NSC_numbers
else
echo "no drugs in your ligands_list have been evaluated with NCI-60 GI protocol!"
echo "SLEM values will be computed but the correlation analysis with GI50 data will be not performed!"
fi
rm lines