-
Notifications
You must be signed in to change notification settings - Fork 0
/
ava.py
executable file
·329 lines (249 loc) · 11.7 KB
/
ava.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#!/usr/bin/env python3
import os
import re
from numpy import array
import pandas as pd
# Get working directory for data
def get_working_directory():
dir_path = input('Please input working directory: ')
if dir_path in os.listdir():
return dir_path
else:
print(f'Error: Directory {dir_path} not found...')
retry = get_working_directory()
return retry
# Collect receptor file name for log files
def get_receptor():
receptor_file = input('Receptor file: ')
# Check that the file exists and returns filename if True
if os.path.isfile(receptor_file):
print('Receptor file accepted...')
return receptor_file
# If file not found - re-request file name
else:
print(f'Error: {receptor_file} not found')
print('Please check filenames and re-enter')
retry = get_receptor()
return retry
def get_flex_receptor():
enable_flex = input('Do you want to use flexible docking?(y/n) ')
if enable_flex == 'y':
# Check that the file exists and returns filename if True
flex_receptor_file = input('Receptor file: ')
if os.path.isfile(flex_receptor_file):
print('Flexible Receptor file accepted...')
return flex_receptor_file
# If file not found - re-request file name
else:
print(f'Error: {flex_receptor_file} not found')
print('Please check filenames and re-enter')
retry = get_flex_receptor()
return retry
else:
return None
# Collect ligand file names for log files
def get_ligands():
# Check that a file exists with the working directory
def file_check(file):
if os.path.isfile(file):
pass
else:
print(f'Error: {file} not found...')
ligand = input('Ligands: ') # Receive files names for ligands
ligands = ligand.split() # Split filenames into list
ligand_map = map(os.path.isfile, ligands) # Map ligands to check if all files exist
# Check if all files can be found, if they can't be found: flag non-existent files and ask for input again
if not all(ligand_map):
[file_check(file) for file in ligands]
print('Please check filenames and re-enter')
retry = get_ligands()
return retry
# If all files found accept input and return list of files
else:
print('File(s) accepted...')
return ligands
# Collect coordinates for log files
def get_coords():
# Function to determine if a value is a float, returns True or False accordingly
def is_float(value):
try:
float(value)
return True
except ValueError:
return False
coordinates_input = input('Grid co-ordinates: ') # Receive coordinates in format x y z
coord = coordinates_input.split() # Generate list of coordinates
coord_map = map(is_float, coord) # Check coordinates are float
# Check all 3 coordinates have been provided
if len(coord) != 3:
print('Error: Please enter exactly 3 coordinates separated by a space')
retry = get_coords()
return retry
# Check all coordinates are numeric values
if not all(coord_map):
print('Error not all values are not numbers')
retry = get_coords()
return retry
# accept and return input if all values are present and correct
else:
print('Coordinates accepted...')
return coord
# Collect box size parameters for log files
def get_box_size():
# Check if a give value is a positive integer
def check_pos(value):
n = int(value)
if n > 0:
return True
else:
return False
box_size_input = input('Box size: ')
box = box_size_input.split()
box_map = map(int, box)
# Check all 3 coordinates have been provided
if len(box) != 3:
print('Error: Please enter exactly 3 coordinates separated by a space')
retry = get_box_size()
return retry
# Check all 3 coordinates are integers
try:
all(box_map)
except ValueError:
print('Error: Box values must be positive integers greater than 0')
retry = get_box_size()
return retry
# Check all integers are positive
if not all([check_pos(n) for n in box]):
print('Error: Box values must be positive integers greater than 0')
retry = get_box_size()
return retry
# accept and return input if all values are present and correct
else:
print('Box parameters accepted...')
return box
# Collect seeds if any for log files
def get_seeds():
seed = input('Seed (optional): ') # Receive seeds in format 1 2 3
seed_list_input = seed.split() # separate seeds into list
# Check seeds are integers (can be positive or negative)
try:
seed_map = map(int, seed_list_input) # generate a list of seeds as integers
# If no input give - default seed is 0
if len(seed_list_input) < 1:
return [0]
# accept and return input if all values are present and correct
else:
print('Seed(s) accepted...')
return list(seed_map)
# If not integer value is received ask for input again
except ValueError:
print('Error: seed(s) must be an integer')
retry = get_seeds()
return retry
# Write a basic config file for AutoDock Vina - ligands, receptors, outputs, seed (0), grid size and coordinates.
def config_writer(ligands, receptor_input, flex_receptor, coord, box, seeding=0):
conformations_directory = f'conformations-{seeding}'
logs_directory = f'logs-{seeding}'
file_name = f'{ligands[:-6]}-config-{seeding}.txt'
output_file = f'{conformations_directory}/{ligands[:-6]}-{seeding}.pdbqt'
log_file = f'{logs_directory}/{ligands[:-6]}-log-{seeding}.txt'
# Generate the config file using inputs
with open(file_name, 'w') as config:
if flex_receptor is not None:
config.write(f'flex = {flex_receptor}\n')
config.write('exhaustiveness = 32\n')
config.write(f'receptor = {receptor_input}\n' # receptor file
f'ligand = {ligands}\n\n' # ligand file
f'center_x = {coord[0]}\n' # grid coordinates for box
f'center_y = {coord[1]}\n'
f'center_z = {coord[2]}\n\n'
f'size_x = {box[0]}\n' # box size parameters
f'size_y = {box[1]}\n'
f'size_z = {box[2]}\n\n'
f'out = {output_file}\n' # Docking confirmation output
f'log = {log_file}\n\n' # Log file output
f'seed = {seeding}' # Seed for experiment
)
file_names.append(file_name) # append file name to list for automatic docking
if conformations_directory not in os.listdir():
os.mkdir(conformations_directory)
if logs_directory not in os.listdir():
os.mkdir(logs_directory)
def get_binding_data_csv():
# Extract data from log files
def extract_data(file, i):
file_name = file.split('-') # extract ligand name and seed number from file name as a list
ligand = file_name[0] # store ligand name
seed = file_name[-1].replace('.txt', '') # store seed number
# Open log file to collect binding data
with open(file, 'r') as log:
try:
logs = log.readlines() # convert each line into a string
raw_data = logs[i].split() # split values into list
map_data = map(float, raw_data) # convert strings to floats
list_data = list(map_data) # convert map to list
list_data.insert(0, ligand) # Add ligand name as 1st element
list_data.insert(1, seed) # Add seed number as 2nd element
log_list.append(list_data) # Append data to overall data list
except IndexError:
print(f'Error could not read file: {file}')
pass
except UnicodeDecodeError:
print(f'Error could not read file: {file}')
pass
except ValueError:
print(f'Error could not read file: {file}')
pass
if "results" not in os.listdir():
os.mkdir("results")
print('results directory created')
current_working_directory = os.getcwd()
directory_list = ' '.join(str(ele) for ele in os.listdir()) # Generate string to search for logs directories
result = re.finditer(r"logs-[0-9]*[0-9]*[0-9]", directory_list) # Identify logs directories within pwd
log_dirs = []
[log_dirs.append(val.group(0)) for val in result] # store names of log directories in a list
log_list = [] # store rows of data extracted from log files
# Access and collect log data from log files in each directory
for directory in log_dirs:
os.chdir(directory)
print(f'Extracting Log data from {directory}...')
[extract_data(file, i) for file in os.listdir() for i in range(25, 35, 1)]
print(f'Log data extracted from {directory}...')
os.chdir(current_working_directory)
log_array = array(log_list) # convert collected data into a 2D NumPy array
# convert 2D NumPy array to DataFrame
log_df = pd.DataFrame(log_array, columns=['ligand', 'seed', 'binding mode', 'affinity (kcal/mol)',
'distance from best mode rmsd l.b', 'distance from best mode rmsd u.b'])
path = f'{current_working_directory}/results/binding-affinity-data.csv' # generate file name for csv file
log_df.to_csv(path, index=False, header=True) # Save DataFrame to CSV file
print(f'Log data retrieved and saved as {path}') # Tell user location of CSV file
def visualise_structures():
directory_list = ' '.join(
str(ele) for ele in os.listdir()) # Generate string to search for conformation directories
result = re.finditer(r"conformations-[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]*[0-9]",
directory_list) # Search for directories
conformation_dirs = []
[conformation_dirs.append(val.group(0)) for val in result] # Store identified directories
conformations_list = [receptor] # list of all .pdbqt files to be visualised
[conformations_list.append(f'{directory}/{file}') for directory in conformation_dirs for file in
os.listdir(directory)] #
conformation_files = " ".join(conformations_list)
pymol_command = f'/Applications/PyMOL.app/Contents/MacOS/PyMOL -cq {conformation_files} -d "save results/analysis.pse"'
os.system(pymol_command)
# Run Script
if __name__ == '__main__':
file_names = [] # Empty list for names of config files
working_directory = get_working_directory() # Ask for working directory for ava
os.chdir(working_directory) # change directory to input
receptor = get_receptor() # Receive file name of receptor
flex_receptor = get_flex_receptor()
ligand_list = get_ligands() # Generate a list of ligand files
coordinates = get_coords() # Receive co-ordinates of box in format x y z
box_size = get_box_size() # Receive box size in format x y z
seed_list = get_seeds() # Receive list of seeds for docking experiments
# Generate config files for AutoDock Vina
[config_writer(x, receptor, flex_receptor, coordinates, box_size, seeding=y) for y in seed_list for x in ligand_list]
# Run AutoDock Vina using generated config files list
[os.system(f'vina --config {config}') for config in file_names]
get_binding_data_csv() # Collect binding affinity data and save as a CSV file
visualise_structures() # Visualise the results of AutoDock Vina predictions for all experiments