-
Notifications
You must be signed in to change notification settings - Fork 4
/
surface_cont_res.py
211 lines (178 loc) · 7.59 KB
/
surface_cont_res.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# Developed by Natália Teruel
# Najmanovich Research Group
# Cite this work as Surfaces: A software to quantify and visualize interactions within and between proteins and ligands - Teruel, N. F. B., Borges, V. M., & Najmanovich, R. (2023)
#Imports
import argparse
import sys
import os
import re
import pandas as pd
#Useful dicts
aa = {'C':'CYS', 'D':'ASP', 'S':'SER', 'Q':'GLN', 'K':'LYS', 'I':'ILE', 'P':'PRO', 'T':'THR', 'F':'PHE', 'N':'ASN', 'G':'GLY', 'H':'HIS', 'L':'LEU', 'R':'ARG', 'W':'TRP', 'A':'ALA', 'V':'VAL', 'E':'GLU', 'Y':'TYR', 'M':'MET'}
#Input of pdb file and list of residues for us to evaluate the interactions
def get_residue_name(pdb_line):
res_num = re.findall('\d',pdb_line[22:27])
res_num = int(''.join(res_num))
res_name = pdb_line[17:20]
string = res_name + str(res_num) + pdb_line[21]
return (string)
def read_chains(pdb_file):
chains = []
atoms = []
residues = []
f = open(pdb_file, 'r')
Lines = f.readlines()
for line in Lines:
if line[:4] == 'ATOM' or line[:4] == 'HETE':
# correspondent chain for each atom
chain = line[21]
if chain not in chains:
chains.append(chain)
atoms.append([int(line[6:12])])
else:
index = chains.index(chain)
atoms[index].append(int(line[6:12]))
# correct names for each residue
string = get_residue_name(line)
if string not in residues:
residues.append(string)
return (residues, chains, atoms)
def read_residues(pdb_file, list_residues):
sele_atom_numbers = []
for k in range(len(list_residues)):
sele_atom_numbers.append([])
f = open(pdb_file, 'r')
Lines = f.readlines()
for line in Lines:
for i in range(len(list_residues)):
if (line[:4] == 'ATOM' or line[:4] == 'HETA') and (get_residue_name(line)==list_residues[i]):
sele_atom_numbers[i].append(int(line[6:12]))
return (sele_atom_numbers)
#Function to generate the file with the output of vcon
def vcon(pdb_name):
string = f'{os.path.join(".", "vcon")} {pdb_name} > vcon_file.txt'
os.system(string)
#Functions to fix the names of the chains
def get_chain(atom, og_chain, chains, atom_numbers):
if og_chain in chains:
index = chains.index(og_chain)
else:
return (og_chain)
if atom in atom_numbers[index]:
return (og_chain)
else:
for i in range(len(chains)):
if i != index:
if atom in atom_numbers[i]:
return (chains[i])
return (0)
#Functions to open vcon results, .def and .dat for atom types interactions
def read_atom(line):
atnum = int(line[:6])
attype = line[8:11]
resnum = int(line[12:17])
res = line[19:22]
chain = line[23:24]
return (atnum,attype,resnum,res,chain)
def read_surface(line):
surf = (float(line[-12:-6]))
return (surf)
def read_interactions(file, matrix, chains, sele_res, all_res, def_file, dat_file, atom_numbers, scale_factor):
f = open(file, 'r')
Lines = f.readlines()
for line in Lines:
if line[:1] != '#' and line != '\n':
if line[31:34] == 'Sol':
main_line = line
main_atnum,main_attype,main_resnum,main_res,main_chain = read_atom(main_line)
fixed_main_chain = get_chain(main_atnum,main_chain,chains,atom_numbers)
if fixed_main_chain != 0:
main_residue = main_res+str(main_resnum)+fixed_main_chain
else:
atnum,attype,resnum,res,chain = read_atom(line[22:])
fixed_other_chain = get_chain(atnum,chain,chains,atom_numbers)
if fixed_other_chain != 0:
other_residue = res+str(resnum)+fixed_other_chain
surf = read_surface(line)
if main_residue in sele_res:
matrix.loc[main_residue, other_residue] += (surf * score(main_attype, main_res, attype, res, def_file, dat_file) * scale_factor)/2
if other_residue in sele_res:
matrix.loc[other_residue, main_residue] += (surf * score(main_attype, main_res, attype, res, def_file, dat_file) * scale_factor)/2
return(matrix)
#get the atom type number from def file of choice
def atomtype_num(def_file, res, attyp):
attyp = attyp.replace(" ", "")
f = open(def_file, 'r')
Lines = f.readlines()
for i in range(len(Lines)):
if Lines[i][:3] == res:
ind = Lines[i].index(attyp+':')
ind_end = Lines[i][ind:].index(',')
attype_num = int(Lines[i][ind+len(attyp)+1:ind+ind_end])
f.close()
return(attype_num)
#get the interaction between atom type 1 and atom type 2 from dat file of choice
def interactions(dat_file, at1, at2):
if len(str(at1)) == 1:
at1 = ' ' + str(at1)
if len(str(at2)) == 1:
at2 = ' ' + str(at2)
f = open(dat_file, 'r')
Lines = f.readlines()
for line in Lines:
if str(at1)+'-'+str(at2) == line[5:10] or str(at2)+'-'+str(at1) == line[5:10]:
interact = float(line[13:])
f.close()
return(interact)
#get the final score based on atom type num and interactions
def score(attype1, res1, attype2, res2, def_file, dat_file):
at1 = atomtype_num(def_file, res1, attype1)
at2 = atomtype_num(def_file, res2, attype2)
value = interactions(dat_file, at1, at2)
return (value)
#create file of list of interactions
def list_file(matrix,output_name):
residues1 = []
residues2 = []
values = []
abs_values = []
for i in range(len(matrix.index)):
for j in range(len(matrix.columns)):
num = matrix.loc[matrix.index[i], matrix.columns[j]]
if num != 0:
residues1.append(matrix.index[i])
residues2.append(matrix.columns[j])
values.append(num)
abs_values.append(abs(num))
sorted_residues1 = [x for _,x in sorted(zip(abs_values,residues1),reverse=True)]
sorted_residues2 = [x for _,x in sorted(zip(abs_values,residues2),reverse=True)]
sorted_values = [x for _,x in sorted(zip(abs_values,values),reverse=True)]
f = open("List_" + output_name[:-4] + ".txt", "w")
for k in range(len(values)):
f.write(sorted_residues1[k] + "," + sorted_residues2[k] + "," + str(sorted_values[k]) + "\n")
f.close()
return
def main():
parser= argparse.ArgumentParser(description="the arguments.", add_help=False)
parser.add_argument("-f","--pdb_file", action="store")
parser.add_argument("-res","--list_residues", action="store")
parser.add_argument("-o","--output_name", action="store")
parser.add_argument("-def","--atomtypes_definition", action="store")
parser.add_argument("-dat","--atomtypes_interactions", action="store")
args=parser.parse_args()
sele_res = list(args.list_residues.split(","))
#print (sele_res)
all_res, chains, atom_numbers = read_chains(args.pdb_file)
vcon(args.pdb_file)
matrix = [ [ 0 for i in range(len(all_res)) ] for j in range(len(sele_res)) ]
matrix = pd.DataFrame(matrix)
matrix.columns = all_res
matrix.index = sele_res
# Determined according to the AB-Bind dataset results
scale_factor = 0.00024329
matrix = read_interactions('vcon_file.txt', matrix, chains, sele_res, all_res, args.atomtypes_definition, args.atomtypes_interactions, atom_numbers, scale_factor)
matrix.to_csv(args.output_name)
list_file(matrix,args.output_name)
os.remove('vcon_file.txt')
return
main()