This repository has been archived by the owner on Mar 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 32
onehot encoding for residue type and polarity #21
Merged
Changes from 6 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
9a4976a
onehot encoding for residue type and polarity
102e514
Merge branch 'master' into onehot
manonreau ffe279e
Update Graph.py
manonreau 0b8c5d5
Update GraphGen.py
manonreau 5861001
Update GraphGen.py
manonreau d0ff5db
Update NeuralNet.py
manonreau 81c1447
Update GraphGen.py
manonreau 051337a
Update Graph.py
manonreau File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,15 +52,15 @@ def __init__(self, pdb_path, ref_path, graph_type='residue', pssm_path=None, | |
@staticmethod | ||
def _get_pssm(pssm_path, mol_name, base_name): | ||
|
||
pssmA = os.path.join(pssm_path, mol_name+'.A.pdb.pssm') | ||
pssmB = os.path.join(pssm_path, mol_name+'.B.pdb.pssm') | ||
pssmA = os.path.join(pssm_path, mol_name+'.A.pssm') | ||
pssmB = os.path.join(pssm_path, mol_name+'.B.pssm') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should rename the files we give in example then Or at least check that the example still work |
||
|
||
# check if the pssms exists | ||
if os.path.isfile(pssmA) and os.path.isfile(pssmB): | ||
pssm = {'A': pssmA, 'B': pssmB} | ||
else: | ||
pssmA = os.path.join(pssm_path, base_name+'.A.pdb.pssm') | ||
pssmB = os.path.join(pssm_path, base_name+'.B.pdb.pssm') | ||
pssmA = os.path.join(pssm_path, base_name+'.A.pssm') | ||
pssmB = os.path.join(pssm_path, base_name+'.B.pssm') | ||
if os.path.isfile(pssmA) and os.path.isfile(pssmB): | ||
pssm = {'A': pssmA, 'B': pssmB} | ||
else: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import os | ||
import numpy as np | ||
import shutil | ||
|
||
import torch | ||
from time import time | ||
import networkx as nx | ||
|
||
|
@@ -38,25 +38,25 @@ def __init__(self, pdb=None, pssm=None, | |
|
||
self.residue_names = {'CYS': 0, 'HIS': 1, 'ASN': 2, 'GLN': 3, 'SER': 4, 'THR': 5, 'TYR': 6, 'TRP': 7, | ||
'ALA': 8, 'PHE': 9, 'GLY': 10, 'ILE': 11, 'VAL': 12, 'MET': 13, 'PRO': 14, 'LEU': 15, | ||
'GLU': 16, 'ASP': 17, 'LYS': 18, 'ARG': 20} | ||
'GLU': 16, 'ASP': 17, 'LYS': 18, 'ARG': 19} | ||
|
||
self.residue_polarity = {'CYS': 'polar', 'HIS': 'polar', 'ASN': 'polar', 'GLN': 'polar', 'SER': 'polar', 'THR': 'polar', 'TYR': 'polar', 'TRP': 'polar', | ||
'ALA': 'apolar', 'PHE': 'apolar', 'GLY': 'apolar', 'ILE': 'apolar', 'VAL': 'apolar', 'MET': 'apolar', 'PRO': 'apolar', 'LEU': 'apolar', | ||
'GLU': 'charged', 'ASP': 'charged', 'LYS': 'charged', 'ARG': 'charged'} | ||
'GLU': 'neg_charged', 'ASP': 'neg_charged', 'LYS': 'neg_charged', 'ARG': 'pos_charged'} | ||
|
||
self.pssm_pos = {'CYS': 4, 'HIS': 8, 'ASN': 2, 'GLN': 5, 'SER': 15, 'THR': 16, 'TYR': 18, 'TRP': 17, | ||
'ALA': 0, 'PHE': 13, 'GLY': 7, 'ILE': 9, 'VAL': 19, 'MET': 12, 'PRO': 14, 'LEU': 10, | ||
'GLU': 6, 'ASP': 3, 'LYS': 11, 'ARG': 1} | ||
|
||
self.polarity_encoding = { | ||
'apolar': 0, 'polar': -1, 'charged': 1} | ||
self.edge_polarity_encoding, iencod = {}, 0 | ||
for k1, v1 in self.polarity_encoding.items(): | ||
for k2, v2 in self.polarity_encoding.items(): | ||
key = tuple(np.sort([v1, v2])) | ||
if key not in self.edge_polarity_encoding: | ||
self.edge_polarity_encoding[key] = iencod | ||
iencod += 1 | ||
'apolar': 0, 'polar': 1, 'neg_charged': 2, 'pos_charged': 3} | ||
#self.edge_polarity_encoding, iencod = {}, 0 | ||
##for k1, v1 in self.polarity_encoding.items(): | ||
##for k2, v2 in self.polarity_encoding.items(): | ||
##key = tuple(np.sort([v1, v2])) | ||
##if key not in self.edge_polarity_encoding: | ||
##self.edge_polarity_encoding[key] = iencod | ||
#iencod += 1 | ||
|
||
# check if external execs are installed | ||
self.check_execs() | ||
|
@@ -226,12 +226,14 @@ def get_node_features(self, db): | |
|
||
self.nx.nodes[node_key]['chain'] = { | ||
'A': 0, 'B': 1}[chainID] | ||
self.nx.nodes[node_key]['type'] = self.residue_names[resName] | ||
self.nx.nodes[node_key]['pos'] = np.mean( | ||
db.get('x,y,z', chainID=chainID, resSeq=resSeq), 0) | ||
self.nx.nodes[node_key]['type'] = self.onehot( | ||
self.residue_names[resName], len(self.residue_names)) | ||
|
||
self.nx.nodes[node_key]['charge'] = self.residue_charge[resName] | ||
self.nx.nodes[node_key]['polarity'] = self.polarity_encoding[self.residue_polarity[resName]] | ||
self.nx.nodes[node_key]['polarity'] = self.onehot( | ||
self.polarity_encoding[self.residue_polarity[resName]], len(self.polarity_encoding)) | ||
|
||
self.nx.nodes[node_key]['bsa'] = bsa_data[node_key] | ||
|
||
|
@@ -253,8 +255,8 @@ def get_edge_features(self): | |
|
||
for e in self.nx.edges: | ||
node1, node2 = e | ||
self.nx.edges[node1, node2]['polarity'] = self._get_edge_polarity( | ||
node1, node2) | ||
#self.nx.edges[node1, node2]['polarity'] = self._get_edge_polarity( | ||
# node1, node2) | ||
Comment on lines
+258
to
+259
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so we don't have any edge features ? no polarity and no distance ? (sorry I forgot a bit about the inner workings of the code) |
||
self.nx.edge_index.append( | ||
[node_keys.index(node1), node_keys.index(node2)]) | ||
|
||
|
@@ -336,3 +338,10 @@ def _get_edge_distance(self, node1, node2, db): | |
d2 = -2*np.dot(xyz1, xyz2.T) + np.sum(xyz1**2, | ||
axis=1)[:, None] + np.sum(xyz2**2, axis=1) | ||
return np.sqrt(np.min(d2)) | ||
|
||
|
||
def onehot(self, idx, size): | ||
onehot = torch.zeros(size) | ||
# Fill the one-hot encoded sequence with 1 at the corresponding idx | ||
onehot[idx] = 1 | ||
return np.array(onehot) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you need the
_class
variable ? why notI also find
'class'
a bit generic. Maybe'capri_class'
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could also shorten the code
but of course it's a bit less readable ... so up to you :) (plus I don't know if what I propose here works :D)