1+ const  fs  =  require ( 'fs' ) ; 
12const  metadataParser  =  require ( './metadataParserJson' ) ; 
23const  _  =  require ( 'lodash' ) ; 
3- var  biopaxFile  =  null ; 
4+ 
5+ var  biopaxMap  =  null ; 
6+ 
47/** 
5-  *   
8+  * 
69 * @param  {* } biopaxElement Metadata for an entity from BioPAX 
710 * @param  {* } nodeType Is this a xref or entity? 
811 */ 
912function  collectEntityMetadata ( biopaxElement ,  nodeType  =  'default' ) { 
10-    
13+ 
1114  let  result  =  [ ] ; 
1215  //Collect relevant data on this node, and push it into output 
1316  if  ( nodeType  !==  'Reference' )  { 
@@ -49,11 +52,11 @@ function collectEntityMetadata(biopaxElement, nodeType = 'default'){
4952} 
5053
5154/** 
52-  *   
55+  * 
5356 * @param  {* } entity Metadata for an entity in the network 
5457 * @returns  Tree array containing all metadata about the given entity 
5558 */ 
56- function  buildBioPaxTree ( entity )  {    
59+ function  buildBiopaxTree ( entity )  { 
5760  let  result  =  [ ] ; 
5861
5962  //make sure the metadata exists 
@@ -81,28 +84,30 @@ function buildBioPaxTree(entity) {
8184    xrefList  =  _ . concat ( xrefList , [ eref ] ) ; 
8285
8386  processXrefs ( xrefList , eref , collectedData ) ; 
84-    
87+ 
8588  result . push ( [ id , collectedData ] ) ; 
8689  return  result ; 
8790} 
8891
8992function  processXrefs ( xrefList , eref , collectedData ) { 
90- 
9193  let  erefXrefList  =  [ ] ; 
9294
9395  for  ( let  i  =  0 ;  i  <  xrefList . length ;  i ++ )  { 
9496    //Check if this is a cross-reference or entity reference 
97+     //(- the last item (if eref is defined) is treated in a special way that 
98+     //TODO: saves a couple of lines but sacrifices clarity...) 
9599    let  keyName  =  'Reference' ; 
96100    if  ( i  ==  ( xrefList . length  -  1 )  &&  eref ) 
97101      keyName  =  'EntityReference' ; 
98102
99103    //Get Referenced element and make sure it's valid 
100-     let  refElement  =  getElementFromBioPax ( xrefList [ i ] ) ;   
101-     if  ( ! ( refElement ) ) 
104+     let  refElement  =  getByNodeId ( xrefList [ i ] ,   biopaxMap ) ; 
105+     if  ( ! ( refElement ) )   { 
102106      continue ; 
107+     } 
103108
104109    //create list of xrefs for eref 
105-     if ( keyName  ===  "EntityReference" ) { 
110+     if ( keyName  ===  "EntityReference" )   { 
106111      let  erefXref  =  refElement [ 'xref' ] ; 
107112      if ( erefXref ) 
108113        if ( typeof  erefXref  ===  'string' ) 
@@ -119,134 +124,109 @@ function processXrefs(xrefList,eref,collectedData){
119124  //almost identically to above 
120125  for ( let  i = 0 ; i < erefXrefList . length ; i ++ ) { 
121126    let  keyName  =  'Reference' ; 
122-     let  refElement  =  getElementFromBioPax ( erefXrefList [ i ] ) ;  
123- 
124-     if  ( ! ( refElement ) ) 
127+     let  refElement  =  getByNodeId ( erefXrefList [ i ] ,  biopaxMap ) ; 
128+     if  ( ! ( refElement ) )  { 
125129      continue ; 
130+     } 
126131
127132    collectedData . push ( [ keyName , collectEntityMetadata ( refElement , keyName ) ] ) ; 
128133  } 
129134
130135} 
131136
132137/** 
133-  *  
134-  * @param  {* } id String representing potential BioPAX ID 
135-  * @returns  BioPAX metadata for that ID (if it exists) or null (if it doesn't) 
138+  * Gets an object from the given map by biopax URI or Cy node id; 
139+  * node id is mapped onto the biopax URI using several rules 
140+  * (knowing how the SBGN xml ids and CyJson node ids were generated). 
141+  * 
142+  * @param  id String representing a BioPAX absolute URI or Cy node id 
143+  * @param   map Map/Object - biopax URI (string) to some objects map (e.g., to elements or metadata) 
144+  * @returns  matching value from the Map (if it exists) or null 
136145 */ 
137- function  getElementFromBioPax ( id )  { 
146+ function  getByNodeId ( id ,  map )  { 
147+   // shortcut 
148+   if ( ! id  ||  ! map ) 
149+     return  null ; 
138150
139-   //Remove any URL stuff from the ID, just like getProcessedBioPax 
140-   if  ( id . indexOf ( 'http' )  !==  - 1  ||  id . indexOf ( '/' )  !==  - 1 )  { 
141-     var  lastIndex  =  id . lastIndexOf ( '/' ) ; 
142-     id  =  id . substring ( lastIndex  +  1 ) ; 
151+   let  bpe  =  ( map  instanceof  Map )  ? map . get ( id )  : map [ id ] ; 
152+   if  ( ! bpe )  { 
153+     //remove the last underscore and everything after, and search again (as generics/complex component ids 
154+     //are sometimes like ..Protein_4c4358ebaabf0f39e1e5325a4178d931_830703244edf74707f84842e080f965d) 
155+     id  =  id . substring ( 0 ,  id . lastIndexOf ( "_" ) ) ; 
156+     bpe  =  ( map  instanceof  Map )  ? map . get ( id )  : map [ id ] ; 
143157  } 
144158
145-   //Get element matching the ID 
146-   //make sure the ID isn't "" or null 
147-   if ( id ) 
148-     return  biopaxFile . get ( id ) ; 
149-   else  return  null ; 
159+   return  ( bpe )  ? bpe  : null ; 
150160} 
151161
152162/** 
153-  *  
154-  * @param  {* } nodeId node ID from Cytoscape  network JSON 
155-  * @returns  Subtree  containing BioPax metadata for the node  
163+  * Builds a URI-to-biopaxElement map from a biopax JSON-LD model.  
164+  * @param  {* } biopaxJsonText String - BioPAX sub- network in  JSON-LD format  
165+  * @returns  `Map`  containing BioPAX JSON objects  
156166 */ 
157- function  matchCyIdToBiopax ( nodeId )  { 
158- 
159-   // The original entity IDs have been converted in the cytoscape network. 
160-   // Need to first find the original BioPAX entity ID, then collect metadata. 
161- 
162-   //Alot of this stuff is strange but it is ALL NECESSARY to correctly map the IDs 
163- 
164-   //Search for ID exactly as it appears 
165-   let  searchTerm  =  getElementFromBioPax ( nodeId ) ; 
166-   if  ( searchTerm ) 
167-     return  searchTerm ; 
168-   
169-   //Search for ID after last underscore 
170-   searchTerm  =  getElementFromBioPax ( nodeId . substring ( nodeId . lastIndexOf ( "_" )  + 1 ) ) ; 
171-   if  ( searchTerm ) 
172-     return  searchTerm ; 
173-   
174- 
175-   //Find index of second underscore 
176-   let  i = 0 , index = null ; 
177-   while ( i < 2  &&  index  !== - 1 ) { 
178-     index  =  nodeId . indexOf ( "_" ,  index  + 1 ) ; 
179-     i ++ ; 
180-   } 
181- 
182-   //Remove extra identifiers appended by Cytoscape 
183-   //i.e. everything after the second underscore 
184-   let  fixedNodeId  =  nodeId . substring ( 0 , index ) ; 
185- 
186-   //The last two methods won't work if there are no underscores in the ID 
187-   if  ( nodeId . indexOf ( '_' )  <=  - 1 )  
188-     return  null ; 
189- 
190-   //Search for ID in the first 2 underscores 
191-   searchTerm  =  getElementFromBioPax ( fixedNodeId ) ; 
192-   if  ( searchTerm ) 
193-     return  searchTerm ; 
167+ function  getBiopaxMap ( biopaxJsonText )  { 
194168
195-   //Search for ID in between first and second underscore 
196-   searchTerm  =  getElementFromBioPax ( fixedNodeId . substring ( fixedNodeId . lastIndexOf ( "_" )  + 1 ) ) ; 
197-   if  ( searchTerm ) 
198-     return  searchTerm ; 
199- 
200-   //Search Failed, return null 
201-   return  null ; 
202- } 
203- 
204- /** 
205-  *  
206-  * @param  {* } biopaxJsonText String containing BioPAX network metadata 
207-  * @returns  `Map` containing BioPAX network metadata 
208-  */ 
209- function  getProcessedBioPax ( biopaxJsonText )  { 
210169  //parse String into JSON object, rename '@id' property to 'pathid' 
211170  const  graph  =  JSON . parse ( biopaxJsonText . replace ( new  RegExp ( '@id' ,  'g' ) ,  'pathid' ) ) [ '@graph' ] ; 
212-   const  biopaxElementMap  =  new  Map ( ) ; 
213171
214-   //The 'pathid' property has a format like: http://pathwaycommons.org/pc2/someID 
215-   //Convert it to this format: someID, and make it the key for the map 
216-   for  ( const  element  of  graph )  { 
217-     const  fullId  =  element [ 'pathid' ] ; 
218-     const  lastIndex  =  fullId . lastIndexOf ( '/' ) ; 
219-     const  subId  =  fullId . substring ( lastIndex  +  1 ) ; 
172+   const  biopaxMap  =  new  Map ( ) ; 
220173
221-     biopaxElementMap . set ( subId ,  element ) ; 
174+   //'pathid' is the absolute URI (URL, by design) of a biopax object, and so we use it as map key: 
175+   for  ( const  element  of  graph )  { 
176+     const  uri  =  element [ 'pathid' ] ; 
177+     biopaxMap . set ( uri ,  element ) ; 
222178  } 
223179
224-   return  biopaxElementMap ; 
180+   return  biopaxMap ; 
225181} 
226182
183+ const  getGenericPhysicalEntityMap  =  _ . memoize ( ( )  =>  JSON . parse ( 
184+   fs . readFileSync ( __dirname  +  '/generic-physical-entity-map.json' ,  'utf-8' ) 
185+ ) ) ; 
186+ 
187+ 
227188/** 
228-  *  
189+  * Extracts additional BioPAX properties, for cy nodes, 
190+  * from the corresp. JSON-LD representation 
191+  * (these were missing from the initial SBGN-ML result, converted to CyJson.) 
192+  * 
229193 * @param  {* } biopaxJsonText String containing BioPAX metadata for the network 
230194 * @param  {* } nodes JSON object containing all the nodes in the network 
231195 * @returns  Processed metadata for each node in the network 
232196 */ 
233- function  getBioPaxMetadata ( biopaxJsonText ,  nodes )  { 
197+ function  getBiopaxMetadata ( biopaxJsonText ,  nodes )  { 
234198
235199  //BioPAX metadata comes as a string, turn it into a Map 
236-   biopaxFile  =  getProcessedBioPax ( biopaxJsonText ) ; 
200+   biopaxMap  =  getBiopaxMap ( biopaxJsonText ) ; 
237201
238202  const  nodeMetadataMap  =  { } ; 
239203
240204  //Iterate through nodes in Cy network, adding metadata to each 
241205  nodes . forEach ( node  =>  { 
242-     const  CyId  =  node . data . id ; 
243-     //metadata for the Cytoscape node 
244-     const  BiopaxData  =  matchCyIdToBiopax ( CyId ) ; 
206+     const  bpe  =  getByNodeId ( node . data . id ,  biopaxMap ) ; 
245207    //build the tree for metadata and add it to the map 
246-     nodeMetadataMap [ CyId ]  =  metadataParser ( buildBioPaxTree ( BiopaxData ) ) ; 
208+     nodeMetadataMap [ node . data . id ]  =  metadataParser ( buildBiopaxTree ( bpe ) ) ; 
247209  } ) ; 
248210
249211  return  nodeMetadataMap ; 
250212} 
251213
252- module . exports  =  {  getProcessedBioPax,  getBioPaxMetadata} ; 
214+ /** 
215+  * 
216+  * @param  nodes 
217+  */ 
218+ const  getGeneSymbolsForGenericNodes  =  nodes  =>  { 
219+   const  nodeGeneSynonyms  =  { } ; 
220+   const  genericPhysicalEntityMap  =  getGenericPhysicalEntityMap ( ) ; 
221+ 
222+   nodes . forEach ( node  =>  { 
223+     const  genericPE  =  getByNodeId ( node . data . id ,  genericPhysicalEntityMap ) ; 
224+     const  syns  =  _ . get ( genericPE ,  'synonyms' ,  [ ] ) ; 
225+     nodeGeneSynonyms [ node . data . id ]  =  syns ; 
226+   } ) ; 
227+ 
228+   return  nodeGeneSynonyms ; 
229+ } ; 
230+ 
231+ 
232+ module . exports  =  { getBiopaxMetadata,  getGeneSymbolsForGenericNodes} ; 
0 commit comments