Skip to content

Commit c60db6f

Browse files
Merge pull request #885 from PathwayCommons/issue_876
Issue 876 and more.
2 parents 6e3d1a0 + 01a1b7a commit c60db6f

File tree

11 files changed

+105
-146
lines changed

11 files changed

+105
-146
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ npm run start
3333
The following environment variables can be used to configure the server:
3434

3535
- `NODE_ENV` : the environment mode, either `production` or `development` (default)
36-
- `PC_URL` : Pathway Commons web service endpoint URL (default: http://www.pathwaycommons.org/pc2/)
36+
- `PC_URL` : actual Pathway Commons web service endpoint URL (default: 'http://www.pathwaycommons.org/pc2/')
3737
- `PORT` : the port on which the server runs (default 3000)
3838

3939
### Configure RethinkDB
@@ -45,8 +45,8 @@ The following environment variables can be used to configure the server:
4545
### Switching Pathway Commons Versions (release/other)
4646

4747
If Pathway Commons data and files have been updated since this app's last built and run,
48-
or you simply want to connect to a different PC2 instance (and set PC_URL environment variable),
49-
then the file `src/server/graph-generation/generic-physical-entities/generic-physical-entity-map.json`
48+
or you simply want to connect to a different PC2 instance (don't forget to set PC_URL),
49+
then the file `src/server/graph-generation/biopax-metadata/generic-physical-entity-map.json`
5050
needs to be updated. Also, purge the RethinkDb db tables or simply switch the database.
5151

5252
The following script downloads and processes physical_entities.json.gz file from Pathway Commons:

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/client/common/config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const databases = [
2323
{database:'CAS', url:'http://identifiers.org/cas/', search:''},
2424
{database:'HPRD',url:'http://identifiers.org/hprd/',search:''},
2525
{database:'RefSeq',url:'http://identifiers.org/refseq/',search:''},
26-
{database:'Pathway Commons',url:'http://pathwaycommons.org/pc2/',search:''},
26+
{database:'Pathway Commons',url:null,search:''}, //TODO: was this PC_URL or xml:base (both should be configurable)?
2727
{database:'NCBI Gene',url:'http://identifiers.org/ncbigene/',search:''},
2828
{database:'Gene Cards',url:'http://identifiers.org/genecards/',search:''}
2929
];

src/client/features/enrichment/index.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ class Enrichment extends React.Component {
7171
}
7272

7373
handleGenes( genes ) {
74-
this.setState( { genes } );console.log(genes);
74+
this.setState( { genes } );
75+
// console.log(genes);
7576
}
7677

7778
render() {

src/scripts/generic-entity-mapping/update.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@ echo "Processing $DATA"
4040

4141
curl -s "$DATA" | gunzip -c | jq -cS 'map(select(.generic)) | reduce .[] as $o ({}; . + {($o.uri): {name: $o.name, label:$o.label, synonyms:$o."HGNC Symbol"}})' > generic-physical-entity-map.json
4242

43-
mv generic-physical-entity-map.json ../../server/graph-generation/generic-physical-entities/
43+
mv generic-physical-entity-map.json ../../server/graph-generation/biopax-metadata/
4444

4545
echo "\nUpdated generic-physical-entity-map.json"

src/server/database/query.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ function getGraph(pcID, releaseID, connection, callback) {
8080
return db.handleResult(newerGraph, callback);
8181
}
8282

83-
function getGraphFromPC(pcID, releaseID, connection) {
84-
return getPathwayJson(pcID)
83+
function getGraphFromPC(pcURI, releaseID, connection) {
84+
return getPathwayJson(pcURI)
8585
.then(result => {
8686
if (connection && result.pathwayMetadata) {
87-
update.updateGraph(pcID, releaseID, result, connection);
87+
update.updateGraph(pcURI, releaseID, result, connection);
8888
}
8989
return result;
9090
});
Lines changed: 78 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
const fs = require('fs');
12
const metadataParser = require('./metadataParserJson');
23
const _ = require('lodash');
3-
var biopaxFile = null;
4+
5+
var biopaxMap = null;
6+
47
/**
5-
*
8+
*
69
* @param {*} biopaxElement Metadata for an entity from BioPAX
710
* @param {*} nodeType Is this a xref or entity?
811
*/
912
function collectEntityMetadata(biopaxElement, nodeType = 'default'){
10-
13+
1114
let result = [];
1215
//Collect relevant data on this node, and push it into output
1316
if (nodeType !== 'Reference') {
@@ -49,11 +52,11 @@ function collectEntityMetadata(biopaxElement, nodeType = 'default'){
4952
}
5053

5154
/**
52-
*
55+
*
5356
* @param {*} entity Metadata for an entity in the network
5457
* @returns Tree array containing all metadata about the given entity
5558
*/
56-
function buildBioPaxTree(entity) {
59+
function buildBiopaxTree(entity) {
5760
let result = [];
5861

5962
//make sure the metadata exists
@@ -81,28 +84,30 @@ function buildBioPaxTree(entity) {
8184
xrefList = _.concat(xrefList,[eref]);
8285

8386
processXrefs(xrefList,eref,collectedData);
84-
87+
8588
result.push([id,collectedData]);
8689
return result;
8790
}
8891

8992
function processXrefs(xrefList,eref,collectedData){
90-
9193
let erefXrefList = [];
9294

9395
for (let i = 0; i < xrefList.length; i++) {
9496
//Check if this is a cross-reference or entity reference
97+
//(- the last item (if eref is defined) is treated in a special way that
98+
//TODO: saves a couple of lines but sacrifices clarity...)
9599
let keyName = 'Reference';
96100
if (i == (xrefList.length - 1) && eref)
97101
keyName = 'EntityReference';
98102

99103
//Get Referenced element and make sure it's valid
100-
let refElement = getElementFromBioPax(xrefList[i]);
101-
if (!(refElement))
104+
let refElement = getByNodeId(xrefList[i], biopaxMap);
105+
if (!(refElement)) {
102106
continue;
107+
}
103108

104109
//create list of xrefs for eref
105-
if(keyName === "EntityReference"){
110+
if(keyName === "EntityReference") {
106111
let erefXref = refElement['xref'];
107112
if(erefXref)
108113
if(typeof erefXref === 'string')
@@ -119,134 +124,109 @@ function processXrefs(xrefList,eref,collectedData){
119124
//almost identically to above
120125
for(let i=0;i<erefXrefList.length;i++){
121126
let keyName = 'Reference';
122-
let refElement = getElementFromBioPax(erefXrefList[i]);
123-
124-
if (!(refElement))
127+
let refElement = getByNodeId(erefXrefList[i], biopaxMap);
128+
if (!(refElement)) {
125129
continue;
130+
}
126131

127132
collectedData.push([keyName,collectEntityMetadata(refElement,keyName)]);
128133
}
129134

130135
}
131136

132137
/**
133-
*
134-
* @param {*} id String representing potential BioPAX ID
135-
* @returns BioPAX metadata for that ID (if it exists) or null (if it doesn't)
138+
* Gets an object from the given map by biopax URI or Cy node id;
139+
* node id is mapped onto the biopax URI using several rules
140+
* (knowing how the SBGN xml ids and CyJson node ids were generated).
141+
*
142+
* @param id String representing a BioPAX absolute URI or Cy node id
143+
* @param map Map/Object - biopax URI (string) to some objects map (e.g., to elements or metadata)
144+
* @returns matching value from the Map (if it exists) or null
136145
*/
137-
function getElementFromBioPax(id) {
146+
function getByNodeId(id, map) {
147+
// shortcut
148+
if(!id || !map)
149+
return null;
138150

139-
//Remove any URL stuff from the ID, just like getProcessedBioPax
140-
if (id.indexOf('http') !== -1 || id.indexOf('/') !== -1) {
141-
var lastIndex = id.lastIndexOf('/');
142-
id = id.substring(lastIndex + 1);
151+
let bpe = (map instanceof Map) ? map.get(id) : map[id];
152+
if (!bpe) {
153+
//remove the last underscore and everything after, and search again (as generics/complex component ids
154+
//are sometimes like ..Protein_4c4358ebaabf0f39e1e5325a4178d931_830703244edf74707f84842e080f965d)
155+
id = id.substring(0, id.lastIndexOf("_"));
156+
bpe = (map instanceof Map) ? map.get(id) : map[id];
143157
}
144158

145-
//Get element matching the ID
146-
//make sure the ID isn't "" or null
147-
if(id)
148-
return biopaxFile.get(id);
149-
else return null;
159+
return (bpe) ? bpe : null;
150160
}
151161

152162
/**
153-
*
154-
* @param {*} nodeId node ID from Cytoscape network JSON
155-
* @returns Subtree containing BioPax metadata for the node
163+
* Builds a URI-to-biopaxElement map from a biopax JSON-LD model.
164+
* @param {*} biopaxJsonText String - BioPAX sub-network in JSON-LD format
165+
* @returns `Map` containing BioPAX JSON objects
156166
*/
157-
function matchCyIdToBiopax(nodeId) {
158-
159-
// The original entity IDs have been converted in the cytoscape network.
160-
// Need to first find the original BioPAX entity ID, then collect metadata.
161-
162-
//Alot of this stuff is strange but it is ALL NECESSARY to correctly map the IDs
163-
164-
//Search for ID exactly as it appears
165-
let searchTerm = getElementFromBioPax(nodeId);
166-
if (searchTerm)
167-
return searchTerm;
168-
169-
//Search for ID after last underscore
170-
searchTerm = getElementFromBioPax(nodeId.substring(nodeId.lastIndexOf("_") +1));
171-
if (searchTerm)
172-
return searchTerm;
173-
174-
175-
//Find index of second underscore
176-
let i=0,index=null;
177-
while(i<2 && index !==-1){
178-
index = nodeId.indexOf("_", index +1);
179-
i++;
180-
}
181-
182-
//Remove extra identifiers appended by Cytoscape
183-
//i.e. everything after the second underscore
184-
let fixedNodeId = nodeId.substring(0,index);
185-
186-
//The last two methods won't work if there are no underscores in the ID
187-
if (nodeId.indexOf('_') <= -1)
188-
return null;
189-
190-
//Search for ID in the first 2 underscores
191-
searchTerm = getElementFromBioPax(fixedNodeId);
192-
if (searchTerm)
193-
return searchTerm;
167+
function getBiopaxMap(biopaxJsonText) {
194168

195-
//Search for ID in between first and second underscore
196-
searchTerm = getElementFromBioPax(fixedNodeId.substring(fixedNodeId.lastIndexOf("_") +1));
197-
if (searchTerm)
198-
return searchTerm;
199-
200-
//Search Failed, return null
201-
return null;
202-
}
203-
204-
/**
205-
*
206-
* @param {*} biopaxJsonText String containing BioPAX network metadata
207-
* @returns `Map` containing BioPAX network metadata
208-
*/
209-
function getProcessedBioPax(biopaxJsonText) {
210169
//parse String into JSON object, rename '@id' property to 'pathid'
211170
const graph = JSON.parse(biopaxJsonText.replace(new RegExp('@id', 'g'), 'pathid'))['@graph'];
212-
const biopaxElementMap = new Map();
213171

214-
//The 'pathid' property has a format like: http://pathwaycommons.org/pc2/someID
215-
//Convert it to this format: someID, and make it the key for the map
216-
for (const element of graph) {
217-
const fullId = element['pathid'];
218-
const lastIndex = fullId.lastIndexOf('/');
219-
const subId = fullId.substring(lastIndex + 1);
172+
const biopaxMap = new Map();
220173

221-
biopaxElementMap.set(subId, element);
174+
//'pathid' is the absolute URI (URL, by design) of a biopax object, and so we use it as map key:
175+
for (const element of graph) {
176+
const uri = element['pathid'];
177+
biopaxMap.set(uri, element);
222178
}
223179

224-
return biopaxElementMap;
180+
return biopaxMap;
225181
}
226182

183+
const getGenericPhysicalEntityMap = _.memoize(() => JSON.parse(
184+
fs.readFileSync(__dirname + '/generic-physical-entity-map.json', 'utf-8')
185+
));
186+
187+
227188
/**
228-
*
189+
* Extracts additional BioPAX properties, for cy nodes,
190+
* from the corresp. JSON-LD representation
191+
* (these were missing from the initial SBGN-ML result, converted to CyJson.)
192+
*
229193
* @param {*} biopaxJsonText String containing BioPAX metadata for the network
230194
* @param {*} nodes JSON object containing all the nodes in the network
231195
* @returns Processed metadata for each node in the network
232196
*/
233-
function getBioPaxMetadata(biopaxJsonText, nodes) {
197+
function getBiopaxMetadata(biopaxJsonText, nodes) {
234198

235199
//BioPAX metadata comes as a string, turn it into a Map
236-
biopaxFile = getProcessedBioPax(biopaxJsonText);
200+
biopaxMap = getBiopaxMap(biopaxJsonText);
237201

238202
const nodeMetadataMap = {};
239203

240204
//Iterate through nodes in Cy network, adding metadata to each
241205
nodes.forEach(node => {
242-
const CyId = node.data.id;
243-
//metadata for the Cytoscape node
244-
const BiopaxData = matchCyIdToBiopax(CyId);
206+
const bpe = getByNodeId(node.data.id, biopaxMap);
245207
//build the tree for metadata and add it to the map
246-
nodeMetadataMap[CyId] = metadataParser(buildBioPaxTree(BiopaxData));
208+
nodeMetadataMap[node.data.id] = metadataParser(buildBiopaxTree(bpe));
247209
});
248210

249211
return nodeMetadataMap;
250212
}
251213

252-
module.exports = { getProcessedBioPax, getBioPaxMetadata};
214+
/**
215+
*
216+
* @param nodes
217+
*/
218+
const getGeneSymbolsForGenericNodes = nodes => {
219+
const nodeGeneSynonyms = {};
220+
const genericPhysicalEntityMap = getGenericPhysicalEntityMap();
221+
222+
nodes.forEach(node => {
223+
const genericPE = getByNodeId(node.data.id, genericPhysicalEntityMap);
224+
const syns = _.get(genericPE, 'synonyms', []);
225+
nodeGeneSynonyms[node.data.id] = syns;
226+
});
227+
228+
return nodeGeneSynonyms;
229+
};
230+
231+
232+
module.exports = {getBiopaxMetadata, getGeneSymbolsForGenericNodes};

src/server/graph-generation/generic-physical-entities/index.js

Lines changed: 0 additions & 22 deletions
This file was deleted.

0 commit comments

Comments
 (0)