diff --git a/src/monarch_ingest/ingests/biogrid/biogrid.py b/src/monarch_ingest/ingests/biogrid/biogrid.py index ec525e16..836da686 100644 --- a/src/monarch_ingest/ingests/biogrid/biogrid.py +++ b/src/monarch_ingest/ingests/biogrid/biogrid.py @@ -14,15 +14,18 @@ publications = get_publication_ids(row['Publication Identifiers']) - association = PairwiseGeneToGeneInteraction( - id="uuid:" + str(uuid.uuid1()), - subject=gid_a, - predicate="biolink:interacts_with", - object=gid_b, - has_evidence=evidence, - publications=publications, - primary_knowledge_source="infores:biogrid", - aggregator_knowledge_source=["infores:monarchinitiative"] - ) + # Only keep interactions using NCBIGene or UniProtKB identifiers, could also filter on taxid + if gid_a.startswith("NCBIGene:") or gid_a.startswith("UniProtKB:") \ + and gid_b.startswith("NCBIGene:") or gid_b.startswith("UniProtKB:"): + association = PairwiseGeneToGeneInteraction( + id="uuid:" + str(uuid.uuid1()), + subject=gid_a, + predicate="biolink:interacts_with", + object=gid_b, + has_evidence=evidence, + publications=publications, + primary_knowledge_source="infores:biogrid", + aggregator_knowledge_source=["infores:monarchinitiative"] + ) - koza_app.write(association) + koza_app.write(association) diff --git a/src/monarch_ingest/ingests/biogrid/biogrid_util.py b/src/monarch_ingest/ingests/biogrid/biogrid_util.py index d9dd81e7..a938e822 100644 --- a/src/monarch_ingest/ingests/biogrid/biogrid_util.py +++ b/src/monarch_ingest/ingests/biogrid/biogrid_util.py @@ -14,7 +14,10 @@ def get_gene_id(raw_id: str) -> str: :param raw_id: str, raw BioGRID input string (a pseudo-CURIE) :return: """ - gid = raw_id.replace("entrez gene/locuslink", "NCBIGene") + gid = (raw_id + .replace("entrez gene/locuslink:", "NCBIGene:") + .replace("uniprot/swiss-prot:", "UniProtKB:")) + return gid