Skip to content

Commit

Permalink
feat!: update models to vrs 2.0.0 community review ballot (#449)
Browse files Browse the repository at this point in the history
close #448 

* Update modules to vrs
[2.0.0-ballot.2024-11.3](https://github.com/ga4gh/vrs/tree/2.0.0-ballot.2024-11.3)
tag
  * Use preferred formats for `MappableConcept.mappings`
* `system` will use OBO Foundry persistent URL (PURL), source homepage,
  or namespace prefix, in that order of preference, if available.
    * `code` will use the `concept_id` as the CURIE
  * Remove `normalized_id` and leverage `MappableConcept.primaryCode`
* Also updates mappings to include exactMatch relation for merged
concept identifier
* Therapeutic Agent --> Therapy
  • Loading branch information
korikuzma authored Jan 2, 2025
1 parent 0bde977 commit 481ca68
Show file tree
Hide file tree
Showing 7 changed files with 1,013 additions and 785 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ dependencies = [
"uvicorn",
"click",
"boto3",
"ga4gh.vrs~=2.0.0a8",
"ga4gh.vrs==2.0.0a13",
]
dynamic = ["version"]

[project.optional-dependencies]
etl = [
"disease-normalizer[etl]~=0.5.0",
"disease-normalizer[etl]~=0.7.0",
"owlready2",
"rdflib",
"wikibaseintegrator>=0.12.0",
Expand Down
2 changes: 1 addition & 1 deletion src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _normalize_disease(self, query: str) -> str | None:
if term in self._disease_cache:
return self._disease_cache[term]
response = self.disease_normalizer.normalize(term)
normalized_id = response.normalized_id
normalized_id = response.disease.primaryCode.root if response.disease else None
self._disease_cache[term] = normalized_id
if normalized_id is None:
_logger.warning("Failed to normalize disease term: %s", query)
Expand Down
4 changes: 2 additions & 2 deletions src/therapy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def custom_openapi() -> dict:
"Return merged strongest-match concept for query string " "provided by user."
)
merged_matches_summary = (
"Given query, provide merged normalized record as a " "Therapeutic Agent."
"Given query, provide merged normalized record as a Therapy Mappable Concept."
)
merged_response_descr = "A response to a validly-formed query."
normalize_q_descr = "Therapy to normalize."
Expand Down Expand Up @@ -148,7 +148,7 @@ def normalize(
:param q: therapy search term
:param bool infer_namespace: if True, try to infer namespace from query term.
:returns: JSON response with matching normalized record provided as a
Therapeutic Agent, and source metadata
Therapy Mappable Concept, and source metadata
"""
try:
response = query_handler.normalize(html.unescape(q), infer_namespace)
Expand Down
145 changes: 97 additions & 48 deletions src/therapy/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
from typing import Any, TypeVar

from botocore.exceptions import ClientError
from ga4gh.core import domain_models, entity_models
from disease.schemas import NAMESPACE_TO_SYSTEM_URI as DISEASE_NAMESPACE_TO_SYSTEM_URI
from disease.schemas import NamespacePrefix as DiseaseNamespacePrefix
from ga4gh.core.models import (
Coding,
ConceptMapping,
Extension,
MappableConcept,
Relation,
code,
)
from uvicorn.config import logger

from therapy import NAMESPACE_LUIS, PREFIX_LOOKUP, SOURCES
from therapy.database import AbstractDatabase
from therapy.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
BaseNormalizationService,
HasIndication,
MatchesNormalized,
Expand Down Expand Up @@ -350,20 +361,17 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
:return: completed response object.
"""
sources_meta = {}
therapeutic_agent = response.therapeutic_agent
sources = [response.normalized_id.split(":")[0]] # type: ignore[union-attr]
if therapeutic_agent.mappings: # type: ignore[union-attr]
sources += [m.coding.system for m in therapeutic_agent.mappings] # type: ignore[union-attr]
therapy = response.therapy

sources = []
for m in therapy.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = SourceName(PREFIX_LOOKUP[src])
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response.source_meta_ = sources_meta # type: ignore[assignment]
return response

Expand All @@ -377,42 +385,88 @@ def _record_order(self, record: dict) -> tuple[int, str]:
source_rank = SourcePriority[src]
return source_rank, record["concept_id"]

def _add_therapeutic_agent(
def _add_therapy(
self,
response: NormalizationService,
record: dict,
match_type: MatchType,
) -> NormalizationService:
"""Format received DB record as therapeutic agent and update response object.
"""Format received DB record as Mappable Concept and update response object.
:param NormalizationService response: in-progress response object
:param Dict record: record as stored in DB
:param str query: query string from user request
:param MatchType match_type: type of match achieved
:return: completed response object ready to return to user
"""
therapeutic_agent_obj = domain_models.TherapeuticAgent(
id=f"normalize.therapy.{record['concept_id']}", label=record.get("label")

def _create_concept_mapping(
concept_id: str,
relation: Relation,
ns_to_system_uri: dict[str, str],
ns_prefix: NamespacePrefix | DiseaseNamespacePrefix,
) -> ConceptMapping:
"""Create concept mapping for therapy or disease identifier
``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
namespace prefix, in that order of preference, if available.
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:param ns_to_system_uri: Dictionary containing mapping from namespace to
system URI
:param ns_prefix: Namespace prefix enum
:return: Concept mapping for therapy or disease identifier
"""
source = concept_id.split(":")[0]

try:
source = ns_prefix(source)
except ValueError:
try:
source = ns_prefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = ns_to_system_uri.get(source, source)

return ConceptMapping(
coding=Coding(code=code(concept_id), system=system), relation=relation
)

therapy_obj = MappableConcept(
id=f"normalize.therapy.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Therapy",
label=record.get("label"),
)

# mappings
mappings = [
_create_concept_mapping(
concept_id=record["concept_id"],
relation=Relation.EXACT_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, code = source_id.split(":")
mappings.append(
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
)
mappings.extend(
_create_concept_mapping(
concept_id=source_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
for source_id in source_ids
)
if mappings:
therapeutic_agent_obj.mappings = mappings
therapy_obj.mappings = mappings

extensions = []
if "aliases" in record:
therapeutic_agent_obj.alternativeLabels = record["aliases"]
extensions.append(Extension(name="aliases", value=record["aliases"]))

extensions = []
if any(
filter(
lambda f: f in record,
Expand All @@ -435,49 +489,44 @@ def _add_therapeutic_agent(
indication = self._get_indication(ind_db)

if indication.normalized_disease_id:
system, code = indication.normalized_disease_id.split(":")
mappings = [
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
_create_concept_mapping(
concept_id=indication.normalized_disease_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=DISEASE_NAMESPACE_TO_SYSTEM_URI,
ns_prefix=DiseaseNamespacePrefix,
)
]
else:
mappings = []
ind_disease_obj = domain_models.Disease(
ind_disease_obj = MappableConcept(
id=indication.disease_id,
conceptType="Disease",
label=indication.disease_label,
mappings=mappings or None,
)

if indication.supplemental_info:
ind_disease_obj.extensions = [
entity_models.Extension(name=k, value=v)
Extension(name=k, value=v)
for k, v in indication.supplemental_info.items()
]
inds_list.append(ind_disease_obj.model_dump(exclude_none=True))
if inds_list:
approv_value["has_indication"] = inds_list

approv = entity_models.Extension(
name="regulatory_approval", value=approv_value
)
approv = Extension(name="regulatory_approval", value=approv_value)
extensions.append(approv)

trade_names = record.get("trade_names")
if trade_names:
extensions.append(
entity_models.Extension(name="trade_names", value=trade_names)
)
extensions.append(Extension(name="trade_names", value=trade_names))

if extensions:
therapeutic_agent_obj.extensions = extensions
therapy_obj.extensions = extensions

response.match_type = match_type
response.normalized_id = record["concept_id"]
response.therapeutic_agent = therapeutic_agent_obj
response.therapy = therapy_obj
return self._add_merged_meta(response)

def _resolve_merge(
Expand Down Expand Up @@ -537,7 +586,7 @@ def normalize(self, query: str, infer: bool = True) -> NormalizationService:
response = NormalizationService(**self._prepare_normalized_response(query))

return self._perform_normalized_lookup(
response, query, infer, self._add_therapeutic_agent
response, query, infer, self._add_therapy
)

def _construct_drug_match(self, record: dict) -> Therapy:
Expand Down
66 changes: 58 additions & 8 deletions src/therapy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum, IntEnum
from typing import Any, Literal

from ga4gh.core import domain_models
from ga4gh.core.models import MappableConcept
from pydantic import BaseModel, ConfigDict, StrictBool, constr

from therapy import __version__
Expand Down Expand Up @@ -258,6 +258,44 @@ class NamespacePrefix(Enum):
WIKIDATA = "wikidata"


# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
NamespacePrefix.ATC: "https://www.who.int/tools/atc-ddd-toolkit/atc-classification/",
NamespacePrefix.CHEBI: "http://purl.obolibrary.org/obo/chebi.owl",
NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/",
NamespacePrefix.CHEMIDPLUS: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CASREGISTRY: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
NamespacePrefix.DRUGBANK: "https://go.drugbank.com",
NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org",
NamespacePrefix.DRUGSATFDA_ANDA: "https://www.fda.gov/drugs/types-applications/abbreviated-new-drug-application-anda",
NamespacePrefix.DRUGSATFDA_NDA: "https://www.fda.gov/drugs/types-applications/new-drug-application-nda",
NamespacePrefix.HEMONC: "https://hemonc.org",
NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
NamespacePrefix.NDC: "https://dps.fda.gov/ndc",
NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/docs/compounds",
NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/docs/substances",
NamespacePrefix.RXNORM: "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch",
NamespacePrefix.UNIPROT: "https://www.uniprot.org",
NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class DataLicenseAttributes(BaseModel):
"""Define constraints for data license attributes."""

Expand Down Expand Up @@ -484,8 +522,7 @@ class UnmergedNormalizationService(BaseNormalizationService):
class NormalizationService(BaseNormalizationService):
"""Response containing one or more merged records and source data."""

normalized_id: str | None = None
therapeutic_agent: domain_models.TherapeuticAgent | None = None
therapy: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] | None = None

model_config = ConfigDict(
Expand All @@ -494,18 +531,31 @@ class NormalizationService(BaseNormalizationService):
"query": "cisplatin",
"warnings": None,
"match_type": 80,
"normalized_id": "rxcui:2555",
"therapeutic_agent": {
"type": "TherapeuticAgent",
"therapy": {
"conceptType": "Therapy",
"primaryCode": "rxcui:2555",
"id": "normalize.therapy.rxcui:2555",
"label": "cisplatin",
"mappings": [
{
"coding": {"code": "C376", "system": "ncit"},
"coding": {
"code": "2555",
"system": "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "C376",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "15663-27-1", "system": "chemidplus"},
"coding": {
"code": "15663-27-1",
"system": "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
},
"relation": "relatedMatch",
},
{
Expand Down
Loading

0 comments on commit 481ca68

Please sign in to comment.