Skip to content

Commit

Permalink
perf: improve disease caching (#391)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Dec 12, 2023
1 parent c7fab69 commit 354a243
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
from abc import ABC, abstractmethod
from functools import lru_cache
from pathlib import Path
from typing import Dict, List, Optional, Union

Expand Down Expand Up @@ -215,6 +214,8 @@ def _load_therapy(self, therapy: Dict) -> None:
class DiseaseIndicationBase(Base):
"""Base class for sources that require disease normalization capabilities."""

_disease_cache: Dict[str, Optional[str]] = {}

def __init__(
self,
database: AbstractDatabase,
Expand All @@ -230,18 +231,22 @@ def __init__(
super().__init__(database, data_path, silent)
self.disease_normalizer = DiseaseNormalizer(create_disease_db())

@lru_cache(maxsize=64)
def _normalize_disease(self, query: str) -> Optional[str]:
"""Attempt normalization of disease term.
:param str query: term to normalize
:return: ID if successful, None otherwise
"""
response = self.disease_normalizer.normalize(query)
if response.match_type > 0:
return response.normalized_id
term = query.lower()
if term in self._disease_cache:
return self._disease_cache[term]
else:
logger.warning(f"Failed to normalize disease term: {query}")
return None
response = self.disease_normalizer.normalize(term)
normalized_id = response.normalized_id
self._disease_cache[term] = normalized_id
if normalized_id is None:
logger.warning(f"Failed to normalize disease term: {query}")
return normalized_id


class SourceFormatException(Exception): # noqa: N818
Expand Down

0 comments on commit 354a243

Please sign in to comment.