Skip to content

Commit 49be74c

Browse files
authored
perf(codes): cache terminologies (#108)
* refactor(cli): only load one matcher in search-codes * perf(codes): cache code matchers by source file * fix(codes): create cache dir if missing * chore(docker): pin python to 3.12 in runtime layer
1 parent eef83b0 commit 49be74c

File tree

3 files changed

+34
-23
lines changed

3 files changed

+34
-23
lines changed

Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
ARG VERSION_BUILD
22

3-
FROM python:3.11-slim
43

5-
ENV PYTHONUNBUFFERED=true
6-
WORKDIR /app
74

85
##################################################
96
# Poetry setup
107
##################################################
11-
FROM python AS poetry
8+
FROM python:3.12-slim AS poetry
129

1310
WORKDIR /app
1411
# Install poetry
12+
ENV PYTHONUNBUFFERED=true
1513
ENV POETRY_HOME=/opt/poetry
1614
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
1715
ENV PATH="$POETRY_HOME/bin:$PATH"
@@ -32,7 +30,7 @@ RUN poetry install --no-interaction --no-ansi -vvv
3230
##################################################
3331
# modos setup
3432
##################################################
35-
FROM python AS runtime
33+
FROM python:3.12-slim AS runtime
3634
ARG VERSION_BUILD
3735
ENV PATH="/app/.venv/bin:$PATH"
3836
COPY --from=poetry /app /app

modos/cli.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from modos.genomics.htsget import HtsgetConnection
2525
from modos.genomics.region import Region
2626
from modos.io import parse_instance
27-
from modos.prompt import SlotPrompter
27+
from modos.prompt import SlotPrompter, fuzzy_complete
2828
from modos.remote import EndpointManager, list_remote_items
2929
from modos.storage import connect_s3
3030

@@ -308,17 +308,19 @@ def search_codes(
308308
] = 50,
309309
):
310310
"""Search for terminology codes using free text."""
311-
prompter = SlotPrompter(
312-
EndpointManager(ctx.obj.endpoint),
313-
prompt=f'Browsing terms for slot "{slot}". Use tab to cycle suggestions.\n> ',
311+
matcher = get_slot_matcher(
312+
slot,
313+
EndpointManager(ctx.obj.endpoint).fuzon,
314314
)
315-
for matcher in prompter.slot_matchers.values():
316-
matcher.top = top
315+
matcher.top = top
317316
if query:
318-
matches = prompter.slot_matchers[slot].find_codes(query)
317+
matches = matcher.find_codes(query)
319318
out = "\n".join([f"{m.uri} | {m.label}" for m in matches])
320319
else:
321-
out = prompter.prompt_for_slot(slot)
320+
out = fuzzy_complete(
321+
prompt_txt=f'Browsing terms for slot "{slot}". Use tab to cycle suggestions.\n> ',
322+
matcher=matcher,
323+
)
322324
print(out)
323325

324326

modos/codes.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
"""Utilities to automatically find / recommend terminology codes from text."""
22
from dataclasses import dataclass
3-
import requests
43
from typing import Optional, Protocol
54

5+
from pathlib import Path
6+
import requests
7+
68

79
SLOT_TERMINOLOGIES = {
8-
"cell_type": "https://purl.obolibrary.org/obo/cl.owl",
9-
"source_material": "https://purl.obolibrary.org/obo/uberon.owl",
10-
"taxon_id": "https://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.owl",
10+
"cell_type": ["https://purl.obolibrary.org/obo/cl.owl"],
11+
"source_material": ["https://purl.obolibrary.org/obo/uberon.owl"],
12+
"taxon_id": [
13+
"https://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.owl"
14+
],
1115
}
1216

1317

@@ -35,14 +39,21 @@ def __init__(self, slot: str, top: int = 50):
3539
self.top = top
3640

3741
try:
38-
from pyfuzon import TermMatcher
39-
40-
self.matcher = TermMatcher.from_files([SLOT_TERMINOLOGIES[slot]])
42+
from pyfuzon import cache
4143
except ImportError:
42-
raise ValueError(
43-
"""No endpoint provided and pyfuzon not installed,
44-
cannot do code matching."""
44+
raise ModuleNotFoundError(
45+
"pyfuzon must be installed to perform local code matching."
46+
)
47+
48+
sources = SLOT_TERMINOLOGIES[slot]
49+
try:
50+
self.matcher = cache.load_by_source(sources)
51+
except RuntimeError:
52+
Path(cache.get_cache_path(sources)).parent.mkdir(
53+
parents=True, exist_ok=True
4554
)
55+
cache.cache_by_source(sources)
56+
self.matcher = cache.load_by_source(sources)
4657

4758
def find_codes(self, query: str) -> list[Code]:
4859
return self.matcher.top(query, self.top)

0 commit comments

Comments
 (0)