CogStack · mart-r · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/medcat2/cat.py b/medcat2/cat.py
@@ -124,10 +124,12 @@ def _get_entity(self, ent: MutableEntity,
             right_context = []
             center_context = []
 
+        # NOTE: in case the CUI is not in the CDB, we don't want to fail here
+        def_ci: dict[str, list[str]] = {'type_ids': []}
         out_dict: Entity = {
             'pretty_name': self.cdb.get_name(cui),
             'cui': cui,
-            'type_ids': list(self.cdb.cui2info[cui]['type_ids']),
+            'type_ids': list(self.cdb.cui2info.get(cui, def_ci)['type_ids']),
             'source_value': ent.base.text,
             'detected_name': str(ent.detected_name),
             'acc': ent.context_similarity,

diff --git a/medcat2/components/ner/trf/model.py b/medcat2/components/ner/trf/model.py
@@ -43,6 +43,20 @@ def train(self, json_path: Union[str, list, None],
         """
         return self.trf_ner._component.train(json_path, *args, **kwargs)
 
+    def eval(self, json_path: Union[str, list, None],
+             *args, **kwargs) -> tuple[Any, Any, Any]:
+        """Evaluate the underlying transformers NER model.
+        All the extra arguments are passed to the TransformersNER eval method.
+        Args:
+            json_path (Union[str, list, None]):
+                The JSON file path to read the training data from.
+            *args: Additional arguments for TransformersNER.eval .
+            **kwargs: Additional keyword arguments for TransformersNER.eval .
+        Returns:
+            Tuple[Any, Any, Any]: df, examples, dataset
+        """
+        return self.trf_ner._component.eval(json_path, *args, **kwargs)
+
     def __call__(self, text: Optional[str], *args, **kwargs
                  ) -> Optional[MutableDocument]:
         """Get the annotated document for text.

diff --git a/medcat2/components/ner/trf/transformers_ner.py b/medcat2/components/ner/trf/transformers_ner.py
@@ -4,7 +4,7 @@
 import datasets
 import torch
 from datetime import datetime
-from typing import Iterable, Iterator, Optional, Union, Callable, Type, Any
+from typing import Iterable, Iterator, Optional, Union, Callable, Any
 from typing import cast
 import inspect
 from functools import partial
@@ -310,8 +310,6 @@ def create_eval_pipeline(self):
                 '_special_tokens_map'] = special_tokens_map
 
         self.ner_pipe.device = self.model.device
-        self._consecutive_identical_failures = 0
-        self._last_exception: Optional[tuple[str, Type[Exception]]] = None
 
     def get_hash(self) -> str:
         """A partial hash trying to catch differences between models.
@@ -555,7 +553,7 @@ def eval(self, json_path: Union[str, list, None] = None, dataset=None,
         df, examples = metrics(p, return_df=True, tokenizer=self.tokenizer,
                                dataset=encoded_dataset)
 
-        return df, examples
+        return df, examples, dataset
 
     def expand_model_with_concepts(self, cui2preferred_name: dict[str, str],
                                    use_avg_init: bool = True) -> None:
@@ -695,8 +693,6 @@ def _process_doc(self, doc: MutableDocument):
 
             doc.all_ents.append(entity)
         create_main_ann(doc)
-        self._consecutive_identical_failures = 0  # success
-        self._last_exception = None
 
     def _process(self,
                  stream: Iterable[Union[MutableDocument, None]],

diff --git a/medcat2/utils/legacy/convert_cdb.py b/medcat2/utils/legacy/convert_cdb.py
@@ -120,7 +120,7 @@ def _add_cui_info(cdb: CDB, data: dict) -> CDB:
         vecs = cui2cv.get(cui, None)
         count_train = cui2ct.get(cui, 0)
         tags = cui2tags.get(cui, None)
-        type_ids = cui2type_ids.get(cui, None)
+        type_ids = cui2type_ids.get(cui, set())
         prefname = cui2prefname.get(cui, None)
         av_conf = cui2av_conf.get(cui, 0.0)
         info = get_new_cui_info(