Merge branch 'v2' into test-types

tlambert03 · Jul 4, 2023 · da33693 · da33693
2 parents 33f0aff + f29ac5d
commit da33693
Show file tree

Hide file tree

Showing 8 changed files with 201 additions and 47 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
         exclude: ^tests|^docs
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.275
+    rev: v0.0.276
     hooks:
       - id: ruff
         args: [--fix]

diff --git a/src/ome_autogen/_generator.py b/src/ome_autogen/_generator.py
@@ -109,6 +109,7 @@ def build_import_patterns(cls) -> dict[str, dict]:
                 if o.module_name
             }
         )
+        patterns["ome_types._mixins._util"] = {"new_uuid": ["default_factory=new_uuid"]}
         return {key: patterns[key] for key in sorted(patterns)}
 
     def field_default_value(self, attr: Attr, ns_map: dict | None = None) -> str:
@@ -125,6 +126,11 @@ def format_arguments(self, kwargs: dict, indent: int = 0) -> str:
         if kwargs.get("default") in factorize:
             kwargs = {"default_factory": kwargs.pop("default"), **kwargs}
 
+        # uncomment this to use new_uuid as the default_factory for all UUIDs
+        # but then we have an equality checking problem in the tests
+        # if kwargs.get("metadata", {}).get("pattern", "").startswith("(urn:uuid:"):
+        #     kwargs.pop("default", None)
+        #     kwargs = {"default_factory": "new_uuid", **kwargs}
         return super().format_arguments(kwargs, indent)
 
     def constant_name(self, name: str, class_name: str) -> str:

diff --git a/src/ome_types/_mixins/_base_type.py b/src/ome_types/_mixins/_base_type.py
@@ -1,13 +1,12 @@
-import contextlib
-import re
 import warnings
 from datetime import datetime
 from enum import Enum
 from textwrap import indent
-from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Sequence, Set, cast
+from typing import TYPE_CHECKING, Any, ClassVar, Optional, Sequence, Set, cast
 
 from pydantic import BaseModel, validator
 
+from ome_types._mixins._ids import validate_id
 from ome_types.units import ureg
 
 if TYPE_CHECKING:
@@ -17,7 +16,7 @@
 # Default value to support automatic numbering for id field values.
 AUTO_SEQUENCE = "__auto_sequence__"
 
-_COUNTERS: Dict[str, int] = {}
+
 _UNIT_FIELD = "{}_unit"
 _QUANTITY_FIELD = "{}_quantity"
 DEPRECATED_NAMES = {
@@ -105,47 +104,7 @@ def __repr__(self) -> str:
             body = ""
         return f"{name}({body})"
 
-    @validator("id", pre=True, always=True, check_fields=False)
-    @classmethod
-    def _validate_id(cls, value: Any) -> Any:
-        """Pydantic validator for ID fields in OME models.
-
-        If no value is provided, this validator provides and integer ID, and stores the
-        maximum previously-seen value on the class.
-        """
-        # FIXME: clean this up
-        id_field = cls.__fields__["id"]
-        id_regex = cast(str, id_field.field_info.regex)
-        id_name = id_regex.split(":")[-3]
-
-        current_count = _COUNTERS.setdefault(id_name, -1)
-        if isinstance(value, str) and value != AUTO_SEQUENCE:
-            # parse the id and update the counter
-            *name, v_id = value.rsplit(":", 1)
-            if not re.match(id_regex, value):
-                newname = cls._validate_id(
-                    int(v_id) if v_id.isnumeric() else AUTO_SEQUENCE
-                )
-                warnings.warn(
-                    f"Casting invalid {id_name}ID {value!r} to {newname!r}",
-                    stacklevel=2,
-                )
-                return newname
-
-            with contextlib.suppress(ValueError):
-                _COUNTERS[id_name] = max(current_count, int(v_id))
-            return value
-
-        if isinstance(value, int):
-            _COUNTERS[id_name] = max(current_count, value)
-        elif value == AUTO_SEQUENCE:
-            # just increment the counter
-            _COUNTERS[id_name] += 1
-            value = _COUNTERS[id_name]
-        else:
-            raise ValueError(f"Invalid ID value: {value!r}, {type(value)}")
-
-        return f"{id_name}:{value}"
+    _v = validator("id", pre=True, always=True, check_fields=False)(validate_id)
 
     def __getattr__(self, key: str) -> Any:
         cls_name = self.__class__.__name__

diff --git a/src/ome_types/_mixins/_ids.py b/src/ome_types/_mixins/_ids.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import re
+import warnings
+from contextlib import suppress
+from typing import TYPE_CHECKING, Any, Final, cast
+
+if TYPE_CHECKING:
+    from pydantic import BaseModel
+
+# Default value to support automatic numbering for id field values.
+AUTO_SEQUENCE: Final = "__auto_sequence__"
+# map of id_name -> max id value
+ID_COUNTER: dict[str, int] = {}
+
+# map of (id_name, id_value) -> converted id
+# NOTE: this is cleared in OMEMixin.__init__, so that the set of converted IDs
+# is unique to each OME instance
+CONVERTED_IDS: dict[tuple[str, str], str] = {}
+
+
+def _get_id_name_and_pattern(cls: type[BaseModel]) -> tuple[str, str]:
+    # let this raise if it doesn't exist...
+    # this should only be used on classes that have an id field
+    id_field = cls.__fields__["id"]
+    id_pattern = cast(str, id_field.field_info.regex)
+    id_name = id_pattern.split(":")[-3]
+
+    return id_name, id_pattern
+
+
+def validate_id(cls: type[BaseModel], value: int | str) -> Any:
+    """Pydantic validator for ID fields in OME models.
+
+    This validator does the following:
+    1. if it's valid string ID just use it, and updating the counter if necessary.
+    2. if it's an invalid string id, try to extract the integer part from it, and use
+       that to create a new ID, or use the next value in the sequence if not.
+    2. if it's an integer, grab the appropriate ID name from the pattern and prepend it.
+    3. if it's the special `AUTO_SEQUENCE` sentinel, use the next value in the sequence.
+
+    COUNTERS stores the maximum previously-seen value on the class.
+    """
+    id_name, id_pattern = _get_id_name_and_pattern(cls)
+    current_count = ID_COUNTER.setdefault(id_name, -1)
+
+    if value == AUTO_SEQUENCE:
+        # if it's the special sentinel, use the next value
+        value = ID_COUNTER[id_name] + 1
+    elif isinstance(value, str):
+        if (id_name, value) in CONVERTED_IDS:
+            # XXX: possible bug
+            # if the same invalid value is used across multiple documents
+            # we'll be replacing it with the same converted id here
+            return CONVERTED_IDS[(id_name, value)]
+
+        # if the value is a string, extract the number from it if possible
+        value_id: str = value.rsplit(":", 1)[-1]
+
+        # if the value matches the pattern, just return it
+        # but update the counter if it's higher than the current value
+        if re.match(id_pattern, value):
+            with suppress(ValueError):
+                # (not all IDs have integers after the colon)
+                ID_COUNTER[id_name] = max(current_count, int(value_id))
+            return value
+
+        # if the value doesn't match the pattern, create a proper ID
+        # (using the value_id as the integer part if possible)
+        id_int = int(value_id) if value_id.isdecimal() else current_count + 1
+        newname = validate_id(cls, id_int)
+        # store the converted ID so we can use it elsewhere
+        CONVERTED_IDS[(id_name, value)] = newname
+
+        # warn the user
+        msg = f"Casting invalid {id_name}ID {value!r} to {newname!r}"
+        warnings.warn(msg, stacklevel=2)
+        return newname
+    elif not isinstance(value, int):
+        raise ValueError(f"Invalid ID value: {value!r}, {type(value)}")
+
+    # update the counter to be at least this value
+    ID_COUNTER[id_name] = max(current_count, value)
+    return f"{id_name}:{value}"
diff --git a/src/ome_types/_mixins/_ome.py b/src/ome_types/_mixins/_ome.py
@@ -4,6 +4,7 @@
 from typing import TYPE_CHECKING, Any, cast
 
 from ome_types._mixins._base_type import OMEType
+from ome_types._mixins._ids import CONVERTED_IDS
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -13,6 +14,8 @@
 
 class OMEMixin:
     def __init__(self, **data: Any) -> None:
+        # Clear the cache of converted IDs, so that they are unique to each OME instance
+        CONVERTED_IDS.clear()
         super().__init__(**data)
         self._link_refs()
 

diff --git a/src/ome_types/_mixins/_util.py b/src/ome_types/_mixins/_util.py
@@ -0,0 +1,6 @@
+import uuid
+
+
+def new_uuid() -> str:
+    """Generate a new UUID."""
+    return f"urn:uuid:{uuid.uuid4()}"
diff --git a/tests/data/transfer.ome.xml b/tests/data/transfer.ome.xml
@@ -0,0 +1,70 @@
+<OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd">
+    <Image ID="Image:1678" Name="combined_result.tiff">
+        <Description />
+        <Pixels DimensionOrder="XYZCT" ID="Pixels:1678" SizeC="1" SizeT="1" SizeX="1024" SizeY="1024" SizeZ="1" Type="uint16">
+            <MetadataOnly />
+        </Pixels>
+        <ROIRef ID="ROI:866" />
+        <ROIRef ID="ROI:867" />
+        <ROIRef ID="ROI:868" />
+        <AnnotationRef ID="Annotation:2209" />
+        <AnnotationRef ID="Annotation:2210" />
+        <AnnotationRef ID="Annotation:2551" />
+        <AnnotationRef ID="Annotation:-242965625845933864455559976333404928513" />
+    </Image>
+    <Image ID="Image:1679" Name="combined_result.tiff">
+        <Description />
+        <Pixels DimensionOrder="XYZCT" ID="Pixels:1679" SizeC="1" SizeT="1" SizeX="1024" SizeY="1024" SizeZ="1" Type="uint16">
+            <MetadataOnly />
+        </Pixels>
+        <ROIRef ID="ROI:866" />
+        <ROIRef ID="ROI:867" />
+        <ROIRef ID="ROI:868" />
+        <AnnotationRef ID="Annotation:2209" />
+        <AnnotationRef ID="Annotation:2210" />
+        <AnnotationRef ID="Annotation:2551" />
+        <AnnotationRef ID="Annotation:-242965625845933864455559976333404928514" />
+    </Image>
+    <StructuredAnnotations>
+        <MapAnnotation ID="Annotation:2209" Namespace="openmicroscopy.org/omero/client/mapAnnotation">
+            <Value>
+                <M K="species"> </M>
+                <M K="occupation"> </M>
+                <M K="first name"> </M>
+                <M K="surname">this is a test to see if the kv pairs in omero have any length limits. I don't think they do, but I will write something relatively long just so I can double-check whether that is the case or not.</M>
+            </Value>
+        </MapAnnotation>
+        <TagAnnotation ID="Annotation:2551">
+            <Value>simple_tag</Value>
+        </TagAnnotation>
+        <CommentAnnotation ID="Annotation:-242965625845933864455559976333404928513" Namespace="Image:1678">
+            <Value>root_0/2022-01/14/18-30-55.264/combined_result.tiff</Value>
+        </CommentAnnotation>
+        <CommentAnnotation ID="Annotation:-242965625845933864455559976333404928514" Namespace="Image:1679">
+            <Value>root_0/2022-01/14/18-30-55.264/combined_result.tiff</Value>
+        </CommentAnnotation>
+        <MapAnnotation ID="Annotation:2210" Namespace="jax.org/jax/example/namespace">
+            <Value>
+                <M K="species"> </M>
+                <M K="occupation"> </M>
+                <M K="first name"> </M>
+                <M K="surname"> </M>
+            </Value>
+        </MapAnnotation>
+    </StructuredAnnotations>
+    <ROI ID="ROI:866">
+        <Union>
+            <Line ID="Shape:1766" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" X1="321.7500000000001" X2="715.6500000000001" Y1="546.4875000000001" Y2="667.3875" />
+        </Union>
+    </ROI>
+    <ROI ID="ROI:867">
+        <Union>
+            <Line ID="Shape:1767" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" X1="453.3750000000001" X2="862.8750000000001" Y1="242.2875000000001" Y2="481.1625000000001" />
+        </Union>
+    </ROI>
+    <ROI ID="ROI:868">
+        <Union>
+            <Rectangle ID="Shape:1768" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" Height="154.69825576030746" Width="177.56789185846986" X="136.35047580394115" Y="644.0620035942259" />
+        </Union>
+    </ROI>
+</OME>
diff --git a/tests/test_ids.py b/tests/test_ids.py
@@ -1,8 +1,34 @@
+import pytest
+
+from ome_types import from_xml
 from ome_types.model import Line, Rectangle
 
 
-def test_shape_ids():
+def test_shape_ids() -> None:
     rect = Rectangle(x=0, y=0, width=1, height=1)
     line = Line(x1=0, y1=0, x2=1, y2=1)
     assert rect.id == "Shape:0"
     assert line.id == "Shape:1"
+
+
+def test_id_conversion() -> None:
+    """When converting ids, we should still be preserving references."""
+    XML_WITH_BAD_REFS = """<?xml version="1.0" ?>
+    <OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06">
+        <Instrument ID="Microscope">
+        </Instrument>
+        <Image ID="Image:0">
+            <InstrumentRef ID="Microscope"/>
+            <Pixels BigEndian="false" DimensionOrder="XYCZT" SizeC="3" SizeT="50"
+                SizeX="256" SizeY="256" SizeZ="5" ID="Pixels:0" Type="uint16">
+            </Pixels>
+        </Image>
+    </OME>
+    """
+    with pytest.warns(match="Casting invalid InstrumentID"):
+        ome = from_xml(XML_WITH_BAD_REFS)
+
+    assert ome.instruments[0].id == "Instrument:0"
+    assert ome.images[0].instrument_ref is not None
+    assert ome.images[0].instrument_ref.id == "Instrument:0"
+    assert ome.images[0].instrument_ref.ref is ome.instruments[0]