Skip to content

Commit

Permalink
Merge branch 'v2' into test-types
Browse files Browse the repository at this point in the history
  • Loading branch information
tlambert03 authored Jul 4, 2023
2 parents 33f0aff + f29ac5d commit da33693
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:
exclude: ^tests|^docs

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.275
rev: v0.0.276
hooks:
- id: ruff
args: [--fix]
Expand Down
6 changes: 6 additions & 0 deletions src/ome_autogen/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def build_import_patterns(cls) -> dict[str, dict]:
if o.module_name
}
)
patterns["ome_types._mixins._util"] = {"new_uuid": ["default_factory=new_uuid"]}
return {key: patterns[key] for key in sorted(patterns)}

def field_default_value(self, attr: Attr, ns_map: dict | None = None) -> str:
Expand All @@ -125,6 +126,11 @@ def format_arguments(self, kwargs: dict, indent: int = 0) -> str:
if kwargs.get("default") in factorize:
kwargs = {"default_factory": kwargs.pop("default"), **kwargs}

# uncomment this to use new_uuid as the default_factory for all UUIDs
# but then we have an equality checking problem in the tests
# if kwargs.get("metadata", {}).get("pattern", "").startswith("(urn:uuid:"):
# kwargs.pop("default", None)
# kwargs = {"default_factory": "new_uuid", **kwargs}
return super().format_arguments(kwargs, indent)

def constant_name(self, name: str, class_name: str) -> str:
Expand Down
49 changes: 4 additions & 45 deletions src/ome_types/_mixins/_base_type.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import contextlib
import re
import warnings
from datetime import datetime
from enum import Enum
from textwrap import indent
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Sequence, Set, cast
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Sequence, Set, cast

from pydantic import BaseModel, validator

from ome_types._mixins._ids import validate_id
from ome_types.units import ureg

if TYPE_CHECKING:
Expand All @@ -17,7 +16,7 @@
# Default value to support automatic numbering for id field values.
AUTO_SEQUENCE = "__auto_sequence__"

_COUNTERS: Dict[str, int] = {}

_UNIT_FIELD = "{}_unit"
_QUANTITY_FIELD = "{}_quantity"
DEPRECATED_NAMES = {
Expand Down Expand Up @@ -105,47 +104,7 @@ def __repr__(self) -> str:
body = ""
return f"{name}({body})"

@validator("id", pre=True, always=True, check_fields=False)
@classmethod
def _validate_id(cls, value: Any) -> Any:
"""Pydantic validator for ID fields in OME models.
If no value is provided, this validator provides and integer ID, and stores the
maximum previously-seen value on the class.
"""
# FIXME: clean this up
id_field = cls.__fields__["id"]
id_regex = cast(str, id_field.field_info.regex)
id_name = id_regex.split(":")[-3]

current_count = _COUNTERS.setdefault(id_name, -1)
if isinstance(value, str) and value != AUTO_SEQUENCE:
# parse the id and update the counter
*name, v_id = value.rsplit(":", 1)
if not re.match(id_regex, value):
newname = cls._validate_id(
int(v_id) if v_id.isnumeric() else AUTO_SEQUENCE
)
warnings.warn(
f"Casting invalid {id_name}ID {value!r} to {newname!r}",
stacklevel=2,
)
return newname

with contextlib.suppress(ValueError):
_COUNTERS[id_name] = max(current_count, int(v_id))
return value

if isinstance(value, int):
_COUNTERS[id_name] = max(current_count, value)
elif value == AUTO_SEQUENCE:
# just increment the counter
_COUNTERS[id_name] += 1
value = _COUNTERS[id_name]
else:
raise ValueError(f"Invalid ID value: {value!r}, {type(value)}")

return f"{id_name}:{value}"
_v = validator("id", pre=True, always=True, check_fields=False)(validate_id)

def __getattr__(self, key: str) -> Any:
cls_name = self.__class__.__name__
Expand Down
84 changes: 84 additions & 0 deletions src/ome_types/_mixins/_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import annotations

import re
import warnings
from contextlib import suppress
from typing import TYPE_CHECKING, Any, Final, cast

if TYPE_CHECKING:
from pydantic import BaseModel

# Default value to support automatic numbering for id field values.
AUTO_SEQUENCE: Final = "__auto_sequence__"
# map of id_name -> max id value
ID_COUNTER: dict[str, int] = {}

# map of (id_name, id_value) -> converted id
# NOTE: this is cleared in OMEMixin.__init__, so that the set of converted IDs
# is unique to each OME instance
CONVERTED_IDS: dict[tuple[str, str], str] = {}


def _get_id_name_and_pattern(cls: type[BaseModel]) -> tuple[str, str]:
# let this raise if it doesn't exist...
# this should only be used on classes that have an id field
id_field = cls.__fields__["id"]
id_pattern = cast(str, id_field.field_info.regex)
id_name = id_pattern.split(":")[-3]

return id_name, id_pattern


def validate_id(cls: type[BaseModel], value: int | str) -> Any:
"""Pydantic validator for ID fields in OME models.
This validator does the following:
1. if it's valid string ID just use it, and updating the counter if necessary.
2. if it's an invalid string id, try to extract the integer part from it, and use
that to create a new ID, or use the next value in the sequence if not.
2. if it's an integer, grab the appropriate ID name from the pattern and prepend it.
3. if it's the special `AUTO_SEQUENCE` sentinel, use the next value in the sequence.
COUNTERS stores the maximum previously-seen value on the class.
"""
id_name, id_pattern = _get_id_name_and_pattern(cls)
current_count = ID_COUNTER.setdefault(id_name, -1)

if value == AUTO_SEQUENCE:
# if it's the special sentinel, use the next value
value = ID_COUNTER[id_name] + 1
elif isinstance(value, str):
if (id_name, value) in CONVERTED_IDS:
# XXX: possible bug
# if the same invalid value is used across multiple documents
# we'll be replacing it with the same converted id here
return CONVERTED_IDS[(id_name, value)]

# if the value is a string, extract the number from it if possible
value_id: str = value.rsplit(":", 1)[-1]

# if the value matches the pattern, just return it
# but update the counter if it's higher than the current value
if re.match(id_pattern, value):
with suppress(ValueError):
# (not all IDs have integers after the colon)
ID_COUNTER[id_name] = max(current_count, int(value_id))
return value

# if the value doesn't match the pattern, create a proper ID
# (using the value_id as the integer part if possible)
id_int = int(value_id) if value_id.isdecimal() else current_count + 1
newname = validate_id(cls, id_int)
# store the converted ID so we can use it elsewhere
CONVERTED_IDS[(id_name, value)] = newname

# warn the user
msg = f"Casting invalid {id_name}ID {value!r} to {newname!r}"
warnings.warn(msg, stacklevel=2)
return newname
elif not isinstance(value, int):
raise ValueError(f"Invalid ID value: {value!r}, {type(value)}")

# update the counter to be at least this value
ID_COUNTER[id_name] = max(current_count, value)
return f"{id_name}:{value}"
3 changes: 3 additions & 0 deletions src/ome_types/_mixins/_ome.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING, Any, cast

from ome_types._mixins._base_type import OMEType
from ome_types._mixins._ids import CONVERTED_IDS

if TYPE_CHECKING:
from pathlib import Path
Expand All @@ -13,6 +14,8 @@

class OMEMixin:
def __init__(self, **data: Any) -> None:
# Clear the cache of converted IDs, so that they are unique to each OME instance
CONVERTED_IDS.clear()
super().__init__(**data)
self._link_refs()

Expand Down
6 changes: 6 additions & 0 deletions src/ome_types/_mixins/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import uuid


def new_uuid() -> str:
"""Generate a new UUID."""
return f"urn:uuid:{uuid.uuid4()}"
70 changes: 70 additions & 0 deletions tests/data/transfer.ome.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd">
<Image ID="Image:1678" Name="combined_result.tiff">
<Description />
<Pixels DimensionOrder="XYZCT" ID="Pixels:1678" SizeC="1" SizeT="1" SizeX="1024" SizeY="1024" SizeZ="1" Type="uint16">
<MetadataOnly />
</Pixels>
<ROIRef ID="ROI:866" />
<ROIRef ID="ROI:867" />
<ROIRef ID="ROI:868" />
<AnnotationRef ID="Annotation:2209" />
<AnnotationRef ID="Annotation:2210" />
<AnnotationRef ID="Annotation:2551" />
<AnnotationRef ID="Annotation:-242965625845933864455559976333404928513" />
</Image>
<Image ID="Image:1679" Name="combined_result.tiff">
<Description />
<Pixels DimensionOrder="XYZCT" ID="Pixels:1679" SizeC="1" SizeT="1" SizeX="1024" SizeY="1024" SizeZ="1" Type="uint16">
<MetadataOnly />
</Pixels>
<ROIRef ID="ROI:866" />
<ROIRef ID="ROI:867" />
<ROIRef ID="ROI:868" />
<AnnotationRef ID="Annotation:2209" />
<AnnotationRef ID="Annotation:2210" />
<AnnotationRef ID="Annotation:2551" />
<AnnotationRef ID="Annotation:-242965625845933864455559976333404928514" />
</Image>
<StructuredAnnotations>
<MapAnnotation ID="Annotation:2209" Namespace="openmicroscopy.org/omero/client/mapAnnotation">
<Value>
<M K="species"> </M>
<M K="occupation"> </M>
<M K="first name"> </M>
<M K="surname">this is a test to see if the kv pairs in omero have any length limits. I don't think they do, but I will write something relatively long just so I can double-check whether that is the case or not.</M>
</Value>
</MapAnnotation>
<TagAnnotation ID="Annotation:2551">
<Value>simple_tag</Value>
</TagAnnotation>
<CommentAnnotation ID="Annotation:-242965625845933864455559976333404928513" Namespace="Image:1678">
<Value>root_0/2022-01/14/18-30-55.264/combined_result.tiff</Value>
</CommentAnnotation>
<CommentAnnotation ID="Annotation:-242965625845933864455559976333404928514" Namespace="Image:1679">
<Value>root_0/2022-01/14/18-30-55.264/combined_result.tiff</Value>
</CommentAnnotation>
<MapAnnotation ID="Annotation:2210" Namespace="jax.org/jax/example/namespace">
<Value>
<M K="species"> </M>
<M K="occupation"> </M>
<M K="first name"> </M>
<M K="surname"> </M>
</Value>
</MapAnnotation>
</StructuredAnnotations>
<ROI ID="ROI:866">
<Union>
<Line ID="Shape:1766" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" X1="321.7500000000001" X2="715.6500000000001" Y1="546.4875000000001" Y2="667.3875" />
</Union>
</ROI>
<ROI ID="ROI:867">
<Union>
<Line ID="Shape:1767" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" X1="453.3750000000001" X2="862.8750000000001" Y1="242.2875000000001" Y2="481.1625000000001" />
</Union>
</ROI>
<ROI ID="ROI:868">
<Union>
<Rectangle ID="Shape:1768" FillColor="-256" StrokeColor="-65283" Text="" TheC="0" TheT="0" TheZ="0" Height="154.69825576030746" Width="177.56789185846986" X="136.35047580394115" Y="644.0620035942259" />
</Union>
</ROI>
</OME>
28 changes: 27 additions & 1 deletion tests/test_ids.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,34 @@
import pytest

from ome_types import from_xml
from ome_types.model import Line, Rectangle


def test_shape_ids():
def test_shape_ids() -> None:
rect = Rectangle(x=0, y=0, width=1, height=1)
line = Line(x1=0, y1=0, x2=1, y2=1)
assert rect.id == "Shape:0"
assert line.id == "Shape:1"


def test_id_conversion() -> None:
"""When converting ids, we should still be preserving references."""
XML_WITH_BAD_REFS = """<?xml version="1.0" ?>
<OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06">
<Instrument ID="Microscope">
</Instrument>
<Image ID="Image:0">
<InstrumentRef ID="Microscope"/>
<Pixels BigEndian="false" DimensionOrder="XYCZT" SizeC="3" SizeT="50"
SizeX="256" SizeY="256" SizeZ="5" ID="Pixels:0" Type="uint16">
</Pixels>
</Image>
</OME>
"""
with pytest.warns(match="Casting invalid InstrumentID"):
ome = from_xml(XML_WITH_BAD_REFS)

assert ome.instruments[0].id == "Instrument:0"
assert ome.images[0].instrument_ref is not None
assert ome.images[0].instrument_ref.id == "Instrument:0"
assert ome.images[0].instrument_ref.ref is ome.instruments[0]

0 comments on commit da33693

Please sign in to comment.