a better way to do this

wildlife-dynamics · atmorling · Sep 8, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
commit 1c10a3396a9dfdd03899b1df3303491fab4bbae1
diff --git a/ecoscope/analysis/classifier.py b/ecoscope/analysis/classifier.py
@@ -1,5 +1,7 @@
 import pandas as pd
 import matplotlib as mpl
+from ecoscope.base.utils import hex_to_rgba
+from ecoscope.base._dataclasses import ColorStyleLookup
 
 try:
     import mapclassify
@@ -60,7 +62,7 @@ def apply_classification(
     Returns:
     result: an array of corresponding labels of the input data.
     """
-    assert input_column_name in dataframe.columns
+    assert input_column_name in dataframe.columns, "input column must exist on dataframe"
     if not output_column_name:
         output_column_name = f"{input_column_name}_classified"
 
@@ -77,17 +79,33 @@ def apply_classification(
     return dataframe
 
 
-def create_color_dict(dataframe, column_name, cmap, labels=None):
-    assert column_name in dataframe.columns
+def create_color_lookup(dataframe, column_name, cmap):
+    """
+    Creates a color lookup from the values in the provided dataframe column and colormap
+
+    Args:
+    dataframe (pd.DatFrame): The data.
+    column_name (str): The dataframe column who's unique values will be keys in the lookup.
+    cmap (str, list): Either a named mpl.colormap or a list of string hex values.
+
+    Returns:
+    The generated ColorStyleLookup.
+    """
+    assert column_name in dataframe.columns, "input column must exist on dataframe"
 
     if isinstance(cmap, list):
-        assert len(cmap) == dataframe[column_name].nunique()
-        cmap = pd.Series(cmap, index=dataframe[column_name].unique())
+        nunique = dataframe[column_name].nunique()
+        assert len(cmap) >= nunique, f"cmap list must contain at least as many values as unique in {column_name}"
+        cmap = [hex_to_rgba(x) for x in cmap]
+        cmap = pd.Series(cmap[:nunique], index=dataframe[column_name].unique())
     if isinstance(cmap, str):
         cmap = mpl.colormaps[cmap]
         cmap = cmap.resampled(dataframe[column_name].nunique())
-        cmap = pd.Series([color for color in cmap.colors], index=dataframe[column_name].unique())
+        # convert to hex first to put values in range(0,255), then to an RGBA tuple
+        cmap = pd.Series(
+            [hex_to_rgba(mpl.colors.to_hex(color)) for color in cmap.colors], index=dataframe[column_name].unique()
+        )
 
     vals = dict([(classification, cmap[classification]) for classification in dataframe[column_name].values])
 
-    return vals
+    return ColorStyleLookup(column_name=column_name, lookup=vals)
diff --git a/ecoscope/base/__init__.py b/ecoscope/base/__init__.py
@@ -10,6 +10,7 @@
     cachedproperty,
     create_meshgrid,
     groupby_intervals,
+    hex_to_rgba,
 )
 
 __all__ = [
@@ -24,4 +25,5 @@
     "cachedproperty",
     "create_meshgrid",
     "groupby_intervals",
+    "hex_to_rgba",
 ]
diff --git a/ecoscope/base/_dataclasses.py b/ecoscope/base/_dataclasses.py
@@ -69,3 +69,15 @@ class TrajSegFilter:
     max_time_secs: float = float("inf")
     min_speed_kmhr: float = 0.0
     max_speed_kmhr: float = float("inf")
+
+
+@dataclass
+class NumericStyleLookup:
+    column_name: str
+    lookup: typing.Dict[str, float]
+
+
+@dataclass
+class ColorStyleLookup:
+    column_name: str
+    lookup: typing.Dict[str, typing.Tuple[int, int, int, int]]
diff --git a/ecoscope/base/utils.py b/ecoscope/base/utils.py
@@ -262,3 +262,21 @@ def add_temporal_index(df, index_name, time_col, directive):
         return df.set_index(index_name, append=True)
     else:
         return df
+
+
+def hex_to_rgba(input: str) -> tuple:
+    if not input:
+        raise ValueError("Input cannot be empty")
+    hex = input.strip("#")
+
+    if len(hex) != 6 and len(hex) != 8:
+        raise ValueError("Invalid hex length, must be 6 or 8")
+
+    # Max alpha if none provided
+    if len(hex) == 6:
+        hex = f"{hex}FF"
+
+    try:
+        return tuple(int(hex[i : i + 2], 16) for i in (0, 2, 4, 6))
+    except ValueError:
+        raise ValueError(f"Invalid hex string, {input}")
diff --git a/tests/test_classifier.py b/tests/test_classifier.py
@@ -1,6 +1,6 @@
 import pytest
 import pandas as pd
-from ecoscope.analysis.classifier import apply_classification, create_color_dict
+from ecoscope.analysis.classifier import apply_classification, create_color_lookup
 
 
 @pytest.fixture
@@ -46,40 +46,57 @@ def test_classify_with_invalid_scheme(sample_df):
         apply_classification(sample_df, input_column_name="value", scheme="InvalidScheme")
 
 
-def test_color_dict(sample_df):
+def test_color_lookup(sample_df):
 
     classified = apply_classification(sample_df, input_column_name="value", scheme="equal_interval")
     cmap = "viridis"
 
-    color_dict = create_color_dict(classified, "value_classified", cmap)
-    # check that our classification bins are the keys of the color_dict
-    assert classified["value_classified"].values.tolist() == list(color_dict.keys())
+    color_lookup = create_color_lookup(classified, "value_classified", cmap)
+    assert color_lookup.column_name == "value_classified"
+    # check that our classification bins are the keys of the color_lookup
+    assert classified["value_classified"].values.tolist() == list(color_lookup.lookup.keys())
 
 
-def test_color_dict_k2(sample_df):
+def test_color_lookup_k2(sample_df):
 
     classified = apply_classification(sample_df, input_column_name="value", scheme="equal_interval", k=2)
     cmap = "viridis"
 
-    color_dict = create_color_dict(classified, "value_classified", cmap)
-    # check that our classification bins are the keys of the color_dict
-    assert classified["value_classified"].unique().tolist() == list(color_dict.keys())
+    color_lookup = create_color_lookup(classified, "value_classified", cmap)
+    assert color_lookup.column_name == "value_classified"
+    # check that our classification bins are the keys of the color_lookup
+    assert classified["value_classified"].unique().tolist() == list(color_lookup.lookup.keys())
 
 
-def test_speed_parity(movebank_relocations):
+def test_color_lookup_cmap_list(movebank_relocations):
     trajectory = movebank_relocations.trajectories.from_relocations()
     classified = apply_classification(
         trajectory, "speed_kmhr", output_column_name="speed_bins", k=6, scheme="equal_interval"
     )
 
+    # With len(cmap)==7 we're also testing that the input cmap can be larger than the number of categories
     cmap = [
         "#1a9850",
         "#91cf60",
         "#d9ef8b",
         "#fee08b",
         "#fc8d59",
         "#d73027",
+        "#FFFFFF",
     ]
 
-    color_dict = create_color_dict(classified, "speed_bins", cmap)
-    assert classified["speed_bins"].unique().tolist() == list(color_dict.keys())
+    color_lookup = create_color_lookup(classified, "speed_bins", cmap)
+    assert color_lookup.column_name == "speed_bins"
+    assert classified["speed_bins"].unique().tolist() == list(color_lookup.lookup.keys())
+
+
+def test_color_lookup_cmap_bad_list(movebank_relocations):
+    trajectory = movebank_relocations.trajectories.from_relocations()
+    classified = apply_classification(
+        trajectory, "speed_kmhr", output_column_name="speed_bins", k=6, scheme="equal_interval"
+    )
+
+    cmap = ["#1a9850"]
+
+    with pytest.raises(AssertionError):
+        create_color_lookup(classified, "speed_bins", cmap)
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,5 +1,6 @@
+import pytest
 import pandas as pd
-
+from ecoscope.base.utils import hex_to_rgba
 from ecoscope.base.utils import (
     create_meshgrid,
     groupby_intervals,
@@ -129,3 +130,25 @@ def test_modis_offset():
     modis = ModisBegin()
     assert modis.apply(ts1) == pd.Timestamp("2022-01-17 00:00:00+0")
     assert modis.apply(ts2) == pd.Timestamp("2023-01-01 00:00:00+0")
+
+
+@pytest.mark.parametrize(
+    "hex_str,expected",
+    [
+        ("#000000", (0, 0, 0, 255)),
+        ("FFFFFF00", (255, 255, 255, 0)),
+        ("#4444AABB", (68, 68, 170, 187)),
+        ("#123456", (18, 52, 86, 255)),
+    ],
+)
+def test_hex_to_rgba(hex_str, expected):
+    assert hex_to_rgba(hex_str) == expected
+
+
+@pytest.mark.parametrize(
+    "hex_str",
+    ["hello", "", "#FF00FNFF", None],
+)
+def test_hex_to_rgba_invalid(hex_str):
+    with pytest.raises(ValueError):
+        hex_to_rgba(hex_str)