Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dataprofiler/profilers/json_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .datetime_column_profile import DateTimeColumn
from .float_column_profile import FloatColumn
from .int_column_profile import IntColumn
from .order_column_profile import OrderColumn


def get_column_profiler_class(class_name: str) -> Type[BaseColumnProfiler]:
Expand All @@ -25,6 +26,7 @@ def get_column_profiler_class(class_name: str) -> Type[BaseColumnProfiler]:
FloatColumn.__name__: FloatColumn,
IntColumn.__name__: IntColumn,
DateTimeColumn.__name__: DateTimeColumn,
OrderColumn.__name__: OrderColumn,
}

profile_class: Optional[Type[BaseColumnProfiler]] = profiles.get(class_name)
Expand Down
14 changes: 14 additions & 0 deletions dataprofiler/profilers/order_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,20 @@ def report(self, remove_disabled_flag: bool = False) -> dict:
"""
return self.profile

@classmethod
def load_from_dict(cls, data):
"""
Parse attribute from json dictionary into self.

:param data: dictionary with attributes and values.
:type data: dict[string, Any]

:return: Profiler with attributes populated.
:rtype: CategoricalColumn
"""
# This is an ambiguous call to super classes.
return super().load_from_dict(data)

@property
def profile(self) -> dict:
"""
Expand Down
39 changes: 39 additions & 0 deletions dataprofiler/tests/profilers/test_order_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
import pandas as pd

from dataprofiler.profilers import OrderColumn
from dataprofiler.profilers.json_decoder import load_column_profile
from dataprofiler.profilers.json_encoder import ProfileEncoder

from .. import test_utils
from . import utils

# This is taken from: https://github.com/rlworkgroup/dowel/pull/36/files
# undo when cpython#4800 is merged.
Expand Down Expand Up @@ -413,3 +415,40 @@ def test_json_encode_after_update(self):
)

self.assertEqual(serialized, expected)

def test_json_decode(self):
fake_profile_name = None
expected_profile = OrderColumn(fake_profile_name)

serialized = json.dumps(expected_profile, cls=ProfileEncoder)
deserialized = load_column_profile(json.loads(serialized))

utils.assert_profiles_equal(deserialized, expected_profile)

def test_json_decode_after_update(self):
fake_profile_name = "Fake profile name"

# Build expected orderColumn
df_order = pd.Series(["za", "z", "c", "a"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might have be interesting to have this ordered and then the update below keep it ordered and validating the output expectation order. Maybe can quick add.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't that to some extent what line 454 does with assert deserialized._last_value == "zza"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what hes saying there order attribute is not consistent after updating

expected_profile = OrderColumn(fake_profile_name)

with utils.mock_timeit():
expected_profile.update(df_order)

serialized = json.dumps(expected_profile, cls=ProfileEncoder)
deserialized = load_column_profile(json.loads(serialized))

utils.assert_profiles_equal(deserialized, expected_profile)

df_order = pd.Series(
[
"c", # add existing
"zza", # add new
]
)

# validating update after deserialization
deserialized.update(df_order)

assert deserialized.sample_size == 6
assert deserialized._last_value == "zza"