Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion dataprofiler/profilers/json_encoder.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Contains ProfilerEncoder class."""

import json

import numpy as np
import pandas as pd

from ..labelers.base_data_labeler import BaseDataLabeler
from . import base_column_profilers, column_profile_compilers, numerical_column_stats
from . import (
base_column_profilers,
column_profile_compilers,
numerical_column_stats,
profiler_options,
)


class ProfileEncoder(json.JSONEncoder):
Expand All @@ -25,9 +31,12 @@ def default(self, to_serialize):
base_column_profilers.BaseColumnProfiler,
numerical_column_stats.NumericStatsMixin,
column_profile_compilers.BaseCompiler,
profiler_options.BaseOption,
),
):
return {"class": type(to_serialize).__name__, "data": to_serialize.__dict__}
elif isinstance(to_serialize, set):
return list(to_serialize)
elif isinstance(to_serialize, np.integer):
return int(to_serialize)
elif isinstance(to_serialize, np.ndarray):
Expand All @@ -47,4 +56,5 @@ def default(self, to_serialize):

elif callable(to_serialize):
return to_serialize.__name__

return json.JSONEncoder.default(self, to_serialize)
4 changes: 2 additions & 2 deletions dataprofiler/profilers/profiler_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ def __init__(self, is_enabled: bool = True, max_k_modes: int = 5) -> None:

:ivar is_enabled: boolean option to enable/disable the option.
:vartype is_enabled: bool
:ivar top_k_modes: the max number of modes to return, if applicable
:vartype top_k_modes: int
:ivar max_k_modes: the max number of modes to return, if applicable
:vartype max_k_modes: int
"""
self.top_k_modes = max_k_modes
super().__init__(is_enabled=is_enabled)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import BaseInspectorOptions
from dataprofiler.tests.profilers.profiler_options.test_boolean_option import (
TestBooleanOption,
Expand Down Expand Up @@ -47,3 +50,15 @@ def test_is_prop_enabled(self):

def test_eq(self):
super().test_eq()

def test_json_encode(self):
option = BaseInspectorOptions(is_enabled=False)

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "BaseInspectorOptions",
"data": {"is_enabled": False},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import BooleanOption
from dataprofiler.tests.profilers.profiler_options.test_base_option import (
TestBaseOption,
Expand Down Expand Up @@ -87,3 +90,15 @@ def test_eq(self):
self.assertNotEqual(options, options2)
options2.is_enabled = False
self.assertEqual(options, options2)

def test_json_encode(self):
option = BooleanOption(is_enabled=False)

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "BooleanOption",
"data": {"is_enabled": False},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import CategoricalOptions
from dataprofiler.tests.profilers.profiler_options.test_base_inspector_options import (
TestBaseInspectorOptions,
Expand Down Expand Up @@ -81,3 +84,15 @@ def test_is_prop_enabled(self):

def test_eq(self):
super().test_eq()

def test_json_encode(self):
option = CategoricalOptions(is_enabled=False, top_k_categories=5)

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "CategoricalOptions",
"data": {"is_enabled": False, "top_k_categories": 5},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import CorrelationOptions
from dataprofiler.tests.profilers.profiler_options.test_base_inspector_options import (
TestBaseInspectorOptions,
)


class TestCorrelationOptions(TestBaseInspectorOptions):
def test_json_encode(self):
option = CorrelationOptions(
is_enabled=False, columns=["name", "age", "location"]
)

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "CorrelationOptions",
"data": {"is_enabled": False, "columns": ["name", "age", "location"]},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
pass
import json
from unittest import mock

from dataprofiler.labelers.base_data_labeler import BaseDataLabeler
from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import DataLabelerOptions
from dataprofiler.tests.profilers.profiler_options.test_base_inspector_options import (
TestBaseInspectorOptions,
Expand Down Expand Up @@ -168,3 +171,27 @@ def test_eq(self, *mocks):
self.assertNotEqual(options, options2)
options2.data_labeler_object._model = 7
self.assertEqual(options, options2)

def test_json_encode(self):
option = DataLabelerOptions()

with mock.patch(
"dataprofiler.labelers.base_data_labeler.BaseDataLabeler",
spec=BaseDataLabeler,
) as BaseDataLabelerMock:
BaseDataLabelerMock._default_model_loc = "test_loc"
option.data_labeler_object = BaseDataLabelerMock

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "DataLabelerOptions",
"data": {
"is_enabled": True,
"data_labeler_dirpath": None,
"max_sample_size": None,
"data_labeler_object": {"from_library": "test_loc"},
},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import DateTimeOptions
from dataprofiler.tests.profilers.profiler_options.test_base_inspector_options import (
TestBaseInspectorOptions,
Expand Down Expand Up @@ -28,3 +31,15 @@ def test_is_prop_enabled(self):

def test_eq(self):
super().test_eq()

def test_json_encode(self):
option = DateTimeOptions()

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "DateTimeOptions",
"data": {"is_enabled": True},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import json
from unittest import mock

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import FloatOptions
from dataprofiler.tests.profilers.profiler_options.test_numerical_options import (
TestNumericalOptions,
Expand Down Expand Up @@ -36,3 +40,73 @@ def test_eq(self):
self.assertNotEqual(options, options2)
options2.precision.is_enabled = False
self.assertEqual(options, options2)

def test_json_encode(self):
option = FloatOptions()

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "FloatOptions",
"data": {
"min": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"max": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"mode": {
"class": "ModeOption",
"data": mock.ANY,
},
"median": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"sum": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"variance": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"skewness": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"kurtosis": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"median_abs_deviation": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"num_zeros": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"num_negatives": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"histogram_and_quantiles": {
"class": "HistogramOption",
"data": mock.ANY,
},
"bias_correction": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"is_enabled": True,
"precision": {
"class": "PrecisionOptions",
"data": mock.ANY,
},
},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import HistogramOption

from .test_boolean_option import TestBooleanOption
Expand Down Expand Up @@ -170,3 +173,18 @@ def test_eq(self):
self.assertNotEqual(options, options2)
options2.bin_count_or_method = "sturges"
self.assertEqual(options, options2)

def test_json_encode(self):
option = HistogramOption(is_enabled=False, bin_count_or_method="doane")

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "HistogramOption",
"data": {
"bin_count_or_method": "doane",
"is_enabled": False,
},
}

self.assertDictEqual(expected, json.loads(serialized))
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import json
from unittest import mock

from dataprofiler.profilers.json_encoder import ProfileEncoder
from dataprofiler.profilers.profiler_options import IntOptions
from dataprofiler.tests.profilers.profiler_options.test_numerical_options import (
TestNumericalOptions,
Expand Down Expand Up @@ -28,3 +32,69 @@ def test_is_numeric_stats_enabled(self):

def test_eq(self):
super().test_eq()

def test_json_encode(self):
option = IntOptions()

serialized = json.dumps(option, cls=ProfileEncoder)

expected = {
"class": "IntOptions",
"data": {
"min": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"max": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"mode": {
"class": "ModeOption",
"data": mock.ANY,
},
"median": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"sum": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"variance": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"skewness": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"kurtosis": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"median_abs_deviation": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"num_zeros": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"num_negatives": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"histogram_and_quantiles": {
"class": "HistogramOption",
"data": mock.ANY,
},
"bias_correction": {
"class": "BooleanOption",
"data": {"is_enabled": True},
},
"is_enabled": True,
},
}

self.assertDictEqual(expected, json.loads(serialized))
Loading