Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename PIIAnonymizer AnonymizedFaker #486

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rdt/performance/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
'ClusterBasedNormalizer': {
'model_missing_values': True
},
'PIIAnonymizer': {
'AnonymizedFaker': {
'model_missing_values': True
},
}
Expand Down
4 changes: 2 additions & 2 deletions rdt/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from rdt.transformers.datetime import OptimizedTimestampEncoder, UnixTimestampEncoder
from rdt.transformers.null import NullTransformer
from rdt.transformers.numerical import ClusterBasedNormalizer, FloatFormatter, GaussianNormalizer
from rdt.transformers.pii.anonymizer import PIIAnonymizer
from rdt.transformers.pii.anonymizer import AnonymizedFaker

__all__ = [
'BaseTransformer',
Expand All @@ -29,7 +29,7 @@
'OneHotEncoder',
'OptimizedTimestampEncoder',
'UnixTimestampEncoder',
'PIIAnonymizer',
'AnonymizedFaker',
'get_transformer_name',
'get_transformer_class',
'get_transformer_instance',
Expand Down
4 changes: 2 additions & 2 deletions rdt/transformers/pii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Personal Identifiable Information Transformers module."""

from rdt.transformers.pii.anonymizer import PIIAnonymizer
from rdt.transformers.pii.anonymizer import AnonymizedFaker

__all__ = [
'PIIAnonymizer',
'AnonymizedFaker',
]
2 changes: 1 addition & 1 deletion rdt/transformers/pii/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from rdt.transformers.null import NullTransformer


class PIIAnonymizer(BaseTransformer):
class AnonymizedFaker(BaseTransformer):
"""Personal Identifiable Information Anonymizer using Faker.

This transformer will drop a column and regenerate it with the previously specified
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
import numpy as np
import pandas as pd

from rdt.transformers.pii import PIIAnonymizer
from rdt.transformers.pii import AnonymizedFaker


def test_piianonymizer():
"""End to end test with the default settings of the ``PIIAnonymizer``."""
def test_anonymizedfaker():
"""End to end test with the default settings of the ``AnonymizedFaker``."""
data = pd.DataFrame({
'id': [1, 2, 3, 4, 5],
'username': ['a', 'b', 'c', 'd', 'e']
})

instance = PIIAnonymizer()
instance = AnonymizedFaker()
transformed = instance.fit_transform(data, 'username')
reverse_transform = instance.reverse_transform(transformed)

Expand All @@ -24,8 +24,8 @@ def test_piianonymizer():
assert len(reverse_transform['username']) == 5


def test_piianonymizer_custom_provider():
"""End to end test with a custom provider and function for the ``PIIAnonymizer``."""
def test_anonymizedfaker_custom_provider():
"""End to end test with a custom provider and function for the ``AnonymizedFaker``."""
data = pd.DataFrame({
'id': [1, 2, 3, 4, 5],
'username': ['a', 'b', 'c', 'd', 'e'],
Expand All @@ -38,7 +38,7 @@ def test_piianonymizer_custom_provider():
]
})

instance = PIIAnonymizer('credit_card', 'credit_card_number')
instance = AnonymizedFaker('credit_card', 'credit_card_number')
transformed = instance.fit_transform(data, 'cc')
reverse_transform = instance.reverse_transform(transformed)

Expand All @@ -51,14 +51,14 @@ def test_piianonymizer_custom_provider():
assert len(reverse_transform['cc']) == 5


def test_piianonymizer_with_nans():
"""End to end test with the default settings of the ``PIIAnonymizer`` with ``nan`` values."""
def test_anonymizedfaker_with_nans():
"""End to end test with the default settings of the ``AnonymizedFaker`` with ``nan`` values."""
data = pd.DataFrame({
'id': [1, 2, 3, 4, 5],
'username': ['a', np.nan, 'c', 'd', 'e']
})

instance = PIIAnonymizer(model_missing_values=True)
instance = AnonymizedFaker(model_missing_values=True)
transformed = instance.fit_transform(data, 'username')
reverse_transform = instance.reverse_transform(transformed)

Expand All @@ -72,8 +72,8 @@ def test_piianonymizer_with_nans():
assert reverse_transform['username'].isna().sum() == 1


def test_piianonymizer_custom_provider_with_nans():
"""End to end test with a custom provider for the ``PIIAnonymizer`` with `` nans``."""
def test_anonymizedfaker_custom_provider_with_nans():
"""End to end test with a custom provider for the ``AnonymizedFaker`` with `` nans``."""
data = pd.DataFrame({
'id': [1, 2, 3, 4, 5],
'username': ['a', 'b', 'c', 'd', 'e'],
Expand All @@ -86,7 +86,7 @@ def test_piianonymizer_custom_provider_with_nans():
]
})

instance = PIIAnonymizer(
instance = AnonymizedFaker(
'credit_card',
'credit_card_number',
model_missing_values=True
Expand Down
66 changes: 33 additions & 33 deletions tests/unit/transformers/pii/test_anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

from rdt.errors import Error
from rdt.transformers.null import NullTransformer
from rdt.transformers.pii.anonymizer import PIIAnonymizer
from rdt.transformers.pii.anonymizer import AnonymizedFaker


class TestPIIAnonymizer:
"""Test class for ``PIIAnonymizer``."""
class TestAnonymizedFaker:
"""Test class for ``AnonymizedFaker``."""

@patch('rdt.transformers.pii.anonymizer.faker')
@patch('rdt.transformers.pii.anonymizer.getattr')
Expand All @@ -27,7 +27,7 @@ def test_check_provider_function_baseprovider(self, mock_getattr, mock_faker):
mock_getattr.side_effect = ['module', 'provider', None]

# Run
PIIAnonymizer.check_provider_function('BaseProvider', 'function_name')
AnonymizedFaker.check_provider_function('BaseProvider', 'function_name')

# Assert
assert mock_getattr.call_args_list[0] == call(mock_faker.providers, 'BaseProvider')
Expand All @@ -46,7 +46,7 @@ def test_check_provider_function_other_providers(self, mock_getattr, mock_faker)
mock_getattr.side_effect = ['module', 'provider_class', None]

# Run
PIIAnonymizer.check_provider_function('provider_name', 'function_name')
AnonymizedFaker.check_provider_function('provider_name', 'function_name')

# Assert
assert mock_getattr.call_args_list[0] == call(mock_faker.providers, 'provider_name')
Expand All @@ -68,15 +68,15 @@ def test_check_provider_function_raise_attribute_error(self):

# Run
with pytest.raises(Error, match=expected_message):
PIIAnonymizer.check_provider_function('TestProvider', 'TestFunction')
AnonymizedFaker.check_provider_function('TestProvider', 'TestFunction')

def test__function(self):
"""Test that `_function`.

The method `_function` should return a call from the `instance.faker.provider.<function>`.

Mock:
- Instance of 'PIIAnonymizer'.
- Instance of 'AnonymizedFaker'.
- Faker instance.
- A function for the faker instance.

Expand All @@ -96,7 +96,7 @@ def test__function(self):
instance.function_kwargs = {'type': 'int'}

# Run
result = PIIAnonymizer._function(instance)
result = AnonymizedFaker._function(instance)

# Assert
function.assert_called_once_with(type='int')
Expand All @@ -121,7 +121,7 @@ def test__check_locales(self, mock_warnings, mock_importlib):
mock_importlib.util.find_spec.side_effect = [None, 'en_US']

# Run
PIIAnonymizer._check_locales(instance)
AnonymizedFaker._check_locales(instance)

# Assert
expected_message = (
Expand All @@ -133,7 +133,7 @@ def test__check_locales(self, mock_warnings, mock_importlib):
mock_warnings.warn.assert_called_once_with(expected_message)

@patch('rdt.transformers.pii.anonymizer.faker')
@patch('rdt.transformers.pii.anonymizer.PIIAnonymizer.check_provider_function')
@patch('rdt.transformers.pii.anonymizer.AnonymizedFaker.check_provider_function')
def test___init__default(self, mock_check_provider_function, mock_faker):
"""Test the default instantiation of the transformer.

Expand All @@ -158,7 +158,7 @@ def test___init__default(self, mock_check_provider_function, mock_faker):
- the ``instance._function`` is ``instance.faker.lexify``.
"""
# Run
instance = PIIAnonymizer()
instance = AnonymizedFaker()

# Assert
mock_check_provider_function.assert_called_once_with('BaseProvider', 'lexify')
Expand All @@ -171,7 +171,7 @@ def test___init__default(self, mock_check_provider_function, mock_faker):
assert mock_faker.Faker.called_once_with(None)

@patch('rdt.transformers.pii.anonymizer.faker')
@patch('rdt.transformers.pii.anonymizer.PIIAnonymizer.check_provider_function')
@patch('rdt.transformers.pii.anonymizer.AnonymizedFaker.check_provider_function')
def test___init__custom(self, mock_check_provider_function, mock_faker):
"""Test the instantiation of the transformer with custom parameters.

Expand All @@ -197,7 +197,7 @@ def test___init__custom(self, mock_check_provider_function, mock_faker):
- the ``instance._function`` is ``instance.faker.credit_card_full``.
"""
# Run
instance = PIIAnonymizer(
instance = AnonymizedFaker(
provider_name='credit_card',
function_name='credit_card_full',
function_kwargs={
Expand Down Expand Up @@ -230,19 +230,19 @@ def test___init__no_function_name(self):
"'credit_card' provider."
)
with pytest.raises(Error, match=expected_message):
PIIAnonymizer(provider_name='credit_card', locales=['en_US', 'fr_FR'])
AnonymizedFaker(provider_name='credit_card', locales=['en_US', 'fr_FR'])

def test_get_output_sdtypes(self):
"""Test the ``get_output_sdtypes``.

Setup:
- initialize a ``PIIAnonymizer`` transformer which:
- initialize a ``AnonymizedFaker`` transformer which:

Output:
- the ``output_sdtypes`` returns an empty dictionary.
"""
# Setup
transformer = PIIAnonymizer()
transformer = AnonymizedFaker()
transformer.column_prefix = 'a#b'

# Run
Expand All @@ -256,7 +256,7 @@ def test_get_output_sdtypes_model_missing_values(self):
"""Test the ``get_output_sdtypes`` method when a null column is created.

Setup:
- initialize a ``PIIAnonymizer`` transformer which:
- initialize a ``AnonymizedFaker`` transformer which:
- sets ``self.null_transformer`` to a ``NullTransformer`` where
``self.model_missing_values`` is True.
- sets ``self.column_prefix`` to a string.
Expand All @@ -266,7 +266,7 @@ def test_get_output_sdtypes_model_missing_values(self):
added to the beginning of the keys.
"""
# Setup
transformer = PIIAnonymizer()
transformer = AnonymizedFaker()
transformer.null_transformer = NullTransformer(missing_value_replacement='fill')
transformer.null_transformer._model_missing_values = True
transformer.column_prefix = 'a#b'
Expand All @@ -288,7 +288,7 @@ def test__fit(self, mock_null_transformer):
and learn the length of it.

Setup:
- Initialize a ``PIIAnonymizer`` transformer.
- Initialize a ``AnonymizedFaker`` transformer.
- Mock the ``NullTransformer``.

Input:
Expand All @@ -301,7 +301,7 @@ def test__fit(self, mock_null_transformer):
- ``instance.data_length`` equals to the length of the input data.
"""
# Setup
transformer = PIIAnonymizer()
transformer = AnonymizedFaker()

columns_data = pd.Series(['1', '2', '3'])

Expand All @@ -320,7 +320,7 @@ def test__transform(self):
does not model the missing values.

Setup:
- Initialize a ``PIIAnonymizer`` transformer.
- Initialize a ``AnonymizedFaker`` transformer.

Input:
- ``pd.Series`` with three values.
Expand All @@ -330,7 +330,7 @@ def test__transform(self):
"""
# Setup
columns_data = pd.Series([1, 2, 3])
instance = PIIAnonymizer()
instance = AnonymizedFaker()

# Run
result = instance._transform(columns_data)
Expand All @@ -345,7 +345,7 @@ def test__transform_model_missing_values(self):
transform the data.

Setup:
- Initialize a ``PIIAnonymizer`` transformer.
- Initialize a ``AnonymizedFaker`` transformer.
- Mock the ``null_transformer`` of the instance.
- Mock the return value of the ``null_transformer.transform``.

Expand All @@ -358,7 +358,7 @@ def test__transform_model_missing_values(self):
"""
# Setup
columns_data = pd.Series([1, 2, 3])
instance = PIIAnonymizer()
instance = AnonymizedFaker()
instance.null_transformer = Mock()

instance.null_transformer.transform.return_value = np.array([
Expand All @@ -381,7 +381,7 @@ def test__reverse_transform(self):
the ``instance.function_kwargs`` the ``instance.data_length`` amount of times.

Setup:
- Initialize a ``PIIAnonymizer`` transformer.
- Initialize a ``AnonymizedFaker`` transformer.
- Mock the ``null_transformer`` of the instance.
- Mock the return value of the ``null_transformer.reverse_transform``.

Expand All @@ -393,7 +393,7 @@ def test__reverse_transform(self):
"""
# Setup
columns_data = pd.Series([1, 2, 3])
instance = PIIAnonymizer()
instance = AnonymizedFaker()
instance.null_transformer = Mock()
instance.null_transformer.models_missing_values.return_value = False
instance.data_length = 3
Expand Down Expand Up @@ -424,7 +424,7 @@ def test__reverse_transform_models_missing_values(self):
to generate values within the range of the ``instance.data_length``.

Setup:
- Mock the instance of ``PIIAnonymizer``.
- Mock the instance of ``AnonymizedFaker``.
- Mock the ``instance.null_transformer.reverse_transform`` return value.

Input:
Expand Down Expand Up @@ -457,7 +457,7 @@ def test__reverse_transform_models_missing_values(self):
}

# Run
output = PIIAnonymizer._reverse_transform(instance, columns_data)
output = AnonymizedFaker._reverse_transform(instance, columns_data)

# Assert
expected_output = pd.Series([
Expand All @@ -482,16 +482,16 @@ def test___repr__default(self):
"""Test the ``__repr__`` method.

With the default parameters should return only the ``function_name='lexify'`` as an
starting argument for the ``PIIAnonymizer``.
starting argument for the ``AnonymizedFaker``.
"""
# Setup
instance = PIIAnonymizer()
instance = AnonymizedFaker()

# Run
res = repr(instance)

# Assert
expected_res = "PIIAnonymizer(function_name='lexify')"
expected_res = "AnonymizedFaker(function_name='lexify')"
assert res == expected_res

def test___repr__custom_provider(self):
Expand All @@ -501,14 +501,14 @@ def test___repr__custom_provider(self):
arguments.
"""
# Setup
instance = PIIAnonymizer('credit_card', 'credit_card_full', model_missing_values=True)
instance = AnonymizedFaker('credit_card', 'credit_card_full', model_missing_values=True)

# Run
res = repr(instance)

# Assert
expected_res = (
"PIIAnonymizer(provider_name='credit_card', function_name='credit_card_full', "
"AnonymizedFaker(provider_name='credit_card', function_name='credit_card_full', "
'model_missing_values=True)'
)

Expand Down