Skip to content

Commit

Permalink
Add update_predictor_catg_realistic function
Browse files Browse the repository at this point in the history
  • Loading branch information
eliwangj committed Jul 25, 2023
1 parent 300742f commit 5979263
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions src/analyticsdf/analyticsdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,36 @@ def update_predictor_multicollinear(self, target_predictor_name = None, dependen
self.predictor_matrix[target_predictor_name] = safe_sparse_dot(self.predictor_matrix[dependent_predictors_list],
beta[1:].T, dense_output=True) + beta[0] + eps

@check_columns_exist
def update_predictor_catg_realistic(self, predictor_name: str = None,
type: str = None):
"""Update the predictor with realistic but fake categorical values, given a type, e.g. name, address, etc.
Args:
predictor_name:
String, a target predictor name in the initial AnalyticsDataframe.
type:
String, the type of category desired, currently support the following:
name,
address,
city,
country,
company,
color (in HEX color code),
color_name (in plain English).
Raises:
KeyError: If the column does not exists.
"""
with set_random_state(validate_random_state(self.seed)):
from faker import Faker
fake = Faker()
nrow = self.n
new_values = []
for _ in range(nrow):
value = getattr(fake, type) # getattr(fake, 'name') == fake.name -> returns the address
new_values.append(value()) # fake.name() returns the value
self.predictor_matrix[predictor_name] = new_values

@check_columns_exist
def generate_response_vector_linear(self, predictor_name_list: list = None,
Expand Down

0 comments on commit 5979263

Please sign in to comment.