-
Notifications
You must be signed in to change notification settings - Fork 185
New preset implementation and test #867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
559b69f
cb75ef0
f3f921b
3d04acf
601f35a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
| import copy | ||
| import re | ||
| import warnings | ||
| from typing import Any | ||
|
|
||
| from ..labelers.base_data_labeler import BaseDataLabeler | ||
|
|
||
|
|
@@ -1557,7 +1558,8 @@ def __init__(self, presets: str = None) -> None: | |
| :ivar unstructured_options: option set for unstructured dataset profiling. | ||
| :vartype unstructured_options: UnstructuredOptions | ||
| :ivar presets: A pre-configured mapping of a string name to group of options: | ||
| "complete", "data_types", and "numeric_stats_disabled". Default: None | ||
| "complete", "data_types", "numeric_stats_disabled", | ||
| and "memory_optimization". Default: None | ||
| :vartype presets: Optional[str] | ||
| """ | ||
| self.structured_options = StructuredOptions() | ||
|
|
@@ -1570,6 +1572,10 @@ def __init__(self, presets: str = None) -> None: | |
| self._data_types_presets() | ||
| elif self.presets == "numeric_stats_disabled": | ||
| self._numeric_stats_disabled_presets() | ||
| elif self.presets == "memory_optimization": | ||
| self._memory_optimization_presets() | ||
| else: | ||
| raise ValueError("The preset entered is not a valid preset.") | ||
|
|
||
| def _complete_presets(self) -> None: | ||
| self.set({"*.is_enabled": True}) | ||
|
|
@@ -1583,6 +1589,25 @@ def _numeric_stats_disabled_presets(self) -> None: | |
| self.set({"*.float.is_numeric_stats_enabled": False}) | ||
| self.set({"structured_options.text.is_numeric_stats_enabled": False}) | ||
|
|
||
| def _memory_optimization_presets(self) -> None: | ||
| self.set({"structured_options.row_statistics.is_enabled": False}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's more TBD IMO |
||
| self.set({"structured_options.multiprocess.is_enabled": False}) | ||
| self.set({"structured_options.data_labeler.is_enabled": False}) | ||
| self.set({"structured_options.datetime.is_enabled": False}) | ||
| self.set({"structured_options.order.is_enabled": False}) | ||
|
Comment on lines
+1596
to
+1597
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. curious why we are specifically disabling these two. Are they memory intensive? |
||
| self.set({"structured_options.chi2_homogeneity.is_enabled": False}) | ||
| self.set({"structured_options.null_replication_metrics.is_enabled": False}) | ||
| self.set({"unstructured_options.data_labeler.is_enabled": False}) | ||
| self.set( | ||
| { | ||
| ( | ||
| "structured_options.category." | ||
| "max_sample_size_to_check_stop_condition" | ||
| ): 5000 | ||
| } | ||
| ) | ||
| self.set({"structured_options.category.stop_condition_unique_value_ratio": 0.5}) | ||
|
|
||
| def _validate_helper(self, variable_path: str = "ProfilerOptions") -> list[str]: | ||
| """ | ||
| Validate the options do not conflict and cause errors. | ||
|
|
@@ -1620,7 +1645,7 @@ def _validate_helper(self, variable_path: str = "ProfilerOptions") -> list[str]: | |
|
|
||
| return errors | ||
|
|
||
| def set(self, options: dict[str, bool]) -> None: | ||
| def set(self, options: dict[str, Any]) -> None: | ||
lizlouise1335 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """ | ||
| Overwrite BaseOption.set. | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.