JacksonBurns · JacksonBurns · Oct 11, 2023 · Aug 31, 2023 · Sep 22, 2023 · Sep 18, 2023
diff --git a/README.md b/README.md
@@ -86,18 +86,19 @@ X_train, X_val, X_test = train_val_test_split(X, sampler = 'sphere_exclusion')
 ```
 You can now train your model with `X_train`, optimize your model with `X_val`, and measure its performance with `X_test`.
 
-### Evaluate the Impact of Splitting Algorithms
+### Evaluate the Impact of Splitting Algorithms on Regression Models
 For data with many features it can be difficult to visualize how different sampling algorithms change the distribution of data into training, validation, and testing like we do in some of the demo notebooks.
 To aid in analyzing the impact of the algorithms, `astartes` provides `generate_regression_results_dict`.
-This function allows users to quickly evaluate the impact of different splitting techniques on any model supported by `sklearn`. All results are stored in a dictionary format and can be displayed in a neatly formatted table using the optional `print_results` argument.
+This function allows users to quickly evaluate the impact of different splitting techniques on any model supported by `sklearn`.
+All results are stored in a dictionary format and can be displayed in a neatly formatted table using the optional `print_results` argument.
 
 ```python
 from sklearn.svm import LinearSVR
 
-from astartes.utils import generate_regression_results_dict
+from astartes.utils import generate_regression_results_dict as grrd
 
 sklearn_model = LinearSVR()
-results_dict = generate_regression_results_dict(
+results_dict = grrd(
     sklearn_model,
     X,
     y,
@@ -112,6 +113,16 @@ R2    0.90745   0.80787   0.78412
 
 ```
 
+Additional metrics can be passed to `generate_regression_results_dict` via the `additional_metrics` argument, which should be a dictionary mapping the name of the metric (as a `string`) to the function itself, like this:
+
+```python
+from sklearn.metrics import mean_absolute_percentage_error
+
+add_met = {"mape": mean_absolute_percentage_error}
+
+grrd(sklearn_model, X, y, additional_metric=add_met)
+```
+
 ### Access Sampling Algorithms Directly
 The sampling algorithms implemented in `astartes` can also be directly accessed and run if it is more useful for your applications.
 In the below example, we import the Kennard Stone sampler, use it to partition a simple array, and then retrieve a sample.

diff --git a/astartes/__init__.py b/astartes/__init__.py
@@ -1,7 +1,6 @@
 # convenience import to enable 'from astartes import train_test_split'
 from .main import train_test_split, train_val_test_split
 
-# update this in pyproject.toml, too
 __version__ = "1.1.2"
 
 # DO NOT do this:

diff --git a/astartes/samplers/__init__.py b/astartes/samplers/__init__.py
@@ -3,12 +3,11 @@
 
 # implementations
 from .extrapolation import DBSCAN, KMeans, OptiSim, Scaffold, SphereExclusion, TimeBased
-from .interpolation import MTSD, SPXY, KennardStone, Random
+from .interpolation import SPXY, KennardStone, Random
 
 IMPLEMENTED_INTERPOLATION_SAMPLERS = (
     "random",
     "kennard_stone",
-    # "mtsd",
     "spxy",
 )
 

diff --git a/astartes/samplers/extrapolation/dbscan.py b/astartes/samplers/extrapolation/dbscan.py
@@ -4,9 +4,6 @@
 
 
 class DBSCAN(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """
         Implements the DBSCAN sampler to identify clusters.

diff --git a/astartes/samplers/extrapolation/kmeans.py b/astartes/samplers/extrapolation/kmeans.py
@@ -6,9 +6,6 @@
 
 
 class KMeans(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """Implements the K-Means sampler to identify clusters."""
         # use the sklearn kmeans model

diff --git a/astartes/samplers/extrapolation/optisim.py b/astartes/samplers/extrapolation/optisim.py
@@ -59,9 +59,6 @@
 
 
 class OptiSim(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """Implementes the OptiSim sampler"""
         self._init_random(self.get_config("random_state", 42))

diff --git a/astartes/samplers/extrapolation/scaffold.py b/astartes/samplers/extrapolation/scaffold.py
@@ -27,9 +27,6 @@
 
 
 class Scaffold(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _before_sample(self):
         # ensure that X contains entries that are either a SMILES string or an RDKit Molecule
         if not all(isinstance(i, str) for i in self.X) and not all(isinstance(i, Chem.rdchem.Mol) for i in self.X):

diff --git a/astartes/samplers/extrapolation/sphere_exclusion.py b/astartes/samplers/extrapolation/sphere_exclusion.py
@@ -20,9 +20,6 @@
 
 
 class SphereExclusion(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """Cluster X according to a Sphere Exclusion-like algorithm with arbitrary distance metrics."""
         # euclidian, cosine, or city block from get_configs

diff --git a/astartes/samplers/extrapolation/time_based.py b/astartes/samplers/extrapolation/time_based.py
@@ -6,9 +6,6 @@
 
 
 class TimeBased(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _before_sample(self):
         # verify that the user provided time as the labels (i.e. args[2])
         if self.labels is None:

diff --git a/astartes/samplers/interpolation/__init__.py b/astartes/samplers/interpolation/__init__.py
@@ -1,4 +1,3 @@
 from .kennardstone import KennardStone
-from .mtsd import MTSD
 from .random_split import Random
 from .spxy import SPXY
diff --git a/astartes/samplers/interpolation/kennardstone.py b/astartes/samplers/interpolation/kennardstone.py
@@ -5,9 +5,6 @@
 
 
 class KennardStone(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """
         Implements the Kennard-Stone algorithm

diff --git a/astartes/samplers/interpolation/mtsd.py b/astartes/samplers/interpolation/mtsd.py
diff --git a/astartes/samplers/interpolation/random_split.py b/astartes/samplers/interpolation/random_split.py
@@ -5,9 +5,6 @@
 
 
 class Random(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _sample(self):
         """Passthrough to sklearn train_test_split"""
         idx_list = list(range(len(self.X)))

diff --git a/astartes/samplers/interpolation/spxy.py b/astartes/samplers/interpolation/spxy.py
@@ -20,9 +20,6 @@
 
 
 class SPXY(AbstractSampler):
-    def __init__(self, *args):
-        super().__init__(*args)
-
     def _before_sample(self):
         if self.y is None:
             raise InvalidConfigurationError("SPXY sampler requires both X and y arrays. Provide y or switch to kennard_stone.")

diff --git a/astartes/utils/user_utils.py b/astartes/utils/user_utils.py
@@ -16,6 +16,7 @@ def generate_regression_results_dict(
     val_size=0.1,
     test_size=0.1,
     print_results=False,
+    additional_metrics={},
 ):
     """
     Helper function to train a sklearn model using the provided data
@@ -32,6 +33,8 @@ def generate_regression_results_dict(
                                          the sampler and the values being another dictionary with the
                                          corresponding hyperparameters. Defaults to {}.
         print_results (bool, optional): whether to print the resulting dictionary as a neat table
+        additional_metrics (dict, optional): mapping of name (str) to metric (func) for additional metrics
+                                             such as those in sklearn.metrics or user-provided functions
 
     Returns:
         dict: nested dictionary with the format of
@@ -148,6 +151,11 @@ def generate_regression_results_dict(
 
         final_dict[sampler] = error_dict
 
+        for metric_name, metric_function in additional_metrics.items():
+            error_dict[metric_name]["train"] = metric_function(y_train, y_pred_train)
+            error_dict[metric_name]["val"] = metric_function(y_val, y_pred_val)
+            error_dict[metric_name]["test"] = metric_function(y_test, y_pred_test)
+
         if print_results:
             print(f"\nDisplaying results for {sampler} sampler")
             display_results_as_table(error_dict)

diff --git a/docs/_sources/astartes.samplers.rst.txt b/docs/_sources/astartes.samplers.rst.txt
@@ -36,14 +36,6 @@ astartes.samplers.kennard\_stone module
    :undoc-members:
    :show-inheritance:
 
-astartes.samplers.mtsd module
------------------------------
-
-.. automodule:: astartes.samplers.mtsd
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
 astartes.samplers.optisim module
 --------------------------------
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,8 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "astartes"
-# update this in astartes/__init__.py, too
-version = "1.1.2"
+dynamic = ["version"]
 authors = [
     { name = "Jackson Burns", email = "jwburns@mit.edu" },
     { name = "Himaghna Bhattacharjee", email = "himaghna@udel.edu" },
@@ -43,3 +42,6 @@ include-package-data = true
 where = ["."]
 include = ["astartes*"]
 exclude = ["docs*", "examples*", "test*"]
+
+[tool.setuptools.dynamic]
+version = {attr = "astartes.__version__"}
diff --git a/test/unit/samplers/interpolative/test_MTSD.py b/test/unit/samplers/interpolative/test_MTSD.py