validmind
diff --git a/‎tests/unit_tests/data_validation/test_ClassImbalance.py
Lines changed: 2 additions & 1 deletion b/‎tests/unit_tests/data_validation/test_ClassImbalance.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/unit_tests/data_validation/test_RollingStatsPlot.py
Lines changed: 51 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_RollingStatsPlot.py
Lines changed: 51 additions & 0 deletions
diff --git a/‎tests/unit_tests/data_validation/test_SeasonalDecompose.py
Lines changed: 75 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_SeasonalDecompose.py
Lines changed: 75 additions & 0 deletions
diff --git a/‎tests/unit_tests/data_validation/test_Skewness.py
Lines changed: 68 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_Skewness.py
Lines changed: 68 additions & 0 deletions
diff --git a/‎tests/unit_tests/data_validation/test_SpreadPlot.py
Lines changed: 54 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_SpreadPlot.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎tests/unit_tests/data_validation/test_TabularCategoricalBarPlots.py
Lines changed: 53 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_TabularCategoricalBarPlots.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎tests/unit_tests/data_validation/test_TabularDateTimeHistograms.py
Lines changed: 40 additions & 0 deletions b/‎tests/unit_tests/data_validation/test_TabularDateTimeHistograms.py
Lines changed: 40 additions & 0 deletions
@@ -1,6 +1,7 @@
 import unittest
 import pandas as pd
 import validmind as vm
+from validmind.errors import SkipTestError
 from validmind.tests.data_validation.ClassImbalance import ClassImbalance
 from plotly.graph_objs import Figure
 
@@ -79,5 +80,5 @@ def test_missing_target(self):
             __log=False,
         )
 
-        with self.assertRaises(Exception):
+        with self.assertRaises(SkipTestError):
             ClassImbalance(dataset_no_target)
@@ -0,0 +1,51 @@
+import unittest
+import pandas as pd
+import validmind as vm
+import matplotlib.pyplot as plt
+from validmind.tests.data_validation.RollingStatsPlot import RollingStatsPlot
+
+
+class TestRollingStatsPlot(unittest.TestCase):
+    def setUp(self):
+        # Create a sample time series dataset
+        dates = pd.date_range(start="2023-01-01", periods=100, freq="D")
+        df = pd.DataFrame(
+            {"A": range(100), "B": [i * 2 for i in range(100)]}, index=dates
+        )
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset", dataset=df, feature_columns=["A", "B"], __log=False
+        )
+
+        # Create a dataset without datetime index
+        df_no_datetime = pd.DataFrame(
+            {"A": range(100), "B": [i * 2 for i in range(100)]}
+        )
+
+        self.vm_dataset_no_datetime = vm.init_dataset(
+            input_id="test_dataset_no_datetime",
+            dataset=df_no_datetime,
+            feature_columns=["A", "B"],
+            __log=False,
+        )
+
+    def test_rolling_stats_plot(self):
+        figures = RollingStatsPlot(self.vm_dataset, window_size=10)
+
+        # Check that we get the correct number of figures (one per feature)
+        self.assertEqual(len(figures), 2)
+
+        # Check that outputs are matplotlib figures
+        for fig in figures:
+            self.assertIsInstance(fig, plt.Figure)
+
+        # Clean up
+        plt.close("all")
+
+    def test_no_datetime_index(self):
+        # Should raise an error for non-datetime index
+        with self.assertRaises(Exception) as context:
+            RollingStatsPlot(self.vm_dataset_no_datetime)
+
+        # Verify error message mentions datetime requirement
+        self.assertIn("datetime", str(context.exception).lower())
@@ -0,0 +1,75 @@
+import unittest
+import pandas as pd
+import numpy as np
+import validmind as vm
+import plotly.graph_objects as go
+from validmind.tests.data_validation.SeasonalDecompose import SeasonalDecompose
+from validmind.errors import SkipTestError
+
+
+class TestSeasonalDecompose(unittest.TestCase):
+    def setUp(self):
+        # Create a sample time series dataset with seasonal pattern
+        dates = pd.date_range(start="2023-01-01", periods=100, freq="D")
+        seasonal_pattern = np.sin(np.linspace(0, 4 * np.pi, 100))  # 2 complete cycles
+        trend = np.linspace(0, 2, 100)  # upward trend
+        noise = np.random.normal(0, 0.1, 100)
+
+        df = pd.DataFrame(
+            {
+                "feature1": seasonal_pattern + trend + noise,
+                "feature2": seasonal_pattern * 2 + trend + noise,
+            },
+            index=dates,
+        )
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset",
+            dataset=df,
+            feature_columns=["feature1", "feature2"],
+            __log=False,
+        )
+
+        # Create dataset with non-finite values
+        df_with_nan = df.copy()
+        df_with_nan.iloc[0:10, 0] = np.nan
+        self.vm_dataset_with_nan = vm.init_dataset(
+            input_id="test_dataset_with_nan",
+            dataset=df_with_nan,
+            feature_columns=["feature1", "feature2"],
+            __log=False,
+        )
+
+    def test_seasonal_decompose(self):
+        figures = SeasonalDecompose(self.vm_dataset)
+
+        # Check that we get the correct number of figures (one per feature)
+        self.assertIsInstance(figures, tuple)
+        self.assertEqual(len(figures), 2)
+
+        # Check that outputs are plotly figures with correct subplots
+        for fig in figures:
+            self.assertIsInstance(fig, go.Figure)
+            # Should have 6 subplots: Observed, Trend, Seasonal, Residuals,
+            # Histogram, and Q-Q plot
+            self.assertEqual(len(fig.data), 7)  # 6 plots + 1 QQ line
+
+    def test_seasonal_decompose_with_nan(self):
+        # Should still work with NaN values
+        figures = SeasonalDecompose(self.vm_dataset_with_nan)
+        self.assertEqual(len(figures), 2)
+
+    def test_seasonal_decompose_models(self):
+        # Test additive model (should work with any data)
+        figures_add = SeasonalDecompose(self.vm_dataset, seasonal_model="additive")
+        self.assertEqual(len(figures_add), 2)
+
+        # Test multiplicative model (should raise ValueError for data with zero/negative values)
+        with self.assertRaises(ValueError) as context:
+            SeasonalDecompose(self.vm_dataset, seasonal_model="multiplicative")
+
+        # Verify the error message
+        self.assertIn(
+            "Multiplicative seasonality is not appropriate for zero and negative values",
+            str(context.exception),
+        )
@@ -0,0 +1,68 @@
+import unittest
+import pandas as pd
+import numpy as np
+import validmind as vm
+from validmind.tests.data_validation.Skewness import Skewness
+
+
+class TestSkewness(unittest.TestCase):
+    def setUp(self):
+        # Set consistent size for all columns
+        n_samples = 1000
+
+        # Create a dataset with known skewness
+        # Normal distribution (low skewness)
+        normal_data = np.random.normal(0, 1, n_samples)
+
+        # Right-skewed distribution (high positive skewness)
+        skewed_data = np.random.exponential(2, n_samples)
+
+        # Non-numeric column
+        categorical = ["A", "B", "C"] * (n_samples // 3)
+        if (
+            len(categorical) < n_samples
+        ):  # Handle case where n_samples isn't divisible by 3
+            categorical.extend(["A"] * (n_samples - len(categorical)))
+
+        df = pd.DataFrame(
+            {"normal": normal_data, "skewed": skewed_data, "categorical": categorical}
+        )
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset",
+            dataset=df,
+            feature_columns=["normal", "skewed", "categorical"],
+            __log=False,
+        )
+
+    def test_skewness_threshold(self):
+        # Test with default threshold (1)
+        results, passed = Skewness(self.vm_dataset)
+
+        # Check return types
+        self.assertIsInstance(results, dict)
+        self.assertIn(passed, [True, False])
+
+        # Check results structure
+        results_table = results["Skewness Results for Dataset"]
+        self.assertIsInstance(results_table, list)
+
+        # Verify only numeric columns are included
+        column_names = {row["Column"] for row in results_table}
+        self.assertEqual(column_names, {"normal", "skewed"})
+
+        # Normal distribution should pass, skewed should fail
+        for row in results_table:
+            if row["Column"] == "normal":
+                self.assertEqual(row["Pass/Fail"], "Pass")
+            if row["Column"] == "skewed":
+                self.assertEqual(row["Pass/Fail"], "Fail")
+
+    def test_custom_threshold(self):
+        # Test with very high threshold (all should pass)
+        results, passed = Skewness(self.vm_dataset, max_threshold=10)
+        results_table = results["Skewness Results for Dataset"]
+
+        # All columns should pass with high threshold
+        self.assertTrue(passed)
+        self.assertTrue(all(row["Pass/Fail"] == "Pass" for row in results_table))
@@ -0,0 +1,54 @@
+import unittest
+import pandas as pd
+import matplotlib.pyplot as plt
+
+import validmind as vm
+
+from validmind.errors import SkipTestError
+from validmind.tests.data_validation.SpreadPlot import SpreadPlot
+
+
+class TestSpreadPlot(unittest.TestCase):
+    def setUp(self):
+        # Create a sample time series dataset
+        dates = pd.date_range(start="2023-01-01", periods=100, freq="D")
+        df = pd.DataFrame(
+            {"A": range(100), "B": [i * 2 for i in range(100)]}, index=dates
+        )
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset", dataset=df, feature_columns=["A", "B"], __log=False
+        )
+
+        # Create a dataset without datetime index
+        df_no_datetime = pd.DataFrame(
+            {"A": range(100), "B": [i * 2 for i in range(100)]}
+        )
+
+        self.vm_dataset_no_datetime = vm.init_dataset(
+            input_id="test_dataset_no_datetime",
+            dataset=df_no_datetime,
+            feature_columns=["A", "B"],
+            __log=False,
+        )
+
+    def test_spread_plot(self):
+        figures = SpreadPlot(self.vm_dataset)
+
+        # Check that we get the correct number of figures (one per feature pair)
+        self.assertEqual(len(figures), 1)  # Only one pair (A-B) for two features
+
+        # Check that outputs are matplotlib figures
+        for fig in figures:
+            self.assertIsInstance(fig, plt.Figure)
+
+        # Clean up
+        plt.close("all")
+
+    def test_no_datetime_index(self):
+        # Should raise an error for non-datetime index
+        with self.assertRaises(SkipTestError):
+            SpreadPlot(self.vm_dataset_no_datetime)
+
+        # Clean up
+        plt.close("all")
@@ -0,0 +1,53 @@
+import unittest
+import pandas as pd
+import validmind as vm
+import plotly.graph_objs as go
+from validmind.errors import SkipTestError
+from validmind.tests.data_validation.TabularCategoricalBarPlots import (
+    TabularCategoricalBarPlots,
+)
+
+
+class TestTabularCategoricalBarPlots(unittest.TestCase):
+    def setUp(self):
+        # Create a sample dataset with categorical and numerical columns
+        df = pd.DataFrame(
+            {
+                "cat1": ["A", "B", "C", "A", "B"] * 20,
+                "cat2": ["X", "Y", "X", "Y", "X"] * 20,
+                "numeric": range(100),
+            }
+        )
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset",
+            dataset=df,
+            feature_columns=["cat1", "cat2", "numeric"],
+            __log=False,
+        )
+
+        # Create dataset with no categorical columns
+        df_no_cat = pd.DataFrame({"numeric1": range(100), "numeric2": range(100, 200)})
+
+        self.vm_dataset_no_cat = vm.init_dataset(
+            input_id="test_dataset_no_cat",
+            dataset=df_no_cat,
+            feature_columns=["numeric1", "numeric2"],
+            __log=False,
+        )
+
+    def test_categorical_bar_plots(self):
+        figures = TabularCategoricalBarPlots(self.vm_dataset)
+
+        # Check that we get the correct number of figures (one per categorical column)
+        self.assertIsInstance(figures, tuple)
+        self.assertEqual(len(figures), 2)  # Should have 2 figures for cat1 and cat2
+
+        # Check that outputs are plotly figures
+        for fig in figures:
+            self.assertIsInstance(fig, go.Figure)
+
+    def test_no_categorical_columns(self):
+        # Should raise SkipTestError when no categorical columns present
+        with self.assertRaises(SkipTestError):
+            TabularCategoricalBarPlots(self.vm_dataset_no_cat)
@@ -0,0 +1,40 @@
+import unittest
+import pandas as pd
+import validmind as vm
+import plotly.graph_objs as go
+from validmind.errors import SkipTestError
+from validmind.tests.data_validation.TabularDateTimeHistograms import (
+    TabularDateTimeHistograms,
+)
+
+
+class TestTabularDateTimeHistograms(unittest.TestCase):
+    def setUp(self):
+        # Create a sample dataset with datetime index
+        dates = pd.date_range(start="2023-01-01", periods=100, freq="D")
+        df = pd.DataFrame({"A": range(100), "B": range(100, 200)}, index=dates)
+
+        self.vm_dataset = vm.init_dataset(
+            input_id="test_dataset", dataset=df, feature_columns=["A", "B"], __log=False
+        )
+
+        # Create dataset without datetime index
+        df_no_datetime = pd.DataFrame({"A": range(100), "B": range(100, 200)})
+
+        self.vm_dataset_no_datetime = vm.init_dataset(
+            input_id="test_dataset_no_datetime",
+            dataset=df_no_datetime,
+            feature_columns=["A", "B"],
+            __log=False,
+        )
+
+    def test_datetime_histograms(self):
+        figure = TabularDateTimeHistograms(self.vm_dataset)
+
+        # Check that output is a plotly figure
+        self.assertIsInstance(figure, go.Figure)
+
+    def test_no_datetime_index(self):
+        # Should raise SkipTestError when no datetime index present
+        with self.assertRaises(SkipTestError):
+            TabularDateTimeHistograms(self.vm_dataset_no_datetime)