feat: add restart_stream=False as an option in evaluators

tachyonicClock · hmgomes · commit a965dc45c4d2 · 2024-07-31T09:28:01.000+12:00
diff --git a/src/capymoa/evaluation/__init__.py b/src/capymoa/evaluation/__init__.py
@@ -11,6 +11,7 @@
     PredictionIntervalWindowedEvaluator,
     AnomalyDetectionEvaluator,
 )
+from . import results
 
 __all__ = [
     "prequential_evaluation",
@@ -24,4 +25,5 @@
     "PredictionIntervalEvaluator",
     "PredictionIntervalWindowedEvaluator",
     "AnomalyDetectionEvaluator",
+    "results"
 ]
diff --git a/src/capymoa/evaluation/evaluation.py b/src/capymoa/evaluation/evaluation.py
@@ -1,4 +1,5 @@
 from typing import Optional
+from typing import Any, Dict, Union
 
 import pandas as pd
 import numpy as np
@@ -9,14 +10,11 @@
 import os
 
 from capymoa.stream import Schema, Stream
-from capymoa.base import (
-    AnomalyDetector,
-    ClassifierSSL,
-    MOAPredictionIntervalLearner
-)
+from capymoa.base import AnomalyDetector, ClassifierSSL, MOAPredictionIntervalLearner
 
 from capymoa.evaluation.results import PrequentialResults
 from capymoa._utils import _translate_metric_name
+from capymoa.base import Classifier, Regressor
 
 from com.yahoo.labs.samoa.instances import Instances, Attribute, DenseInstance
 from moa.core import InstanceExample
@@ -820,25 +818,51 @@ def stop_time_measuring(start_wallclock_time, start_cpu_time):
 
 
 def prequential_evaluation(
-        stream,
-        learner,
-        max_instances=None,
-        window_size=1000,
-        store_predictions=False,
-        store_y=False,
-        optimise=True
-):
-    """
-    Calculates the metrics cumulatively (i.e. test-then-train) and in a window-fashion (i.e. windowed prequential
-    evaluation). Returns both evaluators so that the user has access to metrics from both evaluators.
+    stream: Stream,
+    learner: Union[Classifier, Regressor],
+    max_instances: Optional[int] = None,
+    window_size: int = 1000,
+    store_predictions: bool = False,
+    store_y: bool = False,
+    optimise: bool = True,
+    restart_stream: bool = True,
+) -> PrequentialResults:
+    """Run and evaluate a learner on a stream using prequential evaluation.
+
+    Calculates the metrics cumulatively (i.e. test-then-train) and in a
+    window-fashion (i.e. windowed prequential evaluation). Returns both
+    evaluators so that the user has access to metrics from both evaluators.
+
+    :param stream: A data stream to evaluate the learner on. Will be restarted if
+        ``restart_stream`` is True.
+    :param learner: The learner to evaluate.
+    :param max_instances: The number of instances to evaluate before exiting. If
+        None, the evaluation will continue until the stream is empty.
+    :param window_size: The size of the window used for windowed evaluation,
+        defaults to 1000
+    :param store_predictions: Store the learner's prediction in a list, defaults
+        to False
+    :param store_y: Store the ground truth targets in a list, defaults to False
+    :param optimise: If True and the learner is compatible, the evaluator will
+        use a Java native evaluation loop, defaults to True.
+    :param restart_stream: If False, evaluation will continue from the current
+        position in the stream, defaults to True. Not restarting the stream is
+        useful for switching between learners or evaluators, without starting
+        from the beginning of the stream.
+    :return: An object containing the results of the evaluation windowed metrics,
+        cumulative metrics, ground truth targets, and predictions.
     """
-    stream.restart()
+    if restart_stream:
+        stream.restart()
     if _is_fast_mode_compilable(stream, learner, optimise):
-        return _prequential_evaluation_fast(stream, learner,
-                                            max_instances,
-                                            window_size,
-                                            store_y=store_y,
-                                            store_predictions=store_predictions)
+        return _prequential_evaluation_fast(
+            stream,
+            learner,
+            max_instances,
+            window_size,
+            store_y=store_y,
+            store_predictions=store_predictions,
+        )
 
     predictions = None
     if store_predictions:
@@ -880,7 +904,7 @@ def prequential_evaluation(
                     schema=stream.get_schema(), window_size=window_size
                 )
     while stream.has_more_instances() and (
-            max_instances is None or instancesProcessed <= max_instances
+        max_instances is None or instancesProcessed <= max_instances
     ):
         instance = stream.next_instance()
 
@@ -933,25 +957,55 @@ def prequential_evaluation(
     return results
 
 
-# TODO: Include store_predictions and store_y logic
 def prequential_ssl_evaluation(
-        stream,
-        learner,
-        max_instances=None,
-        window_size=1000,
-        initial_window_size=0,
-        delay_length=0,
-        label_probability=0.01,
-        random_seed=1,
-        store_predictions=False,
-        store_y=False,
-        optimise=True,
+    stream: Stream,
+    learner: Union[ClassifierSSL, Classifier],
+    max_instances: Optional[int] = None,
+    window_size: int = 1000,
+    initial_window_size: int = 0,
+    delay_length: int = 0,
+    label_probability: float = 0.01,
+    random_seed: int = 1,
+    store_predictions: bool = False,
+    store_y: bool = False,
+    optimise: bool = True,
+    restart_stream: bool = True,
 ):
-    """
-    If the learner is not an SSL learner, then it will be trained only on the labeled instances.
+    """Run and evaluate a learner on a semi-supervised stream using prequential evaluation.
+
+    :param stream: A data stream to evaluate the learner on. Will be restarted if
+        ``restart_stream`` is True.
+    :param learner: The learner to evaluate. If the learner is an SSL learner,
+        it will be trained on both labeled and unlabeled instances. If the
+        learner is not an SSL learner, then it will be trained only on the
+        labeled instances.
+    :param max_instances: The number of instances to evaluate before exiting.
+        If None, the evaluation will continue until the stream is empty.
+    :param window_size: The size of the window used for windowed evaluation,
+        defaults to 1000
+    :param initial_window_size: Not implemented yet
+    :param delay_length: If greater than zero the labeled (``label_probability``%)
+        instances will appear as unlabeled before reappearing as labeled after
+        ``delay_length`` instances, defaults to 0
+    :param label_probability: The proportion of instances that will be labeled,
+        must be in the range [0, 1], defaults to 0.01
+    :param random_seed: A random seed to define the random state that decides
+        which instances are labeled and which are not, defaults to 1.
+    :param store_predictions: Store the learner's prediction in a list, defaults
+        to False
+    :param store_y: Store the ground truth targets in a list, defaults to False
+    :param optimise: If True and the learner is compatible, the evaluator will
+        use a Java native evaluation loop, defaults to True.
+    :param restart_stream: If False, evaluation will continue from the current
+        position in the stream, defaults to True. Not restarting the stream is
+        useful for switching between learners or evaluators, without starting
+        from the beginning of the stream.
+    :return: An object containing the results of the evaluation windowed metrics,
+        cumulative metrics, ground truth targets, and predictions.
     """
 
-    stream.restart()
+    if restart_stream:
+        stream.restart()
 
     if _is_fast_mode_compilable(stream, learner, optimise):
         return _prequential_ssl_evaluation_fast(stream,
diff --git a/src/capymoa/evaluation/results.py b/src/capymoa/evaluation/results.py
@@ -30,9 +30,13 @@ def __init__(self,
         self._predictions = predictions
         self._other_metrics = other_metrics
         # attributes
-        self.learner = learner
-        self.stream = stream
+        #: The name of the learner
+        self.learner: str = learner
+        #: The stream used to evaluate the learner
+        self.stream: Stream = stream
+        #: The cumulative evaluator
         self.cumulative = cumulative_evaluator
+        #: The windowed evaluator
         self.windowed = windowed_evaluator
 
     def __getitem__(self, key):
diff --git a/src/capymoa/stream/__init__.py b/src/capymoa/stream/__init__.py
@@ -1,4 +1,4 @@
-from ._stream import Stream, Schema, ARFFStream, stream_from_file, CSVStream
+from ._stream import Stream, Schema, ARFFStream, stream_from_file, CSVStream, NumpyStream
 from .PytorchStream import PytorchStream
 from . import drift, generator, preprocessing
 
@@ -12,4 +12,5 @@
     "drift",
     "generator",
     "preprocessing",
+    "NumpyStream"
 ]
diff --git a/src/capymoa/stream/_stream.py b/src/capymoa/stream/_stream.py
@@ -1,6 +1,6 @@
 import typing
 import warnings
-from typing import Dict, Optional, Sequence
+from typing import Dict, Optional, Sequence, Union
 
 import numpy as np
 from numpy.lib import recfunctions as rfn
@@ -380,7 +380,7 @@ def __init__(
     def has_more_instances(self):
         return self.arff_instances_data.numInstances() > self.current_instance_index
 
-    def next_instance(self) -> Instance:
+    def next_instance(self) -> Union[LabeledInstance, RegressionInstance]:
         # Return None if all instances have been read already.
         if not self.has_more_instances():
             return None
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
@@ -1,5 +1,8 @@
-from capymoa.evaluation.evaluation import prequential_evaluation_anomaly
-from capymoa.stream.generator import SEA
+from contextlib import nullcontext
+from itertools import product
+from capymoa.evaluation.evaluation import _is_fast_mode_compilable, prequential_evaluation_anomaly
+from capymoa.regressor import KNNRegressor
+from capymoa.stream.generator import SEA, HyperPlaneRegression, RandomTreeGenerator
 from capymoa.classifier import NaiveBayes, HoeffdingTree
 from capymoa.evaluation import (prequential_evaluation,
                                 prequential_evaluation_multiple_learners,
@@ -171,4 +174,77 @@ def test_prequential_evaluation_anomaly():
     assert results_1st_run['windowed'].auc() == pytest.approx(
         results_2nd_run['windowed'].auc(), abs=0.001
     ), f"prequential_evaluation_anomaly same synthetic stream: Expected AUC of " \
-       f"{results_1st_run['windowed'].auc():0.3f} got {results_2nd_run['windowed'].auc(): 0.3f}"
+       f"{results_1st_run['windowed'].auc():0.3f} got {results_2nd_run['windowed'].auc(): 0.3f}"
+    
+
+
+
+@pytest.mark.parametrize(
+    ["restart_stream", "optimise", "regression", "evaluation"],
+    list(
+        product(
+            [True, False],
+            [True, False],
+            [True, False],
+            [
+                prequential_evaluation,
+                prequential_ssl_evaluation,
+            ],
+        )
+    ),
+)
+def test_restart_stream_flag(restart_stream, optimise, regression, evaluation):
+    """Ensure that the stream is restarted when the restart_stream flag is set to True"""
+    expect_error = False
+    # Some configurations are not supported by some evaluation methods. 
+    # When these are eventually supported, this test will need to be updated.
+
+    # Create a stream and learner
+    stream = (
+        HyperPlaneRegression() if regression else RandomTreeGenerator(num_classes=10)
+    )
+
+    # This evaluation function does not yet support regression
+    if evaluation == prequential_ssl_evaluation and regression:
+        expect_error = True
+
+    if not regression:
+        learner = NaiveBayes(
+            schema=stream.get_schema()
+        )  # The type of model is not important
+    else:
+        learner = KNNRegressor(schema=stream.get_schema())
+    assert _is_fast_mode_compilable(
+        stream, learner, True
+    ), "Fast mode should always be compilable for this test"
+
+    def _take_y(num_instances):
+        if regression:
+            return [stream.next_instance().y_value for _ in range(num_instances)]
+        else:
+            return [stream.next_instance().y_index for _ in range(num_instances)]
+
+    # Store targets from the stream for use in assertions later.
+    y_stream = _take_y(20)
+    stream.restart()  # Must restart the stream to get the same instances again
+
+    # Consume the first 10 instances
+    _take_y(10)
+    with pytest.raises((RuntimeError, ValueError)) if expect_error else nullcontext():
+        # Consume either the next 5 instances or the same 5 instances again
+        # depending on the ``restart_stream`` flag
+        evaluation(
+            stream=stream,
+            learner=learner,
+            max_instances=5,
+            optimise=optimise,
+            restart_stream=restart_stream,
+        )
+
+        # If the stream is restarted, the next 5 instances should be the same as those
+        # we remembered. Otherwise, they should be different.
+        y_remaining = _take_y(5)
+        if restart_stream == True:
+            assert y_remaining == y_stream[5:10]
+        else:
+            assert y_remaining == y_stream[15:20]

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`	`PredictionIntervalWindowedEvaluator,`
`12`	`12`	`AnomalyDetectionEvaluator,`
`13`	`13`	`)`
	`14`	`+from . import results`
`14`	`15`
`15`	`16`	`__all__ = [`
`16`	`17`	`"prequential_evaluation",`
`@@ -24,4 +25,5 @@`
`24`	`25`	`"PredictionIntervalEvaluator",`
`25`	`26`	`"PredictionIntervalWindowedEvaluator",`
`26`	`27`	`"AnomalyDetectionEvaluator",`
	`28`	`+ "results"`
`27`	`29`	`]`