-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22 from philip-ndikum/feat-sliding-window-models
Refactor: Design for Interoperability Across Forecasting Modes and SHAP Compatibility
- Loading branch information
Showing
23 changed files
with
6,207 additions
and
1,976 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
"""TemporalScope/src/temporalscope/core/exceptions.py | ||
This module defines custom exceptions and warnings used throughout the TemporalScope package, | ||
specifically for handling errors and edge cases in the TimeFrame class. These custom error | ||
types and warnings are designed to provide clear and actionable feedback for developers | ||
when issues are encountered during time-series forecasting workflows. | ||
Use Cases: | ||
---------- | ||
- **TimeColumnError**: Raised when there are validation issues with the `time_col` such as unsupported types. | ||
- **MixedTypesWarning**: Raised when mixed numeric and timestamp types are detected in `time_col`. | ||
- **MixedTimezonesWarning**: Raised when `time_col` contains a mixture of timezone-aware and naive timestamps. | ||
Classes: | ||
-------- | ||
- `TimeFrameError`: The base class for all custom exceptions in the TimeFrame module. | ||
- `TimeColumnError`: Raised when the time column has invalid values or types. | ||
- `MixedTypesWarning`: Warning issued when the `time_col` contains mixed numeric and timestamp-like types. | ||
- `MixedTimezonesWarning`: Warning issued when the `time_col` contains a mix of timezone-aware and naive timestamps. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
from temporalscope.core.exceptions import TimeColumnError, MixedTypesWarning, MixedTimezonesWarning | ||
def validate_time_column(df): | ||
if df["time"].dtype == object: | ||
raise TimeColumnError("Invalid time column data type.") | ||
elif contains_mixed_types(df["time"]): | ||
warnings.warn("Mixed numeric and timestamp types.", MixedTypesWarning) | ||
""" | ||
|
||
|
||
class TimeFrameError(Exception): | ||
"""Base class for exceptions in the TimeFrame module. | ||
This exception serves as the foundation for all errors related to the | ||
`TimeFrame` class. It should be subclassed to create more specific | ||
exceptions for different error conditions. | ||
""" | ||
|
||
pass | ||
|
||
|
||
class TimeColumnError(TimeFrameError): | ||
"""Exception raised for errors related to the `time_col`. | ||
This error is raised when the `time_col` in the TimeFrame is either | ||
missing, contains unsupported types (non-numeric or non-timestamp), | ||
or has invalid data like null values. | ||
Attributes: | ||
message (str): Explanation of the error. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
if not pd.api.types.is_numeric_dtype(df[time_col]) and \ | ||
not pd.api.types.is_datetime64_any_dtype(df[time_col]): | ||
raise TimeColumnError("`time_col` must be numeric or timestamp-like.") | ||
""" | ||
|
||
pass | ||
|
||
|
||
class MixedTypesWarning(UserWarning): | ||
"""Warning raised when mixed numeric and timestamp-like types are detected in `time_col`. | ||
This warning is issued when the time column contains both numeric and | ||
timestamp-like types, which could lead to unpredictable behavior in time | ||
series processing workflows. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
if numeric_mask and timestamp_mask: | ||
warnings.warn("`time_col` contains mixed numeric and timestamp-like types.", MixedTypesWarning) | ||
""" | ||
|
||
pass | ||
|
||
|
||
class MixedTimezonesWarning(UserWarning): | ||
"""Warning raised when mixed timezone-aware and naive timestamps are detected in `time_col`. | ||
This warning is issued when the time column contains a mix of timezone-aware | ||
and timezone-naive timestamps, which could cause errors in models that | ||
require consistent timestamp formats. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
if df[time_col].dt.tz is not None and df[time_col].dt.tz.hasnans: | ||
warnings.warn("`time_col` contains mixed timezone-aware and naive timestamps.", MixedTimezonesWarning) | ||
""" | ||
|
||
pass | ||
|
||
|
||
class MixedFrequencyWarning(UserWarning): | ||
"""Warning raised when mixed timestamp frequencies are detected in `time_col`. | ||
This warning is issued when the time column contains timestamps of mixed frequencies | ||
(e.g., daily, monthly, and yearly timestamps), which can lead to inconsistent behavior | ||
in time series operations that assume uniform frequency. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
inferred_freq = pd.infer_freq(time_col.dropna()) | ||
if inferred_freq is None: | ||
warnings.warn("`time_col` contains mixed timestamp frequencies.", MixedFrequencyWarning) | ||
""" | ||
|
||
pass | ||
|
||
|
||
class UnsupportedBackendError(Exception): | ||
"""Exception raised when an unsupported backend is encountered. | ||
This error is raised when a user attempts to use a backend that is not | ||
supported by TemporalScope. It centralizes backend validation errors across the package. | ||
Attributes: | ||
backend (str): The invalid backend that caused the error. | ||
message (str): Explanation of the error. | ||
""" | ||
|
||
def __init__(self, backend, message="Unsupported backend"): | ||
"""Initialize the UnsupportedBackendError. | ||
:param backend: The invalid backend (e.g., 'pl', 'pd', 'mpd') that caused the error. | ||
:param message: Optional; a custom error message. Defaults to "Unsupported backend". | ||
""" | ||
self.backend = backend | ||
self.message = f"{message}: {backend}. Supported backends are 'pd', 'mpd', 'pl'." | ||
super().__init__(self.message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
"""TemporalScope/src/temporalscope/core/temporal_core_processing.py | ||
Core Dataset Conversion and Interoperability Layer | ||
This module provides core functionalities for dataset preparation and conversion, primarily | ||
focused on handling multi-step workflows and ensuring interoperability between backends like | ||
Pandas, TensorFlow, Modin, and Polars. It facilitates conversions required for downstream | ||
tasks such as those used by the `temporal_target_shifter.py` module, ensuring multi-step | ||
processing is smooth and integrated with deep learning and machine learning frameworks. | ||
The module is fully functional, avoiding object-oriented over-complication, following a | ||
functional approach for ease of use and extensibility. | ||
Key Features: | ||
------------- | ||
- **Dataset Conversion**: Functions for converting between formats (e.g., Pandas, TensorFlow). | ||
- **Interoperability**: Manages conversions between different backends for multi-step workflows. | ||
- **Support for Future Extensions**: Stubbed for future implementations of key features required | ||
by downstream tasks like multi-step target handling and TensorFlow dataset conversion. | ||
Example Usage: | ||
-------------- | ||
.. code-block:: python | ||
from temporal_core_processing import convert_to_tensorflow, convert_to_pandas | ||
# Example DataFrame | ||
df = pd.DataFrame( | ||
{"time": pd.date_range(start="2023-01-01", periods=100, freq="D"), "feature_1": range(100), "target": range(100)} | ||
) | ||
# Convert DataFrame to TensorFlow Dataset | ||
tf_dataset = convert_to_tensorflow(df) | ||
# Convert TensorFlow Dataset back to Pandas | ||
df_back = convert_to_pandas(tf_dataset) | ||
""" | ||
|
||
import pandas as pd | ||
import tensorflow as tf | ||
|
||
from temporalscope.core.core_utils import SupportedBackendDataFrame | ||
|
||
|
||
def convert_to_tensorflow(df: SupportedBackendDataFrame) -> tf.data.Dataset: | ||
"""Stub: Convert a DataFrame to a TensorFlow Dataset. | ||
This function will convert Pandas, Modin, or Polars DataFrames into a TensorFlow Dataset | ||
to enable compatibility with deep learning frameworks like TensorFlow. | ||
:param df: The input DataFrame to convert. | ||
:return: A TensorFlow `tf.data.Dataset` object. | ||
""" | ||
pass | ||
|
||
|
||
def convert_to_pandas(df: SupportedBackendDataFrame) -> pd.DataFrame: | ||
"""Stub: Convert a DataFrame or TensorFlow Dataset to a Pandas DataFrame. | ||
This function will handle converting Modin, Polars, or TensorFlow Datasets back to Pandas | ||
DataFrames to ensure interoperability across backends and downstream tasks. | ||
:param df: The input DataFrame or TensorFlow Dataset. | ||
:return: A Pandas DataFrame. | ||
""" | ||
pass | ||
|
||
|
||
def handle_multi_step_conversion(df: pd.DataFrame, sequence_length: int) -> pd.DataFrame: | ||
"""Stub: Prepare DataFrame for multi-step forecasting. | ||
This function will handle the preparation of multi-step targets by expanding the target | ||
column into sequences of the specified length, suitable for sequential models. | ||
:param df: The input DataFrame containing single-step targets. | ||
:param sequence_length: The length of the target sequence for multi-step forecasting. | ||
:return: A DataFrame with expanded target sequences. | ||
""" | ||
pass |
Oops, something went wrong.