Skip to content

Commit

Permalink
Merge pull request #22 from philip-ndikum/feat-sliding-window-models
Browse files Browse the repository at this point in the history
Refactor: Design for Interoperability Across Forecasting Modes and SHAP Compatibility
  • Loading branch information
philip-ndikum authored Oct 6, 2024
2 parents 766d0a6 + 6041fa6 commit d3273e1
Show file tree
Hide file tree
Showing 23 changed files with 6,207 additions and 1,976 deletions.
18 changes: 16 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dependencies = ["pre-commit", "ruff", "jupyterlab", "notebook", "commitizen"]
features = ["docs"]

[tool.hatch.envs.test]
extra-dependencies = ["pytest", "pytest-cov", "pytest-custom_exit_code"]
extra-dependencies = ["pytest", "pytest-cov", "pytest-custom_exit_code", "pytest-mock"]

[tool.hatch.envs.docs.scripts]
build = "sphinx-build -WTb html . _build"
Expand Down Expand Up @@ -159,6 +159,7 @@ python_version = "3.10"
ignore_missing_imports = true
warn_unreachable = true
exclude = 'test/*'
warn_return_any = false # Turn off MyPy warnings for missing return types

[tool.bandit]
exclude_dirs = ["test"]
Expand All @@ -178,7 +179,20 @@ check = "ruff check {args}"
fix = "ruff check --fix"
format = "ruff format {args}"
format-check = "ruff format --check {args}"
quality-assurance = "pre-commit run"
docformat = """
docformatter --check --recursive --wrap-summaries 120 --wrap-descriptions 120 src/temporalscope || \
docformatter --in-place --recursive --wrap-summaries 120 --wrap-descriptions 120 src/temporalscope
"""
clear-coverage = "coverage erase"
# Automated developer Q&A script
quality-assurance = """
pytest &&
docformatter --check --recursive --wrap-summaries 120 --wrap-descriptions 120 src/temporalscope || \
docformatter --in-place --recursive --wrap-summaries 120 --wrap-descriptions 120 src/temporalscope
ruff check src/temporalscope --output-format=full --show-files --show-fixes &&
mypy src/temporalscope --ignore-missing-imports --show-error-codes --warn-unreachable &&
bandit -r src/temporalscope
"""
generate-kernel = """
python -m ipykernel install --user --name temporalscope-kernel --display-name "TemporalScope"
echo "Jupyter kernel 'TemporalScope' created. You can now use it in Jupyter notebooks."
Expand Down
426 changes: 357 additions & 69 deletions src/temporalscope/core/core_utils.py

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions src/temporalscope/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""TemporalScope/src/temporalscope/core/exceptions.py
This module defines custom exceptions and warnings used throughout the TemporalScope package,
specifically for handling errors and edge cases in the TimeFrame class. These custom error
types and warnings are designed to provide clear and actionable feedback for developers
when issues are encountered during time-series forecasting workflows.
Use Cases:
----------
- **TimeColumnError**: Raised when there are validation issues with the `time_col` such as unsupported types.
- **MixedTypesWarning**: Raised when mixed numeric and timestamp types are detected in `time_col`.
- **MixedTimezonesWarning**: Raised when `time_col` contains a mixture of timezone-aware and naive timestamps.
Classes:
--------
- `TimeFrameError`: The base class for all custom exceptions in the TimeFrame module.
- `TimeColumnError`: Raised when the time column has invalid values or types.
- `MixedTypesWarning`: Warning issued when the `time_col` contains mixed numeric and timestamp-like types.
- `MixedTimezonesWarning`: Warning issued when the `time_col` contains a mix of timezone-aware and naive timestamps.
Example Usage:
--------------
.. code-block:: python
from temporalscope.core.exceptions import TimeColumnError, MixedTypesWarning, MixedTimezonesWarning
def validate_time_column(df):
if df["time"].dtype == object:
raise TimeColumnError("Invalid time column data type.")
elif contains_mixed_types(df["time"]):
warnings.warn("Mixed numeric and timestamp types.", MixedTypesWarning)
"""


class TimeFrameError(Exception):
"""Base class for exceptions in the TimeFrame module.
This exception serves as the foundation for all errors related to the
`TimeFrame` class. It should be subclassed to create more specific
exceptions for different error conditions.
"""

pass


class TimeColumnError(TimeFrameError):
"""Exception raised for errors related to the `time_col`.
This error is raised when the `time_col` in the TimeFrame is either
missing, contains unsupported types (non-numeric or non-timestamp),
or has invalid data like null values.
Attributes:
message (str): Explanation of the error.
Example Usage:
--------------
.. code-block:: python
if not pd.api.types.is_numeric_dtype(df[time_col]) and \
not pd.api.types.is_datetime64_any_dtype(df[time_col]):
raise TimeColumnError("`time_col` must be numeric or timestamp-like.")
"""

pass


class MixedTypesWarning(UserWarning):
"""Warning raised when mixed numeric and timestamp-like types are detected in `time_col`.
This warning is issued when the time column contains both numeric and
timestamp-like types, which could lead to unpredictable behavior in time
series processing workflows.
Example Usage:
--------------
.. code-block:: python
if numeric_mask and timestamp_mask:
warnings.warn("`time_col` contains mixed numeric and timestamp-like types.", MixedTypesWarning)
"""

pass


class MixedTimezonesWarning(UserWarning):
"""Warning raised when mixed timezone-aware and naive timestamps are detected in `time_col`.
This warning is issued when the time column contains a mix of timezone-aware
and timezone-naive timestamps, which could cause errors in models that
require consistent timestamp formats.
Example Usage:
--------------
.. code-block:: python
if df[time_col].dt.tz is not None and df[time_col].dt.tz.hasnans:
warnings.warn("`time_col` contains mixed timezone-aware and naive timestamps.", MixedTimezonesWarning)
"""

pass


class MixedFrequencyWarning(UserWarning):
"""Warning raised when mixed timestamp frequencies are detected in `time_col`.
This warning is issued when the time column contains timestamps of mixed frequencies
(e.g., daily, monthly, and yearly timestamps), which can lead to inconsistent behavior
in time series operations that assume uniform frequency.
Example Usage:
--------------
.. code-block:: python
inferred_freq = pd.infer_freq(time_col.dropna())
if inferred_freq is None:
warnings.warn("`time_col` contains mixed timestamp frequencies.", MixedFrequencyWarning)
"""

pass


class UnsupportedBackendError(Exception):
"""Exception raised when an unsupported backend is encountered.
This error is raised when a user attempts to use a backend that is not
supported by TemporalScope. It centralizes backend validation errors across the package.
Attributes:
backend (str): The invalid backend that caused the error.
message (str): Explanation of the error.
"""

def __init__(self, backend, message="Unsupported backend"):
"""Initialize the UnsupportedBackendError.
:param backend: The invalid backend (e.g., 'pl', 'pd', 'mpd') that caused the error.
:param message: Optional; a custom error message. Defaults to "Unsupported backend".
"""
self.backend = backend
self.message = f"{message}: {backend}. Supported backends are 'pd', 'mpd', 'pl'."
super().__init__(self.message)
96 changes: 96 additions & 0 deletions src/temporalscope/core/temporal_core_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""TemporalScope/src/temporalscope/core/temporal_core_processing.py
Core Dataset Conversion and Interoperability Layer
This module provides core functionalities for dataset preparation and conversion, primarily
focused on handling multi-step workflows and ensuring interoperability between backends like
Pandas, TensorFlow, Modin, and Polars. It facilitates conversions required for downstream
tasks such as those used by the `temporal_target_shifter.py` module, ensuring multi-step
processing is smooth and integrated with deep learning and machine learning frameworks.
The module is fully functional, avoiding object-oriented over-complication, following a
functional approach for ease of use and extensibility.
Key Features:
-------------
- **Dataset Conversion**: Functions for converting between formats (e.g., Pandas, TensorFlow).
- **Interoperability**: Manages conversions between different backends for multi-step workflows.
- **Support for Future Extensions**: Stubbed for future implementations of key features required
by downstream tasks like multi-step target handling and TensorFlow dataset conversion.
Example Usage:
--------------
.. code-block:: python
from temporal_core_processing import convert_to_tensorflow, convert_to_pandas
# Example DataFrame
df = pd.DataFrame(
{"time": pd.date_range(start="2023-01-01", periods=100, freq="D"), "feature_1": range(100), "target": range(100)}
)
# Convert DataFrame to TensorFlow Dataset
tf_dataset = convert_to_tensorflow(df)
# Convert TensorFlow Dataset back to Pandas
df_back = convert_to_pandas(tf_dataset)
"""

import pandas as pd
import tensorflow as tf

from temporalscope.core.core_utils import SupportedBackendDataFrame


def convert_to_tensorflow(df: SupportedBackendDataFrame) -> tf.data.Dataset:
"""Stub: Convert a DataFrame to a TensorFlow Dataset.
This function will convert Pandas, Modin, or Polars DataFrames into a TensorFlow Dataset
to enable compatibility with deep learning frameworks like TensorFlow.
:param df: The input DataFrame to convert.
:return: A TensorFlow `tf.data.Dataset` object.
"""
pass


def convert_to_pandas(df: SupportedBackendDataFrame) -> pd.DataFrame:
"""Stub: Convert a DataFrame or TensorFlow Dataset to a Pandas DataFrame.
This function will handle converting Modin, Polars, or TensorFlow Datasets back to Pandas
DataFrames to ensure interoperability across backends and downstream tasks.
:param df: The input DataFrame or TensorFlow Dataset.
:return: A Pandas DataFrame.
"""
pass


def handle_multi_step_conversion(df: pd.DataFrame, sequence_length: int) -> pd.DataFrame:
"""Stub: Prepare DataFrame for multi-step forecasting.
This function will handle the preparation of multi-step targets by expanding the target
column into sequences of the specified length, suitable for sequential models.
:param df: The input DataFrame containing single-step targets.
:param sequence_length: The length of the target sequence for multi-step forecasting.
:return: A DataFrame with expanded target sequences.
"""
pass
Loading

0 comments on commit d3273e1

Please sign in to comment.