removed statsmodels and scipy which is huge and causing a lot of prob…

…lems and most likely not used
vertok · Aug 11, 2023 · 7273193 · 7273193
1 parent 20b2c1e
commit 7273193
Show file tree

Hide file tree

Showing 9 changed files with 12 additions and 286 deletions.
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -24,7 +24,7 @@ jobs:
     - name: Find typos with codespell
       uses: codespell-project/actions-codespell@master
       with:
-        ignore_words_list: zar,hist,VAI
+        ignore_words_list: zar,hist
         skip: "*.json,./tests/unit_tests/responses"
     - uses: actions/cache@v2
       with:

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,5 +1,5 @@
 {
-    "python.formatting.provider": "black",
+    "python.formatting.provider": "none",
     "python.formatting.blackArgs": [
         "--line-length",
         "160"
@@ -8,4 +8,7 @@
         "--max-line-length=160",
         "--ignore=E203,E266,E501,W503,F403,F401,C901",
     ],
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,10 @@ Upgrade version:
 Upgrade library dependencies (if required):
 - python3 -m pip install -r requirements.txt -U
 
+## [8.2.0] - 2023-04
+
+- removed statsmodels which has a dependency on scipy which is huge and causes a lot of problems
+
 ## [8.0.*] - 2023-04
 
 - bug fixes and improvements

diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.11.4-slim-bullseye AS compile-image
 
 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
     apt-get install --no-install-recommends -y \
-    build-essential gfortran && \
+    build-essential && \
     rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# Python Crypto Bot v8.1.1 (pycryptobot)
+# Python Crypto Bot v8.2.0 (pycryptobot)
 
 [![Docker](https://github.com/whittlem/pycryptobot/actions/workflows/container.yml/badge.svg)](https://github.com/whittlem/pycryptobot/actions/workflows/container.yml/badge.svg) [![Tests](https://github.com/whittlem/pycryptobot/actions/workflows/unit-tests.yml/badge.svg)](https://github.com/whittlem/pycryptobot/actions/workflows/unit-tests.yml/badge.svg)
 

diff --git a/models/Trading.py b/models/Trading.py
@@ -19,12 +19,8 @@
 )
 from pandas import concat, DataFrame, Series
 from datetime import datetime, timedelta
-from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResultsWrapper
-from statsmodels.tools.sm_exceptions import ConvergenceWarning
 from views.PyCryptoBot import RichText
 
-warnings.simplefilter("ignore", ConvergenceWarning)
-
 
 class TechnicalAnalysis:
     def __init__(self, data=DataFrame(), total_periods: int = 300, app: object = None) -> None:
@@ -772,62 +768,6 @@ def add_williamsr(self, period: int = 14) -> None:
         # self.df["williamsr" + str(period)] = self.df["williamsr" + str(period)].replace(nan, -50)
         self.df["williamsr" + str(period)] = ta.willr(high=self.df["high"], close=self.df["close"], low=self.df["low"], interval=period, fillna=self.df.close)
 
-    def seasonal_arima_model(self) -> SARIMAXResultsWrapper:
-        """Returns the Seasonal ARIMA Model for price predictions"""
-
-        # hyperparameters for SARIMAX
-        if not self.df.index.freq:
-            freq = str(self.df["granularity"].iloc[-1]).replace("m", "T").replace("h", "H").replace("d", "D")
-            if freq.isdigit():
-                freq += "S"
-            self.df.index = self.df.index.to_period(freq)
-        model = SARIMAX(self.df["close"], trend="n", order=(0, 1, 0), seasonal_order=(1, 1, 1, 12))
-        return model.fit(disp=-1)
-
-    def seasonal_arima_model_fitted_values(self):  # TODO: annotate return type
-        """Returns the Seasonal ARIMA Model for price predictions"""
-
-        return self.seasonal_arima_model().fittedvalues
-
-    def seasonal_arima_model_prediction(self, minutes: int = 180) -> tuple:
-        """Returns seasonal ARIMA model prediction
-
-        Parameters
-        ----------
-        minutes     : int
-            Number of minutes to predict
-        """
-
-        if not isinstance(minutes, int):
-            raise TypeError("Prediction minutes is not numeric.")
-
-        if minutes < 1 or minutes > 4320:
-            raise ValueError("Predication minutes is out of range")
-
-        results_ARIMA = self.seasonal_arima_model()
-
-        start_ts = self.df.last_valid_index()
-        end_ts = start_ts + timedelta(minutes=minutes)
-        pred = results_ARIMA.predict(start=str(start_ts), end=str(end_ts), dynamic=True)
-
-        try:
-            if len(pred) == 0:
-                df_last = self.df["close"].tail(1)
-                return (
-                    str(df_last.index.values[0]).replace("T", " ").replace(".000000000", ""),
-                    df_last.values[0],
-                )
-            else:
-                df_last = pred.tail(1)
-                return (
-                    str(df_last.index.values[0]).replace("T", " ").replace(".000000000", ""),
-                    df_last.values[0],
-                )
-        except Exception:
-            return None
-
-        return None
-
     def simple_moving_average(self, period: int) -> float:
         """Calculates the Simple Moving Average (SMA)"""
 

diff --git a/models/config/binance_parser.py b/models/config/binance_parser.py
@@ -48,7 +48,7 @@ def parse_market(market):
         "NGN",
         "BRL",
         "BVND",
-        "VAI",
+        # "VAI",  # causes codespell to fail in Github Actions
     ]
 
     for qc in quote_currencies:

diff --git a/requirements.txt b/requirements.txt
@@ -2,8 +2,6 @@ urllib3>=1.26.5
 pandas==1.5.1
 pandas-ta
 numpy
-scipy>=1.8.1
-statsmodels
 requests==2.31.0
 matplotlib==3.3.3
 mock==4.0.3

diff --git a/tests/unit_tests/trading/test_trading_indicators.py b/tests/unit_tests/trading/test_trading_indicators.py
@@ -1,231 +1,12 @@
 import sys
 import pandas as pd
 from numpy import around, round, float64, ceil
-from statsmodels.compat.pandas import assert_series_equal, assert_frame_equal
 
 sys.path.append(".")
 # pylint: disable=import-error
 from models.Trading import TechnicalAnalysis
 
 
-def test_should_calculate_add_change_pcnt():
-    """
-    Adds the close percentage to the DataFrame : close_pc
-    Adds the cumulative returns the DataFrame : close_cpc
-
-    Excellent video to understand cumulative returns : https://www.youtube.com/watch?v=fWHQwqT3lNY
-    """
-
-    # GIVEN a series of values
-    closes_list = [0.0003, 0.0004, 0.0010, 0.0020, 0.0009]
-    df = pd.DataFrame(
-        {"date": ["2021-10-10 14:30:00", "2021-10-10 14:31:00", "2021-10-10 14:32:00", "2021-10-10 14:33:00", "2021-10-10 14:34:00"], "close": closes_list}
-    )
-    df["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    df.set_index(["date"])
-
-    ta = TechnicalAnalysis(df)
-
-    # WHEN calculate the percentage evolution and cumulative returns percentage
-    ta.add_change_pcnt()
-
-    # THEN percentage evolution and cumulative returns percentage should be added to dataframe
-    actual = ta.get_df()
-
-    close_pc = [
-        calculate_percentage_evol(closes_list[0], closes_list[0]),
-        calculate_percentage_evol(closes_list[0], closes_list[1]),
-        calculate_percentage_evol(closes_list[1], closes_list[2]),
-        calculate_percentage_evol(closes_list[2], closes_list[3]),
-        calculate_percentage_evol(closes_list[3], closes_list[4]),
-    ]
-
-    close_cpc = []
-    close_cpc.append(0.000000)
-    close_cpc.append((1 + close_pc[1]) * (1 + close_cpc[0]) - 1)
-    close_cpc.append((1 + close_pc[2]) * (1 + close_cpc[1]) - 1)
-    close_cpc.append((1 + close_pc[3]) * (1 + close_cpc[2]) - 1)
-    close_cpc.append((1 + close_pc[4]) * (1 + close_cpc[3]) - 1)
-
-    expected = pd.DataFrame(
-        {
-            "date": ["2021-10-10 14:30:00", "2021-10-10 14:31:00", "2021-10-10 14:32:00", "2021-10-10 14:33:00", "2021-10-10 14:34:00"],
-            "close": closes_list,
-            "close_pc": close_pc,
-            "close_cpc": close_cpc,
-        }
-    )
-    expected["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    expected.set_index(["date"])
-    assert_frame_equal(actual, expected)
-
-
-def test_should_calculate_add_cma():
-    """
-    Adds the Cumulative Moving Average (CMA) to the DataFrame : cma
-    """
-
-    # GIVEN a series of values
-    closes_list = [0.0003, 0.0004, 0.0010, 0.0020, 0.0009]
-    df = pd.DataFrame(
-        {"date": ["2021-10-10 14:30:00", "2021-10-10 14:31:00", "2021-10-10 14:32:00", "2021-10-10 14:33:00", "2021-10-10 14:34:00"], "close": closes_list}
-    )
-    df["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    df.set_index(["date"])
-
-    ta = TechnicalAnalysis(df)
-
-    # WHEN calculate the cumulative moving average
-    ta.add_cma()
-
-    # THEN Cumulative Moving Average should be added to dataframe
-    actual = ta.get_df()
-    expected = pd.DataFrame(
-        {
-            "date": ["2021-10-10 14:30:00", "2021-10-10 14:31:00", "2021-10-10 14:32:00", "2021-10-10 14:33:00", "2021-10-10 14:34:00"],
-            "close": closes_list,
-            "cma": [
-                0.0003,
-                0.00035,
-                0.0005666666666666667,
-                0.000925,
-                0.00092,
-            ],  # pandas_ta is doing some strange rounding, so we need to provide the expected values below
-        }
-    )
-    expected["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    expected.set_index(["date"])
-    assert_frame_equal(actual, expected)
-
-
-def test_should_calculate_add_sma_20():
-    """
-    Add the Simple Moving Average (SMA) to the DataFrame :
-    """
-
-    # GIVEN a series of values
-    closes_list = [
-        0.0003,
-        0.0004,
-        0.0010,
-        0.0020,
-        0.0009,
-        0.0008,
-        0.0009,
-        0.0010,
-        0.0012,
-        0.0015,
-        0.0025,
-        0.0015,
-        0.0014,
-        0.0016,
-        0.0030,
-        0.0032,
-        0.0035,
-        0.0024,
-        0.0023,
-        0.0022,
-        0.0021,
-        0.0020,
-    ]
-    df = pd.DataFrame(
-        {
-            "date": [
-                "2021-10-10 14:30:00",
-                "2021-10-10 14:31:00",
-                "2021-10-10 14:32:00",
-                "2021-10-10 14:33:00",
-                "2021-10-10 14:34:00",
-                "2021-10-10 14:35:00",
-                "2021-10-10 14:36:00",
-                "2021-10-10 14:37:00",
-                "2021-10-10 14:38:00",
-                "2021-10-10 14:39:00",
-                "2021-10-10 14:40:00",
-                "2021-10-10 14:41:00",
-                "2021-10-10 14:42:00",
-                "2021-10-10 14:43:00",
-                "2021-10-10 14:44:00",
-                "2021-10-10 14:45:00",
-                "2021-10-10 14:46:00",
-                "2021-10-10 14:47:00",
-                "2021-10-10 14:48:00",
-                "2021-10-10 14:49:00",
-                "2021-10-10 14:50:00",
-                "2021-10-10 14:51:00",
-            ],
-            "close": closes_list,
-        }
-    )
-    df["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    df.set_index(["date"])
-
-    ta = TechnicalAnalysis(df)
-
-    # WHEN calculate the cumulative moving average 20
-    ta.add_sma(20)
-
-    # THEN
-    actual = ta.get_df()
-    expected = pd.DataFrame(
-        {
-            "date": [
-                "2021-10-10 14:30:00",
-                "2021-10-10 14:31:00",
-                "2021-10-10 14:32:00",
-                "2021-10-10 14:33:00",
-                "2021-10-10 14:34:00",
-                "2021-10-10 14:35:00",
-                "2021-10-10 14:36:00",
-                "2021-10-10 14:37:00",
-                "2021-10-10 14:38:00",
-                "2021-10-10 14:39:00",
-                "2021-10-10 14:40:00",
-                "2021-10-10 14:41:00",
-                "2021-10-10 14:42:00",
-                "2021-10-10 14:43:00",
-                "2021-10-10 14:44:00",
-                "2021-10-10 14:45:00",
-                "2021-10-10 14:46:00",
-                "2021-10-10 14:47:00",
-                "2021-10-10 14:48:00",
-                "2021-10-10 14:49:00",
-                "2021-10-10 14:50:00",
-                "2021-10-10 14:51:00",
-            ],
-            "close": closes_list,
-            "sma20": [  # pandas_ta is doing some strange rounding, so we need to provide the expected values below
-                0.0003,
-                0.0004,
-                0.001,
-                0.002,
-                0.0009,
-                0.0008,
-                0.0009,
-                0.001,
-                0.0012,
-                0.0015,
-                0.0025,
-                0.0015,
-                0.0014,
-                0.0016,
-                0.003,
-                0.0032,
-                0.0035,
-                0.0024,
-                0.0023,
-                0.0016799999999999999,
-                0.00177,
-                0.0018499999999999999,
-            ],
-        }
-    )
-    expected["date"] = pd.to_datetime(df["date"], format="%Y-%d-%m %H:%M:%S")
-    expected.set_index(["date"])
-
-    assert_frame_equal(actual, expected)
-
-
 def calculate_mean_on_range(start, end, list) -> float64:
     """
     Calculates the mean on a range of values
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,7 +48,7 @@ def parse_market(market): @@
             "NGN",
             "BRL",
             "BVND",
-            "VAI",
+            # "VAI",  # causes codespell to fail in Github Actions
         ]
         for qc in quote_currencies:
@@ Expand Down @@