predict-idlab · jonasvdd · Jun 25, 2022 · Jun 27, 2022 · Jun 27, 2022 · Jun 27, 2022
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,11 +12,12 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        os: ['windows-latest', 'macOS-latest', 'ubuntu-latest']
+        python-version: ['3.7', '3.8', '3.9', '3.10']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/LICENSE b/LICENSE
@@ -1,28 +1,21 @@
-Copyright (c) Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost
-2021 Ghent University and IMEC vzw with offices at Technologiepark 122, 9052 Ghent,
-Belgium - Contact info: http://predict.idlab.ugent.be
+MIT License
 
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software for non-commercial educational and research use, including without
-limitation the rights to use, copy, modify, merge, publish, distribute and/or
-sublicense copies of the Software, and to permit persons to whom the Software
-is furnished to do so, subject to the following conditions:
+Copyright (c) 2022 Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost.
 
-1. The above copyright notice and this permission notice shall be included in
-   all copies of the Software.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
-2. Permission is restricted to non-commercial educational and research use:
-   the use of the Software is allowed for teaching purposes and academic
-   research. Usage by non-academic parties is allowed in a strict research
-   environment only. The use of the results of the research for commercial
-   purposes or inclusion in commercial activities requires the permission of
-   Ghent University and IMEC vzw.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
 
-3. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-   IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/build.py b/build.py
@@ -0,0 +1,84 @@
+import os
+import shutil
+import sys
+
+from distutils.command.build_ext import build_ext
+from distutils.core import Distribution
+from distutils.core import Extension
+from distutils.errors import CCompilerError
+from distutils.errors import DistutilsExecError
+from distutils.errors import DistutilsPlatformError
+
+import numpy as np
+
+# C Extensions
+with_extensions = True
+
+
+def get_script_path():
+    return os.path.dirname(os.path.realpath(sys.argv[0]))
+
+extensions = []
+if with_extensions:
+    extensions = [
+        Extension(
+            name="plotly_resampler.aggregation.algorithms.lttbcv2",
+            sources=["plotly_resampler/aggregation/algorithms/lttbcv2.c"],
+            define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
+            include_dirs=[np.get_include(), get_script_path()],
+        ),
+    ]
+
+
+class BuildFailed(Exception):
+
+    pass
+
+
+class ExtBuilder(build_ext):
+    # This class allows C extension building to fail.
+
+    built_extensions = []
+
+    def run(self):
+        try:
+            build_ext.run(self)
+        except (DistutilsPlatformError, FileNotFoundError) as e:
+            print("   Unable to build the C extensions.")
+            raise e
+
+    def build_extension(self, ext):
+        try:
+            build_ext.build_extension(self, ext)
+        except (CCompilerError, DistutilsExecError, DistutilsPlatformError, ValueError) as e:
+            print('   Unable to build the "{}" C extension, '.format(ext.name))
+            raise e
+
+
+def build(setup_kwargs):
+    """
+    This function is mandatory in order to build the extensions.
+    """
+    distribution = Distribution({"name": "plotly_resampler", "ext_modules": extensions})
+    distribution.package_dir = "plotly_resampler"
+
+    cmd = ExtBuilder(distribution)
+    cmd.ensure_finalized()
+    cmd.run()
+
+    # Copy built extensions back to the project
+    for output in cmd.get_outputs():
+        relative_extension = os.path.relpath(output, cmd.build_lib)
+        if not os.path.exists(output):
+            continue
+
+        shutil.copyfile(output, relative_extension)
+        mode = os.stat(relative_extension).st_mode
+        mode |= (mode & 0o444) >> 2
+        os.chmod(relative_extension, mode)
+
+    return setup_kwargs
+
+
+if __name__ == "__main__":
+    build({})
diff --git a/plotly_resampler/__init__.py b/plotly_resampler/__init__.py
@@ -6,7 +6,7 @@
 
 __docformat__ = "numpy"
 __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
-__version__ = "0.7.2"
+__version__ = "0.7.2.2"
 
 __all__ = [
     "__version__",

diff --git a/plotly_resampler/aggregation/aggregation_interface.py b/plotly_resampler/aggregation/aggregation_interface.py
@@ -95,7 +95,7 @@ def _insert_gap_none(self, s: pd.Series) -> pd.Series:
             df_gap_idx = s.index.values[s_idx_diff > 3 * med_diff]
             if len(df_gap_idx):
                 df_res_gap = pd.Series(
-                    index=df_gap_idx, data=None, name=s.name, copy=False
+                    index=df_gap_idx, data=None, name=s.name, copy=False, dtype=s.dtype
                 )
 
                 if isinstance(df_res_gap.index, pd.DatetimeIndex):
@@ -152,10 +152,6 @@ def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
 
         self._supports_dtype(s)
 
-        # convert the bool values to uint8 (as we will display them on a y-axis)
-        if str(s.dtype) == "bool":
-            s = s.astype("uint8")
-
         if len(s) > n_out:
             # More samples that n_out -> perform data aggregation
             s = self._aggregate(s, n_out=n_out)

diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py
@@ -11,11 +11,11 @@
 
 import math
 
-import lttbc
 import numpy as np
 import pandas as pd
 
 from ..aggregation.aggregation_interface import AbstractSeriesAggregator
+from .algorithms import lttbcv2
 
 
 class LTTB(AbstractSeriesAggregator):
@@ -73,42 +73,19 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"):
         )
 
     def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
-        # if we have categorical data, LTTB will convert the categorical values into
-        # their numeric codes, i.e., the index position of the category array
         s_v = s.cat.codes.values if str(s.dtype) == "category" else s.values
-        s_i = s.index.values
-
-        if s_i.dtype.type == np.datetime64:
-            # lttbc does not support this datatype -> convert to int
-            # (where the time is represented in ns)
-            # REMARK:
-            #   -> additional logic is needed to mitigate rounding errors 
-            #   First, the start offset is subtracted, after which the input series
-            #   is set in the already requested format, i.e. np.float64
-
-            # NOTE -> Rounding errors can still persist, but this approach is already
-            #         significantly less prone to it than the previos implementation.
-            s_i0 = s_i[0].astype(np.int64)
-            idx, data = lttbc.downsample(
-                (s_i.astype(np.int64) - s_i0).astype(np.float64), s_v, n_out
-            )
 
-            # add the start-offset and convert back to datetime
-            idx = pd.to_datetime(
-                idx.astype(np.int64) + s_i0, unit="ns", utc=True
-            ).tz_convert(s.index.tz)
-        else:
-            idx, data = lttbc.downsample(s_i, s_v, n_out)
-            idx = idx.astype(s_i.dtype)
+        s_i = s.index.values
+        s_i = s_i.astype(np.int64) if s_i.dtype.type == np.datetime64 else s_i
 
-        if str(s.dtype) == "category":
-            # reconvert the downsampled numeric codes to the category array
-            data = np.vectorize(s.dtype.categories.values.item)(data.astype(s_v.dtype))
-        else:
-            # default case, use the series it's dtype as return type
-            data = data.astype(s.dtype)
+        index = lttbcv2.downsample_return_index(s_i, s_v, n_out)
 
-        return pd.Series(index=idx, data=data, name=str(s.name), copy=False)
+        return pd.Series(
+            index=s.index[index],
+            data=s.values[index],
+            name=str(s.name),
+            copy=False,
+        )
 
 
 class MinMaxOverlapAggregator(AbstractSeriesAggregator):
@@ -166,14 +143,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         # Calculate the argmin & argmax on the reshaped view of `s` &
         # add the corresponding offset
         argmin = (
-            s.iloc[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size)
+            s.values[: block_size * offset.shape[0]]
+            .reshape(-1, block_size)
             .argmin(axis=1)
             + offset
         )
         argmax = (
-            s.iloc[argmax_offset : block_size * offset.shape[0] + argmax_offset]
-            .values.reshape(-1, block_size)
+            s.values[argmax_offset : block_size * offset.shape[0] + argmax_offset]
+            .reshape(-1, block_size)
             .argmax(axis=1)
             + offset
             + argmax_offset
@@ -231,14 +208,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         # Calculate the argmin & argmax on the reshaped view of `s` &
         # add the corresponding offset
         argmin = (
-            s.iloc[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size)
+            s.values[: block_size * offset.shape[0]]
+            .reshape(-1, block_size)
             .argmin(axis=1)
             + offset
         )
         argmax = (
-            s.iloc[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size)
+            s.values[: block_size * offset.shape[0]]
+            .reshape(-1, block_size)
             .argmax(axis=1)
             + offset
         )
@@ -297,7 +274,7 @@ def __init__(self, interleave_gaps: bool = True, nan_position="end"):
         )
 
     def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
-        if s.shape[0] > n_out * 1_000:
+        if s.shape[0] > n_out * 2_000:
             s = self.minmax._aggregate(s, n_out * 50)
         return self.lttb._aggregate(s, n_out)