Skip to content

Commit

Permalink
[add:lib] Add standardization preprocessing method (#63)
Browse files Browse the repository at this point in the history
* Finish tests for standardization

* Add documentation for standardize

* Add standardize to notebooks
  • Loading branch information
eonu authored Jan 5, 2020
1 parent 6c8f588 commit 8eb7c1b
Show file tree
Hide file tree
Showing 12 changed files with 327 additions and 72 deletions.
1 change: 1 addition & 0 deletions docs/_includes/examples/preprocessing/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# Create the Preprocess object
pre = Preprocess()
pre.center()
pre.standardize()
pre.filtrate(n=5, method='median')
pre.downsample(n=5, method='decimate')
pre.fft()
Expand Down
8 changes: 8 additions & 0 deletions docs/_includes/examples/preprocessing/standardize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import numpy as np
from sequentia.preprocessing import standardize

# Create some sample data
X = [np.random.random((10 * i, 3)) for i in range(1, 4)]

# Standardize the data
X = standardize(X)
4 changes: 2 additions & 2 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Sequentia offers some appropriate classification algorithms for these kinds of t
:caption: Preprocessing Methods

sections/preprocessing/center.rst
sections/preprocessing/standardize.rst
sections/preprocessing/downsample.rst
sections/preprocessing/filtrate.rst
sections/preprocessing/fft.rst
Expand All @@ -55,5 +56,4 @@ Documentation Search and Index
==============================

* :ref:`search`
* :ref:`genindex`
* :ref:`modindex`
* :ref:`genindex`
2 changes: 1 addition & 1 deletion docs/sections/preprocessing/center.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.. _center:

Centering (``center``)
=============================
======================

Centers an observation sequence about the mean of its observations – that is, given:

Expand Down
23 changes: 23 additions & 0 deletions docs/sections/preprocessing/standardize.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.. _standardize:

Standardizing (``standardize``)
===============================

Standardizes an observation sequence by transforming observations
so that they have zero mean and unit variance.

For further information, please see the `preprocessing tutorial notebook <https://nbviewer.jupyter.org/github/eonu/sequentia/blob/master/notebooks/2%20-%20Preprocessing%20%28Tutorial%29.ipynb#Standardizing-(standardize)>`_.

Example
-------

.. literalinclude:: ../../_includes/examples/preprocessing/standardize.py
:language: python
:linenos:

API reference
-------------

.. automodule:: sequentia.preprocessing
:noindex:
.. autofunction:: standardize
4 changes: 2 additions & 2 deletions lib/sequentia/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .methods import (
downsample, center, fft, filtrate,
_downsample, _center, _fft, _filtrate
downsample, center, standardize, fft, filtrate,
_downsample, _center, _standardize, _fft, _filtrate
)
from .preprocess import Preprocess
32 changes: 30 additions & 2 deletions lib/sequentia/preprocessing/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,38 @@ def transform(x):
elif isinstance(X, np.ndarray):
return transform(X)

def standardize(X):
"""Standardizes an observation sequence (or multiple sequences) by transforming observations
so that they have zero mean and unit variance.
Parameters
----------
X: numpy.ndarray or List[numpy.ndarray]
An individual observation sequence or a list of multiple observation sequences.
Returns
-------
standardized: numpy.ndarray or List[numpy.ndarray]
The standardized input observation sequence(s).
"""
val = _Validator()
val.observation_sequences(X, allow_single=True)
return _standardize(X)

def _standardize(X):
def transform(x):
return (x - x.mean(axis=0)) / x.std(axis=0)

if isinstance(X, list):
return [transform(x) for x in X]
elif isinstance(X, np.ndarray):
return transform(X)

def downsample(X, n, method='decimate'):
"""Downsamples an observation sequence (or multiple sequences) by either:
- Decimating the next :math:`n-1` observations
- Averaging the current observation with the next :math:`n-1` observations
- Decimating the next :math:`n-1` observations
- Averaging the current observation with the next :math:`n-1` observations
Parameters
----------
Expand Down
11 changes: 9 additions & 2 deletions lib/sequentia/preprocessing/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
from .methods import _center, _downsample, _fft, _filtrate
from .methods import _center, _standardize, _downsample, _fft, _filtrate
from ..internals import _Validator

class Preprocess:
Expand All @@ -13,6 +13,11 @@ def center(self):
"""Centers an observation sequence (or multiple sequences) by centering observations around the mean."""
self._transforms.append((_center, {}))

def standardize(self):
"""Standardizes an observation sequence (or multiple sequences) by transforming observations
so that they have zero mean and unit variance."""
self._transforms.append((_standardize, {}))

def downsample(self, n, method='decimate'):
"""Downsamples an observation sequence (or multiple sequences) by either:
Expand Down Expand Up @@ -95,8 +100,10 @@ def summary(self):
idx = i + 1
if transform == _center:
steps.append(('{}. Centering'.format(idx), None))
elif transform == _standardize:
steps.append(('{}. Standardization'.format(idx), None))
elif transform == _downsample:
header = 'Decimating' if kwargs['method'] == 'decimate' else 'Averaging'
header = 'Decimation' if kwargs['method'] == 'decimate' else 'Averaging'
steps.append((
'{}. Downsampling:'.format(idx),
' {} with downsample factor (n={})'.format(header, kwargs['n'])
Expand Down
58 changes: 57 additions & 1 deletion lib/test/lib/preprocessing/test_methods.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
import numpy as np
from sequentia.preprocessing import downsample, center, fft, filtrate
from sequentia.preprocessing import downsample, center, standardize, fft, filtrate
from ...support import assert_equal, assert_all_equal

# Set seed for reproducible randomness
Expand Down Expand Up @@ -69,6 +69,62 @@ def test_center_multiple():
])
])

# ============= #
# standardize() #
# ============= #

def test_standardize_single_even():
"""Standardize a single even-length observation sequence"""
assert_equal(standardize(X_even), np.array([
[-0.40964472, 0.60551094],
[-0.13067455, -0.45932478],
[-1.05682966, 0.17224387],
[-0.98478635, 1.70959629],
[ 1.73550526, -1.46873528],
[ 0.84643002, -0.55929105]
]))

def test_standardize_single_odd():
"""Standardize a single odd-length observation sequence"""
assert_equal(standardize(X_odd), np.array([
[ 0.40527155, 0.83146609],
[-1.03275681, -2.32879115],
[-1.17979099, 0.48102837],
[ 1.01320338, 0.62196325],
[ 1.59321247, 0.35490986],
[ 0.09693924, 0.28469405],
[-0.89607884, -0.24527047]
]))

def test_standardize_multiple():
"""Standardize multiple observation sequences"""
assert_all_equal(standardize(Xs), [
np.array([
[-1.05545468, 1.05686059],
[ 1.34290313, -1.34223879],
[-0.28744845, 0.2853782 ]
]),
np.array([
[-0.20256659, 0.34141162],
[-1.71691396, 0.57661018],
[ 0.33738952, 0.57325679],
[ 1.4857256 , 0.88343331],
[-0.53718803, -0.28673041],
[ 0.63355347, -2.08798149]
]),
np.array([
[ 0.75393018, 2.22884906],
[-1.0030964 , -0.82147823],
[-0.59868217, 0.50057122],
[ 0.38214698, 0.922274 ],
[ 1.99208067, -0.97284537],
[-1.00889357, -0.63913134],
[ 0.70134695, -0.12118881],
[-0.01780218, -0.17111248],
[-1.20103046, -0.92593806]
])
])

# ============ #
# downsample() #
# ============ #
Expand Down
67 changes: 52 additions & 15 deletions lib/test/lib/preprocessing/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import numpy as np
from sequentia.preprocessing import (
Preprocess,
downsample, center, fft, filtrate,
_downsample, _center, _fft, _filtrate
downsample, center, standardize, fft, filtrate,
_downsample, _center, _standardize, _fft, _filtrate
)
from ...support import assert_equal, assert_all_equal

Expand All @@ -17,8 +17,12 @@
Xs = [i * rng.random((3 * i, 2)) for i in range(1, 4)]

# Centering preprocessor
norm = Preprocess()
norm.center()
cent = Preprocess()
cent.center()

# Standardizing preprocessor
standard = Preprocess()
standard.standardize()

# Discrete Fourier Transform preprocessor
fourier = Preprocess()
Expand All @@ -37,6 +41,7 @@
# Combined preprocessor
combined = Preprocess()
combined.center()
combined.standardize()
combined.filtrate(**filt_kwargs)
combined.downsample(**down_kwargs)
combined.fft()
Expand All @@ -47,27 +52,54 @@

def test_center_adds_transform():
"""Applying a single centering transformation"""
assert len(norm._transforms) == 1
assert norm._transforms[0] == (_center, {})
assert len(cent._transforms) == 1
assert cent._transforms[0] == (_center, {})

def test_center_single():
"""Applying centering to a single observation sequence"""
assert_equal(norm.transform(X), center(X))
assert_equal(cent.transform(X), center(X))

def test_center_multiple():
"""Applying centering to multiple observation sequences"""
assert_all_equal(norm.transform(Xs), center(Xs))
assert_all_equal(cent.transform(Xs), center(Xs))

def test_center_summary(capsys):
"""Summary of a centering transformation"""
norm.summary()
cent.summary()
assert capsys.readouterr().out == (
'Preprocessing summary:\n'
'======================\n'
'1. Centering\n'
'======================\n'
)

# ======================== #
# Preprocess.standardize() #
# ======================== #

def test_standardize_adds_transform():
"""Applying a single standardizing transformation"""
assert len(standard._transforms) == 1
assert standard._transforms[0] == (_standardize, {})

def test_standardize_single():
"""Applying standardization to a single observation sequence"""
assert_equal(standard.transform(X), standardize(X))

def test_standardize_multiple():
"""Applying standardization to multiple observation sequences"""
assert_all_equal(standard.transform(Xs), standardize(Xs))

def test_standardize_summary(capsys):
"""Summary of a standardizing transformation"""
standard.summary()
assert capsys.readouterr().out == (
'Preprocessing summary:\n'
'======================\n'
'1. Standardization\n'
'======================\n'
)

# ================ #
# Preprocess.fft() #
# ================ #
Expand Down Expand Up @@ -119,7 +151,7 @@ def test_downsample_summary(capsys):
' Preprocessing summary: \n'
'==========================================\n'
'1. Downsampling:\n'
' Decimating with downsample factor (n=3)\n'
' Decimation with downsample factor (n=3)\n'
'==========================================\n'
)

Expand Down Expand Up @@ -157,9 +189,10 @@ def test_filtrate_summary(capsys):

def test_combined_adds_transforms():
"""Applying multiple filtering transformations"""
assert len(combined._transforms) == 4
assert len(combined._transforms) == 5
assert combined._transforms == [
(_center, {}),
(_standardize, {}),
(_filtrate, filt_kwargs),
(_downsample, down_kwargs),
(_fft, {})
Expand All @@ -169,6 +202,7 @@ def test_combined_single():
"""Applying combined transformations to a single observation sequence"""
X_pre = X
X_pre = center(X_pre)
X_pre = standardize(X_pre)
X_pre = filtrate(X_pre, **filt_kwargs)
X_pre = downsample(X_pre, **down_kwargs)
X_pre = fft(X_pre)
Expand All @@ -178,6 +212,7 @@ def test_combined_multiple():
"""Applying combined transformations to multiple observation sequences"""
Xs_pre = Xs
Xs_pre = center(Xs_pre)
Xs_pre = standardize(Xs_pre)
Xs_pre = filtrate(Xs_pre, **filt_kwargs)
Xs_pre = downsample(Xs_pre, **down_kwargs)
Xs_pre = fft(Xs_pre)
Expand All @@ -191,13 +226,15 @@ def test_combined_summary(capsys):
'==========================================\n'
'1. Centering\n'
'------------------------------------------\n'
'2. Filtering:\n'
'2. Standardization\n'
'------------------------------------------\n'
'3. Filtering:\n'
' Median filter with window size (n=3)\n'
'------------------------------------------\n'
'3. Downsampling:\n'
' Decimating with downsample factor (n=3)\n'
'4. Downsampling:\n'
' Decimation with downsample factor (n=3)\n'
'------------------------------------------\n'
'4. Discrete Fourier Transform\n'
'5. Discrete Fourier Transform\n'
'==========================================\n'
)

Expand Down
Loading

0 comments on commit 8eb7c1b

Please sign in to comment.