add module

cccxm · cccxm · commit a8375a75dba3 · 2020-09-22T10:39:14.000+08:00
diff --git a/module/__init__.py b/module/__init__.py
@@ -0,0 +1,9 @@
+from ._lvq import (
+    Model as LVQ
+)
+from ._som import (
+    Model as SOM
+)
+from ._wta import (
+    Model as WTA
+)
diff --git a/module/_lvq.py b/module/_lvq.py
@@ -0,0 +1,92 @@
+import numpy as np
+from numpy import ndarray
+from .util.rand import PARAMETER_RAND_NORM
+from .util.distance import EUCLIDEAN
+from .util.neighborhood import GAUSSIAN
+
+
+class Node(object):
+    def __init__(self, label, weight: ndarray):
+        """
+        图节点
+        :param label: 标签
+        :param weight: 权重
+        """
+        self.weight = weight
+        self.position = np.array([0, 0], dtype=np.float)
+        self.label = label
+
+
+class Model(object):
+    def __init__(self, depth: int, width: int, height: int, labels: list,
+                 param_init=PARAMETER_RAND_NORM,
+                 distance=EUCLIDEAN,
+                 neighborhood=GAUSSIAN):
+        """
+        学习向量量化 LVQ 模型
+        需要额外设置
+        :param depth: 位深，对应节点权值深度
+        :param width: 图的宽度
+        :param height: 图的高度
+        :param labels: 标签初始节点标签列表，长度不能小于节点数量
+        :param param_init: 标签初始化函数，默认为正态分布
+        :param distance: 距离函数，默认为欧氏距离
+        :param neighborhood: 邻域函数，默认为简化高斯函数
+        """
+        self.width = width
+        self.height = height
+        self.length = width * height
+        self.nodes = [Node(label, weight) for weight, label in
+                      [(param_init(depth), labels[i]) for i in range(width * height)]]
+        for i in range(len(self.nodes)):
+            self.nodes[i].position = np.array([int(i % width), int(i / width)], np.float)
+        self.distance = distance
+        self.neighborhood = neighborhood
+
+    def __len__(self):
+        return self.length
+
+    def winner(self, x: ndarray) -> Node:
+        """
+        获胜节点计算函数
+        :param x: 单个输入
+        :return: 获胜节点
+        """
+        centre = self.nodes[0]
+        min_d = self.distance(centre.weight, x)
+        for node in self.nodes:
+            d = self.distance(node.weight, x)
+            if d < min_d:
+                centre = node
+                min_d = d
+        return centre
+
+    def train(self, X: list, y, alpha: float, radius: float) -> None:
+        """
+        模型训练
+        :param X: 输入列表（要求属于同一个标签）
+        :param y: 标签
+        :param alpha: 学习率 [0.-1.]
+        :param radius: 邻域半径
+        :return: None
+        """
+        winners = []
+        for _, x in X:
+            # 查找优胜节点
+            centre = self.winner(x)
+            winners.append(self.nodes.index(centre))
+            # 利用邻域函数更新全部节点的权值
+            for node in self.nodes:
+                node.weight = node.weight + self.neighborhood(alpha, radius, node.position, centre.position) * (
+                        x - node.weight)
+        # 更新选中节点的标签
+        max_label = max(winners, key=winners.count)
+        self.nodes[max_label].label = y
+
+    def validate(self, test_sets: list) -> float:
+        """
+        评估模型正确率
+        :param test_sets:输入列表（标签可以不相同）
+        :return: 模型正确率[0.-1.]
+        """
+        return sum([1 for (label, test) in test_sets if self.winner(test).label == label]) / len(test_sets)
diff --git a/module/_som.py b/module/_som.py
@@ -0,0 +1,73 @@
+import numpy as np
+from numpy import ndarray
+from .util.rand import PARAMETER_RAND_NORM
+from .util.distance import EUCLIDEAN
+from .util.neighborhood import GAUSSIAN
+
+
+class Node(object):
+    def __init__(self, weight: ndarray):
+        """
+        图节点
+        :param weight: 权重
+        """
+        self.weight = weight
+        self.position = np.array([0, 0], dtype=np.float64)
+
+
+class Model(object):
+    def __init__(self, depth: int, width: int, height: int,
+                 param_init=PARAMETER_RAND_NORM,
+                 distance=EUCLIDEAN,
+                 neighborhood=GAUSSIAN
+                 ):
+        """
+        SOM 模型
+        :param depth: 深度
+        :param width: 图 宽度
+        :param height: 图 高度
+        :param param_init: 标签初始化函数，默认为正态分布
+        :param distance: 距离函数，默认为欧氏距离
+        :param neighborhood: 邻域函数，默认为简化高斯函数
+        """
+        self.width = width
+        self.height = height
+        self.length = width * height
+        self.nodes = [Node(weight) for weight in [param_init(depth) for _ in range(width * height)]]
+        for i in range(len(self.nodes)):
+            self.nodes[i].position = np.array([int(i % width), int(i / width)], np.float64)
+        self.distance = distance
+        self.neighborhood = neighborhood
+
+    def __len__(self):
+        return self.length
+
+    def winner(self, x) -> Node:
+        """
+        获胜节点计算函数
+        :param x: 单个输入
+        :return: 获胜节点
+        """
+        centre = self.nodes[0]
+        min_d = self.distance(centre.weight, x)
+        for node in self.nodes:
+            d = self.distance(node.weight, x)
+            if d < min_d:
+                centre = node
+                min_d = d
+        return centre
+
+    def train(self, x: ndarray, alpha: float, radius: float):
+        """
+        模型训练
+        :param x: 单个输入
+        :param alpha: 学习率 [0.-1.]
+        :param radius: 邻域半径
+        :return: None
+        """
+        # 查找优胜节点
+        centre = self.winner(x)
+        # 利用邻域函数更新全部节点的权值
+        for node in self.nodes:
+            node.weight = node.weight + self.neighborhood(alpha, radius, node.position, centre.position) * (
+                    x - node.weight)
diff --git a/module/_wta.py b/module/_wta.py
@@ -0,0 +1,64 @@
+import numpy as np
+from numpy import ndarray
+from .util.rand import PARAMETER_RAND_NORM
+from .util.distance import EUCLIDEAN
+from .util.neighborhood import GAUSSIAN
+
+
+class Node(object):
+    def __init__(self, weight: ndarray):
+        """
+        图节点
+        :param weight: 权重
+        """
+        self.weight = weight
+
+
+class Model(object):
+    def __init__(self, depth: int, length: int,
+                 param_init=PARAMETER_RAND_NORM,
+                 distance=EUCLIDEAN,
+                 neighborhood=GAUSSIAN
+                 ):
+        """
+        WTA 模型
+        :param depth: 深度
+        :param length: 节点数量
+        :param param_init: 标签初始化函数，默认为正态分布
+        :param distance: 距离函数，默认为欧氏距离
+        :param neighborhood: 邻域函数，默认为简化高斯函数
+        """
+        self.length = length
+        self.nodes = [Node(weight) for weight in [param_init(depth) for _ in range(length)]]
+        self.distance = distance
+
+    def __len__(self):
+        return self.length
+
+    def winner(self, x) -> Node:
+        """
+        获胜节点计算函数
+        :param x: 单个输入
+        :return: 获胜节点
+        """
+        centre = self.nodes[0]
+        min_d = self.distance(centre.weight, x)
+        for node in self.nodes:
+            d = self.distance(node.weight, x)
+            if d < min_d:
+                centre = node
+                min_d = d
+        return centre
+
+    def train(self, x: ndarray, alpha: float):
+        """
+        模型训练
+        :param x: 单个输入
+        :param alpha: 学习率 [0.-1.]
+        :return: None
+        """
+        # 查找优胜节点
+        centre = self.winner(x)
+        # 利用邻域函数更新全部节点的权值
+        for node in self.nodes:
+            node.weight = node.weight + alpha * (x - node.weight)
diff --git a/module/util/__init__.py b/module/util/__init__.py
diff --git a/module/util/distance/__init__.py b/module/util/distance/__init__.py
@@ -0,0 +1,6 @@
+from ._distance import (
+    euclidean as EUCLIDEAN,
+    pearson as PEARSON,
+    fast_dtw as FAST_DTW,
+    dtw as DTW
+)
diff --git a/module/util/distance/_distance.py b/module/util/distance/_distance.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+
+def euclidean(v1: np.ndarray, v2: np.ndarray) -> float:
+    """
+    计算两个向量的欧氏距离
+    :return: float
+    """
+    return np.linalg.norm(v1 - v2)
+
+
+def pearson(v1: np.ndarray, v2: np.ndarray) -> float:
+    """
+    计算两个向量的皮尔逊相关系数
+    :return: 相关系数的倒数
+    """
+    from scipy.stats import pearsonr
+    return 1 / pearsonr(v1, v2)[0]
+
+
+def fast_dtw(v1: np.ndarray, v2: np.ndarray) -> float:
+    """
+    fast_dtw
+    :return: 距离
+    """
+    import fastdtw
+    from scipy.spatial.distance import euclidean
+    # noinspection PyTypeChecker,PyUnresolvedReferences
+    return fastdtw.fastdtw(v1, v2, dist=euclidean)[0]
+
+
+def dtw(v1: np.ndarray, v2: np.ndarray) -> float:
+    """
+    fast_dtw
+    :return: 距离
+    """
+    import fastdtw
+    from scipy.spatial.distance import euclidean
+    # noinspection PyTypeChecker,PyUnresolvedReferences
+    return fastdtw.dtw(v1, v2, dist=euclidean)[0]
diff --git a/module/util/neighborhood/__init__.py b/module/util/neighborhood/__init__.py
@@ -0,0 +1,3 @@
+from ._gaussian import (
+    gaussian as GAUSSIAN
+)
diff --git a/module/util/neighborhood/_gaussian.py b/module/util/neighborhood/_gaussian.py
@@ -0,0 +1,17 @@
+import numpy as np
+from numpy import ndarray
+
+
+def gaussian(alpha: float, radius: float, i: ndarray, c: ndarray) -> float:
+    """
+    简化高斯函数
+    :param alpha: 学习率
+    :param radius: 邻域半径
+    :param i: 位置
+    :param c: 中心位置
+    :return: 距离
+    """
+    if np.linalg.norm(i - c) <= radius:
+        return alpha
+    else:
+        return 0.0
diff --git a/module/util/rand/__init__.py b/module/util/rand/__init__.py
@@ -0,0 +1,3 @@
+from ._parameter import (
+    rand as PARAMETER_RAND_NORM
+)
diff --git a/module/util/rand/_parameter.py b/module/util/rand/_parameter.py
@@ -0,0 +1,10 @@
+import numpy as np
+
+
+def rand(depth: int) -> np.ndarray:
+    """
+    随机正态分布参数向量
+    :param depth:
+    :return:
+    """
+    return np.random.randn(depth)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from ._gaussian import (`
	`2`	`+ gaussian as GAUSSIAN`
	`3`	`+)`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from ._parameter import (`
	`2`	`+ rand as PARAMETER_RAND_NORM`
	`3`	`+)`