Update SVM

carefree0910 · carefree0910 · commit f57b71fa4e06 · 2017-06-21T00:31:54.000+08:00
diff --git a/e_SVM/KP.py b/e_SVM/KP.py
@@ -38,27 +38,19 @@ def _fit(self, sample_weight, lr):
 class GDKP(GDKernelBase):
     GDKPTiming = Timing()
 
-    def __init__(self, **kwargs):
-        super(GDKP, self).__init__(**kwargs)
-        self._fit_args, self._fit_args_names = [1e-3], ["tol"]
-
-    @GDKPTiming.timeit(level=1, prefix="[Core] ")
-    def _loss(self, y, y_pred, sample_weight):
-        return np.sum(
-            np.maximum(0, 1 - y * y_pred) * sample_weight
-        )
-
     @GDKPTiming.timeit(level=1, prefix="[Core] ")
     def _get_grads(self, x_batch, y_batch, y_pred, sample_weight_batch, *args):
-        err = -y_batch * (x_batch.dot(self._alpha) + self._b)
-        if np.max(err) < 0:
-            return [None, None]
-        mask = err >= 0
-        delta = -y_batch[mask]
-        self._model_grads = [
-            np.sum(delta[..., None] * x_batch[mask], axis=0),
-            np.sum(delta)
-        ]
+        err = -y_batch * (x_batch.dot(self._alpha) + self._b) * sample_weight_batch
+        mask = err >= 0  # type: np.ndarray
+        if not np.any(mask):
+            self._model_grads = [None, None]
+        else:
+            delta = -y_batch[mask] * sample_weight_batch[mask]
+            self._model_grads = [
+                np.sum(delta[..., None] * x_batch[mask], axis=0),
+                np.sum(delta)
+            ]
+        return np.sum(err[mask])
 
 if __name__ == '__main__':
     # xs, ys = DataUtil.gen_two_clusters(center=5, dis=1, scale=2, one_hot=False)
@@ -67,17 +59,17 @@ def _get_grads(self, x_batch, y_batch, y_pred, sample_weight_batch, *args):
     ys[ys == 0] = -1
 
     animation_params = {
-        "show": False, "mp4": False, "period": 500,
+        "show": False, "mp4": False, "period": 50,
         "dense": 400, "draw_background": True
     }
 
     kp = KP(animation_params=animation_params)
-    kp.fit(xs, ys, p=12, epoch=10 ** 4)
+    kp.fit(xs, ys, kernel="poly", p=12, epoch=200)
     kp.evaluate(xs, ys)
     kp.visualize2d(xs, ys, dense=400)
 
     kp = GDKP(animation_params=animation_params)
-    kp.fit(xs, ys, p=12, epoch=10 ** 4)
+    kp.fit(xs, ys, kernel="poly", p=12, epoch=10000)
     kp.evaluate(xs, ys)
     kp.visualize2d(xs, ys, dense=400)
 
diff --git a/e_SVM/LinearSVM.py b/e_SVM/LinearSVM.py
@@ -30,24 +30,20 @@ def __init__(self, **kwargs):
         self._params["tol"] = kwargs.get("tol", 1e-3)
         self._params["optimizer"] = kwargs.get("optimizer", "Adam")
 
-    @LinearSVMTiming.timeit(level=1, prefix="[Core] ")
-    def _loss(self, y, y_pred, c):
-        return np.sum(
-            np.maximum(0, 1 - y * y_pred)
-        ) + c * np.linalg.norm(self._w)
-
     @LinearSVMTiming.timeit(level=1, prefix="[Core] ")
     def _get_grads(self, x_batch, y_batch, y_pred, sample_weight_batch, *args):
         c = args[0]
         err = (1 - y_pred * y_batch) * sample_weight_batch
         mask = err > 0  # type: np.ndarray
         if not np.any(mask):
-            return [None, None]
-        delta = -c * y_batch[mask] * sample_weight_batch[mask]
-        self._model_grads = [
-            np.sum(delta[..., None] * x_batch[mask], axis=0),
-            np.sum(delta)
-        ]
+            self._model_grads = [None, None]
+        else:
+            delta = -c * y_batch[mask] * sample_weight_batch[mask]
+            self._model_grads = [
+                np.sum(delta[..., None] * x_batch[mask], axis=0),
+                np.sum(delta)
+            ]
+        return np.sum(err[mask]) + c * np.linalg.norm(self._w)
 
     @LinearSVMTiming.timeit(level=1, prefix="[API] ")
     def fit(self, x, y, sample_weight=None, c=None, lr=None, optimizer=None,
@@ -79,15 +75,14 @@ def fit(self, x, y, sample_weight=None, c=None, lr=None, optimizer=None,
         self._optimizer = OptFactory().get_optimizer_by_name(
             optimizer, self._model_parameters, lr, epoch
         )
-        loss_function = lambda _y, _y_pred: self._loss(_y, _y_pred, c)
 
-        bar = ProgressBar(max_value=epoch, name="TorchLinearSVM")
+        bar = ProgressBar(max_value=epoch, name="LinearSVM")
         ims = []
         train_repeat = self._get_train_repeat(x, batch_size)
         for i in range(epoch):
             self._optimizer.update()
-            l = self.batch_training(
-                x, y, batch_size, train_repeat, loss_function, sample_weight, c
+            l = self._batch_training(
+                x, y, batch_size, train_repeat, sample_weight, c
             )
             if l < tol:
                 bar.terminate()
@@ -152,7 +147,7 @@ def fit(self, x, y, c=None, lr=None, batch_size=None, epoch=None, tol=None,
         ims = []
         train_repeat = self._get_train_repeat(x, batch_size)
         for i in range(epoch):
-            l = self.batch_training(x, y_2d, batch_size, train_repeat, loss, train_step)
+            l = self._batch_training(x, y_2d, batch_size, train_repeat, loss, train_step)
             if l < tol:
                 bar.terminate()
                 break
diff --git a/e_SVM/README.md b/e_SVM/README.md
@@ -26,14 +26,20 @@ Implemented `Tensorflow` & `PyTorch` backend for `LinearSVM` & `SVM`
 ![TorchLinearSVM on Two Clusters](https://cdn.rawgit.com/carefree0910/Resources/cbd5675e/Backgrounds/TorchLinearSVM.gif)
 
 ### Kernel Perceptron
-![Kernel Perceptron on Spiral](https://cdn.rawgit.com/carefree0910/Resources/d269faeb/Lines/KP.gif)
 
-![Kernel Perceptron on Spiral](https://cdn.rawgit.com/carefree0910/Resources/d269faeb/Backgrounds/KP.gif)
+#### GD
+![Kernel Perceptron on Spiral](https://cdn.rawgit.com/carefree0910/Resources/14dfc108/Backgrounds/GDKP.gif)
+
+#### SMO
+![Kernel Perceptron on Spiral](https://cdn.rawgit.com/carefree0910/Resources/14dfc108/Backgrounds/KP.gif)
 
 ### SVM
-![SVM on Spiral](https://cdn.rawgit.com/carefree0910/Resources/d269faeb/Lines/SVM.gif)
 
-![SVM on Spiral](https://cdn.rawgit.com/carefree0910/Resources/d269faeb/Backgrounds/SVM.gif)
+#### GD
+![SVM on Spiral](https://cdn.rawgit.com/carefree0910/Resources/14dfc108/Backgrounds/GDSVM.gif)
+
+#### SMO
+![SVM on Spiral](https://cdn.rawgit.com/carefree0910/Resources/14dfc108/Backgrounds/SVM.gif)
 
 ## Example
 ```python
@@ -43,7 +49,7 @@ from e_SVM.SVM import SVM
 x, y = DataUtil.gen_spiral(20, 4, 2, 2, one_hot=False)
 y[y == 0] = -1                          # Get spiral dataset, Notice that y should be 1 or -1
 
-svm = SVM()
+svm = SVM()                             # Build SVM with SMO algorithm
 svm.fit(x, y, kernel="poly", p=12)      # Train SVM (kernel: poly, degree: 12)
 svm.evaluate(x, y)                      # Print out accuracy
 svm.visualize2d(x, y, padding=0.1, dense=400, emphasize=svm["alpha"] > 0)
diff --git a/e_SVM/SVM.py b/e_SVM/SVM.py
@@ -108,27 +108,19 @@ def _fit(self, sample_weight, tol):
 class GDSVM(GDKernelBase):
     GDSVMTiming = Timing()
 
-    def __init__(self, **kwargs):
-        super(GDSVM, self).__init__(**kwargs)
-        self._fit_args, self._fit_args_names = [1e-3], ["tol"]
-
-    @GDSVMTiming.timeit(level=1, prefix="[Core] ")
-    def _loss(self, y, y_pred, sample_weight):
-        return np.sum(
-            np.maximum(0, 1 - y * y_pred) * sample_weight
-        ) + 0.5 * (y_pred - self._b).dot(self._alpha)
-
     @GDSVMTiming.timeit(level=1, prefix="[Core] ")
     def _get_grads(self, x_batch, y_batch, y_pred, sample_weight_batch, *args):
         err = -y_batch * (x_batch.dot(self._alpha) + self._b)
-        if np.max(err) < 0:
-            return [None, None]
         mask = err >= 0
-        delta = -y_batch[mask]
-        self._model_grads = [
-            np.sum(delta[..., None] * x_batch[mask], axis=0),
-            np.sum(delta)
-        ]
+        if np.max(err) < 0:
+            self._model_grads = [None, None]
+        else:
+            delta = -y_batch[mask] * sample_weight_batch[mask]
+            self._model_grads = [
+                np.sum(delta[..., None] * x_batch[mask], axis=0),
+                np.sum(delta)
+            ]
+        return np.sum(err[mask]) + 0.5 * (y_pred - self._b).dot(self._alpha)
 
 
 class TFSVM(TFKernelBase):
@@ -165,7 +157,7 @@ def _prepare(self, sample_weight, **kwargs):
     def _fit(self, sample_weight, tol):
         if self._train_repeat is None:
             self._train_repeat = self._get_train_repeat(self._x, self._batch_size)
-        l = self.batch_training(
+        l = self._batch_training(
             self._gram, self._y, self._batch_size, self._train_repeat,
             self._loss, self._train_step
         )
@@ -212,7 +204,7 @@ def _prepare(self, sample_weight, **kwargs):
         def _fit(self, sample_weight, tol):
             if self._train_repeat is None:
                 self._train_repeat = self._get_train_repeat(self._x, self._batch_size)
-            l = self.batch_training(
+            l = self._batch_training(
                 self._gram, self._y, self._batch_size, self._train_repeat,
                 self._loss_function
             )
diff --git a/e_SVM/TestSVM.py b/e_SVM/TestSVM.py
@@ -14,17 +14,17 @@ def main():
     y[y == 0] = -1
 
     animation_params = {
-        "show": False, "period": 50, "mp4": False,
-        "dense": 400, "draw_background": False
+        "show": False, "mp4": False, "period": 50,
+        "dense": 400, "draw_background": True
     }
 
     svm = SVM(animation_params=animation_params)
-    svm.fit(x, y, kernel="poly", p=12)
+    svm.fit(x, y, kernel="poly", p=12, epoch=600)
     svm.evaluate(x, y)
     svm.visualize2d(x, y, padding=0.1, dense=400, emphasize=svm["alpha"] > 0)
 
     svm = GDSVM(animation_params=animation_params)
-    svm.fit(x, y, kernel="poly", p=12)
+    svm.fit(x, y, kernel="poly", p=12, epoch=10000)
     svm.evaluate(x, y)
     svm.visualize2d(x, y, padding=0.1, dense=400, emphasize=svm["alpha"] > 0)
 
@@ -90,6 +90,18 @@ def main():
     plt.plot(range(len(logs)), logs)
     plt.show()
 
+    svm = GDSVM()
+    logs = [log[0] for log in svm.fit(
+        x_train, y_train, metrics=["acc"], x_test=x_test, y_test=y_test
+    )]
+    svm.evaluate(x_train, y_train)
+    svm.evaluate(x_test, y_test)
+
+    plt.figure()
+    plt.title(svm.title)
+    plt.plot(range(len(logs)), logs)
+    plt.show()
+
     svm.show_timing_log()
 
 if __name__ == '__main__':