Update _Dist/NeuralNetworks/d_Traditional2NN

carefree0910 · carefree0910 · commit 0c3aaa6e5950 · 2017-10-03T15:08:21.000+08:00
diff --git a/_Dist/NeuralNetworks/d_Traditional2NN/DTree2NN.ipynb b/_Dist/NeuralNetworks/d_Traditional2NN/DTree2NN.ipynb
@@ -12,7 +12,9 @@
     "sys.path.append(\"../../../\")\n",
     "from Util.Util import DataUtil\n",
     "\n",
-    "(x_train, y_train), (x_test, y_test) = DataUtil.gen_noisy_linear()"
+    "(x_train, y_train), (x_test, y_test) = DataUtil.gen_noisy_linear()\n",
+    "idx = [True] * 100 + [False]\n",
+    "features_lists = [set() for _ in range(100)] + [{0, 1}]"
    ]
   },
   {
@@ -27,9 +29,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Shuffling data...\n",
-      "Generating data info...\n",
-      "Transforming data...\n",
       "============================================================\n",
       "Decision tree performance\n",
       "------------------------------------------------------------\n",
@@ -41,7 +40,6 @@
       "------------------------------------------------------------\n",
       "Data     : 10000 training samples, 1500 test samples\n",
       "Features : 0 categorical, 100 numerical\n",
-      "-> No pre-processing methods were applied\n",
       "============================================================\n",
       "Deep model input: Continuous features only\n",
       "------------------------------------------------------------\n",
@@ -55,123 +53,62 @@
       "------------------------------------------------------------\n",
       "Activation   : ['step', 'one_hot']\n",
       "Batch size   : 128\n",
-      "Epoch num    : 20\n",
+      "Epoch num    : 5\n",
       "Optimizer    : Adam\n",
       "Metric       : acc\n",
       "Loss         : cross_entropy\n",
       "lr           : 0.001\n",
       "lb           : 0.0\n",
       "------------------------------------------------------------\n",
-      "Noises       : None\n",
-      "------------------------------------------------------------\n",
       "============================================================\n",
       "Initial performance\n",
       "------------------------------------------------------------\n",
       "Train acc:      1.0\n",
       "Test  acc: 0.815333\n",
       "------------------------------------------------------------\n",
       "\n",
-      "## #     Main     # Progress bar initialized  ##"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=1 due to 'Over-fitting'\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## #     Main     # (20 : 0 -> 1) Task Finished. Time Cost:   0 h   0 min  9.004 s; Average:   0 h   0 min  9.004 s  ##\n",
-      "============================================================\n",
-      "History score summary of NoisyLinear (test) - snapshot 0\n",
-      "------------------------------------------------------------\n",
-      "Mean : Train - 0.9318; Test - 0.7562\n",
-      "Std  : Train - 0.0685; Test - 0.5302\n",
-      "------------------------------------------------------------\n",
-      "============================================================\n",
-      "Model performance on test set\n",
-      "------------------------------------------------------------\n",
-      "acc: 0.686667\n",
-      "------------------------------------------------------------\n",
-      "Shuffling data...\n",
-      "Generating data info...\n",
-      "Transforming data...\n",
+      "## #     Main     # (5 : 0 -> 5) Task Finished. Time Cost:   0 h   0 min  29.29 s; Average:   0 h   0 min  5.857 s  ##\n",
+      "Test acc:    0.724\n",
       "============================================================\n",
       "Decision tree performance\n",
       "------------------------------------------------------------\n",
       "Train :  1.0\n",
-      "Test  :  0.814666666667\n",
+      "Test  :  0.811333333333\n",
       "------------------------------------------------------------\n",
       "============================================================\n",
       "This is a 2-classes problem\n",
       "------------------------------------------------------------\n",
       "Data     : 10000 training samples, 1500 test samples\n",
       "Features : 0 categorical, 100 numerical\n",
-      "-> No pre-processing methods were applied\n",
       "============================================================\n",
       "Deep model input: Continuous features only\n",
       "------------------------------------------------------------\n",
       "Using raw values in categorical columns without embedding\n",
       "------------------------------------------------------------\n",
       "Using dropout with keep_prob = 0.5\n",
       "Training without batch norm\n",
-      "Hidden units: [862, 863]\n",
+      "Hidden units: [860, 861]\n",
       "============================================================\n",
       "Hyper parameters\n",
       "------------------------------------------------------------\n",
       "Activation   : ['step', 'softmax']\n",
       "Batch size   : 128\n",
-      "Epoch num    : 20\n",
+      "Epoch num    : 5\n",
       "Optimizer    : Adam\n",
       "Metric       : acc\n",
       "Loss         : cross_entropy\n",
       "lr           : 0.001\n",
       "lb           : 0.0\n",
       "------------------------------------------------------------\n",
-      "Noises       : None\n",
-      "------------------------------------------------------------\n",
       "============================================================\n",
       "Initial performance\n",
       "------------------------------------------------------------\n",
       "Train acc:      1.0\n",
-      "Test  acc: 0.814667\n",
+      "Test  acc: 0.811333\n",
       "------------------------------------------------------------\n",
       "\n",
-      "## #     Main     # [---                           ] : 2 / 20 ##  Time Cost:   0 h   0 min  16.72 s; Average:   0 h   0 min  8.362 s "
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=3 due to 'Over-fitting'\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## #     Main     # (20 : 0 -> 2) Task Finished. Time Cost:   0 h   0 min  18.39 s; Average:   0 h   0 min  9.196 s  ##\n",
-      "============================================================\n",
-      "History score summary of NoisyLinear (test) - snapshot 0\n",
-      "------------------------------------------------------------\n",
-      "Mean : Train - 0.8101; Test - 0.8433\n",
-      "Std  : Train - 0.0128; Test - 0.2110\n",
-      "------------------------------------------------------------\n",
-      "============================================================\n",
-      "Model performance on test set\n",
-      "------------------------------------------------------------\n",
-      "acc: 0.810667\n",
-      "------------------------------------------------------------\n",
-      "Shuffling data...\n",
-      "Generating data info...\n",
-      "Transforming data...\n",
+      "## #     Main     # (5 : 0 -> 5) Task Finished. Time Cost:   0 h   0 min  25.96 s; Average:   0 h   0 min  5.192 s  ##\n",
+      "Test acc: 0.887333\n",
       "============================================================\n",
       "Decision tree performance\n",
       "------------------------------------------------------------\n",
@@ -183,69 +120,65 @@
       "------------------------------------------------------------\n",
       "Data     : 10000 training samples, 1500 test samples\n",
       "Features : 0 categorical, 100 numerical\n",
-      "-> No pre-processing methods were applied\n",
       "============================================================\n",
       "Deep model input: Continuous features only\n",
       "------------------------------------------------------------\n",
       "Using raw values in categorical columns without embedding\n",
       "------------------------------------------------------------\n",
       "Using dropout with keep_prob = 0.5\n",
       "Training without batch norm\n",
-      "Hidden units: [873, 874]\n",
+      "Hidden units: [867, 868]\n",
       "============================================================\n",
       "Hyper parameters\n",
       "------------------------------------------------------------\n",
       "Activation   : ['tanh', 'softmax']\n",
       "Batch size   : 128\n",
-      "Epoch num    : 20\n",
+      "Epoch num    : 5\n",
       "Optimizer    : Adam\n",
       "Metric       : acc\n",
       "Loss         : cross_entropy\n",
       "lr           : 0.001\n",
       "lb           : 0.0\n",
       "------------------------------------------------------------\n",
-      "Noises       : None\n",
-      "------------------------------------------------------------\n",
       "============================================================\n",
       "Initial performance\n",
       "------------------------------------------------------------\n",
-      "Train acc:    0.787\n",
-      "Test  acc: 0.820667\n",
+      "Train acc:   0.8104\n",
+      "Test  acc: 0.855333\n",
       "------------------------------------------------------------\n",
       "\n",
-      "## #     Main     # (20 : 0 -> 1) Task Finished. Time Cost:   0 h   0 min  7.939 s; Average:   0 h   0 min  7.939 s  ##\n",
-      "============================================================\n",
-      "History score summary of NoisyLinear (test) - snapshot 0\n",
-      "------------------------------------------------------------\n",
-      "Mean : Train - 0.7920; Test - 0.8280\n",
-      "Std  : Train - 0.0113; Test - 0.2904\n",
-      "------------------------------------------------------------\n",
-      "============================================================\n",
-      "Model performance on test set\n",
-      "------------------------------------------------------------\n",
-      "acc: 0.796667\n",
-      "------------------------------------------------------------\n"
+      "## #     Main     # (5 : 0 -> 5) Task Finished. Time Cost:   0 h   0 min  29.02 s; Average:   0 h   0 min  5.804 s  ##\n",
+      "Test acc:     0.95\n"
      ]
     }
    ],
    "source": [
     "from Toolbox import DT2NNWrapper\n",
     "\n",
     "nn = DT2NNWrapper(\n",
-    "    \"NoisyLinear\",\n",
-    "    model_param_settings={\"activations\": [\"step\", \"one_hot\"]},\n",
+    "    \"NoisyLinear\", idx, features_lists,\n",
+    "    model_param_settings={\n",
+    "        \"activations\": [\"step\", \"one_hot\"],\n",
+    "        \"n_epoch\": 5\n",
+    "    },\n",
     "    verbose_settings={\"metric\": \"acc\"}\n",
     ").fit(x_train, y_train, x_test, y_test)\n",
     "\n",
     "nn = DT2NNWrapper(\n",
-    "    \"NoisyLinear\",\n",
-    "    model_param_settings={\"activations\": [\"step\", \"softmax\"]},\n",
+    "    \"NoisyLinear\", idx, features_lists,\n",
+    "    model_param_settings={\n",
+    "        \"activations\": [\"step\", \"softmax\"],\n",
+    "        \"n_epoch\": 5\n",
+    "    },\n",
     "    verbose_settings={\"metric\": \"acc\"}\n",
     ").fit(x_train, y_train, x_test, y_test)\n",
     "\n",
     "nn = DT2NNWrapper(\n",
-    "    \"NoisyLinear\",\n",
-    "    model_param_settings={\"activations\": [\"tanh\", \"softmax\"]},\n",
+    "    \"NoisyLinear\", idx, features_lists,\n",
+    "    model_param_settings={\n",
+    "        \"activations\": [\"tanh\", \"softmax\"],\n",
+    "        \"n_epoch\": 5\n",
+    "    },\n",
     "    verbose_settings={\"metric\": \"acc\"}\n",
     ").fit(x_train, y_train, x_test, y_test)"
    ]
@@ -272,5 +205,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 0
 }
diff --git a/_Dist/NeuralNetworks/d_Traditional2NN/NaiveBayes2NN.ipynb b/_Dist/NeuralNetworks/d_Traditional2NN/NaiveBayes2NN.ipynb
@@ -9,7 +9,6 @@
    "outputs": [],
    "source": [
     "import sys\n",
-    "import numpy as np\n",
     "sys.path.append(\"../../../\")\n",
     "from Util.Util import DataUtil\n",
     "\n",
@@ -35,23 +34,6 @@
     "scrolled": true
    },
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shuffling data...\n",
-      "Generating data info...\n",
-      "Transforming data...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNWrapper.py:201: UserWarning: One column is redundant: [82]\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -66,10 +48,9 @@
       "This is a 2-classes problem\n",
       "------------------------------------------------------------\n",
       "Data     : 6000 training samples, 2124 test samples\n",
-      "Features : 117 categorical, 0 numerical\n",
-      "-> No pre-processing methods were applied\n",
+      "Features : 0 categorical, 117 numerical\n",
       "============================================================\n",
-      "Deep model input: Continuous features and raw categorical features\n",
+      "Deep model input: Continuous features only\n",
       "------------------------------------------------------------\n",
       "Using raw values in categorical columns without embedding\n",
       "------------------------------------------------------------\n",
@@ -88,43 +69,23 @@
       "lr           : 0.001\n",
       "lb           : 0.0\n",
       "------------------------------------------------------------\n",
-      "Noises       : None\n",
-      "------------------------------------------------------------\n",
       "============================================================\n",
       "Initial performance\n",
       "------------------------------------------------------------\n",
       "Train acc: 0.955167\n",
       "Test  acc:  0.95339\n",
       "------------------------------------------------------------\n",
       "\n",
-      "## #     Main     # (20 : 0 -> 18) Task Finished. Time Cost:   0 h   0 min   3.38 s; Average:   0 h   0 min 0.1878 s  ##\n",
-      "============================================================\n",
-      "History score summary of Mushroom (test) - snapshot 0\n",
-      "------------------------------------------------------------\n",
-      "Mean : Train - 0.9980; Test - 0.9972\n",
-      "Std  : Train - 0.0002; Test - 0.0000\n",
-      "------------------------------------------------------------\n",
-      "============================================================\n",
-      "Model performance on test set\n",
-      "------------------------------------------------------------\n",
-      "acc: 0.997175\n",
-      "------------------------------------------------------------\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=19 due to 'Performance not improving'\n",
-      "  warnings.warn(warn_msg)\n"
+      "## #     Main     # (20 : 0 -> 20) Task Finished. Time Cost:   0 h   0 min  1.312 s; Average:   0 h   0 min 0.06561 s  ##\n",
+      "Test acc: 0.997175\n"
      ]
     }
    ],
    "source": [
     "from Toolbox import NB2NNWrapper\n",
     "\n",
     "nn = NB2NNWrapper(\n",
-    "    \"Mushroom\",\n",
+    "    \"Mushroom\", [True] * 117 + [False], [(0, 1) for _ in range(118)],\n",
     "    verbose_settings={\"metric\": \"acc\"}\n",
     ").fit(x_train, y_train, x_test, y_test)"
    ]
@@ -151,5 +112,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 0
 }
diff --git a/_Dist/NeuralNetworks/d_Traditional2NN/Toolbox.py b/_Dist/NeuralNetworks/d_Traditional2NN/Toolbox.py