Skip to content

Commit 0c3aaa6

Browse files
committed
Update _Dist/NeuralNetworks/d_Traditional2NN
1 parent c4343d9 commit 0c3aaa6

File tree

3 files changed

+54
-164
lines changed

3 files changed

+54
-164
lines changed

_Dist/NeuralNetworks/d_Traditional2NN/DTree2NN.ipynb

+34-101
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
"sys.path.append(\"../../../\")\n",
1313
"from Util.Util import DataUtil\n",
1414
"\n",
15-
"(x_train, y_train), (x_test, y_test) = DataUtil.gen_noisy_linear()"
15+
"(x_train, y_train), (x_test, y_test) = DataUtil.gen_noisy_linear()\n",
16+
"idx = [True] * 100 + [False]\n",
17+
"features_lists = [set() for _ in range(100)] + [{0, 1}]"
1618
]
1719
},
1820
{
@@ -27,9 +29,6 @@
2729
"name": "stdout",
2830
"output_type": "stream",
2931
"text": [
30-
"Shuffling data...\n",
31-
"Generating data info...\n",
32-
"Transforming data...\n",
3332
"============================================================\n",
3433
"Decision tree performance\n",
3534
"------------------------------------------------------------\n",
@@ -41,7 +40,6 @@
4140
"------------------------------------------------------------\n",
4241
"Data : 10000 training samples, 1500 test samples\n",
4342
"Features : 0 categorical, 100 numerical\n",
44-
"-> No pre-processing methods were applied\n",
4543
"============================================================\n",
4644
"Deep model input: Continuous features only\n",
4745
"------------------------------------------------------------\n",
@@ -55,123 +53,62 @@
5553
"------------------------------------------------------------\n",
5654
"Activation : ['step', 'one_hot']\n",
5755
"Batch size : 128\n",
58-
"Epoch num : 20\n",
56+
"Epoch num : 5\n",
5957
"Optimizer : Adam\n",
6058
"Metric : acc\n",
6159
"Loss : cross_entropy\n",
6260
"lr : 0.001\n",
6361
"lb : 0.0\n",
6462
"------------------------------------------------------------\n",
65-
"Noises : None\n",
66-
"------------------------------------------------------------\n",
6763
"============================================================\n",
6864
"Initial performance\n",
6965
"------------------------------------------------------------\n",
7066
"Train acc: 1.0\n",
7167
"Test acc: 0.815333\n",
7268
"------------------------------------------------------------\n",
7369
"\n",
74-
"## # Main # Progress bar initialized ##"
75-
]
76-
},
77-
{
78-
"name": "stderr",
79-
"output_type": "stream",
80-
"text": [
81-
"../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=1 due to 'Over-fitting'\n",
82-
" warnings.warn(warn_msg)\n"
83-
]
84-
},
85-
{
86-
"name": "stdout",
87-
"output_type": "stream",
88-
"text": [
89-
"## # Main # (20 : 0 -> 1) Task Finished. Time Cost: 0 h 0 min 9.004 s; Average: 0 h 0 min 9.004 s ##\n",
90-
"============================================================\n",
91-
"History score summary of NoisyLinear (test) - snapshot 0\n",
92-
"------------------------------------------------------------\n",
93-
"Mean : Train - 0.9318; Test - 0.7562\n",
94-
"Std : Train - 0.0685; Test - 0.5302\n",
95-
"------------------------------------------------------------\n",
96-
"============================================================\n",
97-
"Model performance on test set\n",
98-
"------------------------------------------------------------\n",
99-
"acc: 0.686667\n",
100-
"------------------------------------------------------------\n",
101-
"Shuffling data...\n",
102-
"Generating data info...\n",
103-
"Transforming data...\n",
70+
"## # Main # (5 : 0 -> 5) Task Finished. Time Cost: 0 h 0 min 29.29 s; Average: 0 h 0 min 5.857 s ##\n",
71+
"Test acc: 0.724\n",
10472
"============================================================\n",
10573
"Decision tree performance\n",
10674
"------------------------------------------------------------\n",
10775
"Train : 1.0\n",
108-
"Test : 0.814666666667\n",
76+
"Test : 0.811333333333\n",
10977
"------------------------------------------------------------\n",
11078
"============================================================\n",
11179
"This is a 2-classes problem\n",
11280
"------------------------------------------------------------\n",
11381
"Data : 10000 training samples, 1500 test samples\n",
11482
"Features : 0 categorical, 100 numerical\n",
115-
"-> No pre-processing methods were applied\n",
11683
"============================================================\n",
11784
"Deep model input: Continuous features only\n",
11885
"------------------------------------------------------------\n",
11986
"Using raw values in categorical columns without embedding\n",
12087
"------------------------------------------------------------\n",
12188
"Using dropout with keep_prob = 0.5\n",
12289
"Training without batch norm\n",
123-
"Hidden units: [862, 863]\n",
90+
"Hidden units: [860, 861]\n",
12491
"============================================================\n",
12592
"Hyper parameters\n",
12693
"------------------------------------------------------------\n",
12794
"Activation : ['step', 'softmax']\n",
12895
"Batch size : 128\n",
129-
"Epoch num : 20\n",
96+
"Epoch num : 5\n",
13097
"Optimizer : Adam\n",
13198
"Metric : acc\n",
13299
"Loss : cross_entropy\n",
133100
"lr : 0.001\n",
134101
"lb : 0.0\n",
135102
"------------------------------------------------------------\n",
136-
"Noises : None\n",
137-
"------------------------------------------------------------\n",
138103
"============================================================\n",
139104
"Initial performance\n",
140105
"------------------------------------------------------------\n",
141106
"Train acc: 1.0\n",
142-
"Test acc: 0.814667\n",
107+
"Test acc: 0.811333\n",
143108
"------------------------------------------------------------\n",
144109
"\n",
145-
"## # Main # [--- ] : 2 / 20 ## Time Cost: 0 h 0 min 16.72 s; Average: 0 h 0 min 8.362 s "
146-
]
147-
},
148-
{
149-
"name": "stderr",
150-
"output_type": "stream",
151-
"text": [
152-
"../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=3 due to 'Over-fitting'\n",
153-
" warnings.warn(warn_msg)\n"
154-
]
155-
},
156-
{
157-
"name": "stdout",
158-
"output_type": "stream",
159-
"text": [
160-
"## # Main # (20 : 0 -> 2) Task Finished. Time Cost: 0 h 0 min 18.39 s; Average: 0 h 0 min 9.196 s ##\n",
161-
"============================================================\n",
162-
"History score summary of NoisyLinear (test) - snapshot 0\n",
163-
"------------------------------------------------------------\n",
164-
"Mean : Train - 0.8101; Test - 0.8433\n",
165-
"Std : Train - 0.0128; Test - 0.2110\n",
166-
"------------------------------------------------------------\n",
167-
"============================================================\n",
168-
"Model performance on test set\n",
169-
"------------------------------------------------------------\n",
170-
"acc: 0.810667\n",
171-
"------------------------------------------------------------\n",
172-
"Shuffling data...\n",
173-
"Generating data info...\n",
174-
"Transforming data...\n",
110+
"## # Main # (5 : 0 -> 5) Task Finished. Time Cost: 0 h 0 min 25.96 s; Average: 0 h 0 min 5.192 s ##\n",
111+
"Test acc: 0.887333\n",
175112
"============================================================\n",
176113
"Decision tree performance\n",
177114
"------------------------------------------------------------\n",
@@ -183,69 +120,65 @@
183120
"------------------------------------------------------------\n",
184121
"Data : 10000 training samples, 1500 test samples\n",
185122
"Features : 0 categorical, 100 numerical\n",
186-
"-> No pre-processing methods were applied\n",
187123
"============================================================\n",
188124
"Deep model input: Continuous features only\n",
189125
"------------------------------------------------------------\n",
190126
"Using raw values in categorical columns without embedding\n",
191127
"------------------------------------------------------------\n",
192128
"Using dropout with keep_prob = 0.5\n",
193129
"Training without batch norm\n",
194-
"Hidden units: [873, 874]\n",
130+
"Hidden units: [867, 868]\n",
195131
"============================================================\n",
196132
"Hyper parameters\n",
197133
"------------------------------------------------------------\n",
198134
"Activation : ['tanh', 'softmax']\n",
199135
"Batch size : 128\n",
200-
"Epoch num : 20\n",
136+
"Epoch num : 5\n",
201137
"Optimizer : Adam\n",
202138
"Metric : acc\n",
203139
"Loss : cross_entropy\n",
204140
"lr : 0.001\n",
205141
"lb : 0.0\n",
206142
"------------------------------------------------------------\n",
207-
"Noises : None\n",
208-
"------------------------------------------------------------\n",
209143
"============================================================\n",
210144
"Initial performance\n",
211145
"------------------------------------------------------------\n",
212-
"Train acc: 0.787\n",
213-
"Test acc: 0.820667\n",
146+
"Train acc: 0.8104\n",
147+
"Test acc: 0.855333\n",
214148
"------------------------------------------------------------\n",
215149
"\n",
216-
"## # Main # (20 : 0 -> 1) Task Finished. Time Cost: 0 h 0 min 7.939 s; Average: 0 h 0 min 7.939 s ##\n",
217-
"============================================================\n",
218-
"History score summary of NoisyLinear (test) - snapshot 0\n",
219-
"------------------------------------------------------------\n",
220-
"Mean : Train - 0.7920; Test - 0.8280\n",
221-
"Std : Train - 0.0113; Test - 0.2904\n",
222-
"------------------------------------------------------------\n",
223-
"============================================================\n",
224-
"Model performance on test set\n",
225-
"------------------------------------------------------------\n",
226-
"acc: 0.796667\n",
227-
"------------------------------------------------------------\n"
150+
"## # Main # (5 : 0 -> 5) Task Finished. Time Cost: 0 h 0 min 29.02 s; Average: 0 h 0 min 5.804 s ##\n",
151+
"Test acc: 0.95\n"
228152
]
229153
}
230154
],
231155
"source": [
232156
"from Toolbox import DT2NNWrapper\n",
233157
"\n",
234158
"nn = DT2NNWrapper(\n",
235-
" \"NoisyLinear\",\n",
236-
" model_param_settings={\"activations\": [\"step\", \"one_hot\"]},\n",
159+
" \"NoisyLinear\", idx, features_lists,\n",
160+
" model_param_settings={\n",
161+
" \"activations\": [\"step\", \"one_hot\"],\n",
162+
" \"n_epoch\": 5\n",
163+
" },\n",
237164
" verbose_settings={\"metric\": \"acc\"}\n",
238165
").fit(x_train, y_train, x_test, y_test)\n",
239166
"\n",
240167
"nn = DT2NNWrapper(\n",
241-
" \"NoisyLinear\",\n",
242-
" model_param_settings={\"activations\": [\"step\", \"softmax\"]},\n",
168+
" \"NoisyLinear\", idx, features_lists,\n",
169+
" model_param_settings={\n",
170+
" \"activations\": [\"step\", \"softmax\"],\n",
171+
" \"n_epoch\": 5\n",
172+
" },\n",
243173
" verbose_settings={\"metric\": \"acc\"}\n",
244174
").fit(x_train, y_train, x_test, y_test)\n",
245175
"\n",
246176
"nn = DT2NNWrapper(\n",
247-
" \"NoisyLinear\",\n",
248-
" model_param_settings={\"activations\": [\"tanh\", \"softmax\"]},\n",
177+
" \"NoisyLinear\", idx, features_lists,\n",
178+
" model_param_settings={\n",
179+
" \"activations\": [\"tanh\", \"softmax\"],\n",
180+
" \"n_epoch\": 5\n",
181+
" },\n",
249182
" verbose_settings={\"metric\": \"acc\"}\n",
250183
").fit(x_train, y_train, x_test, y_test)"
251184
]
@@ -272,5 +205,5 @@
272205
}
273206
},
274207
"nbformat": 4,
275-
"nbformat_minor": 1
208+
"nbformat_minor": 0
276209
}

_Dist/NeuralNetworks/d_Traditional2NN/NaiveBayes2NN.ipynb

+6-45
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
"outputs": [],
1010
"source": [
1111
"import sys\n",
12-
"import numpy as np\n",
1312
"sys.path.append(\"../../../\")\n",
1413
"from Util.Util import DataUtil\n",
1514
"\n",
@@ -35,23 +34,6 @@
3534
"scrolled": true
3635
},
3736
"outputs": [
38-
{
39-
"name": "stdout",
40-
"output_type": "stream",
41-
"text": [
42-
"Shuffling data...\n",
43-
"Generating data info...\n",
44-
"Transforming data...\n"
45-
]
46-
},
47-
{
48-
"name": "stderr",
49-
"output_type": "stream",
50-
"text": [
51-
"../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNWrapper.py:201: UserWarning: One column is redundant: [82]\n",
52-
" warnings.warn(warn_msg)\n"
53-
]
54-
},
5537
{
5638
"name": "stdout",
5739
"output_type": "stream",
@@ -66,10 +48,9 @@
6648
"This is a 2-classes problem\n",
6749
"------------------------------------------------------------\n",
6850
"Data : 6000 training samples, 2124 test samples\n",
69-
"Features : 117 categorical, 0 numerical\n",
70-
"-> No pre-processing methods were applied\n",
51+
"Features : 0 categorical, 117 numerical\n",
7152
"============================================================\n",
72-
"Deep model input: Continuous features and raw categorical features\n",
53+
"Deep model input: Continuous features only\n",
7354
"------------------------------------------------------------\n",
7455
"Using raw values in categorical columns without embedding\n",
7556
"------------------------------------------------------------\n",
@@ -88,43 +69,23 @@
8869
"lr : 0.001\n",
8970
"lb : 0.0\n",
9071
"------------------------------------------------------------\n",
91-
"Noises : None\n",
92-
"------------------------------------------------------------\n",
9372
"============================================================\n",
9473
"Initial performance\n",
9574
"------------------------------------------------------------\n",
9675
"Train acc: 0.955167\n",
9776
"Test acc: 0.95339\n",
9877
"------------------------------------------------------------\n",
9978
"\n",
100-
"## # Main # (20 : 0 -> 18) Task Finished. Time Cost: 0 h 0 min 3.38 s; Average: 0 h 0 min 0.1878 s ##\n",
101-
"============================================================\n",
102-
"History score summary of Mushroom (test) - snapshot 0\n",
103-
"------------------------------------------------------------\n",
104-
"Mean : Train - 0.9980; Test - 0.9972\n",
105-
"Std : Train - 0.0002; Test - 0.0000\n",
106-
"------------------------------------------------------------\n",
107-
"============================================================\n",
108-
"Model performance on test set\n",
109-
"------------------------------------------------------------\n",
110-
"acc: 0.997175\n",
111-
"------------------------------------------------------------\n"
112-
]
113-
},
114-
{
115-
"name": "stderr",
116-
"output_type": "stream",
117-
"text": [
118-
"../../../_Dist/NeuralNetworks/c_NeuralNetwork/NNCore.py:1015: UserWarning: Early stopped at n_epoch=19 due to 'Performance not improving'\n",
119-
" warnings.warn(warn_msg)\n"
79+
"## # Main # (20 : 0 -> 20) Task Finished. Time Cost: 0 h 0 min 1.312 s; Average: 0 h 0 min 0.06561 s ##\n",
80+
"Test acc: 0.997175\n"
12081
]
12182
}
12283
],
12384
"source": [
12485
"from Toolbox import NB2NNWrapper\n",
12586
"\n",
12687
"nn = NB2NNWrapper(\n",
127-
" \"Mushroom\",\n",
88+
" \"Mushroom\", [True] * 117 + [False], [(0, 1) for _ in range(118)],\n",
12889
" verbose_settings={\"metric\": \"acc\"}\n",
12990
").fit(x_train, y_train, x_test, y_test)"
13091
]
@@ -151,5 +112,5 @@
151112
}
152113
},
153114
"nbformat": 4,
154-
"nbformat_minor": 1
115+
"nbformat_minor": 0
155116
}

0 commit comments

Comments
 (0)