Skip to content

Commit

Permalink
Fixed accuracy inflation and test data leak
Browse files Browse the repository at this point in the history
  • Loading branch information
AnanthVivekanand committed Nov 26, 2020
1 parent d6b0a3d commit e598b3d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
25 changes: 23 additions & 2 deletions training/.ipynb_checkpoints/optimized_TCN-checkpoint.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,29 @@
" f.write(data)\n",
" return url\n",
"\n",
"from sklearn.model_selection import train_test_split \n",
"x_train, x_test, y_train, y_test = train_test_split(all_X, all_Y, test_size=0.2, random_state = 42)\n",
"# Old code for a random split. This inflates accuracy, which is undesirable. \n",
"# from sklearn.model_selection import train_test_split \n",
"# x_train, x_test, y_train, y_test = train_test_split(all_X, all_Y, test_size=0.2, random_state = 42)\n",
"\n",
"x_train = []\n",
"x_test = []\n",
"y_train = []\n",
"y_test = []\n",
"\n",
"for x in all_X:\n",
" if x[0:5] != \"fold5\":\n",
" x_train.append(x)\n",
" else:\n",
" x_test.append(x)\n",
"\n",
"\n",
"for y in all_Y:\n",
" if y[0:5] != \"fold5\":\n",
" y_train.append(y)\n",
" else:\n",
" y_test.append(y)\n",
" \n",
"# ^^^ change \"fold5\" to other folds when doing final 10-fold crossvalidation \n",
"\n",
"def file_to_arr(t: tf.Tensor):\n",
" x = []\n",
Expand Down
25 changes: 23 additions & 2 deletions training/optimized_TCN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,29 @@
" f.write(data)\n",
" return url\n",
"\n",
"from sklearn.model_selection import train_test_split \n",
"x_train, x_test, y_train, y_test = train_test_split(all_X, all_Y, test_size=0.2, random_state = 42)\n",
"# Old code for a random split. This inflates accuracy, which is undesirable. \n",
"# from sklearn.model_selection import train_test_split \n",
"# x_train, x_test, y_train, y_test = train_test_split(all_X, all_Y, test_size=0.2, random_state = 42)\n",
"\n",
"x_train = []\n",
"x_test = []\n",
"y_train = []\n",
"y_test = []\n",
"\n",
"for x in all_X:\n",
" if x[0:5] != \"fold5\":\n",
" x_train.append(x)\n",
" else:\n",
" x_test.append(x)\n",
"\n",
"\n",
"for y in all_Y:\n",
" if y[0:5] != \"fold5\":\n",
" y_train.append(y)\n",
" else:\n",
" y_test.append(y)\n",
" \n",
"# ^^^ change \"fold5\" to other folds when doing final 10-fold crossvalidation \n",
"\n",
"def file_to_arr(t: tf.Tensor):\n",
" x = []\n",
Expand Down

0 comments on commit e598b3d

Please sign in to comment.