-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a2bc2ee
commit f226a7c
Showing
2 changed files
with
282 additions
and
66 deletions.
There are no files selected for viewing
249 changes: 249 additions & 0 deletions
249
...methods/Feature selection methods - wrapper method- with cross validation-backwords.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 53, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from sklearn.metrics import accuracy_score\n", | ||
"from sklearn.datasets import load_iris\n", | ||
"from itertools import combinations\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import ast" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 54, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class logisticregression():\n", | ||
" def __init__(self,train_data,train_labels,lr=0.01,batch_size=None,epoch=10,print_every = 10):\n", | ||
" dummy_once = np.ones((len(train_data),1))\n", | ||
" self.train_data = np.hstack((dummy_once,train_data))\n", | ||
" self.train_labels = train_labels\n", | ||
" \n", | ||
" self.params = np.zeros((len(self.train_data[0]),1))\n", | ||
" \n", | ||
" self.lr = lr\n", | ||
" self.epoch = epoch\n", | ||
" self.batch_size = batch_size\n", | ||
" self.print_every = print_every\n", | ||
" \n", | ||
" def sigmoid(self,x):\n", | ||
" return 1/(1+np.exp(-x))\n", | ||
" \n", | ||
" def cost(self,y,y_pred):\n", | ||
" return -np.mean(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))\n", | ||
" \n", | ||
" def gradient(self,y,y_pred,x):\n", | ||
" return np.dot(x.T,(y_pred-y))\n", | ||
" \n", | ||
" def train(self):\n", | ||
" for i in range(self.epoch):\n", | ||
" y_pred = self.sigmoid(np.dot(self.train_data,self.params))\n", | ||
" loss = self.cost(self.train_labels,y_pred)\n", | ||
" \n", | ||
" gra = self.gradient(self.train_labels,y_pred,self.train_data)\n", | ||
" \n", | ||
" self.params -= self.lr*gra\n", | ||
" \n", | ||
" if self.print_every:\n", | ||
" if i%self.print_every == 0 or i == self.epoch-1:\n", | ||
" print('Epoch : {} Loss: {}'.format(i,loss))\n", | ||
" def predict(self,test_data):\n", | ||
" result = self.sigmoid(np.dot(test_data,self.params[1:])+self.params[0])\n", | ||
" result[result >= 0.5 ] = 1\n", | ||
" result[result < 0.5 ] = 0\n", | ||
" return result\n", | ||
" \n", | ||
" def evaluate(self,test_data,labels):\n", | ||
" accuracy = accuracy_score(self.predict(test_data),labels)\n", | ||
" return accuracy" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 55, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def cross_validate(data,k=5):\n", | ||
" if len(data)%k != 0:\n", | ||
" print('cant vsplit',len(data),' by ',k)\n", | ||
" return\n", | ||
" \n", | ||
" data_splitted = np.vsplit(data,k)\n", | ||
" aggrigate_result = []\n", | ||
" for i in range(len(data_splitted)):\n", | ||
" train = []\n", | ||
" test = []\n", | ||
" items = [j for j in range(len(data_splitted)) if j !=i ]\n", | ||
" test = data_splitted[i]\n", | ||
" for item in items:\n", | ||
" if len(train) == 0:\n", | ||
" train = data_splitted[item]\n", | ||
" else:\n", | ||
" train = np.concatenate((train,data_splitted[item]), axis=0)\n", | ||
" \n", | ||
" logistic = logisticregression(train[:,:-1],train[:,-1:],epoch=10,print_every=None)\n", | ||
" logistic.train()\n", | ||
" \n", | ||
" result = logistic.evaluate(test[:,:-1],test[:,-1:])\n", | ||
" aggrigate_result.append(result)\n", | ||
" \n", | ||
" return aggrigate_result" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 56, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_combinations(items,number):\n", | ||
" comb_list = []\n", | ||
" for c in combinations(items, number):\n", | ||
" c = list(c)\n", | ||
" c.sort()\n", | ||
" comb_list.append(c)\n", | ||
" \n", | ||
" return comb_list" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 121, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def wrapper_back_method(data,names,target_name,feature_count=2,cross_val_k = 5,visualize=True):\n", | ||
" num = len(names)-1\n", | ||
" classes = []\n", | ||
" if num == len(names):\n", | ||
" classes = [names]\n", | ||
" else:\n", | ||
" classes = get_combinations(names,num)\n", | ||
" \n", | ||
" \n", | ||
" wrapper_output = {}\n", | ||
" for i in classes: \n", | ||
" cols = np.concatenate((i,target_name),axis=None)\n", | ||
" sub_data = data[cols].values\n", | ||
" result = cross_validate(sub_data,cross_val_k)\n", | ||
" avarage = sum(result)/len(result)\n", | ||
"\n", | ||
" wrapper_output[str(i)]=avarage\n", | ||
" \n", | ||
" if visualize: \n", | ||
" print(\"{} Classess\".format(len(classes)),i,'\\n',wrapper_output)\n", | ||
" \n", | ||
" wrapper_output_final = wrapper_output\n", | ||
" wrapper_output = sorted(wrapper_output,key=wrapper_output.get)\n", | ||
" \n", | ||
" new_list = [wrapper_output[-1]]\n", | ||
" \n", | ||
" if visualize:\n", | ||
" print('\\nSelected -- >\\n',new_list,wrapper_output_final[new_list[0]],'\\n')\n", | ||
" \n", | ||
" new_list = ast.literal_eval(new_list[0])\n", | ||
" \n", | ||
" if len(new_list)==feature_count:\n", | ||
" return new_list\n", | ||
" else:\n", | ||
" new_list = wrapper_back_method(data,new_list,target_name=target_name,feature_count=feature_count,\n", | ||
" cross_val_k=cross_val_k,visualize=visualize)\n", | ||
" return new_list\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 122, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data = load_iris()\n", | ||
"\n", | ||
"data = pd.concat((pd.DataFrame(data['data']),pd.DataFrame(data['target'])),axis=1)\n", | ||
"data.columns= ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','target']\n", | ||
"data = data[(data['target']==0) | (data['target']==1)]\n", | ||
"data_np = data.values\n", | ||
"np.random.shuffle(data_np)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 123, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"columns = list(data.columns)\n", | ||
"columns.remove('target')\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 125, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"4 Classess ['petal length (cm)', 'petal width (cm)', 'sepal width (cm)'] \n", | ||
" {\"['petal length (cm)', 'sepal length (cm)', 'sepal width (cm)']\": 0.7, \"['petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']\": 0.5, \"['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']\": 0.6399999999999999, \"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']\": 1.0}\n", | ||
"\n", | ||
"Selected -- >\n", | ||
" [\"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']\"] 1.0 \n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']" | ||
] | ||
}, | ||
"execution_count": 125, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"wrapper_back_method(data,columns,['target'],3,10,visualize=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.