Skip to content

Commit

Permalink
wrapper backwords done
Browse files Browse the repository at this point in the history
  • Loading branch information
IsraelAbebe committed Nov 14, 2019
1 parent a2bc2ee commit f226a7c
Show file tree
Hide file tree
Showing 2 changed files with 282 additions and 66 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"from sklearn.datasets import load_iris\n",
"from itertools import combinations\n",
"import numpy as np\n",
"import pandas as pd\n",
"import ast"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"class logisticregression():\n",
" def __init__(self,train_data,train_labels,lr=0.01,batch_size=None,epoch=10,print_every = 10):\n",
" dummy_once = np.ones((len(train_data),1))\n",
" self.train_data = np.hstack((dummy_once,train_data))\n",
" self.train_labels = train_labels\n",
" \n",
" self.params = np.zeros((len(self.train_data[0]),1))\n",
" \n",
" self.lr = lr\n",
" self.epoch = epoch\n",
" self.batch_size = batch_size\n",
" self.print_every = print_every\n",
" \n",
" def sigmoid(self,x):\n",
" return 1/(1+np.exp(-x))\n",
" \n",
" def cost(self,y,y_pred):\n",
" return -np.mean(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))\n",
" \n",
" def gradient(self,y,y_pred,x):\n",
" return np.dot(x.T,(y_pred-y))\n",
" \n",
" def train(self):\n",
" for i in range(self.epoch):\n",
" y_pred = self.sigmoid(np.dot(self.train_data,self.params))\n",
" loss = self.cost(self.train_labels,y_pred)\n",
" \n",
" gra = self.gradient(self.train_labels,y_pred,self.train_data)\n",
" \n",
" self.params -= self.lr*gra\n",
" \n",
" if self.print_every:\n",
" if i%self.print_every == 0 or i == self.epoch-1:\n",
" print('Epoch : {} Loss: {}'.format(i,loss))\n",
" def predict(self,test_data):\n",
" result = self.sigmoid(np.dot(test_data,self.params[1:])+self.params[0])\n",
" result[result >= 0.5 ] = 1\n",
" result[result < 0.5 ] = 0\n",
" return result\n",
" \n",
" def evaluate(self,test_data,labels):\n",
" accuracy = accuracy_score(self.predict(test_data),labels)\n",
" return accuracy"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"def cross_validate(data,k=5):\n",
" if len(data)%k != 0:\n",
" print('cant vsplit',len(data),' by ',k)\n",
" return\n",
" \n",
" data_splitted = np.vsplit(data,k)\n",
" aggrigate_result = []\n",
" for i in range(len(data_splitted)):\n",
" train = []\n",
" test = []\n",
" items = [j for j in range(len(data_splitted)) if j !=i ]\n",
" test = data_splitted[i]\n",
" for item in items:\n",
" if len(train) == 0:\n",
" train = data_splitted[item]\n",
" else:\n",
" train = np.concatenate((train,data_splitted[item]), axis=0)\n",
" \n",
" logistic = logisticregression(train[:,:-1],train[:,-1:],epoch=10,print_every=None)\n",
" logistic.train()\n",
" \n",
" result = logistic.evaluate(test[:,:-1],test[:,-1:])\n",
" aggrigate_result.append(result)\n",
" \n",
" return aggrigate_result"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"def get_combinations(items,number):\n",
" comb_list = []\n",
" for c in combinations(items, number):\n",
" c = list(c)\n",
" c.sort()\n",
" comb_list.append(c)\n",
" \n",
" return comb_list"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"def wrapper_back_method(data,names,target_name,feature_count=2,cross_val_k = 5,visualize=True):\n",
" num = len(names)-1\n",
" classes = []\n",
" if num == len(names):\n",
" classes = [names]\n",
" else:\n",
" classes = get_combinations(names,num)\n",
" \n",
" \n",
" wrapper_output = {}\n",
" for i in classes: \n",
" cols = np.concatenate((i,target_name),axis=None)\n",
" sub_data = data[cols].values\n",
" result = cross_validate(sub_data,cross_val_k)\n",
" avarage = sum(result)/len(result)\n",
"\n",
" wrapper_output[str(i)]=avarage\n",
" \n",
" if visualize: \n",
" print(\"{} Classess\".format(len(classes)),i,'\\n',wrapper_output)\n",
" \n",
" wrapper_output_final = wrapper_output\n",
" wrapper_output = sorted(wrapper_output,key=wrapper_output.get)\n",
" \n",
" new_list = [wrapper_output[-1]]\n",
" \n",
" if visualize:\n",
" print('\\nSelected -- >\\n',new_list,wrapper_output_final[new_list[0]],'\\n')\n",
" \n",
" new_list = ast.literal_eval(new_list[0])\n",
" \n",
" if len(new_list)==feature_count:\n",
" return new_list\n",
" else:\n",
" new_list = wrapper_back_method(data,new_list,target_name=target_name,feature_count=feature_count,\n",
" cross_val_k=cross_val_k,visualize=visualize)\n",
" return new_list\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"data = load_iris()\n",
"\n",
"data = pd.concat((pd.DataFrame(data['data']),pd.DataFrame(data['target'])),axis=1)\n",
"data.columns= ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','target']\n",
"data = data[(data['target']==0) | (data['target']==1)]\n",
"data_np = data.values\n",
"np.random.shuffle(data_np)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"columns = list(data.columns)\n",
"columns.remove('target')\n"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4 Classess ['petal length (cm)', 'petal width (cm)', 'sepal width (cm)'] \n",
" {\"['petal length (cm)', 'sepal length (cm)', 'sepal width (cm)']\": 0.7, \"['petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']\": 0.5, \"['petal length (cm)', 'petal width (cm)', 'sepal length (cm)']\": 0.6399999999999999, \"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']\": 1.0}\n",
"\n",
"Selected -- >\n",
" [\"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']\"] 1.0 \n",
"\n"
]
},
{
"data": {
"text/plain": [
"['petal length (cm)', 'petal width (cm)', 'sepal width (cm)']"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrapper_back_method(data,columns,['target'],3,10,visualize=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit f226a7c

Please sign in to comment.