Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
KirillTushin authored Mar 6, 2018
1 parent f3b6f75 commit df1bb95
Showing 1 changed file with 147 additions and 0 deletions.
147 changes: 147 additions & 0 deletions sport/hw1/benchmarks/Tushin_Kirill_0.259_benchmark.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import re\n",
"from lightgbm import LGBMClassifier\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"pd.set_option('display.max_columns', 300)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def predict(model , data , columns):\n",
" data = data.copy()\n",
" predict_answer = {}\n",
" for col in ['work' , 'home']:\n",
" data['predict_' + col] = model[col].predict_proba(data[columns])[:,1]\n",
" tmp = data.groupby(['customer_id' , 'pos_address_lat' , 'pos_address_lon'])[['predict_' + col]].max()\n",
" tmp = tmp.groupby(['customer_id']).idxmax()['predict_' + col].values\n",
" \n",
" predict = [np.array(x) for x in tmp]\n",
" predict = pd.DataFrame(predict , columns=['customer_id' , col + '_predict_lat' , col + '_predict_lon'])\n",
" predict_answer[col] = predict.convert_objects(convert_numeric=True)\n",
" \n",
" return pd.merge(predict_answer['work'] , predict_answer['home'] , on='customer_id')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train = pd.read_csv(\"data/train_set.csv.gz\", compression=\"gzip\").rename(columns={\"pos_adress_lat\": \"pos_address_lat\",\"pos_adress_lon\": \"pos_address_lon\"})\n",
"test = pd.read_csv(\"data/test_set.csv.gz\", compression=\"gzip\")\n",
"sample = pd.read_csv('data/sample.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def clean_mcc(mcc):\n",
" if type(mcc) == int:\n",
" return mcc\n",
" mcc = mcc.split(',')\n",
" if len(mcc) == 1:\n",
" return int(mcc[0])\n",
" else:\n",
" return 1000*int(mcc[0]) + int(mcc[1])\n",
"test['mcc'] = test['mcc'].apply(clean_mcc)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"### Target\n",
"train['target_home'] = (np.sqrt((train['home_add_lat'] - train['pos_address_lat']) ** 2 + (train['home_add_lon'] - train['pos_address_lon']) ** 2) < 0.02).astype('int8')\n",
"train['target_work'] = (np.sqrt((train['work_add_lat'] - train['pos_address_lat']) ** 2 + (train['work_add_lon'] - train['pos_address_lon']) ** 2) < 0.02).astype('int8')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"good_col = ['amount', 'atm_address_lat', 'atm_address_lon', 'currency', 'mcc', 'pos_address_lat','pos_address_lon']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 50.2 s, sys: 237 ms, total: 50.4 s\n",
"Wall time: 6.78 s\n"
]
}
],
"source": [
"%%time\n",
"model_home = LGBMClassifier(n_jobs=-1)\n",
"model_work = LGBMClassifier(n_jobs=-1)\n",
"model_home.fit(train[good_col] , train['target_home'])\n",
"model_work.fit(train[good_col] , train['target_work'])\n",
"\n",
"model = {'home':model_home , 'work':model_work}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"predict_test = sample.merge(predict(model , test , good_col) , how='left').drop(sample.columns[1:] , axis=1)\n",
"predict_test = predict_test.fillna(predict_test.median())\n",
"predict_test.columns = sample.columns\n",
"predict_test.to_csv('first.csv' , index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit df1bb95

Please sign in to comment.