-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5a2e39e
commit 865a6f7
Showing
1 changed file
with
282 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,282 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"id": "e77e6470", | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-06-27T14:16:29.004890Z", | ||
"start_time": "2024-06-27T14:16:28.985889Z" | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import os\n", | ||
"from sklearn.model_selection import cross_val_predict\n", | ||
"from sklearn.metrics import accuracy_score\n", | ||
"from sklearn.linear_model import LogisticRegression\n", | ||
"from sklearn.tree import DecisionTreeClassifier\n", | ||
"from lightgbm import LGBMClassifier\n", | ||
"\n", | ||
"def get_fea_ppg(df):\n", | ||
" df=df.groupby('recording_time')['PPG'].mean()\n", | ||
" return pd.DataFrame(df)\n", | ||
"\n", | ||
"def get_fea_acc(df):\n", | ||
" df=df.groupby('recording_time')[['Motion_dataX','Motion_dataY','Motion_dataZ']].mean()\n", | ||
" return pd.DataFrame(df)\n", | ||
"\n", | ||
"def get_fea_gsr(df):\n", | ||
" df=df.groupby('recording_time')['GSR'].mean()\n", | ||
" return pd.DataFrame(df)\n", | ||
"\n", | ||
"def manual_feature(df):\n", | ||
" feat = [\n", | ||
" df_batch['Motion_dataX'].mean(),\n", | ||
" df_batch['Motion_dataX'].max(),\n", | ||
" df_batch['Motion_dataX'].min(),\n", | ||
" df_batch['Motion_dataX'].max() - df_batch['Motion_dataX'].min(),\n", | ||
" df_batch['Motion_dataX'].diff(1).mean(),\n", | ||
" df_batch['Motion_dataX'].diff(1).max(),\n", | ||
" df_batch['Motion_dataX'].diff(1).min(),\n", | ||
"\n", | ||
" df_batch['Motion_dataY'].mean(),\n", | ||
" df_batch['Motion_dataY'].max(),\n", | ||
" df_batch['Motion_dataY'].min(),\n", | ||
" df_batch['Motion_dataY'].max() - df_batch['Motion_dataY'].min(),\n", | ||
" df_batch['Motion_dataY'].diff(1).mean(),\n", | ||
" df_batch['Motion_dataY'].diff(1).max(),\n", | ||
" df_batch['Motion_dataY'].diff(1).min(),\n", | ||
"\n", | ||
" df_batch['Motion_dataZ'].mean(),\n", | ||
" df_batch['Motion_dataZ'].max(),\n", | ||
" df_batch['Motion_dataZ'].min(),\n", | ||
" df_batch['Motion_dataZ'].max() - df_batch['Motion_dataZ'].min(),\n", | ||
" df_batch['Motion_dataZ'].diff(1).mean(),\n", | ||
" df_batch['Motion_dataZ'].diff(1).max(),\n", | ||
" df_batch['Motion_dataZ'].diff(1).min(),\n", | ||
"\n", | ||
" df_batch['GSR'].mean(),\n", | ||
" df_batch['GSR'].max(),\n", | ||
" df_batch['GSR'].min(),\n", | ||
" df_batch['GSR'].max() - df_batch['GSR'].min(),\n", | ||
" df_batch['GSR'].diff(1).mean(),\n", | ||
" df_batch['GSR'].diff(1).max(),\n", | ||
" df_batch['GSR'].diff(1).min(),\n", | ||
" df_batch['GSR'].isnull().mean(),\n", | ||
"\n", | ||
" df_batch['PPG'].mean(),\n", | ||
" df_batch['PPG'].max(),\n", | ||
" df_batch['PPG'].min(),\n", | ||
" df_batch['PPG'].max() - df_batch['PPG'].min(),\n", | ||
" df_batch['PPG'].diff(1).mean(),\n", | ||
" df_batch['PPG'].diff(1).max(),\n", | ||
" df_batch['PPG'].diff(1).min(),\n", | ||
" ]\n", | ||
" return feat" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"id": "0af0d82c", | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-06-27T14:19:12.626293Z", | ||
"start_time": "2024-06-27T14:17:25.090204Z" | ||
} | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"data0033 09:41:01 16:59:59\n", | ||
"data0129 06:00:00 16:59:59\n", | ||
"data0157 10:23:05 16:59:59\n", | ||
"data0172 10:43:28 16:59:59\n", | ||
"data0179 06:00:00 16:59:59\n", | ||
"data0191 08:23:27 16:59:59\n", | ||
"data0302 06:00:00 16:59:59\n", | ||
"data0369 09:10:05 16:59:59\n", | ||
"data0377 08:38:51 16:59:59\n", | ||
"data0418 06:00:00 16:59:59\n", | ||
"data0461 06:00:00 16:59:59\n", | ||
"data0541 06:00:00 16:59:59\n", | ||
"data0548 06:00:00 16:59:59\n", | ||
"data0562 09:11:31 16:59:59\n", | ||
"data0645 07:19:29 16:59:59\n", | ||
"data0667 06:00:00 16:59:59\n", | ||
"data0671 10:01:30 16:59:59\n", | ||
"data0699 06:00:00 16:59:59\n", | ||
"data0702 06:00:00 16:59:59\n", | ||
"data0856 06:00:00 16:59:59\n", | ||
"data0882 09:41:24 16:59:59\n", | ||
"data0943 06:00:00 16:24:55\n", | ||
"data0975 06:00:00 16:59:59\n", | ||
"data0982 11:10:27 16:59:59\n", | ||
"data0984 06:00:00 16:59:59\n", | ||
"data0999 06:00:00 16:59:59\n", | ||
"data1428 06:00:00 16:59:59\n", | ||
"data1576 06:00:00 16:59:59\n", | ||
"data2381 06:00:00 16:59:59\n", | ||
"data3862 06:00:00 16:59:59\n", | ||
"data4722 08:58:50 16:18:37\n", | ||
"data4998 09:26:17 16:59:59\n", | ||
"data5166 08:11:03 16:59:59\n", | ||
"data5468 06:00:00 16:59:59\n", | ||
"data7329 07:34:22 16:59:59\n", | ||
"data7431 06:00:00 16:59:59\n", | ||
"data9057 06:00:00 16:59:59\n", | ||
"data9339 06:00:00 16:59:59\n", | ||
"data9597 06:00:00 16:59:59\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"train_features = []\n", | ||
"list_=os.listdir('./training_data/')\n", | ||
"train_label = pd.read_csv('training_data/train_label.csv', encoding='gb2312')\n", | ||
"# 对训练集的个体\n", | ||
"for sid in list_:\n", | ||
" if '.csv' in sid:\n", | ||
" continue\n", | ||
"\n", | ||
" # 三类观测数据\n", | ||
" df_acc = get_fea_acc(pd.read_csv(f'./training_data/{sid}/ACC.csv'))\n", | ||
" df_gsr = get_fea_gsr(pd.read_csv(f'./training_data/{sid}/GSR.csv'))\n", | ||
" df_ppg = get_fea_ppg(pd.read_csv(f'./training_data/{sid}/PPG.csv'))\n", | ||
"\n", | ||
" # 按照时间顺序,拼接三类观测数据\n", | ||
" df = pd.concat([df_acc, df_gsr, df_ppg], axis=1).reset_index()\n", | ||
" \n", | ||
" df['GSR'] = df['GSR'].round(4)\n", | ||
" df['GSR'] = df['GSR'].replace(0.0061, np.nan)\n", | ||
" \n", | ||
" label = train_label.set_index('文件名').loc[sid].values[0]\n", | ||
"\n", | ||
" # 拆分为更小的数据\n", | ||
" for idx in range(df.shape[0] // 3000):\n", | ||
" df_batch = df.iloc[idx*3000: (idx+1)*3000]\n", | ||
" feat = manual_feature(df_batch)\n", | ||
" feat = [sid] + feat + [label]\n", | ||
" train_features.append(feat)\n", | ||
"\n", | ||
"test_features = []\n", | ||
"for sid in os.listdir('./test_data/'):\n", | ||
" if '.csv' in sid:\n", | ||
" continue\n", | ||
"\n", | ||
" df_acc = get_fea_acc(pd.read_csv(f'./test_data/{sid}/ACC.csv'))\n", | ||
" df_gsr = get_fea_gsr(pd.read_csv(f'./test_data/{sid}/GSR.csv'))\n", | ||
" df_ppg = get_fea_ppg(pd.read_csv(f'./test_data/{sid}/PPG.csv'))\n", | ||
" df = pd.concat([df_acc, df_gsr, df_ppg], axis=1).reset_index()\n", | ||
" \n", | ||
" df['GSR'] = df['GSR'].round(4)\n", | ||
" df['GSR'] = df['GSR'].replace(0.0061, np.nan)\n", | ||
" \n", | ||
" print(sid, df['recording_time'].min(), df['recording_time'].max())\n", | ||
" for idx in range(df.shape[0] // 3000):\n", | ||
" df_batch = df.iloc[idx*3000: (idx+1)*3000]\n", | ||
" feat = manual_feature(df_batch)\n", | ||
" feat = [sid] + feat\n", | ||
" test_features.append(feat)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 28, | ||
"id": "51c53aa7", | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-06-27T14:20:20.151703Z", | ||
"start_time": "2024-06-27T14:20:19.986520Z" | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"train_features = pd.DataFrame(train_features)\n", | ||
"test_features = pd.DataFrame(test_features)\n", | ||
"\n", | ||
"model = LGBMClassifier()\n", | ||
"model.fit(train_features.iloc[:, 1:-1], train_features.iloc[:, -1])\n", | ||
"test_pred = model.predict(test_features.iloc[:, 1:])\n", | ||
"\n", | ||
"test_features['label'] = test_pred\n", | ||
"\n", | ||
"pred = test_features.groupby(0)['label'].mean() > 0.5\n", | ||
"\n", | ||
"pred = pred.astype(int).reset_index()\n", | ||
"\n", | ||
"pred.to_csv('lgb1.csv', index=None, header=None)\n", | ||
"# 成绩:0.92308" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.3" | ||
}, | ||
"toc": { | ||
"base_numbering": 1, | ||
"nav_menu": {}, | ||
"number_sections": true, | ||
"sideBar": true, | ||
"skip_h1_title": false, | ||
"title_cell": "Table of Contents", | ||
"title_sidebar": "Contents", | ||
"toc_cell": false, | ||
"toc_position": {}, | ||
"toc_section_display": true, | ||
"toc_window_display": false | ||
}, | ||
"varInspector": { | ||
"cols": { | ||
"lenName": 16, | ||
"lenType": 16, | ||
"lenVar": 40 | ||
}, | ||
"kernels_config": { | ||
"python": { | ||
"delete_cmd_postfix": "", | ||
"delete_cmd_prefix": "del ", | ||
"library": "var_list.py", | ||
"varRefreshCmd": "print(var_dic_list())" | ||
}, | ||
"r": { | ||
"delete_cmd_postfix": ") ", | ||
"delete_cmd_prefix": "rm(", | ||
"library": "var_list.r", | ||
"varRefreshCmd": "cat(var_dic_list()) " | ||
} | ||
}, | ||
"types_to_exclude": [ | ||
"module", | ||
"function", | ||
"builtin_function_or_method", | ||
"instance", | ||
"_Feature" | ||
], | ||
"window_display": false | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |