Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
muxiaoxiong authored Jun 27, 2024
1 parent 5a2e39e commit 865a6f7
Showing 1 changed file with 282 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 25,
"id": "e77e6470",
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-27T14:16:29.004890Z",
"start_time": "2024-06-27T14:16:28.985889Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from sklearn.model_selection import cross_val_predict\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from lightgbm import LGBMClassifier\n",
"\n",
"def get_fea_ppg(df):\n",
" df=df.groupby('recording_time')['PPG'].mean()\n",
" return pd.DataFrame(df)\n",
"\n",
"def get_fea_acc(df):\n",
" df=df.groupby('recording_time')[['Motion_dataX','Motion_dataY','Motion_dataZ']].mean()\n",
" return pd.DataFrame(df)\n",
"\n",
"def get_fea_gsr(df):\n",
" df=df.groupby('recording_time')['GSR'].mean()\n",
" return pd.DataFrame(df)\n",
"\n",
"def manual_feature(df):\n",
" feat = [\n",
" df_batch['Motion_dataX'].mean(),\n",
" df_batch['Motion_dataX'].max(),\n",
" df_batch['Motion_dataX'].min(),\n",
" df_batch['Motion_dataX'].max() - df_batch['Motion_dataX'].min(),\n",
" df_batch['Motion_dataX'].diff(1).mean(),\n",
" df_batch['Motion_dataX'].diff(1).max(),\n",
" df_batch['Motion_dataX'].diff(1).min(),\n",
"\n",
" df_batch['Motion_dataY'].mean(),\n",
" df_batch['Motion_dataY'].max(),\n",
" df_batch['Motion_dataY'].min(),\n",
" df_batch['Motion_dataY'].max() - df_batch['Motion_dataY'].min(),\n",
" df_batch['Motion_dataY'].diff(1).mean(),\n",
" df_batch['Motion_dataY'].diff(1).max(),\n",
" df_batch['Motion_dataY'].diff(1).min(),\n",
"\n",
" df_batch['Motion_dataZ'].mean(),\n",
" df_batch['Motion_dataZ'].max(),\n",
" df_batch['Motion_dataZ'].min(),\n",
" df_batch['Motion_dataZ'].max() - df_batch['Motion_dataZ'].min(),\n",
" df_batch['Motion_dataZ'].diff(1).mean(),\n",
" df_batch['Motion_dataZ'].diff(1).max(),\n",
" df_batch['Motion_dataZ'].diff(1).min(),\n",
"\n",
" df_batch['GSR'].mean(),\n",
" df_batch['GSR'].max(),\n",
" df_batch['GSR'].min(),\n",
" df_batch['GSR'].max() - df_batch['GSR'].min(),\n",
" df_batch['GSR'].diff(1).mean(),\n",
" df_batch['GSR'].diff(1).max(),\n",
" df_batch['GSR'].diff(1).min(),\n",
" df_batch['GSR'].isnull().mean(),\n",
"\n",
" df_batch['PPG'].mean(),\n",
" df_batch['PPG'].max(),\n",
" df_batch['PPG'].min(),\n",
" df_batch['PPG'].max() - df_batch['PPG'].min(),\n",
" df_batch['PPG'].diff(1).mean(),\n",
" df_batch['PPG'].diff(1).max(),\n",
" df_batch['PPG'].diff(1).min(),\n",
" ]\n",
" return feat"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "0af0d82c",
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-27T14:19:12.626293Z",
"start_time": "2024-06-27T14:17:25.090204Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"data0033 09:41:01 16:59:59\n",
"data0129 06:00:00 16:59:59\n",
"data0157 10:23:05 16:59:59\n",
"data0172 10:43:28 16:59:59\n",
"data0179 06:00:00 16:59:59\n",
"data0191 08:23:27 16:59:59\n",
"data0302 06:00:00 16:59:59\n",
"data0369 09:10:05 16:59:59\n",
"data0377 08:38:51 16:59:59\n",
"data0418 06:00:00 16:59:59\n",
"data0461 06:00:00 16:59:59\n",
"data0541 06:00:00 16:59:59\n",
"data0548 06:00:00 16:59:59\n",
"data0562 09:11:31 16:59:59\n",
"data0645 07:19:29 16:59:59\n",
"data0667 06:00:00 16:59:59\n",
"data0671 10:01:30 16:59:59\n",
"data0699 06:00:00 16:59:59\n",
"data0702 06:00:00 16:59:59\n",
"data0856 06:00:00 16:59:59\n",
"data0882 09:41:24 16:59:59\n",
"data0943 06:00:00 16:24:55\n",
"data0975 06:00:00 16:59:59\n",
"data0982 11:10:27 16:59:59\n",
"data0984 06:00:00 16:59:59\n",
"data0999 06:00:00 16:59:59\n",
"data1428 06:00:00 16:59:59\n",
"data1576 06:00:00 16:59:59\n",
"data2381 06:00:00 16:59:59\n",
"data3862 06:00:00 16:59:59\n",
"data4722 08:58:50 16:18:37\n",
"data4998 09:26:17 16:59:59\n",
"data5166 08:11:03 16:59:59\n",
"data5468 06:00:00 16:59:59\n",
"data7329 07:34:22 16:59:59\n",
"data7431 06:00:00 16:59:59\n",
"data9057 06:00:00 16:59:59\n",
"data9339 06:00:00 16:59:59\n",
"data9597 06:00:00 16:59:59\n"
]
}
],
"source": [
"train_features = []\n",
"list_=os.listdir('./training_data/')\n",
"train_label = pd.read_csv('training_data/train_label.csv', encoding='gb2312')\n",
"# 对训练集的个体\n",
"for sid in list_:\n",
" if '.csv' in sid:\n",
" continue\n",
"\n",
" # 三类观测数据\n",
" df_acc = get_fea_acc(pd.read_csv(f'./training_data/{sid}/ACC.csv'))\n",
" df_gsr = get_fea_gsr(pd.read_csv(f'./training_data/{sid}/GSR.csv'))\n",
" df_ppg = get_fea_ppg(pd.read_csv(f'./training_data/{sid}/PPG.csv'))\n",
"\n",
" # 按照时间顺序,拼接三类观测数据\n",
" df = pd.concat([df_acc, df_gsr, df_ppg], axis=1).reset_index()\n",
" \n",
" df['GSR'] = df['GSR'].round(4)\n",
" df['GSR'] = df['GSR'].replace(0.0061, np.nan)\n",
" \n",
" label = train_label.set_index('文件名').loc[sid].values[0]\n",
"\n",
" # 拆分为更小的数据\n",
" for idx in range(df.shape[0] // 3000):\n",
" df_batch = df.iloc[idx*3000: (idx+1)*3000]\n",
" feat = manual_feature(df_batch)\n",
" feat = [sid] + feat + [label]\n",
" train_features.append(feat)\n",
"\n",
"test_features = []\n",
"for sid in os.listdir('./test_data/'):\n",
" if '.csv' in sid:\n",
" continue\n",
"\n",
" df_acc = get_fea_acc(pd.read_csv(f'./test_data/{sid}/ACC.csv'))\n",
" df_gsr = get_fea_gsr(pd.read_csv(f'./test_data/{sid}/GSR.csv'))\n",
" df_ppg = get_fea_ppg(pd.read_csv(f'./test_data/{sid}/PPG.csv'))\n",
" df = pd.concat([df_acc, df_gsr, df_ppg], axis=1).reset_index()\n",
" \n",
" df['GSR'] = df['GSR'].round(4)\n",
" df['GSR'] = df['GSR'].replace(0.0061, np.nan)\n",
" \n",
" print(sid, df['recording_time'].min(), df['recording_time'].max())\n",
" for idx in range(df.shape[0] // 3000):\n",
" df_batch = df.iloc[idx*3000: (idx+1)*3000]\n",
" feat = manual_feature(df_batch)\n",
" feat = [sid] + feat\n",
" test_features.append(feat)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "51c53aa7",
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-27T14:20:20.151703Z",
"start_time": "2024-06-27T14:20:19.986520Z"
}
},
"outputs": [],
"source": [
"train_features = pd.DataFrame(train_features)\n",
"test_features = pd.DataFrame(test_features)\n",
"\n",
"model = LGBMClassifier()\n",
"model.fit(train_features.iloc[:, 1:-1], train_features.iloc[:, -1])\n",
"test_pred = model.predict(test_features.iloc[:, 1:])\n",
"\n",
"test_features['label'] = test_pred\n",
"\n",
"pred = test_features.groupby(0)['label'].mean() > 0.5\n",
"\n",
"pred = pred.astype(int).reset_index()\n",
"\n",
"pred.to_csv('lgb1.csv', index=None, header=None)\n",
"# 成绩:0.92308"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 865a6f7

Please sign in to comment.