-
Notifications
You must be signed in to change notification settings - Fork 6
/
C-MAPSS_data_cleaner.py
55 lines (45 loc) · 2.78 KB
/
C-MAPSS_data_cleaner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
columns=["id","cycle","op1","op2","op3","sensor1","sensor2","sensor3", "sensor4","sensor5","sensor6","sensor7","sensor8",
"sensor9","sensor10","sensor11","sensor12","sensor13","sensor14","sensor15","sensor16","sensor17","sensor18","sensor19"
,"sensor20","sensor21","sensor22","sensor23"]
cycle=125 #Assumed cycle after which the engine starts degrading. Used for creating the R_early column
def train_data_cleaner(train):
train['remaining_cycle'] = train.groupby(['id'])['cycle'].transform(max)-train['cycle']
train_x=train.drop(["id","cycle","op1","op2","op3","sensor1","sensor5","sensor6",
"sensor10","sensor16","sensor18","sensor19","sensor22","sensor23"],axis=1)
return train_x
def train_data_cleaner_R_early(train):
train['remaining_cycle'] = train.groupby(['id'])['cycle'].transform(max)-train['cycle']
df_train['R_early'] = train['remaining_cycle'].apply(lambda x: cycle if x >= cycle else x)
train_x=df_train.drop(["id","cycle","op1","op2","op3","sensor1","sensor5","sensor6",
"sensor10","sensor16","sensor18","sensor19","sensor22","sensor23"],axis=1)
return train_x
def test_data_cleaner(test_results,test):
test_results.columns=["rul","null"]
test_results.drop(["null"],axis=1,inplace=True)
test_results['id']=test_results.index+1
rul = pd.DataFrame(test.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max']
test_results['rul_failed']=test_results['rul']+rul['max']
test_results.drop(["rul"],axis=1,inplace=True)
test=test.merge(test_results,on=['id'],how='left')
test["remaining_cycle"]=test["rul_failed"]-test["cycle"]
test.drop(["rul_failed"],axis=1,inplace=True)
test_x = test.drop(["id","cycle","op1","op2","op3","sensor1","sensor5","sensor6",
"sensor10","sensor16","sensor18","sensor19","sensor22","sensor23","remaining_cycle"],axis=1)
return test_x
def test_data_cleaner_R_early(test_results,test):
test_results.columns=["rul","null"]
test_results.drop(["null"],axis=1,inplace=True)
test_results['id']=test_results.index+1
rul = pd.DataFrame(test.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max']
test_results['rul_failed']=test_results['rul']+rul['max']
test_results.drop(["rul"],axis=1,inplace=True)
test=test.merge(test_results,on=['id'],how='left')
test["remaining_cycle"]=test["rul_failed"]-test["cycle"]
test.drop(["rul_failed"],axis=1,inplace=True)
df_test['R_early'] = test['remaining_cycle'].apply(lambda x: cycle if x >= cycle else x)
test_x = df_test.drop(["id","cycle","op1","op2","op3","sensor1","sensor5","sensor6","sensor10","sensor16",
"sensor18","sensor19","sensor22","sensor23","remaining_cycle"],axis=1)
return test_x