@@ -65,3 +65,49 @@ print('Test set accuracy: {:.3f}'.format(test_accuracy))
65
65
print('OOB accuracy: {:.3f}'.format(oob_accuracy)
66
66
# The difference between test and oob accuracy will be minimal which proved that we don't need cross validation to check the model accuracy
67
67
68
+ # RANDOM FOREST REGRESSOR
69
+ # Basic imports
70
+ from sklearn.ensemble import RandomForestRegressor
71
+ from sklearn.model_selection import train_test_split
72
+ from sklearn.metrics import mean_squared_error as MSE
73
+
74
+ # Set seed for reproducibility
75
+ SEED = 1
76
+
77
+ # Split dataset into 70% train and 30% test
78
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)
79
+
80
+ # Instantiate a random forests regressor 'rf' 400 estimators
81
+ rf = RandomForestRegressor(n_estimators=400, min_samples_leaf=0.12, random_state=SEED)
82
+
83
+ # Fit 'rf' to the training set
84
+ rf.fit(X_train, y_train)
85
+
86
+ # Predict the test set labels 'y_pred'
87
+ y_pred = rf.predict(X_test)
88
+
89
+ # Evaluate the test set RMSE
90
+ rmse_test = MSE(y_test, y_pred)**(1/2)
91
+
92
+ # Print the test set RMSE
93
+ print('Test set RMSE of rf: {:.2f}'.format(rmse_test))
94
+
95
+
96
+ # FEATURE IMPORTANCE in sklearn
97
+
98
+ import pandas as pd
99
+ import matplotlib.pyplot as plt
100
+
101
+ # Create a pd.Series of features importances
102
+ importances_rf = pd.Series(rf.feature_importances_, index = X.columns)
103
+
104
+ # Sort importances_rf
105
+ sorted_importances_rf = importances_rf.sort_values()
106
+
107
+ # Make a horizontal bar plot
108
+ sorted_importances_rf.plot(kind= 'barh', color= 'lightgreen');
109
+ plt.show()
110
+
111
+
112
+
113
+
0 commit comments