Skip to content

Commit 9d7bcb8

Browse files
Update Part 3 Bagging and Random Forest
1 parent d0add94 commit 9d7bcb8

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

Part 3 Bagging and Random Forest

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,49 @@ print('Test set accuracy: {:.3f}'.format(test_accuracy))
6565
print('OOB accuracy: {:.3f}'.format(oob_accuracy)
6666
# The difference between test and oob accuracy will be minimal which proved that we don't need cross validation to check the model accuracy
6767

68+
# RANDOM FOREST REGRESSOR
69+
# Basic imports
70+
from sklearn.ensemble import RandomForestRegressor
71+
from sklearn.model_selection import train_test_split
72+
from sklearn.metrics import mean_squared_error as MSE
73+
74+
# Set seed for reproducibility
75+
SEED = 1
76+
77+
# Split dataset into 70% train and 30% test
78+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)
79+
80+
# Instantiate a random forests regressor 'rf' 400 estimators
81+
rf = RandomForestRegressor(n_estimators=400, min_samples_leaf=0.12, random_state=SEED)
82+
83+
# Fit 'rf' to the training set
84+
rf.fit(X_train, y_train)
85+
86+
# Predict the test set labels 'y_pred'
87+
y_pred = rf.predict(X_test)
88+
89+
# Evaluate the test set RMSE
90+
rmse_test = MSE(y_test, y_pred)**(1/2)
91+
92+
# Print the test set RMSE
93+
print('Test set RMSE of rf: {:.2f}'.format(rmse_test))
94+
95+
96+
# FEATURE IMPORTANCE in sklearn
97+
98+
import pandas as pd
99+
import matplotlib.pyplot as plt
100+
101+
# Create a pd.Series of features importances
102+
importances_rf = pd.Series(rf.feature_importances_, index = X.columns)
103+
104+
# Sort importances_rf
105+
sorted_importances_rf = importances_rf.sort_values()
106+
107+
# Make a horizontal bar plot
108+
sorted_importances_rf.plot(kind= 'barh', color= 'lightgreen');
109+
plt.show()
110+
111+
112+
113+

0 commit comments

Comments
 (0)