@@ -66,8 +66,7 @@ def fit_transform(self, df):
66
66
67
67
68
68
def get_data ():
69
- # regex allows arbitrary number of spaces in separator
70
- df = pd .read_csv ('../large_files/housing.data' , header = None , sep = r"\s*" , engine = 'python' )
69
+ df = pd .read_csv ('housing.data' , header = None , delim_whitespace = True )
71
70
df .columns = [
72
71
'crim' , # numerical
73
72
'zn' , # numerical
@@ -128,9 +127,9 @@ def get_data():
128
127
# do a quick baseline test
129
128
baseline = LinearRegression ()
130
129
single_tree = DecisionTreeRegressor ()
131
- print ("CV single tree:" , cross_val_score (single_tree , Xtrain , Ytrain ).mean ())
132
- print ("CV baseline:" , cross_val_score (baseline , Xtrain , Ytrain ).mean ())
133
- print ("CV forest:" , cross_val_score (model , Xtrain , Ytrain ).mean ())
130
+ print ("CV single tree:" , cross_val_score (single_tree , Xtrain , Ytrain , cv = 5 ).mean ())
131
+ print ("CV baseline:" , cross_val_score (baseline , Xtrain , Ytrain , cv = 5 ).mean ())
132
+ print ("CV forest:" , cross_val_score (model , Xtrain , Ytrain , cv = 5 ).mean ())
134
133
135
134
# test score
136
135
single_tree .fit (Xtrain , Ytrain )
0 commit comments