22open Bigarray
33open Dataset_utils
44
5+ (* Logging source for this loader *)
6+ let src =
7+ Logs.Src. create " nx.datasets.california_housing"
8+ ~doc: " California Housing loader"
9+
10+ module Log = (val Logs. src_log src : Logs.LOG )
11+
512let dataset_name = " california-housing"
613let dataset_dir = get_cache_dir dataset_name
714let data_filename = " housing.csv"
@@ -28,7 +35,7 @@ let calculate_mean_non_nan column_data =
2835
2936let load () =
3037 ensure_dataset () ;
31- Printf. printf " Loading California Housing dataset...\n %! " ;
38+ Log. info ( fun m -> m " Loading California Housing dataset..." ) ;
3239
3340 let header, all_data_rows =
3441 try
@@ -85,8 +92,9 @@ let load () =
8592 List. find_index (( = ) " total_bedrooms" ) header
8693 in
8794
88- Printf. printf " Found %d samples. Loading %d features + target '%s'.\n %!"
89- num_samples num_features target_name;
95+ Log. info (fun m ->
96+ m " Found %d samples. Loading %d features + target '%s'." num_samples
97+ num_features target_name);
9098
9199 let parsed_features_temp = Array. make_matrix num_samples num_features nan in
92100 let parsed_labels_temp = Array. make num_samples nan in
@@ -95,8 +103,10 @@ let load () =
95103 List. iteri
96104 (fun i row ->
97105 if List. length row <> List. length header then
98- Printf. eprintf " Warning: Row %d has %d columns, expected %d\n %!" (i + 1 )
99- (List. length row) (List. length header);
106+ Log. warn (fun m ->
107+ m " Row %d has %d columns, expected %d (header: %s)" (i + 1 )
108+ (List. length row) (List. length header)
109+ (String. concat " , " header));
100110
101111 List. iteri
102112 (fun j feature_idx ->
@@ -106,29 +116,30 @@ let load () =
106116 parsed_features_temp.(i).(j) < - v_float;
107117 if Some feature_idx = total_bedrooms_index_opt then
108118 total_bedrooms_col_temp := v_float :: ! total_bedrooms_col_temp)
109- else (
110- Printf. eprintf
111- " Warning: Row %d missing feature column %d ('%s'). Setting NaN. \n \
112- %! "
113- (i + 1 ) feature_idx ( List. nth feature_names j );
119+ else
120+ let feature_name = List. nth feature_names j in
121+ Log. warn ( fun m ->
122+ m " Row %d missing feature column %d ('%s'). Setting NaN. "
123+ (i + 1 ) feature_idx feature_name );
114124 parsed_features_temp.(i).(j) < - nan;
115125 if Some feature_idx = total_bedrooms_index_opt then
116- total_bedrooms_col_temp := nan :: ! total_bedrooms_col_temp))
126+ total_bedrooms_col_temp := nan :: ! total_bedrooms_col_temp)
117127 feature_indices;
118128
119129 if List. length row > target_index then
120130 let label_str = List. nth row target_index in
121131 parsed_labels_temp.(i) < - parse_float_or_nan label_str
122132 else (
123- Printf. eprintf
124- " Warning: Row %d missing target column %d ('%s'). Setting NaN.\n %! "
125- (i + 1 ) target_index target_name;
133+ Log. warn ( fun m ->
134+ m " Row %d missing target column %d ('%s'). Setting NaN." (i + 1 )
135+ target_index target_name) ;
126136 parsed_labels_temp.(i) < - nan))
127137 data_rows_str;
128138
129139 let total_bedrooms_mean = calculate_mean_non_nan ! total_bedrooms_col_temp in
130- Printf. printf " Calculated mean for 'total_bedrooms' (for imputation): %f\n %!"
131- total_bedrooms_mean;
140+ Log. info (fun m ->
141+ m " Calculated mean for 'total_bedrooms' (for imputation): %f"
142+ total_bedrooms_mean);
132143 let total_bedrooms_feature_index =
133144 match List. find_index (( = ) " total_bedrooms" ) feature_names with
134145 | Some idx -> idx
@@ -163,5 +174,5 @@ let load () =
163174 else labels.{i} < - label_v
164175 done ;
165176
166- Printf. printf " California Housing loading complete.\n %! " ;
177+ Log. info ( fun m -> m " California Housing loading complete." ) ;
167178 (features, labels)
0 commit comments