Skip to content

Commit 2d79516

Browse files
feat(nx): Use Logs for logging in nx-datasets (#95)
1 parent 90387f8 commit 2d79516

File tree

7 files changed

+94
-45
lines changed

7 files changed

+94
-45
lines changed

nx-datasets/example/california.ml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@ let () =
1515
let labels_1d = Nx.reshape [| n_samples |] labels in
1616
let labels_f32 = astype_f32 labels_1d in
1717

18-
let longitude = Nx.slice [ Nx.R (0, n_samples); Nx.R (0, 1) ] features in
19-
let latitude = Nx.slice [ Nx.R (0, n_samples); Nx.R (1, 2) ] features in
18+
(* slice produces shape [n;1] — reshape to 1-D [n] so Hugin's scatter receives
19+
a vector not a 2-D column *)
20+
let longitude_col = Nx.slice [ Nx.R (0, n_samples); Nx.R (0, 1) ] features in
21+
let latitude_col = Nx.slice [ Nx.R (0, n_samples); Nx.R (1, 2) ] features in
22+
23+
let longitude = Nx.reshape [| n_samples |] longitude_col in
24+
let latitude = Nx.reshape [| n_samples |] latitude_col in
25+
2026
let longitude_f32 = astype_f32 longitude in
2127
let latitude_f32 = astype_f32 latitude in
2228

nx-datasets/lib/datasets/airline_passengers.ml

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,18 @@ let data_path = dataset_dir ^ data_filename
1010
let url =
1111
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
1212

13+
(* Logging source for this loader *)
14+
let src =
15+
Logs.Src.create "nx.datasets.airline_passengers"
16+
~doc:"Airline passengers loader"
17+
18+
module Log = (val Logs.src_log src : Logs.LOG)
19+
1320
let ensure_dataset () = ensure_file url data_path
1421

1522
let load () =
1623
ensure_dataset ();
17-
Printf.printf "Loading Airline Passengers dataset...\n%!";
24+
Log.info (fun m -> m "Loading Airline Passengers dataset...");
1825

1926
let header, data_rows_iter =
2027
try
@@ -58,9 +65,11 @@ let load () =
5865
let row_list = Csv.Row.to_list row in
5966
(* Convert Row.t to string list *)
6067
if List.length row_list <> List.length header then
61-
Printf.eprintf "Warning: Row %d has %d columns, expected %d\n%!"
62-
(List.length acc + 1)
63-
(List.length row_list) (List.length header);
68+
Log.warn (fun m ->
69+
m "Row %d has %d columns, expected %d (header: %s)"
70+
(List.length acc + 1)
71+
(List.length row_list) (List.length header)
72+
(String.concat ", " header));
6473

6574
(* Check length before accessing *)
6675
if List.length row_list > passenger_col_index then
@@ -73,12 +82,13 @@ let load () =
7382
let passenger_int = parse_int_cell ~context passenger_str in
7483
passenger_int :: acc
7584
else (
76-
Printf.eprintf
77-
"Warning: Row %d is shorter than expected (%d < %d), skipping \
78-
passenger value.\n\
79-
%!"
80-
(List.length acc + 1)
81-
(List.length row_list) (passenger_col_index + 1);
85+
Log.warn (fun m ->
86+
m
87+
"Row %d is shorter than expected (%d < %d), skipping \
88+
passenger value. Missing column: %s"
89+
(List.length acc + 1)
90+
(List.length row_list) (passenger_col_index + 1)
91+
passenger_col_name);
8292
-1 :: acc (* Placeholder for missing data *)))
8393
~init:[] data_rows_iter
8494
with
@@ -101,7 +111,7 @@ let load () =
101111
let num_samples = List.length data_rows_rev in
102112
if num_samples = 0 then
103113
failwith "No data rows loaded from airline-passengers.csv";
104-
Printf.printf "Found %d samples.\n%!" num_samples;
114+
Log.info (fun m -> m "Found %d samples." num_samples);
105115

106116
(* Create Bigarray and populate (data is reversed from fold_left) *)
107117
let passengers = Array1.create int32 c_layout num_samples in
@@ -110,5 +120,5 @@ let load () =
110120
passengers.{num_samples - 1 - i} <- Int32.of_int passenger_val)
111121
data_rows_rev;
112122

113-
Printf.printf "Airline Passengers loading complete.\n%!";
123+
Log.info (fun m -> m "Airline Passengers loading complete.");
114124
passengers

nx-datasets/lib/datasets/breast_cancer.ml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
open Bigarray
33
open Dataset_utils
44

5+
(* Logging source for this loader *)
6+
let src =
7+
Logs.Src.create "nx.datasets.breast_cancer"
8+
~doc:"Breast Cancer dataset loader"
9+
10+
module Log = (val Logs.src_log src : Logs.LOG)
11+
512
let dataset_name = "breast-cancer-wisconsin"
613
let dataset_dir = get_cache_dir dataset_name
714
let data_filename = "wdbc.data"
@@ -24,7 +31,7 @@ let encode_label label row =
2431

2532
let load () =
2633
ensure_dataset ();
27-
Printf.printf "Loading Breast Cancer Wisconsin dataset...\n%!";
34+
Log.info (fun m -> m "Loading Breast Cancer Wisconsin dataset...");
2835

2936
let data_rows =
3037
try
@@ -51,7 +58,7 @@ let load () =
5158
let expected_cols = 32 in
5259
let num_features = 30 in
5360

54-
Printf.printf "Found %d samples.\n%!" num_samples;
61+
Log.info (fun m -> m "Found %d samples." num_samples);
5562

5663
let features = Array2.create float64 c_layout num_samples num_features in
5764
let labels = Array1.create int c_layout num_samples in
@@ -75,5 +82,5 @@ let load () =
7582
done)
7683
data_rows;
7784

78-
Printf.printf "Breast Cancer Wisconsin loading complete.\n%!";
85+
Log.info (fun m -> m "Breast Cancer Wisconsin loading complete.");
7986
(features, labels)

nx-datasets/lib/datasets/california_housing.ml

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
open Bigarray
33
open Dataset_utils
44

5+
(* Logging source for this loader *)
6+
let src =
7+
Logs.Src.create "nx.datasets.california_housing"
8+
~doc:"California Housing loader"
9+
10+
module Log = (val Logs.src_log src : Logs.LOG)
11+
512
let dataset_name = "california-housing"
613
let dataset_dir = get_cache_dir dataset_name
714
let data_filename = "housing.csv"
@@ -28,7 +35,7 @@ let calculate_mean_non_nan column_data =
2835

2936
let load () =
3037
ensure_dataset ();
31-
Printf.printf "Loading California Housing dataset...\n%!";
38+
Log.info (fun m -> m "Loading California Housing dataset...");
3239

3340
let header, all_data_rows =
3441
try
@@ -85,8 +92,9 @@ let load () =
8592
List.find_index (( = ) "total_bedrooms") header
8693
in
8794

88-
Printf.printf "Found %d samples. Loading %d features + target '%s'.\n%!"
89-
num_samples num_features target_name;
95+
Log.info (fun m ->
96+
m "Found %d samples. Loading %d features + target '%s'." num_samples
97+
num_features target_name);
9098

9199
let parsed_features_temp = Array.make_matrix num_samples num_features nan in
92100
let parsed_labels_temp = Array.make num_samples nan in
@@ -95,8 +103,10 @@ let load () =
95103
List.iteri
96104
(fun i row ->
97105
if List.length row <> List.length header then
98-
Printf.eprintf "Warning: Row %d has %d columns, expected %d\n%!" (i + 1)
99-
(List.length row) (List.length header);
106+
Log.warn (fun m ->
107+
m "Row %d has %d columns, expected %d (header: %s)" (i + 1)
108+
(List.length row) (List.length header)
109+
(String.concat ", " header));
100110

101111
List.iteri
102112
(fun j feature_idx ->
@@ -106,29 +116,30 @@ let load () =
106116
parsed_features_temp.(i).(j) <- v_float;
107117
if Some feature_idx = total_bedrooms_index_opt then
108118
total_bedrooms_col_temp := v_float :: !total_bedrooms_col_temp)
109-
else (
110-
Printf.eprintf
111-
"Warning: Row %d missing feature column %d ('%s'). Setting NaN.\n\
112-
%!"
113-
(i + 1) feature_idx (List.nth feature_names j);
119+
else
120+
let feature_name = List.nth feature_names j in
121+
Log.warn (fun m ->
122+
m "Row %d missing feature column %d ('%s'). Setting NaN."
123+
(i + 1) feature_idx feature_name);
114124
parsed_features_temp.(i).(j) <- nan;
115125
if Some feature_idx = total_bedrooms_index_opt then
116-
total_bedrooms_col_temp := nan :: !total_bedrooms_col_temp))
126+
total_bedrooms_col_temp := nan :: !total_bedrooms_col_temp)
117127
feature_indices;
118128

119129
if List.length row > target_index then
120130
let label_str = List.nth row target_index in
121131
parsed_labels_temp.(i) <- parse_float_or_nan label_str
122132
else (
123-
Printf.eprintf
124-
"Warning: Row %d missing target column %d ('%s'). Setting NaN.\n%!"
125-
(i + 1) target_index target_name;
133+
Log.warn (fun m ->
134+
m "Row %d missing target column %d ('%s'). Setting NaN." (i + 1)
135+
target_index target_name);
126136
parsed_labels_temp.(i) <- nan))
127137
data_rows_str;
128138

129139
let total_bedrooms_mean = calculate_mean_non_nan !total_bedrooms_col_temp in
130-
Printf.printf "Calculated mean for 'total_bedrooms' (for imputation): %f\n%!"
131-
total_bedrooms_mean;
140+
Log.info (fun m ->
141+
m "Calculated mean for 'total_bedrooms' (for imputation): %f"
142+
total_bedrooms_mean);
132143
let total_bedrooms_feature_index =
133144
match List.find_index (( = ) "total_bedrooms") feature_names with
134145
| Some idx -> idx
@@ -163,5 +174,5 @@ let load () =
163174
else labels.{i} <- label_v
164175
done;
165176

166-
Printf.printf "California Housing loading complete.\n%!";
177+
Log.info (fun m -> m "California Housing loading complete.");
167178
(features, labels)

nx-datasets/lib/datasets/cifar10.ml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,17 @@ let url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
1010
let tar_path = base_dir ^ Filename.basename url
1111
let test_batch_rel_path = archive_dir_name ^ "/test_batch"
1212

13+
(* Logging source for this loader *)
14+
let src = Logs.Src.create "nx.datasets.cifar10" ~doc:"CIFAR10 dataset loader"
15+
16+
module Log = (val Logs.src_log src : Logs.LOG)
17+
1318
let ensure_dataset () =
1419
ensure_extracted_archive ~url ~archive_path:tar_path ~extract_dir:base_dir
1520
~check_file:test_batch_rel_path
1621

1722
let read_cifar_batch filename =
18-
Printf.printf "Reading batch file: %s\n%!" filename;
23+
Log.debug (fun m -> m "Reading batch file: %s" filename);
1924
let ic = open_in_bin filename in
2025
let s =
2126
try really_input_string ic (in_channel_length ic)
@@ -32,7 +37,7 @@ let read_cifar_batch filename =
3237
(Printf.sprintf "File %s has unexpected size %d" filename num_bytes);
3338

3439
let num_images = num_bytes / bytes_per_image in
35-
Printf.printf "Found %d images in %s.\n%!" num_images filename;
40+
Log.debug (fun m -> m "Found %d images in %s." num_images filename);
3641

3742
let images =
3843
Genarray.create int8_unsigned c_layout [| num_images; 32; 32; 3 |]
@@ -64,7 +69,7 @@ let read_cifar_batch filename =
6469

6570
let load () =
6671
ensure_dataset ();
67-
Printf.printf "Loading CIFAR-10 dataset...\n%!";
72+
Log.info (fun m -> m "Loading CIFAR-10 dataset...");
6873

6974
let train_batches_files =
7075
List.init 5 (fun i -> dataset_dir ^ Printf.sprintf "data_batch_%d" (i + 1))
@@ -107,5 +112,5 @@ let load () =
107112
let test_batch_file = dataset_dir ^ "test_batch" in
108113
let test_images, test_labels = read_cifar_batch test_batch_file in
109114

110-
Printf.printf "CIFAR-10 loading complete.\n%!";
115+
Log.info (fun m -> m "CIFAR-10 loading complete.");
111116
((train_images, train_labels), (test_images, test_labels))

nx-datasets/lib/datasets/diabetes.ml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
open Bigarray
33
open Dataset_utils
44

5+
(* Logging source for this loader *)
6+
let src = Logs.Src.create "nx.datasets.diabetes" ~doc:"Diabetes dataset loader"
7+
8+
module Log = (val Logs.src_log src : Logs.LOG)
9+
510
let dataset_name = "diabetes-sklearn"
611
let dataset_dir = get_cache_dir dataset_name
712
let data_filename = "diabetes.tab.txt"
@@ -11,7 +16,7 @@ let ensure_dataset () = ensure_file url data_path
1116

1217
let load () =
1318
ensure_dataset ();
14-
Printf.printf "Loading Diabetes (sklearn version) dataset...\n%!";
19+
Log.info (fun m -> m "Loading Diabetes (sklearn version) dataset...");
1520

1621
let header, data_rows_iter =
1722
try
@@ -106,8 +111,9 @@ let load () =
106111
let num_samples = List.length labels_rev in
107112

108113
if num_samples = 0 then failwith "No data rows loaded from diabetes.tab.txt";
109-
Printf.printf "Found %d samples with %d features and target '%s'.\n%!"
110-
num_samples num_features target_col_name;
114+
Log.info (fun m ->
115+
m "Found %d samples with %d features and target '%s'." num_samples
116+
num_features target_col_name);
111117

112118
let features_ba = Array2.create float64 c_layout num_samples num_features in
113119
let labels_ba = Array1.create float64 c_layout num_samples in
@@ -124,5 +130,5 @@ let load () =
124130
(fun i label_val -> labels_ba.{num_samples - 1 - i} <- label_val)
125131
labels_rev;
126132

127-
Printf.printf "Diabetes loading complete.\n%!";
133+
Log.info (fun m -> m "Diabetes loading complete.");
128134
(features_ba, labels_ba)

nx-datasets/lib/datasets/iris.ml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
open Bigarray
33
open Dataset_utils
44

5+
let src = Logs.Src.create "nx.datasets.iris" ~doc:"Iris dataset loader"
6+
7+
module Log = (val Logs.src_log src : Logs.LOG)
8+
59
let dataset_name = "iris"
610
let dataset_dir = get_cache_dir dataset_name
711
let data_filename = "iris.data"
@@ -24,7 +28,7 @@ let encode_label s =
2428

2529
let load () =
2630
ensure_dataset ();
27-
Printf.printf "Loading Iris dataset...\n%!";
31+
Log.info (fun m -> m "Loading Iris dataset...");
2832

2933
let data_rows =
3034
try
@@ -50,7 +54,7 @@ let load () =
5054
let num_features = 4 in
5155

5256
if num_samples = 0 then failwith "No data loaded from iris.data";
53-
Printf.printf "Found %d samples.\n%!" num_samples;
57+
Log.info (fun m -> m "Found %d samples" num_samples);
5458

5559
let features = Array2.create float64 c_layout num_samples num_features in
5660
let labels = Array1.create int32 c_layout num_samples in
@@ -73,5 +77,5 @@ let load () =
7377
labels.{i} <- encode_label label_str)
7478
data_rows;
7579

76-
Printf.printf "Iris loading complete.\n%!";
80+
Log.info (fun m -> m "Iris loading complete");
7781
(features, labels)

0 commit comments

Comments
 (0)