Skip to content

Commit 736727d

Browse files
authored
Merge pull request #210 from opensource9ja/fix/rm-nans
Fixes #200 remove NaNs before computing mean or sum
2 parents ee78805 + 96a7d03 commit 736727d

File tree

15 files changed

+742
-588
lines changed

15 files changed

+742
-588
lines changed

danfojs-browser/lib/bundle.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/lib/bundle.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/src/core/frame.js

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -392,16 +392,31 @@ export class DataFrame extends Ndframe {
392392
* @returns {Series}
393393
*/
394394
mean(axis = 1) {
395-
if (this.__frame_is_compactible_for_operation) {
396-
//check if all types are numeric
397-
let operands = this.__get_tensor_and_idx(this, axis);
398-
let tensor_vals = operands[0];
399-
let idx = operands[1];
400-
let result = tensor_vals.mean(operands[2]);
401-
let sf = new Series(result.arraySync(), { index: idx });
395+
if (this.__frame_is_compactible_for_operation()) {
396+
let values;
397+
let val_mean = [];
398+
if (axis == 1) {
399+
values = this.col_data;
400+
} else {
401+
values = this.values;
402+
}
403+
404+
values.map((arr) => {
405+
let temp = utils._remove_nans(arr);
406+
let temp_mean = tf.tensor(temp).mean().arraySync();
407+
val_mean.push(Number(temp_mean.toFixed(5)));
408+
});
409+
410+
let new_index;
411+
if (axis == 1) {
412+
new_index = this.column_names;
413+
} else {
414+
new_index = this.index;
415+
}
416+
let sf = new Series(val_mean, { columns: "sum", index: new_index });
402417
return sf;
403418
} else {
404-
throw Error("TypeError: Dtypes of columns must be Float of Int");
419+
throw Error("Dtype Error: Operation can not be performed on string type");
405420
}
406421
}
407422

@@ -904,7 +919,8 @@ export class DataFrame extends Ndframe {
904919
}
905920

906921
values.map((arr) => {
907-
let temp_sum = tf.tensor(arr).sum().arraySync();
922+
let temp = utils._remove_nans(arr);
923+
let temp_sum = tf.tensor(temp).sum().arraySync();
908924
val_sums.push(Number(temp_sum.toFixed(5)));
909925
});
910926

danfojs-browser/src/core/series.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,12 @@ export class Series extends NDframe {
302302
sum() {
303303
utils._throw_str_dtype_error(this, 'sum');
304304
if (this.dtypes[0] == "boolean") {
305-
let temp_sum = this.row_data_tensor.sum().arraySync();
305+
let temp = utils._remove_nans(this.values);
306+
let temp_sum = tf.tensor(temp).sum().arraySync();
306307
return Number(temp_sum);
307308
}
308-
let temp_sum = this.row_data_tensor.sum().arraySync();
309+
let temp = utils._remove_nans(this.values);
310+
let temp_sum = tf.tensor(temp).sum().arraySync();
309311
return Number(temp_sum.toFixed(5));
310312
}
311313

danfojs-browser/src/core/utils.js

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ export class Utils {
104104
} else {
105105
var selected = new Set();
106106
// eslint-disable-next-line no-empty
107-
while (selected.add((Math.random() * n) | 0).size < k) {}
107+
while (selected.add((Math.random() * n) | 0).size < k) { }
108108
// eslint-disable-next-line no-undef
109109
return Array.prototype.map.call(selected, (i) => population[i]);
110110
}
@@ -210,15 +210,15 @@ export class Utils {
210210
}
211211
}
212212

213-
__checker(arr_val){
213+
__checker(arr_val) {
214214
let dtypes = [];
215215
let lim;
216216
let int_tracker = [];
217217
let float_tracker = [];
218218
let string_tracker = [];
219219
let bool_tracker = [];
220220

221-
if (arr_val.length == 0){
221+
if (arr_val.length == 0) {
222222
dtypes.push("string");
223223
}
224224

@@ -234,7 +234,7 @@ export class Utils {
234234
int_tracker.push(false);
235235
string_tracker.push(false);
236236
bool_tracker.push(true);
237-
} else if (isNaN(ele) && typeof ele != "string"){
237+
} else if (isNaN(ele) && typeof ele != "string") {
238238
float_tracker.push(true);
239239
int_tracker.push(false);
240240
string_tracker.push(false);
@@ -624,6 +624,21 @@ export class Utils {
624624
return values;
625625
}
626626

627+
/**
628+
* Replace NaN with null before tensor operations
629+
* @param {*} arr
630+
*/
631+
_replace_nan_with_null(arr) {
632+
let values = arr.map((val) => {
633+
if (isNaN(val)) {
634+
return null;
635+
} else {
636+
return val;
637+
}
638+
});
639+
return values;
640+
}
641+
627642
__get_duplicate(arr) {
628643
let temp_obj = {};
629644
let rslt_obj = {};

danfojs-browser/tests/core/frame.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,11 @@ describe("DataFrame", function () {
627627
let df = new dfd.DataFrame(data);
628628
assert.deepEqual(df.mean(0).values, [ 2, 300 ]);
629629
});
630+
it("Removes NaN before calculating mean of a DataFrame", function () {
631+
let data = [ [ 11, 20, 3 ], [ NaN, 15, 6 ], [ 2, 30, 40 ], [ 2, 89, 78 ] ];
632+
let df = new dfd.DataFrame(data);
633+
assert.deepEqual(df.mean().values, [ 5, 38.5, 31.75 ]);
634+
});
630635
});
631636

632637
describe("median", function () {
@@ -1927,7 +1932,12 @@ describe("DataFrame", function () {
19271932
let res = [ 1, 2, 1, 1 ];
19281933
assert.deepEqual(df.sum().values, res);
19291934
});
1930-
1935+
it("Sum values of a df with missing values", function () {
1936+
let data1 = [ [ 11, 20, 3 ], [ null, 15, 6 ], [ 2, 30, 40 ], [ 2, 89, 78 ] ];
1937+
let df = new dfd.DataFrame(data1);
1938+
let res = [ 15, 154, 127 ];
1939+
assert.deepEqual(df.sum().values, res);
1940+
});
19311941
});
19321942

19331943
describe("abs", function () {

danfojs-browser/tests/core/series.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,11 @@ describe("Series", function () {
289289
let sf = new dfd.Series(data1);
290290
assert.deepEqual(sf.mean(), 19.625);
291291
});
292+
it("Computes the mean of elements in a float series with NaN", function () {
293+
let data1 = [ 30.1, 40.2, 3.1, 5.1, NaN ];
294+
let sf = new dfd.Series(data1);
295+
assert.deepEqual(sf.mean(), 19.625);
296+
});
292297
it("Throws error if dtype is string", function () {
293298
let data1 = [ "boy", "girl", "Man" ];
294299
let sf = new dfd.Series(data1);
@@ -331,6 +336,11 @@ describe("Series", function () {
331336
let sf = new dfd.Series(data1);
332337
assert.deepEqual(sf.sum(), 2);
333338
});
339+
it("Sum values a Series with missing values", function () {
340+
let data1 = [ 11, NaN, 2, 2 ];
341+
let sf = new dfd.Series(data1);
342+
assert.deepEqual(sf.sum(), 15);
343+
});
334344
});
335345

336346
describe("mode", function () {

danfojs-node/dist/core/frame.js

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -330,18 +330,37 @@ class DataFrame extends _generic.default {
330330
}
331331

332332
mean(axis = 1) {
333-
if (this.__frame_is_compactible_for_operation) {
334-
let operands = this.__get_tensor_and_idx(this, axis);
333+
if (this.__frame_is_compactible_for_operation()) {
334+
let values;
335+
let val_mean = [];
335336

336-
let tensor_vals = operands[0];
337-
let idx = operands[1];
338-
let result = tensor_vals.mean(operands[2]);
339-
let sf = new _series.Series(result.arraySync(), {
340-
index: idx
337+
if (axis == 1) {
338+
values = this.col_data;
339+
} else {
340+
values = this.values;
341+
}
342+
343+
values.map(arr => {
344+
let temp = utils._remove_nans(arr);
345+
346+
let temp_mean = tf.tensor(temp).mean().arraySync();
347+
val_mean.push(Number(temp_mean.toFixed(5)));
348+
});
349+
let new_index;
350+
351+
if (axis == 1) {
352+
new_index = this.column_names;
353+
} else {
354+
new_index = this.index;
355+
}
356+
357+
let sf = new _series.Series(val_mean, {
358+
columns: "sum",
359+
index: new_index
341360
});
342361
return sf;
343362
} else {
344-
throw Error("TypeError: Dtypes of columns must be Float of Int");
363+
throw Error("Dtype Error: Operation can not be performed on string type");
345364
}
346365
}
347366

@@ -747,7 +766,9 @@ class DataFrame extends _generic.default {
747766
}
748767

749768
values.map(arr => {
750-
let temp_sum = tf.tensor(arr).sum().arraySync();
769+
let temp = utils._remove_nans(arr);
770+
771+
let temp_sum = tf.tensor(temp).sum().arraySync();
751772
val_sums.push(Number(temp_sum.toFixed(5)));
752773
});
753774
let new_index;
@@ -783,7 +804,12 @@ class DataFrame extends _generic.default {
783804
let tensor_vals, idx, t_axis;
784805

785806
if (axis == 1) {
786-
tensor_vals = df.row_data_tensor;
807+
let temp_tensor_vals = df.row_data_tensor;
808+
let flat_tensor_array = tf.util.flatten(temp_tensor_vals.arraySync());
809+
810+
const flat_tensor_array_without_nans = utils._replace_nan_with_null(flat_tensor_array);
811+
812+
tensor_vals = tf.tensor(flat_tensor_array_without_nans, temp_tensor_vals.shape);
787813
idx = df.column_names;
788814
t_axis = 0;
789815
} else {

danfojs-node/dist/core/series.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,15 @@ class Series extends _generic.default {
241241
utils._throw_str_dtype_error(this, 'sum');
242242

243243
if (this.dtypes[0] == "boolean") {
244-
let temp_sum = this.row_data_tensor.sum().arraySync();
244+
let temp = utils._remove_nans(this.values);
245+
246+
let temp_sum = tf.tensor(temp).sum().arraySync();
245247
return Number(temp_sum);
246248
}
247249

248-
let temp_sum = this.row_data_tensor.sum().arraySync();
250+
let temp = utils._remove_nans(this.values);
251+
252+
let temp_sum = tf.tensor(temp).sum().arraySync();
249253
return Number(temp_sum.toFixed(5));
250254
}
251255

danfojs-node/dist/core/utils.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,17 @@ class Utils {
552552
return values;
553553
}
554554

555+
_replace_nan_with_null(arr) {
556+
let values = arr.map(val => {
557+
if (isNaN(val)) {
558+
return null;
559+
} else {
560+
return val;
561+
}
562+
});
563+
return values;
564+
}
565+
555566
__get_duplicate(arr) {
556567
let temp_obj = {};
557568
let rslt_obj = {};

0 commit comments

Comments
 (0)