Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ddf60e1
[groupby][s]: update test to remove duplicate rows when printing grou…
steveoni Apr 23, 2021
edd2e0c
[groupby][s]: fix duplicate rows in groupby dataframe
steveoni Apr 23, 2021
bbe2f1c
[groupby][s]: fix duplicate rows in groupby dataframe
steveoni Apr 23, 2021
d0a1674
[groupby][s]: fix duplicate rows in groupby dataframe
steveoni Apr 23, 2021
78aea1d
[groupby][s]: fix duplicate rows in groupby dataframe
steveoni Apr 23, 2021
9795487
[bundle.js][s]: fix duplicate row in groupby
steveoni Apr 24, 2021
e1c1d0f
[bundle.js.map][s]:fix duplicate row in groupby
steveoni Apr 24, 2021
e9403d3
Merge branch 'master' of https://github.com/opensource9ja/danfojs int…
steveoni Apr 24, 2021
bd05a8f
[reader.js][s]: allow blob url
steveoni Apr 24, 2021
50046f9
[reader.js][s]: allow blob url
steveoni Apr 24, 2021
fe3c526
[bundle.js][s]: allow blob url
steveoni Apr 24, 2021
74f266c
[bundle.js.map][s]: allow blob url
steveoni Apr 24, 2021
e6ef19a
[bundle.js][s]: fix #183
steveoni Apr 24, 2021
18c02da
[bundle.js.map][s]: fix #183
steveoni Apr 24, 2021
4fd0fdc
[frame.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
fec70e4
[groupby.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
8038ee6
[frame.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
34008d6
[groupby.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
2b388e3
[reader.js][s]: add blob url
steveoni Apr 24, 2021
a54f618
[frame.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
432f984
[groupby.js][s]: obtain groupby column dtype to fix #183
steveoni Apr 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion danfojs-browser/lib/bundle.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion danfojs-browser/lib/bundle.js.map

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion danfojs-browser/src/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,10 @@ export class DataFrame extends Ndframe {
let column_names = this.column_names;
let col_dict = {};
let key_column = null;
let col_index = col.map((val) => column_names.indexOf(val));
let col_dtype = this.dtypes.filter((val, index) => {
return col_index.includes(index);
});

if (col.length == 2) {
if (column_names.includes(col[0])) {
Expand Down Expand Up @@ -1174,7 +1178,8 @@ export class DataFrame extends Ndframe {
col_dict,
key_column,
this.values,
column_names
column_names,
col_dtype
).group();

return groups;
Expand Down
29 changes: 16 additions & 13 deletions danfojs-browser/src/core/groupby.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ const utils = new Utils;
* @param {column_name} Array of all column name in the dataframe.
*/
export class GroupBy {
constructor(col_dict, key_col, data, column_name) {
constructor(col_dict, key_col, data, column_name, col_dtype) {

this.key_col = key_col;
this.col_dict = col_dict;
this.data = data;
this.column_name = column_name;
this.data_tensors = {}; //store the tensor version of the groupby data
this.col_dtype = col_dtype;

}

Expand Down Expand Up @@ -150,7 +151,8 @@ export class GroupBy {
null,
this.key_col,
null,
col_names
col_names,
this.col_dtype
);

gp.group_col = group_col;
Expand Down Expand Up @@ -376,8 +378,8 @@ export class GroupBy {

if (typeof key_data[j] === "undefined" ){
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
Expand All @@ -387,8 +389,8 @@ export class GroupBy {
df_data.push(...key_data);

} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data.push(...k_data);
df_data.push(key_data);
}
Expand Down Expand Up @@ -422,17 +424,17 @@ export class GroupBy {

if (typeof key_data[j] === "undefined" ){
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
}
}
df_data.push(...key_data);
// df_data.push(...key_data);
}

df_data.push(...key_data);
} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data.push(...key_val);
df_data.push(key_data);
}
Expand Down Expand Up @@ -465,10 +467,11 @@ export class GroupBy {
let data = [];
let count_group = {};
if (this.key_col.length == 2) {

for (let key in this.data_tensors) {
for (let key in df_data) {
count_group[key] = {};
for (let key2 in this.data_tensors[key]) {
for (let key2 in df_data[key]) {
let index;
count_group[key][key2] = [];
for (let i = 0; i < df_data[key][key2].length; i++ ) {
let callable_rslt = callable(df_data[key][key2][i]);
Expand Down
2 changes: 1 addition & 1 deletion danfojs-browser/src/io/reader.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { DataFrame } from "../core/frame";
*/
export const read_csv = async (source, configs = {}) => {
let { start, end } = configs;
if (!(source.startsWith("https") || source.startsWith("http"))) {
if (!(source.startsWith("https") || source.startsWith("http") || source.startsWith("blob"))) {
throw new Error("Cannot read local file in browser environment");
}
let tfdata = [];
Expand Down
16 changes: 8 additions & 8 deletions danfojs-browser/tests/core/groupby.js
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,14 @@ describe("groupby", function () {
let df = new dfd.DataFrame(data);
let group_df = df.groupby([ "A"]);
let rslt = [
[ 'foo', 5, 3 ], [ 'foo', 6, 4 ],
[ 'foo', 7, 7 ], [ 'foo', 9, 8 ],
[ 'foo', 10, 9 ], [ 'foo', 5, 3 ],
[ 'foo', 6, 4 ], [ 'foo', 7, 7 ],
[ 'foo', 9, 8 ], [ 'foo', 10, 9 ],
[ 'bar', 4, 5 ], [ 'bar', 3, 6 ],
[ 'bar', 8, 4 ], [ 'bar', 4, 5 ],
[ 'bar', 3, 6 ], [ 'bar', 8, 4 ]
[ 'foo', 5, 3 ],
[ 'foo', 6, 4 ],
[ 'foo', 7, 7 ],
[ 'foo', 9, 8 ],
[ 'foo', 10, 9 ],
[ 'bar', 4, 5 ],
[ 'bar', 3, 6 ],
[ 'bar', 8, 4 ]
];
assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
});
Expand Down
6 changes: 5 additions & 1 deletion danfojs-node/dist/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,10 @@ class DataFrame extends _generic.default {
let column_names = this.column_names;
let col_dict = {};
let key_column = null;
let col_index = col.map(val => column_names.indexOf(val));
let col_dtype = this.dtypes.filter((val, index) => {
return col_index.includes(index);
});

if (col.length == 2) {
if (column_names.includes(col[0])) {
Expand Down Expand Up @@ -986,7 +990,7 @@ class DataFrame extends _generic.default {
}
}

let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names).group();
let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names, col_dtype).group();
return groups;
}

Expand Down
26 changes: 14 additions & 12 deletions danfojs-node/dist/core/groupby.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ var _series = require("./series");
const utils = new _utils.Utils();

class GroupBy {
constructor(col_dict, key_col, data, column_name) {
constructor(col_dict, key_col, data, column_name, col_dtype) {
this.key_col = key_col;
this.col_dict = col_dict;
this.data = data;
this.column_name = column_name;
this.data_tensors = {};
this.col_dtype = col_dtype;
}

group() {
Expand Down Expand Up @@ -118,7 +119,7 @@ class GroupBy {
}
}

const gp = new GroupBy(null, this.key_col, null, col_names);
const gp = new GroupBy(null, this.key_col, null, col_names, this.col_dtype);
gp.group_col = group_col;
gp.group_col_name = col_names;
return gp;
Expand Down Expand Up @@ -315,8 +316,8 @@ class GroupBy {
for (let j = 0; j < col_data.length; j++) {
if (typeof key_data[j] === "undefined") {
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
Expand All @@ -326,8 +327,8 @@ class GroupBy {

df_data.push(...key_data);
} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data.push(...k_data);
df_data.push(key_data);
}
Expand Down Expand Up @@ -360,17 +361,17 @@ class GroupBy {
for (let j = 0; j < col_data.length; j++) {
if (typeof key_data[j] === "undefined") {
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
}
}

df_data.push(...key_data);
}

df_data.push(...key_data);
} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data.push(...key_val);
df_data.push(key_data);
}
Expand Down Expand Up @@ -408,10 +409,11 @@ class GroupBy {
let count_group = {};

if (this.key_col.length == 2) {
for (let key in this.data_tensors) {
for (let key in df_data) {
count_group[key] = {};

for (let key2 in this.data_tensors[key]) {
for (let key2 in df_data[key]) {
let index;
count_group[key][key2] = [];

for (let i = 0; i < df_data[key][key2].length; i++) {
Expand Down
2 changes: 1 addition & 1 deletion danfojs-node/dist/io/reader.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const read_csv = async (source, configs = {}) => {
end
} = configs;

if (!(source.startsWith("file://") || source.startsWith("http"))) {
if (!(source.startsWith("file://") || source.startsWith("http") || source.startsWith("blob"))) {
source = source.startsWith("/") ? `file://${source}` : `file://${process.cwd()}/${source}`;
}

Expand Down
8 changes: 6 additions & 2 deletions danfojs-node/src/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,10 @@ export class DataFrame extends Ndframe {
let column_names = this.column_names;
let col_dict = {};
let key_column = null;

let col_index = col.map((val) => column_names.indexOf(val));
let col_dtype = this.dtypes.filter((val, index) => {
return col_index.includes(index);
});
if (col.length == 2) {
if (column_names.includes(col[0])) {
// eslint-disable-next-line no-unused-vars
Expand Down Expand Up @@ -1172,7 +1175,8 @@ export class DataFrame extends Ndframe {
col_dict,
key_column,
this.values,
column_names
column_names,
col_dtype
).group();

return groups;
Expand Down
27 changes: 15 additions & 12 deletions danfojs-node/src/core/groupby.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ const utils = new Utils;
* @param {column_name} Array of all column name in the dataframe.
*/
export class GroupBy {
constructor(col_dict, key_col, data, column_name) {
constructor(col_dict, key_col, data, column_name, col_dtype) {

this.key_col = key_col;
this.col_dict = col_dict;
this.data = data;
this.column_name = column_name;
this.data_tensors = {}; //store the tensor version of the groupby data
this.col_dtype = col_dtype;

}

Expand Down Expand Up @@ -150,7 +151,8 @@ export class GroupBy {
null,
this.key_col,
null,
col_names
col_names,
this.col_dtype
);

gp.group_col = group_col;
Expand Down Expand Up @@ -367,8 +369,8 @@ export class GroupBy {

if (typeof key_data[j] === "undefined" ){
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
Expand All @@ -378,8 +380,8 @@ export class GroupBy {
df_data.push(...key_data);

} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
key_data.push(...k_data);
df_data.push(key_data);
}
Expand Down Expand Up @@ -413,17 +415,17 @@ export class GroupBy {

if (typeof key_data[j] === "undefined" ){
key_data[j] = [];
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data[j].push(col_data[j]);
} else {
key_data[j].push(col_data[j]);
}
}
df_data.push(...key_data);
}
df_data.push(...key_data);

} else {
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
key_data.push(...key_val);
df_data.push(key_data);
}
Expand Down Expand Up @@ -456,10 +458,11 @@ export class GroupBy {
let data = [];
let count_group = {};
if (this.key_col.length == 2) {

for (let key in this.data_tensors) {
for (let key in df_data) {
count_group[key] = {};
for (let key2 in this.data_tensors[key]) {
for (let key2 in df_data[key]) {
let index;
count_group[key][key2] = [];
for (let i = 0; i < df_data[key][key2].length; i++ ) {
let callable_rslt = callable(df_data[key][key2][i]);
Expand Down
2 changes: 1 addition & 1 deletion danfojs-node/src/io/reader.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import { DataFrame } from "../core/frame";
*/
export const read_csv = async (source, configs = {}) => {
let { start, end } = configs;
if (!(source.startsWith("file://") || source.startsWith("http"))) {
if (!(source.startsWith("file://") || source.startsWith("http") || source.startsWith("blob"))) {
//probabily a relative path, append file:// to it
source = source.startsWith("/") ? `file://${source}` : `file://${process.cwd()}/${source}`;
}
Expand Down
17 changes: 8 additions & 9 deletions danfojs-node/tests/core/groupby.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,14 +262,14 @@ describe("groupby", function () {
let df = new DataFrame(data);
let group_df = df.groupby([ "A"]);
let rslt = [
[ 'foo', 5, 3 ], [ 'foo', 6, 4 ],
[ 'foo', 7, 7 ], [ 'foo', 9, 8 ],
[ 'foo', 10, 9 ], [ 'foo', 5, 3 ],
[ 'foo', 6, 4 ], [ 'foo', 7, 7 ],
[ 'foo', 9, 8 ], [ 'foo', 10, 9 ],
[ 'bar', 4, 5 ], [ 'bar', 3, 6 ],
[ 'bar', 8, 4 ], [ 'bar', 4, 5 ],
[ 'bar', 3, 6 ], [ 'bar', 8, 4 ]
[ 'foo', 5, 3 ],
[ 'foo', 6, 4 ],
[ 'foo', 7, 7 ],
[ 'foo', 9, 8 ],
[ 'foo', 10, 9 ],
[ 'bar', 4, 5 ],
[ 'bar', 3, 6 ],
[ 'bar', 8, 4 ]
];
assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
});
Expand All @@ -293,5 +293,4 @@ describe("groupby", function () {
];
assert.deepEqual(group_df.apply((x) => x.count()).values, rslt);
});

});