javascriptdata · risenW · Apr 25, 2021 · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021
diff --git a/danfojs-browser/lib/bundle.js b/danfojs-browser/lib/bundle.js
diff --git a/danfojs-browser/lib/bundle.js.map b/danfojs-browser/lib/bundle.js.map
diff --git a/danfojs-browser/src/core/frame.js b/danfojs-browser/src/core/frame.js
@@ -1112,6 +1112,10 @@ export class DataFrame extends Ndframe {
     let column_names = this.column_names;
     let col_dict = {};
     let key_column = null;
+    let col_index = col.map((val) => column_names.indexOf(val));
+    let col_dtype = this.dtypes.filter((val, index) => {
+      return col_index.includes(index);
+    });
 
     if (col.length == 2) {
       if (column_names.includes(col[0])) {
@@ -1174,7 +1178,8 @@ export class DataFrame extends Ndframe {
       col_dict,
       key_column,
       this.values,
-      column_names
+      column_names,
+      col_dtype
     ).group();
 
     return groups;

diff --git a/danfojs-browser/src/core/groupby.js b/danfojs-browser/src/core/groupby.js
@@ -12,13 +12,14 @@ const utils = new Utils;
  * @param {column_name} Array of all column name in the dataframe.
  */
 export class GroupBy {
-  constructor(col_dict, key_col, data, column_name) {
+  constructor(col_dict, key_col, data, column_name, col_dtype) {
 
     this.key_col = key_col;
     this.col_dict = col_dict;
     this.data = data;
     this.column_name = column_name;
     this.data_tensors = {}; //store the tensor version of the groupby data
+    this.col_dtype = col_dtype;
 
   }
 
@@ -150,7 +151,8 @@ export class GroupBy {
       null,
       this.key_col,
       null,
-      col_names
+      col_names,
+      this.col_dtype
     );
 
     gp.group_col = group_col;
@@ -376,8 +378,8 @@ export class GroupBy {
 
                 if (typeof key_data[j] === "undefined" ){
                   key_data[j] = [];
-                  key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-                  key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+                  key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+                  key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
                   key_data[j].push(col_data[j]);
                 } else {
                   key_data[j].push(col_data[j]);
@@ -387,8 +389,8 @@ export class GroupBy {
             df_data.push(...key_data);
 
           } else {
-            key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-            key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+            key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+            key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
             key_data.push(...k_data);
             df_data.push(key_data);
           }
@@ -422,17 +424,17 @@ export class GroupBy {
 
               if (typeof key_data[j] === "undefined" ){
                 key_data[j] = [];
-                key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+                key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
                 key_data[j].push(col_data[j]);
               } else {
                 key_data[j].push(col_data[j]);
               }
             }
-            df_data.push(...key_data);
+            // df_data.push(...key_data);
           }
-
+          df_data.push(...key_data);
         } else {
-          key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+          key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
           key_data.push(...key_val);
           df_data.push(key_data);
         }
@@ -465,10 +467,11 @@ export class GroupBy {
     let data = [];
     let count_group = {};
     if (this.key_col.length == 2) {
-
-      for (let key in this.data_tensors) {
+      
+      for (let key in df_data) {
         count_group[key] = {};
-        for (let key2 in this.data_tensors[key]) {
+        for (let key2 in df_data[key]) {
+          let index;
           count_group[key][key2] = [];
           for (let i = 0; i < df_data[key][key2].length; i++ ) {
             let callable_rslt = callable(df_data[key][key2][i]);

diff --git a/danfojs-browser/src/io/reader.js b/danfojs-browser/src/io/reader.js
@@ -19,7 +19,7 @@ import { DataFrame } from "../core/frame";
  */
 export const read_csv = async (source, configs = {}) => {
   let { start, end } = configs;
-  if (!(source.startsWith("https") || source.startsWith("http"))) {
+  if (!(source.startsWith("https") || source.startsWith("http") || source.startsWith("blob"))) {
     throw new Error("Cannot read local file in browser environment");
   }
   let tfdata = [];

diff --git a/danfojs-browser/tests/core/groupby.js b/danfojs-browser/tests/core/groupby.js
@@ -260,14 +260,14 @@ describe("groupby", function () {
     let df = new dfd.DataFrame(data);
     let group_df = df.groupby([ "A"]);
     let rslt = [
-      [ 'foo', 5, 3 ], [ 'foo', 6, 4 ],
-      [ 'foo', 7, 7 ], [ 'foo', 9, 8 ],
-      [ 'foo', 10, 9 ], [ 'foo', 5, 3 ],
-      [ 'foo', 6, 4 ], [ 'foo', 7, 7 ],
-      [ 'foo', 9, 8 ], [ 'foo', 10, 9 ],
-      [ 'bar', 4, 5 ], [ 'bar', 3, 6 ],
-      [ 'bar', 8, 4 ], [ 'bar', 4, 5 ],
-      [ 'bar', 3, 6 ], [ 'bar', 8, 4 ]
+      [ 'foo', 5, 3 ],
+      [ 'foo', 6, 4 ],
+      [ 'foo', 7, 7 ],
+      [ 'foo', 9, 8 ],
+      [ 'foo', 10, 9 ],
+      [ 'bar', 4, 5 ],
+      [ 'bar', 3, 6 ],
+      [ 'bar', 8, 4 ]
     ];
     assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
   });

diff --git a/danfojs-node/dist/core/frame.js b/danfojs-node/dist/core/frame.js
@@ -928,6 +928,10 @@ class DataFrame extends _generic.default {
     let column_names = this.column_names;
     let col_dict = {};
     let key_column = null;
+    let col_index = col.map(val => column_names.indexOf(val));
+    let col_dtype = this.dtypes.filter((val, index) => {
+      return col_index.includes(index);
+    });
 
     if (col.length == 2) {
       if (column_names.includes(col[0])) {
@@ -986,7 +990,7 @@ class DataFrame extends _generic.default {
       }
     }
 
-    let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names).group();
+    let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names, col_dtype).group();
     return groups;
   }
 

diff --git a/danfojs-node/dist/core/groupby.js b/danfojs-node/dist/core/groupby.js
@@ -14,12 +14,13 @@ var _series = require("./series");
 const utils = new _utils.Utils();
 
 class GroupBy {
-  constructor(col_dict, key_col, data, column_name) {
+  constructor(col_dict, key_col, data, column_name, col_dtype) {
     this.key_col = key_col;
     this.col_dict = col_dict;
     this.data = data;
     this.column_name = column_name;
     this.data_tensors = {};
+    this.col_dtype = col_dtype;
   }
 
   group() {
@@ -118,7 +119,7 @@ class GroupBy {
       }
     }
 
-    const gp = new GroupBy(null, this.key_col, null, col_names);
+    const gp = new GroupBy(null, this.key_col, null, col_names, this.col_dtype);
     gp.group_col = group_col;
     gp.group_col_name = col_names;
     return gp;
@@ -315,8 +316,8 @@ class GroupBy {
               for (let j = 0; j < col_data.length; j++) {
                 if (typeof key_data[j] === "undefined") {
                   key_data[j] = [];
-                  key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-                  key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+                  key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+                  key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
                   key_data[j].push(col_data[j]);
                 } else {
                   key_data[j].push(col_data[j]);
@@ -326,8 +327,8 @@ class GroupBy {
 
             df_data.push(...key_data);
           } else {
-            key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-            key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+            key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+            key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
             key_data.push(...k_data);
             df_data.push(key_data);
           }
@@ -360,17 +361,17 @@ class GroupBy {
             for (let j = 0; j < col_data.length; j++) {
               if (typeof key_data[j] === "undefined") {
                 key_data[j] = [];
-                key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+                key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
                 key_data[j].push(col_data[j]);
               } else {
                 key_data[j].push(col_data[j]);
               }
             }
-
-            df_data.push(...key_data);
           }
+
+          df_data.push(...key_data);
         } else {
-          key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+          key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
           key_data.push(...key_val);
           df_data.push(key_data);
         }
@@ -408,10 +409,11 @@ class GroupBy {
     let count_group = {};
 
     if (this.key_col.length == 2) {
-      for (let key in this.data_tensors) {
+      for (let key in df_data) {
         count_group[key] = {};
 
-        for (let key2 in this.data_tensors[key]) {
+        for (let key2 in df_data[key]) {
+          let index;
           count_group[key][key2] = [];
 
           for (let i = 0; i < df_data[key][key2].length; i++) {

diff --git a/danfojs-node/dist/io/reader.js b/danfojs-node/dist/io/reader.js
@@ -25,7 +25,7 @@ const read_csv = async (source, configs = {}) => {
     end
   } = configs;
 
-  if (!(source.startsWith("file://") || source.startsWith("http"))) {
+  if (!(source.startsWith("file://") || source.startsWith("http") || source.startsWith("blob"))) {
     source = source.startsWith("/") ? `file://${source}` : `file://${process.cwd()}/${source}`;
   }
 

diff --git a/danfojs-node/src/core/frame.js b/danfojs-node/src/core/frame.js
@@ -1110,7 +1110,10 @@ export class DataFrame extends Ndframe {
     let column_names = this.column_names;
     let col_dict = {};
     let key_column = null;
-
+    let col_index = col.map((val) => column_names.indexOf(val));
+    let col_dtype = this.dtypes.filter((val, index) => {
+      return col_index.includes(index);
+    });
     if (col.length == 2) {
       if (column_names.includes(col[0])) {
         // eslint-disable-next-line no-unused-vars
@@ -1172,7 +1175,8 @@ export class DataFrame extends Ndframe {
       col_dict,
       key_column,
       this.values,
-      column_names
+      column_names,
+      col_dtype
     ).group();
 
     return groups;

diff --git a/danfojs-node/src/core/groupby.js b/danfojs-node/src/core/groupby.js
@@ -12,13 +12,14 @@ const utils = new Utils;
  * @param {column_name} Array of all column name in the dataframe.
  */
 export class GroupBy {
-  constructor(col_dict, key_col, data, column_name) {
+  constructor(col_dict, key_col, data, column_name, col_dtype) {
 
     this.key_col = key_col;
     this.col_dict = col_dict;
     this.data = data;
     this.column_name = column_name;
     this.data_tensors = {}; //store the tensor version of the groupby data
+    this.col_dtype = col_dtype;
 
   }
 
@@ -150,7 +151,8 @@ export class GroupBy {
       null,
       this.key_col,
       null,
-      col_names
+      col_names,
+      this.col_dtype
     );
 
     gp.group_col = group_col;
@@ -367,8 +369,8 @@ export class GroupBy {
 
                 if (typeof key_data[j] === "undefined" ){
                   key_data[j] = [];
-                  key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-                  key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+                  key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+                  key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
                   key_data[j].push(col_data[j]);
                 } else {
                   key_data[j].push(col_data[j]);
@@ -378,8 +380,8 @@ export class GroupBy {
             df_data.push(...key_data);
 
           } else {
-            key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
-            key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
+            key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
+            key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
             key_data.push(...k_data);
             df_data.push(key_data);
           }
@@ -413,17 +415,17 @@ export class GroupBy {
 
               if (typeof key_data[j] === "undefined" ){
                 key_data[j] = [];
-                key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+                key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
                 key_data[j].push(col_data[j]);
               } else {
                 key_data[j].push(col_data[j]);
               }
             }
-            df_data.push(...key_data);
           }
+          df_data.push(...key_data);
 
         } else {
-          key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
+          key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
           key_data.push(...key_val);
           df_data.push(key_data);
         }
@@ -456,10 +458,11 @@ export class GroupBy {
     let data = [];
     let count_group = {};
     if (this.key_col.length == 2) {
-
-      for (let key in this.data_tensors) {
+      
+      for (let key in df_data) {
         count_group[key] = {};
-        for (let key2 in this.data_tensors[key]) {
+        for (let key2 in df_data[key]) {
+          let index;
           count_group[key][key2] = [];
           for (let i = 0; i < df_data[key][key2].length; i++ ) {
             let callable_rslt = callable(df_data[key][key2][i]);

diff --git a/danfojs-node/src/io/reader.js b/danfojs-node/src/io/reader.js
@@ -17,7 +17,7 @@ import { DataFrame } from "../core/frame";
  */
 export const read_csv = async (source, configs = {}) => {
   let { start, end } = configs;
-  if (!(source.startsWith("file://") || source.startsWith("http"))) {
+  if (!(source.startsWith("file://") || source.startsWith("http") || source.startsWith("blob"))) {
     //probabily a relative path, append file:// to it
     source = source.startsWith("/") ? `file://${source}` : `file://${process.cwd()}/${source}`;
   }

diff --git a/danfojs-node/tests/core/groupby.js b/danfojs-node/tests/core/groupby.js
@@ -262,14 +262,14 @@ describe("groupby", function () {
     let df = new DataFrame(data);
     let group_df = df.groupby([ "A"]);
     let rslt = [
-      [ 'foo', 5, 3 ], [ 'foo', 6, 4 ],
-      [ 'foo', 7, 7 ], [ 'foo', 9, 8 ],
-      [ 'foo', 10, 9 ], [ 'foo', 5, 3 ],
-      [ 'foo', 6, 4 ], [ 'foo', 7, 7 ],
-      [ 'foo', 9, 8 ], [ 'foo', 10, 9 ],
-      [ 'bar', 4, 5 ], [ 'bar', 3, 6 ],
-      [ 'bar', 8, 4 ], [ 'bar', 4, 5 ],
-      [ 'bar', 3, 6 ], [ 'bar', 8, 4 ]
+      [ 'foo', 5, 3 ],
+      [ 'foo', 6, 4 ],
+      [ 'foo', 7, 7 ],
+      [ 'foo', 9, 8 ],
+      [ 'foo', 10, 9 ],
+      [ 'bar', 4, 5 ],
+      [ 'bar', 3, 6 ],
+      [ 'bar', 8, 4 ]
     ];
     assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
   });
@@ -293,5 +293,4 @@ describe("groupby", function () {
     ];
     assert.deepEqual(group_df.apply((x) => x.count()).values, rslt);
   });
-
 });