From 3d6efda919b94436a34d55b0e849109eee31b896 Mon Sep 17 00:00:00 2001 From: Christian Mang Date: Sun, 27 Jun 2021 12:47:20 +0200 Subject: [PATCH 1/8] Fixed CHANGELOG.md to conform markdownlint --- CHANGELOG.md | 124 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aad6416..41fb99d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,24 @@ # Change Log + All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +This document follows +[markdownlint](https://github.com/markdownlint/markdownlint) formatting rules. + +## [0.12.0] + +### Added in 0.12.0 + +### Changed in 0.12.0 + +### Fixed in 0.12.0 + +### Removed in 0.12.0 ## [0.11.0] - 2021-04-25 -### Added + +### Added in 0.11.0 + - Rolling window Mean and StdDev - GroupBy and Aggregate - Numeric column index @@ -13,40 +28,51 @@ This project adheres to [Semantic Versioning](http://semver.org/). - User-defined filters - Concatination of Dataframes -### Changed +### Changed in 0.11.0 + - Make fixColnames faster - Use Go 1.16 - Update dependencies -### Fixed +### Fixed in 0.11.0 + - Linter issues - Failing tests ## [0.10.1] - 2019-11-08 -### Fixed + +### Fixed in 0.10.1 + - LoadRecords printing type debug information - Missing closing brackets in series.go - Fix gonum import path in dataframe_test ## [0.10.0] - 2019-11-08 -### Changed + +### Changed in 0.10.0 + - Merged dev branch changes from multiple collaborators (Sam Zaydel, Kyle Ellrott, Daniela Petruzalek, Christoph Laaber). ## [0.9.0] - 2016-10-03 -### Added + +### Added in 0.9.0 + - Additional method to load arbitrary struct slices to DataFrames (Juan Álvarez) - New LoadOption Names to set initial column names (Sander van Harmelen). - Parser option for csv delimiter (Kyle Ellrott) - New Describe method for reporting summary statistics (Daniela Petruzalek) -### Changed +### Changed in 0.9.0 + - Improve the performance of multiple operations. - Code cleanup for better consistency (Sander van Harmelen) - Renamed 'Deselect' function to 'Drop' (Ben Marshall) ## [0.8.0] - 2016-12-12 -### Added + +### Added in 0.8.0 + - Series.Order method and tests. - Series.IsNaN method and tests. - DataFrame.Arrange method and tests. @@ -61,7 +87,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). interface via `DataFrame.Matrix()` and load a `mat64.Matrix` via `dataframe.LoadMatrix()`. -### Changed +### Changed in 0.8.0 + - elementInterface is now exported as Element. - Split element.go into separate files for the implementations of the Element interface. @@ -76,10 +103,12 @@ This project adheres to [Semantic Versioning](http://semver.org/). interface is much nicer, showing only the first 10 rows and limiting the number of characters that can be shown by line -### Removed +### Removed in 0.8.0 + - Some unused functions from the helpers.go file. -### Fix +### Fix in 0.8.0 + - Linter errors. - stringElement.Float now returns NaN instead of 0 when applicable. - Autorenaming column names when `hasHeaders == false` now is @@ -88,7 +117,9 @@ This project adheres to [Semantic Versioning](http://semver.org/). suffix numbers if the number of duplicates was greater than two. ## [0.7.0] - 2016-11-27 -### Added + +### Added in 0.7.0 + - Many more table tests for both `series` and `dataframe` - Set method for `Series` and `DataFrame` - When loading data from CSV, JSON, or Records, different @@ -96,7 +127,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). a default type, manually specifying the column types and others. - More documentation for previously undocumented functions. -### Changed +### Changed in 0.7.0 + - The project has been restructured on separated `dataframe` and `series` packages. - Reviewed entire `Series` codebase for better style and @@ -109,23 +141,30 @@ This project adheres to [Semantic Versioning](http://semver.org/). `io.Reader` and `io.Writer` respectively. - Updated README with the new changes. -### Removed +### Removed in 0.7.0 + - Removed unnecessary abstraction layer on `Series.elements` ## [0.6.0] - 2016-10-29 -### Added + +### Added in 0.6.0 + - InnerJoin, CrossJoin, RightJoin, LeftJoin, OuterJoin functions -### Changed +### Changed in 0.6.0 + - More code refactoring for easier maintenance and management - Add more documentation to the exported functions - Remove unnecessary methods and structures from the exported API -### Removed +### Removed in 0.6.0 + - colnames and coltypes from the DataFrame structure ## [0.5.0] - 2016-08-09 -### Added + +### Added in 0.5.0 + - Read and write DataFrames from CSV, JSON, []map[string]interface{}, [][]string. - New constructor for DataFrame accept Series and NamedSeries as @@ -133,18 +172,22 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Subset, Select, Rename, Mutate, Filter, RBind and CBind methods - Much Better error handling -### Changed +### Changed in 0.5.0 + - Almost complete rewrite of DataFrame code. - Now using Series as first class citizens and building blocks for DataFrames. -### Removed +### Removed in 0.5.0 + - Merge/Join functions have been temporarily removed to be adapted to the new architecture. - Cell interface for allowing custom types into the system. ## [0.4.0] - 2016-02-18 -### Added + +### Added in 0.4.0 + - Getter methods for nrows and ncols. - An InnerJoin function that performs an Inner Merge/Join of two DataFrames by the given keys. @@ -156,38 +199,44 @@ This project adheres to [Semantic Versioning](http://semver.org/). return a empty cell for the given type. - Cell interface now have to implement a Copy method. -### Changed +### Changed in 0.4.0 + - The `cell` interface is now exported: `Cell`. - Cell method NA() is now IsNA(). - The function parseColumn is now a method. - A number of fields and methods are now expoted. -### Fixed +### Fixed in 0.4.0 + - Now ensuring that generated subsets are in fact new copies entirely, not copying pointers to the same memory address. ## [0.3.0] - 2016-02-18 -### Added + +### Added in 0.3.0 + - Getter and setter methods for the column names of a DataFrame - Bool column type has been made available - New Bool() interface - A `column` now can now if any of it's elements is NA and a list of said NA elements ([]bool). -### Changed +### Changed in 0.3.0 + - Renamed `cell` interface elements to be more idiomatic: - - ToInteger() is now Int() - - ToFloat() is now Float() + - ToInteger() is now Int() + - ToFloat() is now Float() - The `cell` interface has changed. Int() and Float() now return pointers instead of values to prevent future conflicts when - returning an error. + returning an error. - The `cell` interface has changed. Checksum() [16]byte added. - Using cell.Checksum() for identification of unique elements instead of raw strings. - The `cell` interface has changed, now also requires ToBool() method. - String type now does not contain a string, but a pointer to a string. -### Fixed +### Fixed in 0.3.0 + - Bool type constructor function Bools now parses `bool` and `[]bool` elements correctly. - Int type constructor function Ints now parses `bool` and `[]bool` @@ -198,23 +247,29 @@ This project adheres to [Semantic Versioning](http://semver.org/). elements correctly. ## [0.2.1] - 2016-02-14 -### Fixed + +### Fixed in 0.2.1 + - Fixed a bug when the maximum number of characters on a column was not being updated properly when subsetting. ## [0.2.0] - 2016-02-13 -### Added + +### Added in 0.2.0 + - Added a lot of unit tests -### Changed +### Changed in 0.2.0 + - The base types are now `df.String`, `df.Int`, and `df.Float`. - Restructured the project in different files. - Refactored the project so that it will allow columns to be of any type as long as it complies with the necessary interfaces. - ## [0.1.0] - 2016-02-06 -### Added + +### Added in 0.1.0 + - Load csv data to DataFrame. - Parse data to four supported types: `int`, `float64`, `date` & `string`. @@ -234,3 +289,4 @@ This project adheres to [Semantic Versioning](http://semver.org/). [0.9.0]:https://github.com/go-gota/gota/compare/v0.8.0...v0.9.0 [0.10.0]:https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0 [0.10.1]:https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1 +[0.11.0]:https://github.com/go-gota/gota/compare/v0.10.1...v0.11.0 From efe9f1ffcb2c539ac2fe85fc63f555ce21a8e7ce Mon Sep 17 00:00:00 2001 From: Arjun Mahishi Date: Sun, 4 Jul 2021 01:02:22 +0530 Subject: [PATCH 2/8] Add GetGroups method to Groups, minor improvements on AggregationType enum (#151) --- dataframe/aggregationtype_string.go | 30 +++++++++++++++++++++ dataframe/dataframe.go | 41 +++++++++-------------------- dataframe/dataframe_test.go | 16 +++++++++++ 3 files changed, 59 insertions(+), 28 deletions(-) create mode 100644 dataframe/aggregationtype_string.go diff --git a/dataframe/aggregationtype_string.go b/dataframe/aggregationtype_string.go new file mode 100644 index 0000000..09fda60 --- /dev/null +++ b/dataframe/aggregationtype_string.go @@ -0,0 +1,30 @@ +// Code generated by "stringer -type=AggregationType -linecomment"; DO NOT EDIT. + +package dataframe + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Aggregation_MAX-1] + _ = x[Aggregation_MIN-2] + _ = x[Aggregation_MEAN-3] + _ = x[Aggregation_MEDIAN-4] + _ = x[Aggregation_STD-5] + _ = x[Aggregation_SUM-6] + _ = x[Aggregation_COUNT-7] +} + +const _AggregationType_name = "MAXMINMEANMEDIANSTDSUMCOUNT" + +var _AggregationType_index = [...]uint8{0, 3, 6, 10, 16, 19, 22, 27} + +func (i AggregationType) String() string { + i -= 1 + if i < 0 || i >= AggregationType(len(_AggregationType_index)-1) { + return "AggregationType(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _AggregationType_name[_AggregationType_index[i]:_AggregationType_index[i+1]] +} diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index df95d55..7ce181b 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -424,37 +424,17 @@ func (df DataFrame) GroupBy(colnames ...string) *Groups { //AggregationType Aggregation method type type AggregationType int +//go:generate stringer -type=AggregationType -linecomment const ( - Aggregation_MAX AggregationType = 0 - Aggregation_MIN AggregationType = 1 - Aggregation_MEAN AggregationType = 2 - Aggregation_MEDIAN AggregationType = 3 - Aggregation_STD AggregationType = 4 - Aggregation_SUM AggregationType = 5 - Aggregation_COUNT AggregationType = 6 + Aggregation_MAX AggregationType = iota + 1 // MAX + Aggregation_MIN // MIN + Aggregation_MEAN // MEAN + Aggregation_MEDIAN // MEDIAN + Aggregation_STD // STD + Aggregation_SUM // SUM + Aggregation_COUNT // COUNT ) -func (aggregation AggregationType) String() string { - switch aggregation { - case Aggregation_MAX: - return "MAX" - case Aggregation_MIN: - return "MIN" - case Aggregation_MEAN: - return "MEAN" - case Aggregation_MEDIAN: - return "MEDIAN" - case Aggregation_STD: - return "STD" - case Aggregation_SUM: - return "SUM" - case Aggregation_COUNT: - return "COUNT" - default: - return "UNKNOWN" - } -} - //Groups : structure generated by groupby type Groups struct { groups map[string]DataFrame @@ -515,6 +495,11 @@ func (gps Groups) Aggregation(typs []AggregationType, colnames []string) DataFra return gps.aggregation } +// GetGroups returns the grouped data frames created by GroupBy +func (g Groups) GetGroups() map[string]DataFrame { + return g.groups +} + // Rename changes the name of one of the columns of a DataFrame func (df DataFrame) Rename(newname, oldname string) DataFrame { if df.Err != nil { diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 564d193..b41e6a3 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -2932,3 +2932,19 @@ func TestDataFrame_Aggregation(t *testing.T) { } } } + +func TestGroups_GetGroups(t *testing.T) { + a := New( + series.New([]string{"b", "a", "b", "a", "b"}, series.String, "key1"), + series.New([]int{1, 2, 1, 2, 2}, series.Int, "key2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "values"), + ) + groups := a.GroupBy("key1", "key2").GetGroups() + groupNames := []string{} + for name := range groups { + groupNames = append(groupNames, name) + } + if len(groupNames) != 3 { + t.Fatalf("Expected to get 3 groups, got %d", len(groupNames)) + } +} From e761527bc6a13523957f74df7191319a822a4a63 Mon Sep 17 00:00:00 2001 From: prliu <2920250+prliu@users.noreply.github.com> Date: Sun, 4 Jul 2021 03:36:21 +0800 Subject: [PATCH 3/8] Fix dataframe.GroupBy issue (#154) (#155) dataframe.GroupBy function will trim the prefix '0' characters in a number string. Co-authored-by: prliu --- dataframe/dataframe.go | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 7ce181b..f77998b 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -414,8 +414,14 @@ func (df DataFrame) GroupBy(colnames ...string) *Groups { groupSeries[key] = append(groupSeries[key], s) } + // Save column types + colTypes := map[string]series.Type{} + for _, c := range df.columns { + colTypes[c.Name] = c.Type() + } + for k, cMaps := range groupSeries { - groupDataFrame[k] = LoadMaps(cMaps) + groupDataFrame[k] = LoadMaps(cMaps, WithTypes(colTypes)) } groups := &Groups{groups: groupDataFrame, colnames: colnames} return groups @@ -491,7 +497,23 @@ func (gps Groups) Aggregation(typs []AggregationType, colnames []string) DataFra dfMaps = append(dfMaps, curMap) } - gps.aggregation = LoadMaps(dfMaps) + + // Save column types + colTypes := map[string]series.Type{} + for k := range dfMaps[0] { + switch dfMaps[0][k].(type) { + case string: + colTypes[k] = series.String + case int, int16, int32, int64: + colTypes[k] = series.Int + case float32, float64: + colTypes[k] = series.Float + default: + continue + } + } + + gps.aggregation = LoadMaps(dfMaps, WithTypes(colTypes)) return gps.aggregation } From 31d5d5dd1948c6b37cfc202d8147cd4333b259c2 Mon Sep 17 00:00:00 2001 From: Christian Mang Date: Sat, 3 Jul 2021 22:10:42 +0200 Subject: [PATCH 4/8] Fixed linter issues --- series/series.go | 17 ++++++----------- series/series_test.go | 12 ++++-------- series/type-bool.go | 21 ++++++++++----------- series/type-float.go | 16 +++++++++------- series/type-int.go | 21 ++++++++++----------- series/type-string.go | 17 ++++++++--------- 6 files changed, 47 insertions(+), 57 deletions(-) diff --git a/series/series.go b/series/series.go index 408773d..2c3b756 100644 --- a/series/series.go +++ b/series/series.go @@ -157,37 +157,32 @@ func New(values interface{}, t Type, name string) Series { return ret } - switch values.(type) { + switch v := values.(type) { case []string: - v := values.([]string) l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []float64: - v := values.([]float64) l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []int: - v := values.([]int) l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []bool: - v := values.([]bool) l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case Series: - v := values.(Series) l := v.Len() preAlloc(l) for i := 0; i < l; i++ { @@ -596,13 +591,13 @@ func (s Series) Elem(i int) Element { // out of bounds checks is performed. func parseIndexes(l int, indexes Indexes) ([]int, error) { var idx []int - switch indexes.(type) { + switch idxs := indexes.(type) { case []int: - idx = indexes.([]int) + idx = idxs case int: - idx = []int{indexes.(int)} + idx = []int{idxs} case []bool: - bools := indexes.([]bool) + bools := idxs if len(bools) != l { return nil, fmt.Errorf("indexing error: index dimensions mismatch") } @@ -612,7 +607,7 @@ func parseIndexes(l int, indexes Indexes) ([]int, error) { } } case Series: - s := indexes.(Series) + s := idxs if err := s.Err; err != nil { return nil, fmt.Errorf("indexing error: new values has errors: %v", err) } diff --git a/series/series_test.go b/series/series_test.go index d8e24fe..38b90d7 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1631,16 +1631,14 @@ func TestSeries_Map(t *testing.T) { } doubleFloat64 := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() result.Set(result.Float() * 2) return Element(result) } // and two booleans and := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() b, err := result.Bool() if err != nil { t.Errorf("%v", err) @@ -1652,8 +1650,7 @@ func TestSeries_Map(t *testing.T) { // add constant (+5) to value (v) add5Int := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() i, err := result.Int() if err != nil { return Element(&intElement{ @@ -1667,8 +1664,7 @@ func TestSeries_Map(t *testing.T) { // trim (XyZ) prefix from string trimXyZPrefix := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() result.Set(strings.TrimPrefix(result.String(), "XyZ")) return Element(result) } diff --git a/series/type-bool.go b/series/type-bool.go index 034bb3f..b7a32fc 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -11,11 +11,14 @@ type boolElement struct { nan bool } +// force boolElement struct to implement Element interface +var _ Element = (*boolElement)(nil) + func (e *boolElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch val := value.(type) { case string: - if value.(string) == "NaN" { + if val == "NaN" { e.nan = true return } @@ -29,7 +32,7 @@ func (e *boolElement) Set(value interface{}) { return } case int: - switch value.(int) { + switch val { case 1: e.e = true case 0: @@ -39,7 +42,7 @@ func (e *boolElement) Set(value interface{}) { return } case float64: - switch value.(float64) { + switch val { case 1: e.e = true case 0: @@ -49,7 +52,7 @@ func (e *boolElement) Set(value interface{}) { return } case bool: - e.e = value.(bool) + e.e = val case Element: b, err := value.(Element).Bool() if err != nil { @@ -61,7 +64,6 @@ func (e *boolElement) Set(value interface{}) { e.nan = true return } - return } func (e boolElement) Copy() Element { @@ -72,10 +74,7 @@ func (e boolElement) Copy() Element { } func (e boolElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e boolElement) Type() Type { @@ -103,7 +102,7 @@ func (e boolElement) Int() (int, error) { if e.IsNA() { return 0, fmt.Errorf("can't convert NaN to int") } - if e.e == true { + if e.e { return 1, nil } return 0, nil diff --git a/series/type-float.go b/series/type-float.go index a722cbd..25bc6c9 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -11,11 +11,14 @@ type floatElement struct { nan bool } +// force floatElement struct to implement Element interface +var _ Element = (*floatElement)(nil) + func (e *floatElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch val := value.(type) { case string: - if value.(string) == "NaN" { + if val == "NaN" { e.nan = true return } @@ -26,23 +29,22 @@ func (e *floatElement) Set(value interface{}) { } e.e = f case int: - e.e = float64(value.(int)) + e.e = float64(val) case float64: - e.e = float64(value.(float64)) + e.e = float64(val) case bool: - b := value.(bool) + b := val if b { e.e = 1 } else { e.e = 0 } case Element: - e.e = value.(Element).Float() + e.e = val.Float() default: e.nan = true return } - return } func (e floatElement) Copy() Element { diff --git a/series/type-int.go b/series/type-int.go index 94082a1..031c642 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -11,11 +11,14 @@ type intElement struct { nan bool } +// force intElement struct to implement Element interface +var _ Element = (*intElement)(nil) + func (e *intElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch val := value.(type) { case string: - if value.(string) == "NaN" { + if val == "NaN" { e.nan = true return } @@ -26,9 +29,9 @@ func (e *intElement) Set(value interface{}) { } e.e = i case int: - e.e = int(value.(int)) + e.e = int(val) case float64: - f := value.(float64) + f := val if math.IsNaN(f) || math.IsInf(f, 0) || math.IsInf(f, 1) { @@ -37,14 +40,14 @@ func (e *intElement) Set(value interface{}) { } e.e = int(f) case bool: - b := value.(bool) + b := val if b { e.e = 1 } else { e.e = 0 } case Element: - v, err := value.(Element).Int() + v, err := val.Int() if err != nil { e.nan = true return @@ -54,7 +57,6 @@ func (e *intElement) Set(value interface{}) { e.nan = true return } - return } func (e intElement) Copy() Element { @@ -65,10 +67,7 @@ func (e intElement) Copy() Element { } func (e intElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e intElement) Type() Type { diff --git a/series/type-string.go b/series/type-string.go index f50e3db..6b8e126 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -12,17 +12,20 @@ type stringElement struct { nan bool } +// force stringElement struct to implement Element interface +var _ Element = (*stringElement)(nil) + func (e *stringElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch val := value.(type) { case string: - e.e = string(value.(string)) + e.e = string(val) if e.e == "NaN" { e.nan = true return } case int: - e.e = strconv.Itoa(value.(int)) + e.e = strconv.Itoa(val) case float64: e.e = strconv.FormatFloat(value.(float64), 'f', 6, 64) case bool: @@ -33,12 +36,11 @@ func (e *stringElement) Set(value interface{}) { e.e = "false" } case Element: - e.e = value.(Element).String() + e.e = val.String() default: e.nan = true return } - return } func (e stringElement) Copy() Element { @@ -49,10 +51,7 @@ func (e stringElement) Copy() Element { } func (e stringElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e stringElement) Type() Type { From 05cad5afa8375ceb03bddce8047ac345d2f7455e Mon Sep 17 00:00:00 2001 From: Joseph Edsel Bonilla Date: Tue, 6 Jul 2021 02:54:44 +0800 Subject: [PATCH 5/8] Add Series.Slice (#118) * Add Series.Slice * simplify error checking * Add test case for j < 0 || k >= s.Len() * use make([]int, k-j) for better performance --- series/series.go | 20 ++++++++++++++ series/series_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/series/series.go b/series/series.go index 2c3b756..18a8392 100644 --- a/series/series.go +++ b/series/series.go @@ -813,3 +813,23 @@ func (s Series) Sum() float64 { } return sum } + +// Slice slices Series from j to k-1 index. +func (s Series) Slice(j, k int) Series { + if s.Err != nil { + return s + } + + if j > k || j < 0 || k >= s.Len() { + empty := s.Empty() + empty.Err = fmt.Errorf("slice index out of bounds") + return empty + } + + idxs := make([]int, k-j) + for i := 0; j+i < k; i++ { + idxs[i] = j + i + } + + return s.Subset(idxs) +} diff --git a/series/series_test.go b/series/series_test.go index 38b90d7..70cb442 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1774,3 +1774,64 @@ func TestSeries_Sum(t *testing.T) { } } } + +func TestSeries_Slice(t *testing.T) { + seriesWithErr := Ints([]int{}) + seriesWithErr.Err = fmt.Errorf("slice index out of bounds") + + tests := []struct { + j int + k int + series Series + expected Series + }{ + { + 0, + 3, + Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{1, 2, 3}), + }, + { + 1, + 1, + Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{}), + }, + { + -1, + 1, + Ints([]int{1, 2, 3, 4, 5}), + seriesWithErr, + }, + { + 0, + 5, + Ints([]int{1, 2, 3, 4, 5}), + seriesWithErr, + }, + } + + for testnum, test := range tests { + expected := test.expected + received := test.series.Slice(test.j, test.k) + + for i := 0; i < expected.Len(); i++ { + if strings.Compare(expected.Elem(i).String(), + received.Elem(i).String()) != 0 { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + + if expected.Err != nil { + if received.Err == nil || expected.Err.Error() != received.Err.Error() { + t.Errorf( + "Test:%v\nExpected error:\n%v\nReceived:\n%v", + testnum, expected.Err, received.Err, + ) + } + } + } +} From c3cbefdd3798efd7ee92a25a4b3fa6ed04db6915 Mon Sep 17 00:00:00 2001 From: fredericlemoine Date: Sun, 3 Oct 2021 20:36:10 +0200 Subject: [PATCH 6/8] Added csv lazy quote (#162) --- dataframe/dataframe.go | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index e5da9f2..1b2ff90 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -947,6 +947,9 @@ type loadOptions struct { // Defines the csv delimiter delimiter rune + // EnablesLazyQuotes + lazyQuotes bool + // Defines the comment delimiter comment rune @@ -1003,6 +1006,13 @@ func WithDelimiter(b rune) LoadOption { } } +// WithLazyQuotes sets csv parsing option to LazyQuotes +func WithLazyQuotes(b bool) LoadOption { + return func(c *loadOptions) { + c.lazyQuotes = b + } +} + // WithComments sets the csv comment line detect to remove lines func WithComments(b rune) LoadOption { return func(c *loadOptions) { @@ -1324,17 +1334,17 @@ func LoadMatrix(mat Matrix) DataFrame { func ReadCSV(r io.Reader, options ...LoadOption) DataFrame { csvReader := csv.NewReader(r) cfg := loadOptions{ - delimiter: ',', + delimiter: ',', + lazyQuotes: false, + comment: 0, } for _, option := range options { option(&cfg) } - if cfg.delimiter != ',' { - csvReader.Comma = cfg.delimiter - } - if cfg.comment != 0 { - csvReader.Comment = cfg.comment - } + + csvReader.Comma = cfg.delimiter + csvReader.LazyQuotes = cfg.lazyQuotes + csvReader.Comment = cfg.comment records, err := csvReader.ReadAll() if err != nil { From 8f0ee430cfc8edd28cf3a8a28d0b1eed821ee252 Mon Sep 17 00:00:00 2001 From: Marc Colosimo <63918973+mcolosimo-p4@users.noreply.github.com> Date: Sun, 10 Oct 2021 14:27:52 -0400 Subject: [PATCH 7/8] making series Order stable (#164) Co-authored-by: Marc E. Colosimo --- dataframe/dataframe_test.go | 52 +++++++++++++++++++++++++++++++++++++ series/series.go | 2 +- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 23f2acd..6cb0c2b 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -2423,6 +2423,58 @@ func TestDataFrame_Arrange(t *testing.T) { } } +func TestDataFrame_Arrange2(t *testing.T) { + table := []struct { + df DataFrame + colnames []Order + expDf DataFrame + }{ + { + New( + series.New([]string{"A", "C", "B", "D", "C", "A", "D", "B"}, series.String, "A"), + series.New([]string{"103", "103", "103", "103", "100", "100", "100", "100"}, series.Int, "B"), + ), + []Order{Sort("B")}, + New( + series.New([]string{"C", "A", "D", "B", "A", "C", "B", "D"}, series.String, "A"), + series.New([]string{"100", "100", "100", "100", "103", "103", "103", "103"}, series.Int, "B"), + ), + }, + { + New( + series.New([]string{"A", "C", "B", "D", "C", "A", "D", "B"}, series.String, "A"), + series.New([]string{"103", "103", "103", "103", "100", "100", "100", "100"}, series.Int, "B"), + ), + []Order{Sort("A"), Sort("B")}, + New( + series.New([]string{"A", "A", "B", "B", "C", "C", "D", "D"}, series.String, "A"), + series.New([]string{"100", "103", "100", "103", "100", "103", "100", "103"}, series.Int, "B"), + ), + }, + } + for i, tc := range table { + b := tc.df.Arrange(tc.colnames...) + + if b.Err != nil { + t.Errorf("Test: %d\nError:%v", i, b.Err) + } + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Types(), b.Types()) { + t.Errorf("Test: %d\nDifferent types:\nExpected:%v\nRecieved:%v", i, tc.expDf.Types(), b.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Names(), b.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nExpected:%v\nRecieved:%v", i, tc.expDf.Names(), b.Names()) + } + // Check that the values are the same between both DataFrames + tcr := tc.expDf.Records() + br := b.Records() + if !reflect.DeepEqual(tcr, br) { + t.Errorf("Test: %d\nDifferent values:\nExpected:%v\nRecieved:%v", i, tcr, br) + } + } +} + func TestDataFrame_Capply(t *testing.T) { a := LoadRecords( [][]string{ diff --git a/series/series.go b/series/series.go index 18a8392..13f06f0 100644 --- a/series/series.go +++ b/series/series.go @@ -650,7 +650,7 @@ func (s Series) Order(reverse bool) []int { if reverse { srt = sort.Reverse(srt) } - sort.Sort(srt) + sort.Stable(srt) var ret []int for _, e := range ie { ret = append(ret, e.index) From d3ca268c8be78994a6a674e75ebfa924f90af13d Mon Sep 17 00:00:00 2001 From: Christian Mang Date: Sun, 10 Oct 2021 20:54:40 +0200 Subject: [PATCH 8/8] CHANGELOG; Err deprecation --- CHANGELOG.md | 13 +++++++++++-- dataframe/dataframe.go | 9 ++++++++- series/series.go | 9 ++++++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 878cf78..a01e4f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,23 @@ This project adheres to [Semantic Versioning](http://semver.org/). This document follows [markdownlint](https://github.com/markdownlint/markdownlint) formatting rules. -## [0.12.0] +## [0.12.0] - 2021-10-10 ### Added in 0.12.0 +- Add dataframe.GetGroups (@arjunmahishi) +- Add Series.Slice (@jfussion) +- Add csv lazy quote (@fredericlemoine) + ### Changed in 0.12.0 +- series.Err is deprecated; use Error() instead +- dataframe.Err is deprecated; use Error() instead + ### Fixed in 0.12.0 -### Removed in 0.12.0 +- Fix dataframe.GroupBy issue (@prliu) +- making series Order stable (@mcolosimo-p4) ## [0.11.0] - 2021-06-27 @@ -290,3 +298,4 @@ This document follows [0.10.0]:https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0 [0.10.1]:https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1 [0.11.0]:https://github.com/go-gota/gota/compare/v0.10.1...v0.11.0 +[0.12.0]:https://github.com/go-gota/gota/compare/v0.11.0...v0.12.0 diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 1b2ff90..51d38f6 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -34,7 +34,9 @@ type DataFrame struct { columns []series.Series ncols int nrows int - Err error + + // deprecated: Use Error() instead + Err error } // New is the generic DataFrame constructor @@ -103,6 +105,11 @@ func (df DataFrame) String() (str string) { return df.print(true, true, true, true, 10, 70, "DataFrame") } +// Returns error or nil if no error occured +func (df *DataFrame) Error() error { + return df.Err +} + func (df DataFrame) print( shortRows, shortCols, showDims, showTypes bool, maxRows int, diff --git a/series/series.go b/series/series.go index 13f06f0..69de4ca 100644 --- a/series/series.go +++ b/series/series.go @@ -20,7 +20,9 @@ type Series struct { Name string // The name of the series elements Elements // The values of the elements t Type // The type of the series - Err error // If there are errors they are stored here + + // deprecated: use Error() instead + Err error } // Elements is the interface that represents the array of elements contained on @@ -234,6 +236,11 @@ func (s Series) Empty() Series { return New([]int{}, s.t, s.Name) } +// Returns Error or nil if no error occured +func (s *Series) Error() error { + return s.Err +} + // Append adds new elements to the end of the Series. When using Append, the // Series is modified in place. func (s *Series) Append(values interface{}) {