Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add some functionality #180

Open
wants to merge 62 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
d3c9b6a
go mod
mqy527 Jun 10, 2021
37caee1
1、add some functions in Series : Shift、CumProd、Prod、AddConst、MulConst…
mqy527 Jun 11, 2021
e357493
rolling not test
mqy527 Jun 11, 2021
8e217e5
fix rolling.max and rolling.min
mqy527 Jun 11, 2021
adce640
Rolling.Mean、Quantile、Median、StdDev
mqy527 Jun 15, 2021
1768517
series And、Or
mqy527 Jun 16, 2021
aef0878
logic_test
mqy527 Jun 16, 2021
a40de6b
modify MapFunction,add index param:index
mqy527 Jun 16, 2021
be99528
fix test case
mqy527 Jun 16, 2021
957270b
add rolling series name
mqy527 Jun 17, 2021
7a5c902
fix:series name
mqy527 Jun 18, 2021
49bffe2
add method: series.Operation
mqy527 Jun 21, 2021
6be74a6
Number.Sub、Div、Mod
mqy527 Jun 22, 2021
e49182b
modify go.mod
mqy527 Jul 14, 2021
63dd7f3
modify mod path
mqy527 Jul 22, 2021
dcc2401
optimize Series.Elem
mqy527 Sep 26, 2021
e5e5bed
optimize DataFrame
mqy527 Nov 9, 2021
b7bcff4
Rolling, add method: Apply、MeanByWeights
mqy527 Feb 22, 2022
7e733e3
refactor rolling
mqy527 Mar 3, 2022
2847476
optimize rolling
mqy527 Mar 8, 2022
3ad37b0
Merge from 'go-gota/gota'
mqy527 Mar 8, 2022
c1c2ba8
Remove redundant code
mqy527 Mar 8, 2022
b8b1898
optimize:Series.Slice
mqy527 Mar 9, 2022
9c5bef1
rolling: add descriptions
mqy527 Mar 9, 2022
4292dbd
delete Element.NA()
mqy527 Mar 9, 2022
c0fe7cd
modify module name
mqy527 Mar 9, 2022
e3829e0
optimize:series logic
mqy527 Mar 9, 2022
ca5da43
refactor some method
mqy527 Mar 15, 2022
fd76827
optimize series.Shift
mqy527 Mar 15, 2022
e0847fe
optimize series.Shift
mqy527 Mar 16, 2022
5913f0c
Modify comments
mqy527 Mar 16, 2022
d426b3e
optimize
mqy527 Mar 17, 2022
e4b68c6
modify module
mqy527 Mar 18, 2022
7fbfed2
modify module name
mqy527 Mar 20, 2022
9289c82
modify module name
mqy527 Mar 20, 2022
07dca74
rolling cache
mqy527 Mar 28, 2022
090fb6c
Merge branch 'master' of https://github.com/mqy527/gota
mqy527 Mar 28, 2022
789925a
cacheAble
mqy527 Mar 29, 2022
9f25bdb
unstable
mqy527 Mar 30, 2022
fae5106
cacheAbleRollingSeries
mqy527 Mar 30, 2022
5ac7a3d
cacheAbleSeries
mqy527 Mar 31, 2022
1b91cf9
cacheAbleSeries
mqy527 Mar 31, 2022
54fe127
cacheAbleSeries
mqy527 Apr 1, 2022
6a0ce85
when, wrap: for special operations
mqy527 Apr 1, 2022
2ac0c44
add comment
mqy527 Apr 2, 2022
d1a94cd
fix bug:seriesCache.set
mqy527 Apr 4, 2022
d85a663
Series.Filter
mqy527 May 9, 2022
029d698
add:DataQuantile()
mqy527 May 11, 2022
3b9f6be
add: rolling Quantile
mqy527 May 12, 2022
400ca1d
DataFrame.Slice
mqy527 May 16, 2022
09e42af
optimize Series.DataQuantile(s)
mqy527 May 17, 2022
e3b8e38
self
mqy527 May 20, 2022
7d91dc2
DataFrame FromSeries
mqy527 May 22, 2022
26ec178
optimize cache
mqy527 May 24, 2022
4ad05e9
immutable series
mqy527 May 24, 2022
836a329
optimize cacheable
mqy527 May 25, 2022
5fc68cf
delete cache
mqy527 May 25, 2022
b54ea74
delete some AddConst、MulConst、DivConst from cacheAbleSeries
mqy527 Jun 13, 2022
97b5a4d
Sum support Bool
mqy527 Jul 7, 2022
3962767
add method:CapplyWithName、Rename、RemoveCols
mqy527 Dec 24, 2022
7a1c43b
change method name:CapplyWithName-->CapplyByName
mqy527 Dec 25, 2022
a3a8aac
add FloatValuer
mqy527 Jul 19, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dataframe/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"strconv"
"testing"

"github.com/go-gota/gota/dataframe"
"github.com/go-gota/gota/series"
"github.com/mqy527/gota/dataframe"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope, this is a mistake - keep go-gota here.

"github.com/mqy527/gota/series"
)

func generateSeries(n, rep int) (data []series.Series) {
Expand Down
113 changes: 67 additions & 46 deletions dataframe/dataframe.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
"strings"
"unicode/utf8"

"github.com/go-gota/gota/series"
"github.com/mqy527/gota/series"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
Expand Down Expand Up @@ -41,7 +41,7 @@ type DataFrame struct {

// New is the generic DataFrame constructor
func New(se ...series.Series) DataFrame {
if se == nil || len(se) == 0 {
if len(se) == 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch

return DataFrame{Err: fmt.Errorf("empty DataFrame")}
}

Expand All @@ -63,7 +63,7 @@ func New(se ...series.Series) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand All @@ -76,8 +76,8 @@ func checkColumnsDimensions(se ...series.Series) (nrows, ncols int, err error) {
return
}
for i, s := range se {
if s.Err != nil {
err = fmt.Errorf("error on series %d: %v", i, s.Err)
if s.Error() != nil {
err = fmt.Errorf("error on series %d: %v", i, s.Error())
return
}
if nrows == -1 {
Expand All @@ -102,7 +102,7 @@ func (df DataFrame) Copy() DataFrame {

// String implements the Stringer interface for DataFrame
func (df DataFrame) String() (str string) {
return df.print(true, true, true, true, 10, 70, "DataFrame")
return df.print(true, false, true, true, 10, 70, "DataFrame")
}

// Returns error or nil if no error occured
Expand Down Expand Up @@ -273,8 +273,8 @@ func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame {
columns := make([]series.Series, df.ncols)
for i, s := range df.columns {
columns[i] = s.Set(indexes, newvalues.columns[i])
if columns[i].Err != nil {
df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Err)}
if columns[i].Error() != nil {
df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Error())}
return df
}
}
Expand Down Expand Up @@ -343,7 +343,7 @@ func (df DataFrame) Select(indexes SelectIndexes) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -375,7 +375,7 @@ func (df DataFrame) Drop(indexes SelectIndexes) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -424,7 +424,7 @@ func (df DataFrame) GroupBy(colnames ...string) *Groups {
// Save column types
colTypes := map[string]series.Type{}
for _, c := range df.columns {
colTypes[c.Name] = c.Type()
colTypes[c.Name()] = c.Type()
}

for k, cMaps := range groupSeries {
Expand Down Expand Up @@ -542,7 +542,7 @@ func (df DataFrame) Rename(newname, oldname string) DataFrame {
}

copy := df.Copy()
copy.columns[idx].Name = newname
copy.columns[idx].SetName(newname)
return copy
}

Expand Down Expand Up @@ -577,7 +577,7 @@ func (df DataFrame) RBind(dfb DataFrame) DataFrame {
originalSeries := df.columns[k]
addedSeries := dfb.columns[idx]
newSeries := originalSeries.Concat(addedSeries)
if err := newSeries.Err; err != nil {
if err := newSeries.Error(); err != nil {
return DataFrame{Err: fmt.Errorf("rbind: %v", err)}
}
expandedSeries[k] = newSeries
Expand Down Expand Up @@ -617,15 +617,15 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame {
a = df.columns[aidx]
} else {
bb := dfb.columns[bidx]
a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name)
a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name())
}
if bidx != -1 {
b = dfb.columns[bidx]
} else {
b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name)
b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name())
}
newSeries := a.Concat(b)
if err := newSeries.Err; err != nil {
if err := newSeries.Error(); err != nil {
return DataFrame{Err: fmt.Errorf("concat: %v", err)}
}
expandedSeries[k] = newSeries
Expand All @@ -635,21 +635,32 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame {

// Mutate changes a column of the DataFrame with the given Series or adds it as
// a new column if the column name does not exist.
func (df DataFrame) Mutate(s series.Series) DataFrame {
if df.Err != nil {
func (df DataFrame) Mutate(ss ...series.Series) DataFrame {
if df.Err != nil || len(ss) == 0 {
return df
}
if s.Len() != df.nrows {

slen := ss[0].Len()
for i := 1; i < len(ss); i++ {
if slen != ss[i].Len() {
return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")}
}
}
if slen != df.nrows {
return DataFrame{Err: fmt.Errorf("mutate: wrong dimensions")}
}
df = df.Copy()
// Check that colname exist on dataframe
columns := df.columns
if idx := findInStringSlice(s.Name, df.Names()); idx != -1 {
columns[idx] = s
} else {
columns = append(columns, s)
dfNames := df.Names()
for i := 0; i < len(ss); i++ {
if idx := findInStringSlice(ss[i].Name(), dfNames); idx != -1 {
columns[idx] = ss[i]
} else {
columns = append(columns, ss[i])
}
}

nrows, ncols, err := checkColumnsDimensions(columns...)
if err != nil {
return DataFrame{Err: err}
Expand All @@ -662,7 +673,7 @@ func (df DataFrame) Mutate(s series.Series) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -723,7 +734,7 @@ func (df DataFrame) FilterAggregation(agg Aggregation, filters ...F) DataFrame {
}
}
res := df.columns[idx].Compare(f.Comparator, f.Comparando)
if err := res.Err; err != nil {
if err := res.Error(); err != nil {
return DataFrame{Err: fmt.Errorf("filter: %v", err)}
}
compResults[i] = res
Expand Down Expand Up @@ -777,7 +788,7 @@ func (df DataFrame) Arrange(order ...Order) DataFrame {
if df.Err != nil {
return df
}
if order == nil || len(order) == 0 {
if len(order) == 0 {
return DataFrame{Err: fmt.Errorf("rename: no arguments")}
}

Expand Down Expand Up @@ -823,7 +834,7 @@ func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame {
columns := make([]series.Series, df.ncols)
for i, s := range df.columns {
applied := f(s)
applied.Name = s.Name
applied.SetName(s.Name())
columns[i] = applied
}
return New(columns...)
Expand Down Expand Up @@ -879,8 +890,8 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame {
row.Append(col.Elem(i))
}
row = f(row)
if row.Err != nil {
return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Err)}
if row.Error() != nil {
return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Error())}
}

if rowlen != -1 && rowlen != row.Len() {
Expand Down Expand Up @@ -922,7 +933,7 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -1243,8 +1254,8 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
columns := make([]series.Series, len(headers))
for i, colname := range headers {
col := series.New(rawcols[i], types[i], colname)
if col.Err != nil {
return DataFrame{Err: col.Err}
if col.Error() != nil {
return DataFrame{Err: col.Error()}
}
columns[i] = col
}
Expand All @@ -1261,7 +1272,7 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -1331,7 +1342,7 @@ func LoadMatrix(mat Matrix) DataFrame {
colnames := df.Names()
fixColnames(colnames)
for i, colname := range colnames {
df.columns[i].Name = colname
df.columns[i].SetName(colname)
}
return df
}
Expand Down Expand Up @@ -1552,7 +1563,7 @@ func ReadHTML(r io.Reader, options ...LoadOption) []DataFrame {
func (df DataFrame) Names() []string {
colnames := make([]string, df.ncols)
for i, s := range df.columns {
colnames[i] = s.Name
colnames[i] = s.Name()
}
return colnames
}
Expand All @@ -1576,7 +1587,7 @@ func (df DataFrame) SetNames(colnames ...string) error {
return fmt.Errorf("setting names: wrong dimensions")
}
for k, s := range colnames {
df.columns[k].Name = s
df.columns[k].SetName(s)
}
return nil
}
Expand All @@ -1599,12 +1610,12 @@ func (df DataFrame) Ncol() int {
// Col returns a copy of the Series with the given column name contained in the DataFrame.
func (df DataFrame) Col(colname string) series.Series {
if df.Err != nil {
return series.Series{Err: df.Err}
return series.Err(df.Err)
}
// Check that colname exist on dataframe
idx := findInStringSlice(colname, df.Names())
if idx < 0 {
return series.Series{Err: fmt.Errorf("unknown column name")}
return series.Err(fmt.Errorf("unknown column name"))
}
return df.columns[idx].Copy()
}
Expand Down Expand Up @@ -2099,6 +2110,16 @@ func (df DataFrame) Elem(r, c int) series.Element {
return df.columns[c].Elem(r)
}

// Elem returns the element on row `r` and column `c`. Will panic if the index is
// out of bounds.
func (df DataFrame) ElemByRowAndColName(row int, columnName string) series.Element {
colIndex := df.colIndex(columnName)
if colIndex < 0 {
return nil
}
return df.columns[colIndex].Elem(row)
}

// fixColnames assigns a name to the missing column names and makes it so that the
// column names are unique.
func fixColnames(colnames []string) {
Expand Down Expand Up @@ -2172,13 +2193,13 @@ func findInStringSlice(str string, s []string) int {

func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, error) {
var idx []int
switch indexes.(type) {
switch idt := indexes.(type) {
case []int:
idx = indexes.([]int)
idx = idt
case int:
idx = []int{indexes.(int)}
idx = []int{idt}
case []bool:
bools := indexes.([]bool)
bools := idt
if len(bools) != l {
return nil, fmt.Errorf("indexing error: index dimensions mismatch")
}
Expand All @@ -2205,7 +2226,7 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int,
}
case series.Series:
s := indexes.(series.Series)
if err := s.Err; err != nil {
if err := s.Error(); err != nil {
return nil, fmt.Errorf("indexing error: new values has errors: %v", err)
}
if s.HasNaN() {
Expand Down Expand Up @@ -2311,7 +2332,7 @@ func (df DataFrame) Describe() DataFrame {
"75%",
"max",
})
labels.Name = "column"
labels.SetName("column")

ss := []series.Series{labels}

Expand All @@ -2330,7 +2351,7 @@ func (df DataFrame) Describe() DataFrame {
col.MaxStr(),
},
col.Type(),
col.Name,
col.Name(),
)
case series.Bool:
fallthrough
Expand All @@ -2348,7 +2369,7 @@ func (df DataFrame) Describe() DataFrame {
col.Max(),
},
series.Float,
col.Name,
col.Name(),
)
}
ss = append(ss, newCol)
Expand Down
Loading