-
Notifications
You must be signed in to change notification settings - Fork 281
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add some functionality #180
Open
mqy527
wants to merge
62
commits into
go-gota:master
Choose a base branch
from
mqy527:master
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 49 commits
Commits
Show all changes
62 commits
Select commit
Hold shift + click to select a range
d3c9b6a
go mod
mqy527 37caee1
1、add some functions in Series : Shift、CumProd、Prod、AddConst、MulConst…
mqy527 e357493
rolling not test
mqy527 8e217e5
fix rolling.max and rolling.min
mqy527 adce640
Rolling.Mean、Quantile、Median、StdDev
mqy527 1768517
series And、Or
mqy527 aef0878
logic_test
mqy527 a40de6b
modify MapFunction,add index param:index
mqy527 be99528
fix test case
mqy527 957270b
add rolling series name
mqy527 7a5c902
fix:series name
mqy527 49bffe2
add method: series.Operation
mqy527 6be74a6
Number.Sub、Div、Mod
mqy527 e49182b
modify go.mod
mqy527 63dd7f3
modify mod path
mqy527 dcc2401
optimize Series.Elem
mqy527 e5e5bed
optimize DataFrame
mqy527 b7bcff4
Rolling, add method: Apply、MeanByWeights
mqy527 7e733e3
refactor rolling
mqy527 2847476
optimize rolling
mqy527 3ad37b0
Merge from 'go-gota/gota'
mqy527 c1c2ba8
Remove redundant code
mqy527 b8b1898
optimize:Series.Slice
mqy527 9c5bef1
rolling: add descriptions
mqy527 4292dbd
delete Element.NA()
mqy527 c0fe7cd
modify module name
mqy527 e3829e0
optimize:series logic
mqy527 ca5da43
refactor some method
mqy527 fd76827
optimize series.Shift
mqy527 e0847fe
optimize series.Shift
mqy527 5913f0c
Modify comments
mqy527 d426b3e
optimize
mqy527 e4b68c6
modify module
mqy527 7fbfed2
modify module name
mqy527 9289c82
modify module name
mqy527 07dca74
rolling cache
mqy527 090fb6c
Merge branch 'master' of https://github.com/mqy527/gota
mqy527 789925a
cacheAble
mqy527 9f25bdb
unstable
mqy527 fae5106
cacheAbleRollingSeries
mqy527 5ac7a3d
cacheAbleSeries
mqy527 1b91cf9
cacheAbleSeries
mqy527 54fe127
cacheAbleSeries
mqy527 6a0ce85
when, wrap: for special operations
mqy527 2ac0c44
add comment
mqy527 d1a94cd
fix bug:seriesCache.set
mqy527 d85a663
Series.Filter
mqy527 029d698
add:DataQuantile()
mqy527 3b9f6be
add: rolling Quantile
mqy527 400ca1d
DataFrame.Slice
mqy527 09e42af
optimize Series.DataQuantile(s)
mqy527 e3b8e38
self
mqy527 7d91dc2
DataFrame FromSeries
mqy527 26ec178
optimize cache
mqy527 4ad05e9
immutable series
mqy527 836a329
optimize cacheable
mqy527 5fc68cf
delete cache
mqy527 b54ea74
delete some AddConst、MulConst、DivConst from cacheAbleSeries
mqy527 97b5a4d
Sum support Bool
mqy527 3962767
add method:CapplyWithName、Rename、RemoveCols
mqy527 7a1c43b
change method name:CapplyWithName-->CapplyByName
mqy527 a3a8aac
add FloatValuer
mqy527 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ import ( | |
"strings" | ||
"unicode/utf8" | ||
|
||
"github.com/go-gota/gota/series" | ||
"github.com/mqy527/gota/series" | ||
"golang.org/x/net/html" | ||
"golang.org/x/net/html/atom" | ||
) | ||
|
@@ -41,7 +41,7 @@ type DataFrame struct { | |
|
||
// New is the generic DataFrame constructor | ||
func New(se ...series.Series) DataFrame { | ||
if se == nil || len(se) == 0 { | ||
if len(se) == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch |
||
return DataFrame{Err: fmt.Errorf("empty DataFrame")} | ||
} | ||
|
||
|
@@ -63,7 +63,7 @@ func New(se ...series.Series) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -76,8 +76,8 @@ func checkColumnsDimensions(se ...series.Series) (nrows, ncols int, err error) { | |
return | ||
} | ||
for i, s := range se { | ||
if s.Err != nil { | ||
err = fmt.Errorf("error on series %d: %v", i, s.Err) | ||
if s.Error() != nil { | ||
err = fmt.Errorf("error on series %d: %v", i, s.Error()) | ||
return | ||
} | ||
if nrows == -1 { | ||
|
@@ -102,7 +102,7 @@ func (df DataFrame) Copy() DataFrame { | |
|
||
// String implements the Stringer interface for DataFrame | ||
func (df DataFrame) String() (str string) { | ||
return df.print(true, true, true, true, 10, 70, "DataFrame") | ||
return df.print(true, false, true, true, 10, 70, "DataFrame") | ||
} | ||
|
||
// Returns error or nil if no error occured | ||
|
@@ -273,8 +273,8 @@ func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame { | |
columns := make([]series.Series, df.ncols) | ||
for i, s := range df.columns { | ||
columns[i] = s.Set(indexes, newvalues.columns[i]) | ||
if columns[i].Err != nil { | ||
df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Err)} | ||
if columns[i].Error() != nil { | ||
df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Error())} | ||
return df | ||
} | ||
} | ||
|
@@ -343,7 +343,7 @@ func (df DataFrame) Select(indexes SelectIndexes) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -375,7 +375,7 @@ func (df DataFrame) Drop(indexes SelectIndexes) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -424,7 +424,7 @@ func (df DataFrame) GroupBy(colnames ...string) *Groups { | |
// Save column types | ||
colTypes := map[string]series.Type{} | ||
for _, c := range df.columns { | ||
colTypes[c.Name] = c.Type() | ||
colTypes[c.Name()] = c.Type() | ||
} | ||
|
||
for k, cMaps := range groupSeries { | ||
|
@@ -542,7 +542,7 @@ func (df DataFrame) Rename(newname, oldname string) DataFrame { | |
} | ||
|
||
copy := df.Copy() | ||
copy.columns[idx].Name = newname | ||
copy.columns[idx].SetName(newname) | ||
return copy | ||
} | ||
|
||
|
@@ -577,7 +577,7 @@ func (df DataFrame) RBind(dfb DataFrame) DataFrame { | |
originalSeries := df.columns[k] | ||
addedSeries := dfb.columns[idx] | ||
newSeries := originalSeries.Concat(addedSeries) | ||
if err := newSeries.Err; err != nil { | ||
if err := newSeries.Error(); err != nil { | ||
return DataFrame{Err: fmt.Errorf("rbind: %v", err)} | ||
} | ||
expandedSeries[k] = newSeries | ||
|
@@ -617,15 +617,15 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { | |
a = df.columns[aidx] | ||
} else { | ||
bb := dfb.columns[bidx] | ||
a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name) | ||
a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name()) | ||
} | ||
if bidx != -1 { | ||
b = dfb.columns[bidx] | ||
} else { | ||
b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name) | ||
b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name()) | ||
} | ||
newSeries := a.Concat(b) | ||
if err := newSeries.Err; err != nil { | ||
if err := newSeries.Error(); err != nil { | ||
return DataFrame{Err: fmt.Errorf("concat: %v", err)} | ||
} | ||
expandedSeries[k] = newSeries | ||
|
@@ -635,21 +635,32 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { | |
|
||
// Mutate changes a column of the DataFrame with the given Series or adds it as | ||
// a new column if the column name does not exist. | ||
func (df DataFrame) Mutate(s series.Series) DataFrame { | ||
if df.Err != nil { | ||
func (df DataFrame) Mutate(ss ...series.Series) DataFrame { | ||
if df.Err != nil || len(ss) == 0 { | ||
return df | ||
} | ||
if s.Len() != df.nrows { | ||
|
||
slen := ss[0].Len() | ||
for i := 1; i < len(ss); i++ { | ||
if slen != ss[i].Len() { | ||
return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} | ||
} | ||
} | ||
if slen != df.nrows { | ||
return DataFrame{Err: fmt.Errorf("mutate: wrong dimensions")} | ||
} | ||
df = df.Copy() | ||
// Check that colname exist on dataframe | ||
columns := df.columns | ||
if idx := findInStringSlice(s.Name, df.Names()); idx != -1 { | ||
columns[idx] = s | ||
} else { | ||
columns = append(columns, s) | ||
dfNames := df.Names() | ||
for i := 0; i < len(ss); i++ { | ||
if idx := findInStringSlice(ss[i].Name(), dfNames); idx != -1 { | ||
columns[idx] = ss[i] | ||
} else { | ||
columns = append(columns, ss[i]) | ||
} | ||
} | ||
|
||
nrows, ncols, err := checkColumnsDimensions(columns...) | ||
if err != nil { | ||
return DataFrame{Err: err} | ||
|
@@ -662,7 +673,7 @@ func (df DataFrame) Mutate(s series.Series) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -723,7 +734,7 @@ func (df DataFrame) FilterAggregation(agg Aggregation, filters ...F) DataFrame { | |
} | ||
} | ||
res := df.columns[idx].Compare(f.Comparator, f.Comparando) | ||
if err := res.Err; err != nil { | ||
if err := res.Error(); err != nil { | ||
return DataFrame{Err: fmt.Errorf("filter: %v", err)} | ||
} | ||
compResults[i] = res | ||
|
@@ -777,7 +788,7 @@ func (df DataFrame) Arrange(order ...Order) DataFrame { | |
if df.Err != nil { | ||
return df | ||
} | ||
if order == nil || len(order) == 0 { | ||
if len(order) == 0 { | ||
return DataFrame{Err: fmt.Errorf("rename: no arguments")} | ||
} | ||
|
||
|
@@ -823,7 +834,7 @@ func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame { | |
columns := make([]series.Series, df.ncols) | ||
for i, s := range df.columns { | ||
applied := f(s) | ||
applied.Name = s.Name | ||
applied.SetName(s.Name()) | ||
columns[i] = applied | ||
} | ||
return New(columns...) | ||
|
@@ -879,8 +890,8 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { | |
row.Append(col.Elem(i)) | ||
} | ||
row = f(row) | ||
if row.Err != nil { | ||
return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Err)} | ||
if row.Error() != nil { | ||
return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Error())} | ||
} | ||
|
||
if rowlen != -1 && rowlen != row.Len() { | ||
|
@@ -922,7 +933,7 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -1243,8 +1254,8 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame { | |
columns := make([]series.Series, len(headers)) | ||
for i, colname := range headers { | ||
col := series.New(rawcols[i], types[i], colname) | ||
if col.Err != nil { | ||
return DataFrame{Err: col.Err} | ||
if col.Error() != nil { | ||
return DataFrame{Err: col.Error()} | ||
} | ||
columns[i] = col | ||
} | ||
|
@@ -1261,7 +1272,7 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -1331,7 +1342,7 @@ func LoadMatrix(mat Matrix) DataFrame { | |
colnames := df.Names() | ||
fixColnames(colnames) | ||
for i, colname := range colnames { | ||
df.columns[i].Name = colname | ||
df.columns[i].SetName(colname) | ||
} | ||
return df | ||
} | ||
|
@@ -1552,7 +1563,7 @@ func ReadHTML(r io.Reader, options ...LoadOption) []DataFrame { | |
func (df DataFrame) Names() []string { | ||
colnames := make([]string, df.ncols) | ||
for i, s := range df.columns { | ||
colnames[i] = s.Name | ||
colnames[i] = s.Name() | ||
} | ||
return colnames | ||
} | ||
|
@@ -1576,7 +1587,7 @@ func (df DataFrame) SetNames(colnames ...string) error { | |
return fmt.Errorf("setting names: wrong dimensions") | ||
} | ||
for k, s := range colnames { | ||
df.columns[k].Name = s | ||
df.columns[k].SetName(s) | ||
} | ||
return nil | ||
} | ||
|
@@ -1599,12 +1610,12 @@ func (df DataFrame) Ncol() int { | |
// Col returns a copy of the Series with the given column name contained in the DataFrame. | ||
func (df DataFrame) Col(colname string) series.Series { | ||
if df.Err != nil { | ||
return series.Series{Err: df.Err} | ||
return series.Err(df.Err) | ||
} | ||
// Check that colname exist on dataframe | ||
idx := findInStringSlice(colname, df.Names()) | ||
if idx < 0 { | ||
return series.Series{Err: fmt.Errorf("unknown column name")} | ||
return series.Err(fmt.Errorf("unknown column name")) | ||
} | ||
return df.columns[idx].Copy() | ||
} | ||
|
@@ -2099,6 +2110,16 @@ func (df DataFrame) Elem(r, c int) series.Element { | |
return df.columns[c].Elem(r) | ||
} | ||
|
||
// Elem returns the element on row `r` and column `c`. Will panic if the index is | ||
// out of bounds. | ||
func (df DataFrame) ElemByRowAndColName(row int, columnName string) series.Element { | ||
colIndex := df.colIndex(columnName) | ||
if colIndex < 0 { | ||
return nil | ||
} | ||
return df.columns[colIndex].Elem(row) | ||
} | ||
|
||
// fixColnames assigns a name to the missing column names and makes it so that the | ||
// column names are unique. | ||
func fixColnames(colnames []string) { | ||
|
@@ -2172,13 +2193,13 @@ func findInStringSlice(str string, s []string) int { | |
|
||
func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, error) { | ||
var idx []int | ||
switch indexes.(type) { | ||
switch idt := indexes.(type) { | ||
case []int: | ||
idx = indexes.([]int) | ||
idx = idt | ||
case int: | ||
idx = []int{indexes.(int)} | ||
idx = []int{idt} | ||
case []bool: | ||
bools := indexes.([]bool) | ||
bools := idt | ||
if len(bools) != l { | ||
return nil, fmt.Errorf("indexing error: index dimensions mismatch") | ||
} | ||
|
@@ -2205,7 +2226,7 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, | |
} | ||
case series.Series: | ||
s := indexes.(series.Series) | ||
if err := s.Err; err != nil { | ||
if err := s.Error(); err != nil { | ||
return nil, fmt.Errorf("indexing error: new values has errors: %v", err) | ||
} | ||
if s.HasNaN() { | ||
|
@@ -2311,7 +2332,7 @@ func (df DataFrame) Describe() DataFrame { | |
"75%", | ||
"max", | ||
}) | ||
labels.Name = "column" | ||
labels.SetName("column") | ||
|
||
ss := []series.Series{labels} | ||
|
||
|
@@ -2330,7 +2351,7 @@ func (df DataFrame) Describe() DataFrame { | |
col.MaxStr(), | ||
}, | ||
col.Type(), | ||
col.Name, | ||
col.Name(), | ||
) | ||
case series.Bool: | ||
fallthrough | ||
|
@@ -2348,7 +2369,7 @@ func (df DataFrame) Describe() DataFrame { | |
col.Max(), | ||
}, | ||
series.Float, | ||
col.Name, | ||
col.Name(), | ||
) | ||
} | ||
ss = append(ss, newCol) | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I hope, this is a mistake - keep go-gota here.