Skip to content

Commit

Permalink
re-added dataset filter
Browse files Browse the repository at this point in the history
  • Loading branch information
Wayne Bills authored and Wayne Bills committed Apr 30, 2024
1 parent 9bd190c commit 56568de
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 34 deletions.
6 changes: 6 additions & 0 deletions speedtest-extract/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ COMMANDS:
GLOBAL OPTIONS:
--all Show all extract files, not just latest available (default: false)
--config value Specify the config file (default: "speedtest-extract.yaml")
--filter-datasets value Limit extracts to this comma-delimited list of datasets
--filter-filenames value Limit extracts to this comma-delimited list of filenames
--filter-groups value Limit extracts to this comma-delimited list of groups
--since value Limit extracts to ones updated since the provided date (YYYY-MM-DD)
Expand All @@ -40,6 +41,11 @@ speedtest-extract list
speedtest-extract --filter-groups web,native list
```

* Filter by specific datset groups
```
speedtest-extract --filter-datasets city,state list
```

* Show all extracts updated since a specific date
```
speedtest-extract --since 2022-01-01 list
Expand Down
44 changes: 23 additions & 21 deletions speedtest-extract/extracts.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func GetExtracts(client *resty.Client, path string, cache *ExtractsCache) ([]*Ex
return extracts, nil
}

func FilterFiles(items []*ExtractItem, groupFilter []string, filenameFilter []string, since *time.Time, latestOnly bool, files []ExtractFile) []ExtractFile {
func FilterFiles(items []*ExtractItem, groupFilter []string, datasetFilter []string, filenameFilter []string, since *time.Time, latestOnly bool, files []ExtractFile) []ExtractFile {
if since != nil {
latestOnly = false
}
Expand All @@ -255,32 +255,34 @@ func FilterFiles(items []*ExtractItem, groupFilter []string, filenameFilter []st
if matchedGroup {
for name, datasets := range i.Datasets {
dataset := name
for _, d := range datasets {
filename := d.Name
if len(filenameFilter) == 0 || contains(filename, filenameFilter) {
if !latestOnly || d == i.Latest[name] {
updated := time.UnixMilli(d.Modified).UTC()
if since == nil || updated.Equal(*since) || updated.After(*since) {
log.WithFields(log.Fields{
"groups": groups,
"dataset": dataset,
"latest": d == i.Latest[name],
"updated": updated,
}).Debug(fmt.Sprintf("found file matching all filters: %s", filename))
files = append(files, ExtractFile{
Dataset: dataset,
Name: filename,
Latest: d == i.Latest[name],
Updated: updated,
Item: d,
})
if len(datasetFilter) == 0 || contains(dataset, datasetFilter) {
for _, d := range datasets {
filename := d.Name
if len(filenameFilter) == 0 || contains(filename, filenameFilter) {
if !latestOnly || d == i.Latest[name] {
updated := time.UnixMilli(d.Modified).UTC()
if since == nil || updated.Equal(*since) || updated.After(*since) {
log.WithFields(log.Fields{
"groups": groups,
"dataset": dataset,
"latest": d == i.Latest[name],
"updated": updated,
}).Debug(fmt.Sprintf("found file matching all filters: %s", filename))
files = append(files, ExtractFile{
Dataset: dataset,
Name: filename,
Latest: d == i.Latest[name],
Updated: updated,
Item: d,
})
}
}
}
}
}
}
}
files = FilterFiles(i.Children, groupFilter, filenameFilter, since, latestOnly, files)
files = FilterFiles(i.Children, groupFilter, datasetFilter, filenameFilter, since, latestOnly, files)
}
}
return files
Expand Down
27 changes: 20 additions & 7 deletions speedtest-extract/extracts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ func GetTestExtracts() ([]*ExtractItem, error) {
return GetExtracts(client, "", nil)
}

func RunFilters(t *testing.T, extracts []*ExtractItem, groupFilter []string, filenameFilter []string, since *time.Time, latestOnly bool, expected FilterCounts) []ExtractFile {
files := FilterFiles(extracts, groupFilter, filenameFilter, since, latestOnly, nil)
func RunFilters(t *testing.T, extracts []*ExtractItem, groupFilter []string, datasetFilter []string, filenameFilter []string, since *time.Time, latestOnly bool, expected FilterCounts) []ExtractFile {
files := FilterFiles(extracts, groupFilter, datasetFilter, filenameFilter, since, latestOnly, nil)

latestCount := 0
groups := make(map[string]interface{})
Expand Down Expand Up @@ -97,7 +97,7 @@ func TestFilters(t *testing.T) {
extracts, _ := GetTestExtracts()

t.Run("should return all files when no filters are specified", func(t *testing.T) {
_ = RunFilters(t, extracts, []string{}, []string{}, nil, false, FilterCounts{
_ = RunFilters(t, extracts, []string{}, []string{}, []string{}, nil, false, FilterCounts{
Files: 24,
Groups: 4,
Datasets: 5,
Expand All @@ -106,7 +106,7 @@ func TestFilters(t *testing.T) {
})

t.Run("should filter for the latest files", func(t *testing.T) {
files := RunFilters(t, extracts, []string{}, []string{}, nil, true, FilterCounts{
files := RunFilters(t, extracts, []string{}, []string{}, []string{}, nil, true, FilterCounts{
Files: 5,
Groups: 4,
Datasets: 5,
Expand All @@ -119,7 +119,7 @@ func TestFilters(t *testing.T) {

t.Run("should filter for specific groups", func(t *testing.T) {
groups := []string{"android", "web"}
files := RunFilters(t, extracts, groups, []string{}, nil, false, FilterCounts{
files := RunFilters(t, extracts, groups, []string{}, []string{}, nil, false, FilterCounts{
Files: 12,
Groups: 2,
Datasets: 2,
Expand All @@ -136,9 +136,22 @@ func TestFilters(t *testing.T) {
}
})

t.Run("should filter for specific datasets", func(t *testing.T) {
datasets := []string{"desktop"}
files := RunFilters(t, extracts, []string{}, datasets, []string{}, nil, false, FilterCounts{
Files: 5,
Groups: 1,
Datasets: 1,
Latest: 1,
})
for _, f := range files {
assert.Contains(t, datasets, f.Dataset, "all but desktop datasets should be filtered")
}
})

t.Run("should filter for files modified after a given date", func(t *testing.T) {
since, _ := time.Parse("2006-01-02", "2022-05-01")
files := RunFilters(t, extracts, []string{}, []string{}, &since, false, FilterCounts{
files := RunFilters(t, extracts, []string{}, []string{}, []string{}, &since, false, FilterCounts{
Files: 16,
Groups: 4,
Datasets: 5,
Expand All @@ -151,7 +164,7 @@ func TestFilters(t *testing.T) {

t.Run("should filter for latest version of specific filenames", func(t *testing.T) {
filenames := []string{"android_2022-05-01.zip", "desktop_2022-04-01.zip"}
files := RunFilters(t, extracts, []string{}, filenames, nil, true, FilterCounts{
files := RunFilters(t, extracts, []string{}, []string{}, filenames, nil, true, FilterCounts{
Files: 2,
Groups: 2,
Datasets: 2,
Expand Down
21 changes: 15 additions & 6 deletions speedtest-extract/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var config Config
type GlobalOptions struct {
ShowAll bool
GroupFilter []string
DatasetFilter []string
FilenameFilter []string
Since *time.Time
}
Expand Down Expand Up @@ -55,6 +56,10 @@ func main() {
Name: "filter-groups",
Usage: "Limit extracts to this comma-delimited list of groups",
},
&cli.StringFlag{
Name: "filter-datasets",
Usage: "Limit extracts to this comma-delimited list of datasets",
},
&cli.StringFlag{
Name: "filter-filenames",
Usage: "Limit extracts to this comma-delimited list of filenames",
Expand Down Expand Up @@ -133,6 +138,7 @@ func GetClient() *resty.Client {
func GetGlobalOptions(context *cli.Context) (*GlobalOptions, error) {
showAll := context.Bool("all")
groupFilter := context.String("filter-groups")
datasetFilter := context.String("filter-datasets")
filenameFilter := context.String("filter-filenames")
since := context.String("since")
verbose := context.Bool("verbose")
Expand All @@ -143,6 +149,9 @@ func GetGlobalOptions(context *cli.Context) (*GlobalOptions, error) {
if len(groupFilter) > 0 {
args.GroupFilter = strings.Split(groupFilter, ",")
}
if len(datasetFilter) > 0 {
args.DatasetFilter = strings.Split(datasetFilter, ",")
}
if len(filenameFilter) > 0 {
args.FilenameFilter = strings.Split(filenameFilter, ",")
//account for possibility that the user ignored the .zip extension for the filenames, so allow either way
Expand All @@ -168,9 +177,9 @@ func GetGlobalOptions(context *cli.Context) (*GlobalOptions, error) {
}

log.WithFields(log.Fields{
"all": showAll,
"groupFilter": args.GroupFilter,
//"datasetFilter": args.DatasetFilter,
"all": showAll,
"groupFilter": args.GroupFilter,
"datasetFilter": args.DatasetFilter,
"filenameFilter": args.FilenameFilter,
"since": args.Since,
}).Debug("global flags")
Expand All @@ -189,7 +198,7 @@ func DownloadExtracts(context *cli.Context) error {
func ListFiles(files []ExtractFile) {
t := table.NewWriter()
t.SetOutputMirror(os.Stdout)
t.AppendHeader(table.Row{"Groups", "File", "Updated", "Latest"})
t.AppendHeader(table.Row{"Groups", "Dataset", "File", "Updated", "Latest"})
t.SetColumnConfigs([]table.ColumnConfig{
{Number: 1, AutoMerge: true},
{Number: 2, AutoMerge: true},
Expand All @@ -201,7 +210,7 @@ func ListFiles(files []ExtractFile) {
}
groups := strings.Join(f.Item.Groups, ", ")
row := table.Row{
groups, f.Name, f.Updated, latest,
groups, f.Dataset, f.Name, f.Updated, latest,
}
t.AppendRow(row)
}
Expand All @@ -228,7 +237,7 @@ func ExtractHandler(context *cli.Context, command string) error {
return err
}
WriteExtractsCache(cache)
files := FilterFiles(extracts, args.GroupFilter, args.FilenameFilter, args.Since, !args.ShowAll, nil)
files := FilterFiles(extracts, args.GroupFilter, args.DatasetFilter, args.FilenameFilter, args.Since, !args.ShowAll, nil)

if len(files) == 0 {
return ErrNoMatchingFiles
Expand Down

0 comments on commit 56568de

Please sign in to comment.