diff --git a/docs/src/manpage.md b/docs/src/manpage.md index aad8a4f50f..1a9ebea12c 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -194,12 +194,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1245,6 +1246,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1853,6 +1863,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1990,6 +2008,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 1d59128536..7372e3768b 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -173,12 +173,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1224,6 +1225,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1832,6 +1842,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1969,6 +1987,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index a9abbcfe56..6e9fbb4780 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1447,6 +1447,55 @@ record_count resource 150 /path/to/second/file +## gsub + +
+mlr gsub -h ++
+Usage: mlr gsub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and handling multiple matches, like the `gsub` DSL function. +See also the `sub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. ++ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X ++
+filename color shape flag k index quantity rate +example.csv yeXlow triangXe true 1 11 43.6498 9.8870 +example.csv red square true 2 15 79.2778 0.0130 +example.csv red circXe true 3 16 13.8103 2.9010 +example.csv red square false 4 48 77.5542 7.4670 +example.csv purpXe triangXe false 5 51 81.2290 8.5910 +example.csv red square false 6 64 77.1991 9.5310 +example.csv purpXe triangXe false 7 65 80.1405 5.8240 +example.csv yeXlow circXe true 8 73 63.9785 4.2370 +example.csv yeXlow circXe true 9 87 63.5058 8.3350 +example.csv purpXe square false 10 91 72.3735 8.2430 ++ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X ++
+filename color shape flag k index quantity rate +example.csv yeXXow triangXe true 1 11 43.6498 9.8870 +example.csv red square true 2 15 79.2778 0.0130 +example.csv red circXe true 3 16 13.8103 2.9010 +example.csv red square false 4 48 77.5542 7.4670 +example.csv purpXe triangXe false 5 51 81.2290 8.5910 +example.csv red square false 6 64 77.1991 9.5310 +example.csv purpXe triangXe false 7 65 80.1405 5.8240 +example.csv yeXXow circXe true 8 73 63.9785 4.2370 +example.csv yeXXow circXe true 9 87 63.5058 8.3350 +example.csv purpXe square false 10 91 72.3735 8.2430 ++ ## having-fields
@@ -3120,6 +3169,54 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc. See also the "tee" DSL function which lets you do more ad-hoc customization.+## ssub + +
+mlr ssub -h ++
+Usage: mlr ssub [options] +Replaces old string with new string in specified field(s), without regex support for +the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. ++ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o ++
+filename color shape flag k index quantity rate +oxample.csv yellow triangle true 1 11 43.6498 9.8870 +oxample.csv red square true 2 15 79.2778 0.0130 +oxample.csv red circle true 3 16 13.8103 2.9010 +oxample.csv red square false 4 48 77.5542 7.4670 +oxample.csv purple triangle false 5 51 81.2290 8.5910 +oxample.csv red square false 6 64 77.1991 9.5310 +oxample.csv purple triangle false 7 65 80.1405 5.8240 +oxample.csv yellow circle true 8 73 63.9785 4.2370 +oxample.csv yellow circle true 9 87 63.5058 8.3350 +oxample.csv purple square false 10 91 72.3735 8.2430 ++ +
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o ++
+filename color shape flag k index quantity rate +exampleocsv yellow triangle true 1 11 43.6498 9.8870 +exampleocsv red square true 2 15 79.2778 0.0130 +exampleocsv red circle true 3 16 13.8103 2.9010 +exampleocsv red square false 4 48 77.5542 7.4670 +exampleocsv purple triangle false 5 51 81.2290 8.5910 +exampleocsv red square false 6 64 77.1991 9.5310 +exampleocsv purple triangle false 7 65 80.1405 5.8240 +exampleocsv yellow circle true 8 73 63.9785 4.2370 +exampleocsv yellow circle true 9 87 63.5058 8.3350 +exampleocsv purple square false 10 91 72.3735 8.2430 ++ ## stats1
@@ -3574,6 +3671,55 @@ $ each 10 uptime | mlr -p step -a delta -f 11+## sub + +
+mlr sub -h ++
+Usage: mlr sub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and not handling multiple matches, like the `sub` DSL function. +See also the `gsub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. ++ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X ++
+filename color shape flag k index quantity rate +example.csv yeXlow triangXe true 1 11 43.6498 9.8870 +example.csv red square true 2 15 79.2778 0.0130 +example.csv red circXe true 3 16 13.8103 2.9010 +example.csv red square false 4 48 77.5542 7.4670 +example.csv purpXe triangXe false 5 51 81.2290 8.5910 +example.csv red square false 6 64 77.1991 9.5310 +example.csv purpXe triangXe false 7 65 80.1405 5.8240 +example.csv yeXlow circXe true 8 73 63.9785 4.2370 +example.csv yeXlow circXe true 9 87 63.5058 8.3350 +example.csv purpXe square false 10 91 72.3735 8.2430 ++ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X ++
+filename color shape flag k index quantity rate +example.csv yeXXow triangXe true 1 11 43.6498 9.8870 +example.csv red square true 2 15 79.2778 0.0130 +example.csv red circXe true 3 16 13.8103 2.9010 +example.csv red square false 4 48 77.5542 7.4670 +example.csv purpXe triangXe false 5 51 81.2290 8.5910 +example.csv red square false 6 64 77.1991 9.5310 +example.csv purpXe triangXe false 7 65 80.1405 5.8240 +example.csv yeXXow circXe true 8 73 63.9785 4.2370 +example.csv yeXXow circXe true 9 87 63.5058 8.3350 +example.csv purpXe square false 10 91 72.3735 8.2430 ++ ## summary
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in index 0ff0bd15dd..44feda3deb 100644 --- a/docs/src/reference-verbs.md.in +++ b/docs/src/reference-verbs.md.in @@ -487,6 +487,20 @@ GENMD-RUN-COMMAND mlr --opprint group-like data/het.dkvp GENMD-EOF +## gsub + +GENMD-RUN-COMMAND +mlr gsub -h +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X +GENMD-EOF + ## having-fields GENMD-RUN-COMMAND @@ -987,6 +1001,20 @@ GENMD-RUN-COMMAND mlr split --help GENMD-EOF +## ssub + +GENMD-RUN-COMMAND +mlr ssub -h +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o +GENMD-EOF + ## stats1 GENMD-RUN-COMMAND @@ -1095,6 +1123,20 @@ Example deriving uptime-delta from system uptime: GENMD-INCLUDE-ESCAPED(data/ping-delta-example.txt) +## sub + +GENMD-RUN-COMMAND +mlr sub -h +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X +GENMD-EOF + +GENMD-RUN-COMMAND +mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X +GENMD-EOF + ## summary GENMD-RUN-COMMAND diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go index 60f490e0d8..ece90a8584 100644 --- a/internal/pkg/transformers/aaa_transformer_table.go +++ b/internal/pkg/transformers/aaa_transformer_table.go @@ -33,6 +33,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ GrepSetup, GroupBySetup, GroupLikeSetup, + GsubSetup, HavingFieldsSetup, HeadSetup, HistogramSetup, @@ -62,9 +63,11 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ SortSetup, SortWithinRecordsSetup, SplitSetup, + SsubSetup, Stats1Setup, Stats2Setup, StepSetup, + SubSetup, SummarySetup, TacSetup, TailSetup, diff --git a/internal/pkg/transformers/gsub.go b/internal/pkg/transformers/gsub.go new file mode 100644 index 0000000000..550aeda5af --- /dev/null +++ b/internal/pkg/transformers/gsub.go @@ -0,0 +1,157 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameGsub = "gsub" + +var GsubSetup = TransformerSetup{ + Verb: verbNameGsub, + UsageFunc: transformerGsubUsage, + ParseCLIFunc: transformerGsubParseCLI, + IgnoresInput: false, +} + +func transformerGsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n") + fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerGsubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerGsubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerGsub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerGsub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerGsub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerGsub, error) { + tr := &TransformerGsub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerGsub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_gsub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/internal/pkg/transformers/ssub.go b/internal/pkg/transformers/ssub.go new file mode 100644 index 0000000000..bd8e542473 --- /dev/null +++ b/internal/pkg/transformers/ssub.go @@ -0,0 +1,156 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSsub = "ssub" + +var SsubSetup = TransformerSetup{ + Verb: verbNameSsub, + UsageFunc: transformerSsubUsage, + ParseCLIFunc: transformerSsubParseCLI, + IgnoresInput: false, +} + +func transformerSsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n") + fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerSsubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSsubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSsub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSsub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerSsub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerSsub, error) { + tr := &TransformerSsub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerSsub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_ssub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/internal/pkg/transformers/sub.go b/internal/pkg/transformers/sub.go new file mode 100644 index 0000000000..eee7783624 --- /dev/null +++ b/internal/pkg/transformers/sub.go @@ -0,0 +1,157 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSub = "sub" + +var SubSetup = TransformerSetup{ + Verb: verbNameSub, + UsageFunc: transformerSubUsage, + ParseCLIFunc: transformerSubParseCLI, + IgnoresInput: false, +} + +func transformerSubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n") + fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerSubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerSub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerSub, error) { + tr := &TransformerSub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerSub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_sub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/man/manpage.txt b/man/manpage.txt index 1d59128536..7372e3768b 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -173,12 +173,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1224,6 +1225,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1832,6 +1842,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1969,6 +1987,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/man/mlr.1 b/man/mlr.1 index 583b5dc11f..4e1dc9ca3b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -214,12 +214,13 @@ for all things with "map" in their names. .nf altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values -fraction gap grep group-by group-like having-fields head histogram json-parse -json-stringify join label latin1-to-utf8 least-frequent merge-fields -most-frequent nest nothing put regularize remove-empty-columns rename reorder -repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records -sort sort-within-records split stats1 stats2 step summary tac tail tee -template top utf8-to-latin1 unflatten uniq unspace unsparsify +fraction gap grep group-by group-like gsub having-fields head histogram +json-parse json-stringify join label latin1-to-utf8 least-frequent +merge-fields most-frequent nest nothing put regularize remove-empty-columns +rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle +skip-trivial-records sort sort-within-records split ssub stats1 stats2 step +sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace +unsparsify .fi .if n \{\ .RE @@ -1529,6 +1530,21 @@ Options: .fi .if n \{\ .RE +.SS "gsub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr gsub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and handling multiple matches, like the `gsub` DSL function. +See also the `sub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "having-fields" .if n \{\ .RS 0 @@ -2311,6 +2327,20 @@ See also the "tee" DSL function which lets you do more ad-hoc customization. .fi .if n \{\ .RE +.SS "ssub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr ssub [options] +Replaces old string with new string in specified field(s), without regex support for +the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "stats1" .if n \{\ .RS 0 @@ -2466,6 +2496,21 @@ for more information on EWMA. .fi .if n \{\ .RE +.SS "sub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr sub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and not handling multiple matches, like the `sub` DSL function. +See also the `gsub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "summary" .if n \{\ .RS 0 diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index d6f70fe41a..55efea8ac7 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -379,6 +379,16 @@ Outputs records in batches having identical field names. Options: -h|--help Show this message. +================================================================ +gsub +Usage: mlr gsub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and handling multiple matches, like the `gsub` DSL function. +See also the `sub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ having-fields Usage: mlr having-fields [options] @@ -1016,6 +1026,15 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc. See also the "tee" DSL function which lets you do more ad-hoc customization. +================================================================ +ssub +Usage: mlr ssub [options] +Replaces old string with new string in specified field(s), without regex support for +the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ stats1 Usage: mlr stats1 [options] @@ -1156,6 +1175,16 @@ Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter o https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. +================================================================ +sub +Usage: mlr sub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and not handling multiple matches, like the `sub` DSL function. +See also the `gsub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ summary Usage: mlr summary [options] diff --git a/test/cases/verb-case/x b/test/cases/verb-case/x deleted file mode 100644 index a24cc18bdf..0000000000 --- a/test/cases/verb-case/x +++ /dev/null @@ -1,13 +0,0 @@ -mkdir 0020; echo mlr --from test/input.cases-csv --c2j case -u > 0020/cmd -mkdir 0021; echo mlr --from test/input.cases-csv --c2j case -l > 0021/cmd -mkdir 0022; echo mlr --from test/input.cases-csv --c2j case -s > 0022/cmd -mkdir 0023; echo mlr --from test/input.cases-csv --c2j case -t > 0023/cmd -mkdir 0024; echo mlr --from test/input.cases-csv --c2j case -k -u > 0024/cmd -mkdir 0025; echo mlr --from test/input.cases-csv --c2j case -k -l > 0025/cmd -mkdir 0026; echo mlr --from test/input.cases-csv --c2j case -k -s > 0026/cmd -mkdir 0027; echo mlr --from test/input.cases-csv --c2j case -k -t > 0027/cmd -mkdir 0028; echo mlr --from test/input.cases-csv --c2j case -v -u > 0028/cmd -mkdir 0029; echo mlr --from test/input.cases-csv --c2j case -v -l > 0029/cmd -mkdir 0030; echo mlr --from test/input.cases-csv --c2j case -v -s > 0030/cmd -mkdir 0031; echo mlr --from test/input.cases-csv --c2j case -v -t > 0031/cmd -mkdir 0032; echo mlr --from test/input.cases-csv --c2j case -u apple,ball then case -l cat,dog > 0032/cmd diff --git a/test/cases/verb-sub-gsub-ssub/0001/cmd b/test/cases/verb-sub-gsub-ssub/0001/cmd new file mode 100644 index 0000000000..7d4cec775c --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0001/cmd @@ -0,0 +1 @@ +mlr --d2p --from test/input/abixy sub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0001/experr b/test/cases/verb-sub-gsub-ssub/0001/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sub-gsub-ssub/0001/expout b/test/cases/verb-sub-gsub-ssub/0001/expout new file mode 100644 index 0000000000..917c3f5ed6 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0001/expout @@ -0,0 +1,11 @@ +a b i x y +pan pan 1 0.34679014 0.72680286 +Xks pan 2 0.75867996 0.52215111 +wyX wyX 3 0.20460331 0.33831853 +Xks wyX 4 0.38139939 0.13418874 +wyX pan 5 0.57328892 0.86362447 +zXe pan 6 0.52712616 0.49322129 +Xks zXe 7 0.61178406 0.18788492 +zXe wyX 8 0.59855401 0.97618139 +hat wyX 9 0.03144188 0.74955076 +pan wyX 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0002/cmd b/test/cases/verb-sub-gsub-ssub/0002/cmd new file mode 100644 index 0000000000..f33200891d --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0002/cmd @@ -0,0 +1 @@ +mlr --d2p --from test/input/abixy gsub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0002/experr b/test/cases/verb-sub-gsub-ssub/0002/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sub-gsub-ssub/0002/expout b/test/cases/verb-sub-gsub-ssub/0002/expout new file mode 100644 index 0000000000..49d53727b3 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0002/expout @@ -0,0 +1,11 @@ +a b i x y +pan pan 1 0.34679014 0.72680286 +Xks pan 2 0.75867996 0.52215111 +wyX wyX 3 0.20460331 0.33831853 +Xks wyX 4 0.38139939 0.13418874 +wyX pan 5 0.57328892 0.86362447 +zXX pan 6 0.52712616 0.49322129 +Xks zXX 7 0.61178406 0.18788492 +zXX wyX 8 0.59855401 0.97618139 +hat wyX 9 0.03144188 0.74955076 +pan wyX 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0003/cmd b/test/cases/verb-sub-gsub-ssub/0003/cmd new file mode 100644 index 0000000000..ff6b15c4ac --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0003/cmd @@ -0,0 +1 @@ +mlr --d2p --from test/input/abixy sub -f a,b . X diff --git a/test/cases/verb-sub-gsub-ssub/0003/experr b/test/cases/verb-sub-gsub-ssub/0003/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sub-gsub-ssub/0003/expout b/test/cases/verb-sub-gsub-ssub/0003/expout new file mode 100644 index 0000000000..a8b8e86432 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0003/expout @@ -0,0 +1,11 @@ +a b i x y +Xan Xan 1 0.34679014 0.72680286 +Xks Xan 2 0.75867996 0.52215111 +Xye Xye 3 0.20460331 0.33831853 +Xks Xye 4 0.38139939 0.13418874 +Xye Xan 5 0.57328892 0.86362447 +Xee Xan 6 0.52712616 0.49322129 +Xks Xee 7 0.61178406 0.18788492 +Xee Xye 8 0.59855401 0.97618139 +Xat Xye 9 0.03144188 0.74955076 +Xan Xye 10 0.50262601 0.95261836 diff --git a/test/cases/verb-sub-gsub-ssub/0004/cmd b/test/cases/verb-sub-gsub-ssub/0004/cmd new file mode 100644 index 0000000000..8770d578d5 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0004/cmd @@ -0,0 +1 @@ +mlr --d2p --from test/input/abixy ssub -f a,b e X diff --git a/test/cases/verb-sub-gsub-ssub/0004/experr b/test/cases/verb-sub-gsub-ssub/0004/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sub-gsub-ssub/0004/expout b/test/cases/verb-sub-gsub-ssub/0004/expout new file mode 100644 index 0000000000..917c3f5ed6 --- /dev/null +++ b/test/cases/verb-sub-gsub-ssub/0004/expout @@ -0,0 +1,11 @@ +a b i x y +pan pan 1 0.34679014 0.72680286 +Xks pan 2 0.75867996 0.52215111 +wyX wyX 3 0.20460331 0.33831853 +Xks wyX 4 0.38139939 0.13418874 +wyX pan 5 0.57328892 0.86362447 +zXe pan 6 0.52712616 0.49322129 +Xks zXe 7 0.61178406 0.18788492 +zXe wyX 8 0.59855401 0.97618139 +hat wyX 9 0.03144188 0.74955076 +pan wyX 10 0.50262601 0.95261836