From 6997ad4e0dd8983e223e8ae34e505be447d86e46 Mon Sep 17 00:00:00 2001
From: Annette Wilson <33626088+annettejanewilson@users.noreply.github.com>
Date: Mon, 17 Jun 2024 15:05:26 +0100
Subject: [PATCH] fix(foreach): Remove shell invocation from foreach (#136)

* Remove shell invocation from foreach

As discussed in [issue 135], turbolift applies shell escaping
inconsistently on the arguments in foreach. Upon consideration, I think
there is no good reason to apply shell escaping at all. It makes
turbolift more fragile, more confusing and ultimately less flexible.

Instead, do not use a shell at all and all these problems disappear.
None of the examples in the README use this functionality. If users do
need to invoke a shell, for example to do redirection, it is much more
explicit and understandable for them if they do it themselves.

This PR:

* Removes the use of a shell, instead directly executing a subprocess
  based on the arguments provided.
* Removes custom handling of arguments. Users are directed to the
  standard GNU-style `--` to indicate that everything else on the
  command-line is an argument, not an option. This makes the code
  substantially simpler.

[issue 135]: #135

* Format foreach shell arguments nicely

Shell escape the arguments so you can tell when arguments have spaces in
them.

* Require -- before foreach command

Since we are deliberately removing custom handling of arguments, users
will find that if they don't include a double hypen (`--`) separator
before their foreach command, any options they pass to their command
will be interpreted by turbolift and either result in errors, or worse,
being quietly omitted from their command (e.g. `-v`), which could be
surprising and frustrating!

To head this off, refuse immediately if `--` either doesn't appear on
the command-line, or appears after one or more arguments.

* Reword foreach usage for accuracy

Make it clear that the double-hyphen separator `--` is mandatory and not
just a good idea.

* Wrap long usage message

Turns out Cobra doesn't wrap usage messages, so they look pretty ugly if
we don't pick a pretty conservative width and wrap by hand.

* Fix [flags] appearing at end of usage

Turns out Cobra searches your usage line for the specific text "[flags]"
and if it doesn't find it, shoves it on the end. Gross. For that reason,
let's remove explicit callout of any flags and move it back to *before*
the double-hyphen.

* Improve logged command format and add tests

Wrap braces around the logged command to delimit it clearly. Add unit
tests to cover argument formatting.

* Add foreach absolute path example

---------

Co-authored-by: Daniel Ranson <92924979+Dan7-7-7@users.noreply.github.com>
---
 README.md                   |  27 ++++--
 cmd/foreach/foreach.go      |  81 ++++++------------
 cmd/foreach/foreach_test.go | 163 +++++++++++-------------------------
 go.mod                      |   1 +
 go.sum                      |   2 +
 5 files changed, 97 insertions(+), 177 deletions(-)

diff --git a/README.md b/README.md
index 52960ff..afcc3b5 100644
--- a/README.md
+++ b/README.md
@@ -111,8 +111,8 @@ Occasionally you may need to work on different repo files. For instance the repo
 The default repo file is called `repos.txt` but you can override this on any command with the `--repos` flag.
 
 ```console
-turbolift foreach --repos repoFile1.txt sed 's/pattern1/replacement1/g'
-turbolift foreach --repos repoFile2.txt sed 's/pattern2/replacement2/g'
+turbolift foreach --repos repoFile1.txt -- sed 's/pattern1/replacement1/g'
+turbolift foreach --repos repoFile2.txt -- sed 's/pattern2/replacement2/g'
 ```
 
 
@@ -132,16 +132,27 @@ You can do this manually using an editor, using `sed` and similar commands, or u
 
 **You are free to use any tools that help get the job done.**
 
-If you wish to, you can run the same command against every repo using `turbolift foreach ...` (where `...` is the shell command you want to run).
+If you wish to, you can run the same command against every repo using `turbolift foreach -- ...` (where `...` is the command you want to run).
 
 For example, you might choose to:
 
-* `turbolift foreach rm somefile` - to delete a particular file
-* `turbolift foreach sed -i '' 's/foo/bar/g' somefile` - to find/replace in a common file
-* `turbolift foreach make test` - for example, to run tests (using any appropriate command to invoke the tests)
-* `turbolift foreach git add somefile` - to stage a file that you have created
+* `turbolift foreach -- rm somefile` - to delete a particular file
+* `turbolift foreach -- sed -i '' 's/foo/bar/g' somefile` - to find/replace in a common file
+* `turbolift foreach -- make test` - for example, to run tests (using any appropriate command to invoke the tests)
+* `turbolift foreach -- git add somefile` - to stage a file that you have created
+* `turbolift foreach -- sh -c 'grep needle haystack.txt > output.txt'` - use a shell to run a command using redirection
 
-At any time, if you need to update your working copy branches from the upstream, you can run `turbolift foreach git pull upstream master`.
+Remember that when the command runs the working directory will be the
+repository root. If you want to refer to files from elsewhere you need
+to provide an absolute path. You may find the `pwd` command helpful here.
+For example, to run a shell script from the current directory against
+each repository:
+
+```
+turbolift foreach -- sh "$(pwd)/script.sh"
+```
+
+At any time, if you need to update your working copy branches from the upstream, you can run `turbolift foreach -- git pull upstream master`.
 
 It is highly recommended that you run tests against affected repos, if it will help validate the changes you have made.
 
diff --git a/cmd/foreach/foreach.go b/cmd/foreach/foreach.go
index fe55cda..14bf4ca 100644
--- a/cmd/foreach/foreach.go
+++ b/cmd/foreach/foreach.go
@@ -16,9 +16,9 @@
 package foreach
 
 import (
+	"errors"
 	"os"
 	"path"
-	"strconv"
 	"strings"
 
 	"github.com/spf13/cobra"
@@ -27,66 +27,46 @@ import (
 	"github.com/skyscanner/turbolift/internal/colors"
 	"github.com/skyscanner/turbolift/internal/executor"
 	"github.com/skyscanner/turbolift/internal/logging"
+
+	"github.com/alessio/shellescape"
 )
 
 var exec executor.Executor = executor.NewRealExecutor()
 
 var (
 	repoFile string = "repos.txt"
-	helpFlag bool   = false
 )
 
-func parseForeachArgs(args []string) []string {
-	strippedArgs := make([]string, 0)
-MAIN:
-	for i := 0; i < len(args); i++ {
-		switch args[i] {
-		case "--repos":
-			repoFile = args[i+1]
-			i = i + 1
-		case "--help":
-			helpFlag = true
-		default:
-			// we've parsed everything that could be parsed; this is now the command
-			strippedArgs = append(strippedArgs, args[i:]...)
-			break MAIN
-		}
+func formatArguments(arguments []string) string {
+	quotedArgs := make([]string, len(arguments))
+	for i, arg := range arguments {
+		quotedArgs[i] = shellescape.Quote(arg)
 	}
-
-	return strippedArgs
+	return strings.Join(quotedArgs, " ")
 }
 
 func NewForeachCmd() *cobra.Command {
 	cmd := &cobra.Command{
-		Use:                   "foreach [flags] SHELL_COMMAND",
-		Short:                 "Run a shell command against each working copy",
-		Run:                   run,
-		Args:                  cobra.MinimumNArgs(1),
-		DisableFlagsInUseLine: true,
-		DisableFlagParsing:    true,
+		Use:   "foreach [flags] -- COMMAND [ARGUMENT...]",
+		Short: "Run COMMAND against each working copy",
+		Long:
+`Run COMMAND against each working copy. Make sure to include a
+double hyphen -- with space on both sides before COMMAND, as this
+marks that no further options should be interpreted by turbolift.`,
+		RunE: runE,
+		Args: cobra.MinimumNArgs(1),
 	}
 
-	// this flag will not be parsed (DisabledFlagParsing is on) but is here for the help context and auto complete
 	cmd.Flags().StringVar(&repoFile, "repos", "repos.txt", "A file containing a list of repositories to clone.")
 
 	return cmd
 }
 
-func run(c *cobra.Command, args []string) {
+func runE(c *cobra.Command, args []string) error {
 	logger := logging.NewLogger(c)
 
-	/*
-		Parsing is disabled for this command to make sure it doesn't capture flags from the subsequent command.
-		E.g.: turbolift foreach ls -l   <- here, the -l would be captured by foreach, not by ls
-		Because of this, we need a manual parsing of the arguments.
-		Assumption is the foreach arguments will be parsed before the command and its arguments.
-	*/
-	args = parseForeachArgs(args)
-
-	// check if the help flag was toggled
-	if helpFlag {
-		_ = c.Usage()
-		return
+	if c.ArgsLenAtDash() != 0 {
+		return errors.New("Use -- to separate command")
 	}
 
 	readCampaignActivity := logger.StartActivity("Reading campaign data (%s)", repoFile)
@@ -95,22 +75,19 @@ func run(c *cobra.Command, args []string) {
 	dir, err := campaign.OpenCampaign(options)
 	if err != nil {
 		readCampaignActivity.EndWithFailure(err)
-		return
+		return nil
 	}
 	readCampaignActivity.EndWithSuccess()
 
-	for i := range args {
-		if strings.Contains(args[i], " ") {
-			args[i] = strconv.Quote(args[i])
-		}
-	}
-	command := strings.Join(args, " ")
+	// We shell escape these to avoid ambiguity in our logs, and give
+	// the user something they could copy and paste.
+	prettyArgs := formatArguments(args)
 
 	var doneCount, skippedCount, errorCount int
 	for _, repo := range dir.Repos {
 		repoDirPath := path.Join("work", repo.OrgName, repo.RepoName) // i.e. work/org/repo
 
-		execActivity := logger.StartActivity("Executing %s in %s", command, repoDirPath)
+		execActivity := logger.StartActivity("Executing { %s } in %s", prettyArgs, repoDirPath)
 
 		// skip if the working copy does not exist
 		if _, err = os.Stat(repoDirPath); os.IsNotExist(err) {
@@ -119,13 +96,7 @@ func run(c *cobra.Command, args []string) {
 			continue
 		}
 
-		// Execute within a shell so that piping, redirection, etc are possible
-		shellCommand := os.Getenv("SHELL")
-		if shellCommand == "" {
-			shellCommand = "sh"
-		}
-		shellArgs := []string{"-c", command}
-		err := exec.Execute(execActivity.Writer(), repoDirPath, shellCommand, shellArgs...)
+		err := exec.Execute(execActivity.Writer(), repoDirPath, args[0], args[1:]...)
 
 		if err != nil {
 			execActivity.EndWithFailure(err)
@@ -141,4 +112,6 @@ func run(c *cobra.Command, args []string) {
 	} else {
 		logger.Warnf("turbolift foreach completed with %s %s(%s, %s, %s)\n", colors.Red("errors"), colors.Normal(), colors.Green(doneCount, " OK"), colors.Yellow(skippedCount, " skipped"), colors.Red(errorCount, " errored"))
 	}
+
+	return nil
 }
diff --git a/cmd/foreach/foreach_test.go b/cmd/foreach/foreach_test.go
index de3de2b..da97ece 100644
--- a/cmd/foreach/foreach_test.go
+++ b/cmd/foreach/foreach_test.go
@@ -26,101 +26,6 @@ import (
 	"github.com/skyscanner/turbolift/internal/testsupport"
 )
 
-func TestParseForEachArgs(t *testing.T) {
-	testCases := []struct {
-		Name                 string
-		Args                 []string
-		ExpectedCommand      []string
-		ExpectedRepoFileName string
-		ExpectedHelpFlag     bool
-	}{
-		{
-			Name:                 "simple command",
-			Args:                 []string{"ls", "-l"},
-			ExpectedCommand:      []string{"ls", "-l"},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "advanced command",
-			Args:                 []string{"sed", "-e", "'s/foo/bar/'", "-e", "'s/bar/baz/'"},
-			ExpectedCommand:      []string{"sed", "-e", "'s/foo/bar/'", "-e", "'s/bar/baz/'"},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "simple command with repo flag",
-			Args:                 []string{"--repos", "test.txt", "ls", "-l"},
-			ExpectedCommand:      []string{"ls", "-l"},
-			ExpectedRepoFileName: "test.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "advanced command with repos flag",
-			Args:                 []string{"--repos", "test2.txt", "sed", "-e", "'s/foo/bar/'", "-e", "'s/bar/baz/'"},
-			ExpectedCommand:      []string{"sed", "-e", "'s/foo/bar/'", "-e", "'s/bar/baz/'"},
-			ExpectedRepoFileName: "test2.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "repos flag should only be caught when at the beginning",
-			Args:                 []string{"ls", "-l", "--repos", "random.txt"},
-			ExpectedCommand:      []string{"ls", "-l", "--repos", "random.txt"},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "random flag is not caught",
-			Args:                 []string{"--random", "arg", "ls", "-l"},
-			ExpectedCommand:      []string{"--random", "arg", "ls", "-l"},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     false,
-		},
-		{
-			Name:                 "Help flag is triggered",
-			Args:                 []string{"--help"},
-			ExpectedCommand:      []string{},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     true,
-		},
-		{
-			Name:                 "Help flag is triggered after the repo one",
-			Args:                 []string{"--repos", "example.txt", "--help", "thecommand"},
-			ExpectedCommand:      []string{"thecommand"},
-			ExpectedRepoFileName: "example.txt",
-			ExpectedHelpFlag:     true,
-		},
-		{
-			Name:                 "Help flag is triggered before the repo one",
-			Args:                 []string{"--help", "--repos", "example.txt", "newcommand", "anotherarg"},
-			ExpectedCommand:      []string{"newcommand", "anotherarg"},
-			ExpectedRepoFileName: "example.txt",
-			ExpectedHelpFlag:     true,
-		},
-		{
-			Name:                 "Help flag is not triggered from a subsequent command",
-			Args:                 []string{"command", "--help"},
-			ExpectedCommand:      []string{"command", "--help"},
-			ExpectedRepoFileName: "repos.txt",
-			ExpectedHelpFlag:     false,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.Name, func(t *testing.T) {
-			actual := parseForeachArgs(tc.Args)
-			t.Log(actual)
-			assert.EqualValues(t, tc.ExpectedCommand, actual)
-			assert.Equal(t, repoFile, tc.ExpectedRepoFileName)
-			assert.Equal(t, helpFlag, tc.ExpectedHelpFlag)
-
-			// Cleanup to default repo file name
-			repoFile = "repos.txt"
-			helpFlag = false
-		})
-	}
-}
-
 func TestItRejectsEmptyArgs(t *testing.T) {
 	fakeExecutor := executor.NewAlwaysSucceedsFakeExecutor()
 	exec = fakeExecutor
@@ -128,43 +33,59 @@ func TestItRejectsEmptyArgs(t *testing.T) {
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 
 	out, err := runCommand([]string{}...)
-	assert.Errorf(t, err, "requires at least 1 arg(s), only received 0")
+	assert.Error(t, err, "Expected an error to be returned")
 	assert.Contains(t, out, "Usage")
 
 	fakeExecutor.AssertCalledWith(t, [][]string{})
 }
 
-func TestItRunsCommandInShellAgainstWorkingCopies(t *testing.T) {
+func TestItRejectsCommandWithoutDashes(t *testing.T) {
 	fakeExecutor := executor.NewAlwaysSucceedsFakeExecutor()
 	exec = fakeExecutor
 
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 
 	out, err := runCommand("some", "command")
+	assert.Error(t, err, "Expected an error to be returned")
+	assert.Contains(t, out, "Usage")
+
+	fakeExecutor.AssertCalledWith(t, [][]string{})
+}
+
+func TestItRunsCommandWithoutShellAgainstWorkingCopies(t *testing.T) {
+	fakeExecutor := executor.NewAlwaysSucceedsFakeExecutor()
+	exec = fakeExecutor
+
+	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
+
+	out, err := runCommand("--", "some", "command")
 	assert.NoError(t, err)
 	assert.Contains(t, out, "turbolift foreach completed")
 	assert.Contains(t, out, "2 OK, 0 skipped")
 
 	fakeExecutor.AssertCalledWith(t, [][]string{
-		{"work/org/repo1", userShell(), "-c", "some command"},
-		{"work/org/repo2", userShell(), "-c", "some command"},
+		{"work/org/repo1", "some", "command"},
+		{"work/org/repo2", "some", "command"},
 	})
 }
 
-func TestItRunsCommandQuotedInShellAgainstWorkingCopied(t *testing.T) {
+func TestItRunsCommandWithSpacesAgainstWorkingCopied(t *testing.T) {
 	fakeExecutor := executor.NewAlwaysSucceedsFakeExecutor()
 	exec = fakeExecutor
 
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 
-	out, err := runCommand("some", "command", "with spaces")
+	out, err := runCommand("--", "some", "command", "with spaces")
 	assert.NoError(t, err)
 	assert.Contains(t, out, "turbolift foreach completed")
 	assert.Contains(t, out, "2 OK, 0 skipped")
+	assert.Contains(t, out,
+		"Executing { some command 'with spaces' } in work/org/repo1",
+		"It should format the executed command accurately")
 
 	fakeExecutor.AssertCalledWith(t, [][]string{
-		{"work/org/repo1", userShell(), "-c", "some command \"with spaces\""},
-		{"work/org/repo2", userShell(), "-c", "some command \"with spaces\""},
+		{"work/org/repo1", "some", "command", "with spaces"},
+		{"work/org/repo2", "some", "command", "with spaces"},
 	})
 }
 
@@ -175,13 +96,13 @@ func TestItSkipsMissingWorkingCopies(t *testing.T) {
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 	_ = os.Remove("work/org/repo2")
 
-	out, err := runCommand("some", "command")
+	out, err := runCommand("--", "some", "command")
 	assert.NoError(t, err)
 	assert.Contains(t, out, "turbolift foreach completed")
 	assert.Contains(t, out, "1 OK, 1 skipped")
 
 	fakeExecutor.AssertCalledWith(t, [][]string{
-		{"work/org/repo1", userShell(), "-c", "some command"},
+		{"work/org/repo1", "some", "command"},
 	})
 }
 
@@ -191,14 +112,14 @@ func TestItContinuesOnAndRecordsFailures(t *testing.T) {
 
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 
-	out, err := runCommand("some", "command")
+	out, err := runCommand("--", "some", "command")
 	assert.NoError(t, err)
 	assert.Contains(t, out, "turbolift foreach completed with errors")
 	assert.Contains(t, out, "0 OK, 0 skipped, 2 errored")
 
 	fakeExecutor.AssertCalledWith(t, [][]string{
-		{"work/org/repo1", userShell(), "-c", "some command"},
-		{"work/org/repo2", userShell(), "-c", "some command"},
+		{"work/org/repo1", "some", "command"},
+		{"work/org/repo2", "some", "command"},
 	})
 }
 
@@ -208,12 +129,12 @@ func TestHelpFlagReturnsUsage(t *testing.T) {
 
 	testsupport.PrepareTempCampaign(true, "org/repo1", "org/repo2")
 
-	out, err := runCommand("--help", "command1")
+	out, err := runCommand("--help", "--", "command1")
 	t.Log(out)
 	assert.NoError(t, err)
 	// should return usage
 	assert.Contains(t, out, "Usage:")
-	assert.Contains(t, out, "foreach [flags] SHELL_COMMAND")
+	assert.Contains(t, out, "foreach [flags] -- COMMAND [ARGUMENT...]")
 	assert.Contains(t, out, "Flags:")
 	assert.Contains(t, out, "help for foreach")
 
@@ -221,12 +142,24 @@ func TestHelpFlagReturnsUsage(t *testing.T) {
 	fakeExecutor.AssertCalledWith(t, [][]string{})
 }
 
-func userShell() string {
-	shell := os.Getenv("SHELL")
-	if shell == "" {
-		return "sh"
+func TestFormatArguments(t *testing.T) {
+	// Don't go too heavy here. We are not seeking to exhaustively test
+	// shellescape. We just want to make sure formatArguments works.
+	var tests = []struct {
+		input    []string
+		expected string
+		title    string
+	}{
+		{[]string{""}, `''`, "Empty arg should be quoted"},
+		{[]string{"one two"}, `'one two'`, "Arg with space should be quoted"},
+		{[]string{"one"}, `one`, "Plain arg should not need quotes"},
+		{[]string{}, ``, "Empty arg list should give empty string"},
+		{[]string{"x", "", "y y"}, `x '' 'y y'`, "Args should be separated with spaces"},
+	}
+	for _, test := range tests {
+		actual := formatArguments(test.input)
+		assert.Equal(t, actual, test.expected, test.title)
 	}
-	return shell
 }
 
 func runCommand(args ...string) (string, error) {
diff --git a/go.mod b/go.mod
index 0c462d6..66fd088 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/skyscanner/turbolift
 go 1.16
 
 require (
+	github.com/alessio/shellescape v1.4.2 // indirect
 	github.com/briandowns/spinner v1.15.0
 	github.com/fatih/color v1.12.0
 	github.com/manifoldco/promptui v0.9.0
diff --git a/go.sum b/go.sum
index 167c699..0009688 100644
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
 github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
 github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/alessio/shellescape v1.4.2 h1:MHPfaU+ddJ0/bYWpgIeUnQUqKrlJ1S7BfEYPM4uEoM0=
+github.com/alessio/shellescape v1.4.2/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30=
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
 github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
 github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=