k8sgpt-ai · AlexsJones · Apr 12, 2023 · Apr 9, 2023 · Apr 10, 2023 · Apr 10, 2023
@@ -227,6 +227,40 @@ _Output to JSON_
 k8sgpt analyze --explain --filter=Service --output=json
 ```
 
+_Anonymize during explain_
+
+```
+k8sgpt analyze --explain --filter=Service --output=json --anonymize
+```
+
+### How does anonymization work?
+
+With this option, the data is anonymized before being sent to the AI Backend. During the analysis execution, `k8sgpt` retrieves sensitive data (Kubernetes object names, labels, etc.). This data is masked when sent to the AI backend and replaced by a key that can be used to de-anonymize the data when the solution is returned to the user.
+
+<details>
+
+1. Error reported during analysis:
+```bash
+Error: HorizontalPodAutoscaler uses StatefulSet/fake-deployment as ScaleTargetRef which does not exist.
+```
+
+2. Payload sent to the AI backend:
+```bash
+Error: HorizontalPodAutoscaler uses StatefulSet/tGLcCRcHa1Ce5Rs as ScaleTargetRef which does not exist.
+```
+
+3. Payload returned by the AI:
+```bash
+The Kubernetes system is trying to scale a StatefulSet named tGLcCRcHa1Ce5Rs using the HorizontalPodAutoscaler, but it cannot find the StatefulSet. The solution is to verify that the StatefulSet name is spelled correctly and exists in the same namespace as the HorizontalPodAutoscaler.
+```
+
+4. Payload returned to the user:
+```bash
+The Kubernetes system is trying to scale a StatefulSet named fake-deployment using the HorizontalPodAutoscaler, but it cannot find the StatefulSet. The solution is to verify that the StatefulSet name is spelled correctly and exists in the same namespace as the HorizontalPodAutoscaler.
+```
+
+**Anonymization does not currently apply to events.**
+
 ## Upcoming major milestones
 
 - [ ] Multiple AI backend support

@@ -21,6 +21,7 @@ var (
 	language  string
 	nocache   bool
 	namespace string
+	anonymize bool
 )
 
 // AnalyzeCmd represents the problems command
@@ -85,7 +86,7 @@ var AnalyzeCmd = &cobra.Command{
 		}
 
 		if explain {
-			err := config.GetAIResults(output)
+			err := config.GetAIResults(output, anonymize)
 			if err != nil {
 				color.Red("Error: %v", err)
 				os.Exit(1)
@@ -113,6 +114,8 @@ func init() {
 	AnalyzeCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "Namespace to analyze")
 	// no cache flag
 	AnalyzeCmd.Flags().BoolVarP(&nocache, "no-cache", "c", false, "Do not use cached data")
+	// anonymize flag
+	AnalyzeCmd.Flags().BoolVarP(&anonymize, "anonymize", "a", false, "Anonymize data before sending it to the AI backend. This flag masks sensitive data, such as Kubernetes object names and labels, by replacing it with a key. However, please note that this flag does not currently apply to events.")
 	// array of strings flag
 	AnalyzeCmd.Flags().StringSliceVarP(&filters, "filter", "f", []string{}, "Filter for these analyzers (e.g. Pod, PersistentVolumeClaim, Service, ReplicaSet)")
 	// explain flag

@@ -55,7 +55,6 @@ func (c *OpenAIClient) GetCompletion(ctx context.Context, prompt string) (string
 }
 
 func (a *OpenAIClient) Parse(ctx context.Context, prompt []string, nocache bool) (string, error) {
-	// parse the text with the AI backend
 	inputKey := strings.Join(prompt, " ")
 	// Check for cached data
 	sEnc := base64.StdEncoding.EncodeToString([]byte(inputKey))

@@ -11,6 +11,7 @@ import (
 	"github.com/k8sgpt-ai/k8sgpt/pkg/ai"
 	"github.com/k8sgpt-ai/k8sgpt/pkg/analyzer"
 	"github.com/k8sgpt-ai/k8sgpt/pkg/kubernetes"
+	"github.com/k8sgpt-ai/k8sgpt/pkg/util"
 	"github.com/schollz/progressbar/v3"
 	"github.com/spf13/viper"
 )
@@ -126,13 +127,13 @@ func (a *Analysis) PrintOutput() {
 		fmt.Printf("%s %s(%s)\n", color.CyanString("%d", n),
 			color.YellowString(result.Name), color.CyanString(result.ParentObject))
 		for _, err := range result.Error {
-			fmt.Printf("- %s %s\n", color.RedString("Error:"), color.RedString(err))
+			fmt.Printf("- %s %s\n", color.RedString("Error:"), color.RedString(err.Text))
 		}
 		fmt.Println(color.GreenString(result.Details + "\n"))
 	}
 }
 
-func (a *Analysis) GetAIResults(output string) error {
+func (a *Analysis) GetAIResults(output string, anonymize bool) error {
 	if len(a.Results) == 0 {
 		return nil
 	}
@@ -143,7 +144,17 @@ func (a *Analysis) GetAIResults(output string) error {
 	}
 
 	for index, analysis := range a.Results {
-		parsedText, err := a.AIClient.Parse(a.Context, analysis.Error, a.NoCache)
+		var texts []string
+
+		for _, failure := range analysis.Error {
+			if anonymize {
+				for _, s := range failure.Sensitive {
+					failure.Text = util.ReplaceIfMatch(failure.Text, s.Unmasked, s.Masked)
+				}
+			}
+			texts = append(texts, failure.Text)
+		}
+		parsedText, err := a.AIClient.Parse(a.Context, texts, a.NoCache)
 		if err != nil {
 			// FIXME: can we avoid checking if output is json multiple times?
 			//   maybe implement the progress bar better?
@@ -158,6 +169,15 @@ func (a *Analysis) GetAIResults(output string) error {
 				return fmt.Errorf("failed while calling AI provider %s: %v", a.AIClient.GetName(), err)
 			}
 		}
+
+		if anonymize {
+			for _, failure := range analysis.Error {
+				for _, s := range failure.Sensitive {
+					parsedText = strings.ReplaceAll(parsedText, s.Masked, s.Unmasked)
+				}
+			}
+		}
+
 		analysis.Details = parsedText
 		if output != "json" {
 			bar.Add(1)

@@ -3,9 +3,10 @@ package analysis
 import (
 	"encoding/json"
 	"fmt"
+	"testing"
+
 	"github.com/k8sgpt-ai/k8sgpt/pkg/analyzer"
 	"github.com/stretchr/testify/require"
-	"testing"
 )
 
 func TestAnalysis_NoProblemJsonOutput(t *testing.T) {
@@ -42,11 +43,16 @@ func TestAnalysis_ProblemJsonOutput(t *testing.T) {
 	analysis := Analysis{
 		Results: []analyzer.Result{
 			{
-				"Deployment",
-				"test-deployment",
-				[]string{"test-problem"},
-				"test-solution",
-				"parent-resource"},
+				Kind: "Deployment",
+				Name: "test-deployment",
+				Error: []analyzer.Failure{
+					{
+						Text:      "test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+				},
+				Details:      "test-solution",
+				ParentObject: "parent-resource"},
 		},
 		Namespace: "default",
 	}
@@ -55,11 +61,17 @@ func TestAnalysis_ProblemJsonOutput(t *testing.T) {
 		Status:   StateProblemDetected,
 		Problems: 1,
 		Results: []analyzer.Result{
-			{"Deployment",
-				"test-deployment",
-				[]string{"test-problem"},
-				"test-solution",
-				"parent-resource"},
+			{
+				Kind: "Deployment",
+				Name: "test-deployment",
+				Error: []analyzer.Failure{
+					{
+						Text:      "test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+				},
+				Details:      "test-solution",
+				ParentObject: "parent-resource"},
 		},
 	}
 
@@ -84,11 +96,20 @@ func TestAnalysis_MultipleProblemJsonOutput(t *testing.T) {
 	analysis := Analysis{
 		Results: []analyzer.Result{
 			{
-				"Deployment",
-				"test-deployment",
-				[]string{"test-problem", "another-test-problem"},
-				"test-solution",
-				"parent-resource"},
+				Kind: "Deployment",
+				Name: "test-deployment",
+				Error: []analyzer.Failure{
+					{
+						Text:      "test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+					{
+						Text:      "another-test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+				},
+				Details:      "test-solution",
+				ParentObject: "parent-resource"},
 		},
 		Namespace: "default",
 	}
@@ -97,11 +118,21 @@ func TestAnalysis_MultipleProblemJsonOutput(t *testing.T) {
 		Status:   StateProblemDetected,
 		Problems: 2,
 		Results: []analyzer.Result{
-			{"Deployment",
-				"test-deployment",
-				[]string{"test-problem", "another-test-problem"},
-				"test-solution",
-				"parent-resource"},
+			{
+				Kind: "Deployment",
+				Name: "test-deployment",
+				Error: []analyzer.Failure{
+					{
+						Text:      "test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+					{
+						Text:      "another-test-problem",
+						Sensitive: []analyzer.Sensitive{},
+					},
+				},
+				Details:      "test-solution",
+				ParentObject: "parent-resource"},
 		},
 	}
 

@@ -19,7 +19,7 @@ func (HpaAnalyzer) Analyze(a Analyzer) ([]Result, error) {
 	var preAnalysis = map[string]PreAnalysis{}
 
 	for _, hpa := range list.Items {
-		var failures []string
+		var failures []Failure
 
 		// check ScaleTargetRef exist
 		scaleTargetRef := hpa.Spec.ScaleTargetRef
@@ -47,11 +47,22 @@ func (HpaAnalyzer) Analyze(a Analyzer) ([]Result, error) {
 				scaleTargetRefNotFound = true
 			}
 		default:
-			failures = append(failures, fmt.Sprintf("HorizontalPodAutoscaler uses %s as ScaleTargetRef which does not possible option.", scaleTargetRef.Kind))
+			failures = append(failures, Failure{
+				Text:      fmt.Sprintf("HorizontalPodAutoscaler uses %s as ScaleTargetRef which is not an option.", scaleTargetRef.Kind),
+				Sensitive: []Sensitive{},
+			})
 		}
 
 		if scaleTargetRefNotFound {
-			failures = append(failures, fmt.Sprintf("HorizontalPodAutoscaler uses %s/%s as ScaleTargetRef which does not exist.", scaleTargetRef.Kind, scaleTargetRef.Name))
+			failures = append(failures, Failure{
+				Text: fmt.Sprintf("HorizontalPodAutoscaler uses %s/%s as ScaleTargetRef which does not exist.", scaleTargetRef.Kind, scaleTargetRef.Name),
+				Sensitive: []Sensitive{
+					{
+						Unmasked: scaleTargetRef.Name,
+						Masked:   util.MaskString(scaleTargetRef.Name),
+					},
+				},
+			})
 		}
 
 		if len(failures) > 0 {

@@ -101,7 +101,7 @@ func TestHPAAnalyzerWithUnsuportedScaleTargetRef(t *testing.T) {
 	var errorFound bool
 	for _, analysis := range analysisResults {
 		for _, err := range analysis.Error {
-			if strings.Contains(err, "does not possible option.") {
+			if strings.Contains(err.Text, "which is not an option.") {
 				errorFound = true
 				break
 			}
@@ -148,7 +148,7 @@ func TestHPAAnalyzerWithNonExistentScaleTargetRef(t *testing.T) {
 	var errorFound bool
 	for _, analysis := range analysisResults {
 		for _, err := range analysis.Error {
-			if strings.Contains(err, "does not exist.") {
+			if strings.Contains(err.Text, "does not exist.") {
 				errorFound = true
 				break
 			}

@@ -2,6 +2,7 @@ package analyzer
 
 import (
 	"fmt"
+
 	"github.com/k8sgpt-ai/k8sgpt/pkg/util"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -18,14 +19,26 @@ func (IngressAnalyzer) Analyze(a Analyzer) ([]Result, error) {
 	var preAnalysis = map[string]PreAnalysis{}
 
 	for _, ing := range list.Items {
-		var failures []string
+		var failures []Failure
 
 		// get ingressClassName
 		ingressClassName := ing.Spec.IngressClassName
 		if ingressClassName == nil {
 			ingClassValue := ing.Annotations["kubernetes.io/ingress.class"]
 			if ingClassValue == "" {
-				failures = append(failures, fmt.Sprintf("Ingress %s/%s does not specify an Ingress class.", ing.Namespace, ing.Name))
+				failures = append(failures, Failure{
+					Text: fmt.Sprintf("Ingress %s/%s does not specify an Ingress class.", ing.Namespace, ing.Name),
+					Sensitive: []Sensitive{
+						{
+							Unmasked: ing.Namespace,
+							Masked:   util.MaskString(ing.Namespace),
+						},
+						{
+							Unmasked: ing.Name,
+							Masked:   util.MaskString(ing.Name),
+						},
+					},
+				})
 			} else {
 				ingressClassName = &ingClassValue
 			}
@@ -35,7 +48,15 @@ func (IngressAnalyzer) Analyze(a Analyzer) ([]Result, error) {
 		if ingressClassName != nil {
 			_, err := a.Client.GetClient().NetworkingV1().IngressClasses().Get(a.Context, *ingressClassName, metav1.GetOptions{})
 			if err != nil {
-				failures = append(failures, fmt.Sprintf("Ingress uses the ingress class %s which does not exist.", *ingressClassName))
+				failures = append(failures, Failure{
+					Text: fmt.Sprintf("Ingress uses the ingress class %s which does not exist.", *ingressClassName),
+					Sensitive: []Sensitive{
+						{
+							Unmasked: *ingressClassName,
+							Masked:   util.MaskString(*ingressClassName),
+						},
+					},
+				})
 			}
 		}
 
@@ -45,15 +66,39 @@ func (IngressAnalyzer) Analyze(a Analyzer) ([]Result, error) {
 			for _, path := range rule.HTTP.Paths {
 				_, err := a.Client.GetClient().CoreV1().Services(ing.Namespace).Get(a.Context, path.Backend.Service.Name, metav1.GetOptions{})
 				if err != nil {
-					failures = append(failures, fmt.Sprintf("Ingress uses the service %s/%s which does not exist.", ing.Namespace, path.Backend.Service.Name))
+					failures = append(failures, Failure{
+						Text: fmt.Sprintf("Ingress uses the service %s/%s which does not exist.", ing.Namespace, path.Backend.Service.Name),
+						Sensitive: []Sensitive{
+							{
+								Unmasked: ing.Namespace,
+								Masked:   util.MaskString(ing.Namespace),
+							},
+							{
+								Unmasked: path.Backend.Service.Name,
+								Masked:   util.MaskString(path.Backend.Service.Name),
+							},
+						},
+					})
 				}
 			}
 		}
 
 		for _, tls := range ing.Spec.TLS {
 			_, err := a.Client.GetClient().CoreV1().Secrets(ing.Namespace).Get(a.Context, tls.SecretName, metav1.GetOptions{})
 			if err != nil {
-				failures = append(failures, fmt.Sprintf("Ingress uses the secret %s/%s as a TLS certificate which does not exist.", ing.Namespace, tls.SecretName))
+				failures = append(failures, Failure{
+					Text: fmt.Sprintf("Ingress uses the secret %s/%s as a TLS certificate which does not exist.", ing.Namespace, tls.SecretName),
+					Sensitive: []Sensitive{
+						{
+							Unmasked: ing.Namespace,
+							Masked:   util.MaskString(ing.Namespace),
+						},
+						{
+							Unmasked: tls.SecretName,
+							Masked:   util.MaskString(tls.SecretName),
+						},
+					},
+				})
 			}
 		}
 		if len(failures) > 0 {