diff --git a/pkg/detectors/databrickstoken/databrickstoken.go b/pkg/detectors/databrickstoken/databrickstoken.go index 7ce0cfc99baa..72e48e6b07c6 100644 --- a/pkg/detectors/databrickstoken/databrickstoken.go +++ b/pkg/detectors/databrickstoken/databrickstoken.go @@ -12,17 +12,19 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{} +type Scanner struct{ + client *http.Client +} // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClient() + defaultClient = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - domain = regexp.MustCompile(`\b(https:\/\/[a-z0-9-]+\.cloud\.databricks\.com)\b`) - keyPat = regexp.MustCompile(`\b(dapi[a-z0-9]{32})\b`) + domain = regexp.MustCompile(`\b([a-z0-9-]+(?:\.[a-z0-9-]+)*\.(cloud\.databricks\.com|gcp\.databricks\.com|azurewebsites\.net))\b`) + keyPat = regexp.MustCompile(`\b(dapi[0-9a-f]{32})(-\d)?\b`) ) // Keywords are used for efficiently pre-filtering chunks. @@ -39,15 +41,9 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result domainMatches := domain.FindAllStringSubmatch(dataStr, -1) for _, match := range matches { - if len(match) != 2 { - continue - } resMatch := strings.TrimSpace(match[1]) for _, domainmatch := range domainMatches { - if len(domainmatch) != 2 { - continue - } resDomainMatch := strings.TrimSpace(domainmatch[1]) s1 := detectors.Result{ @@ -57,7 +53,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } if verify { - req, err := http.NewRequestWithContext(ctx, "GET", resDomainMatch + "/api/2.0/clusters/list", nil) + client := s.client + if client == nil { + client = defaultClient + } + req, err := http.NewRequestWithContext(ctx, "GET", "https://" + resDomainMatch + "/api/2.0/clusters/list", nil) if err != nil { continue } @@ -67,14 +67,18 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result defer res.Body.Close() if res.StatusCode >= 200 && res.StatusCode < 300 { s1.Verified = true + } else if res.StatusCode == 403 { + // nothing to do here } else { - // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. - if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { - continue - } + s1.VerificationError = fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) } + } else { + s1.VerificationError = err } } + if !s1.Verified && detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) { + continue + } results = append(results, s1) } diff --git a/pkg/detectors/databrickstoken/databrickstoken_test.go b/pkg/detectors/databrickstoken/databrickstoken_test.go index c25f21c8921c..92911f9d0e3e 100644 --- a/pkg/detectors/databrickstoken/databrickstoken_test.go +++ b/pkg/detectors/databrickstoken/databrickstoken_test.go @@ -6,17 +6,18 @@ package databrickstoken import ( "context" "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "testing" "time" - "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -func TestDatabrickstoken_FromChunk(t *testing.T) { +func TestDatabricksToken_FromChunk(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") @@ -25,6 +26,7 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { } secret := testSecrets.MustGetField("DATABRICKSTOKEN") inactiveSecret := testSecrets.MustGetField("DATABRICKSTOKEN_INACTIVE") + domain := testSecrets.MustGetField("DATABRICKSTOKEN_DOMAIN") type args struct { ctx context.Context @@ -32,18 +34,19 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { verify bool } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool }{ { name: "found, verified", s: Scanner{}, args: args{ ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within", secret)), + data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)), verify: true, }, want: []detectors.Result{ @@ -52,14 +55,15 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { Verified: true, }, }, - wantErr: false, + wantErr: false, + wantVerificationErr: false, }, { name: "found, unverified", s: Scanner{}, args: args{ ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s but not valid", inactiveSecret, domain)), // the secret would satisfy the regex but not pass validation verify: true, }, want: []detectors.Result{ @@ -68,7 +72,8 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { Verified: false, }, }, - wantErr: false, + wantErr: false, + wantVerificationErr: false, }, { name: "not found", @@ -78,14 +83,48 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { data: []byte("You cannot find the secret within"), verify: true, }, - want: nil, - wantErr: false, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_DatabricksToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a databrickstoken secret %s within %s", secret, domain)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_DatabricksToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) if (err != nil) != tt.wantErr { t.Errorf("Databrickstoken.FromData() error = %v, wantErr %v", err, tt.wantErr) return @@ -94,10 +133,13 @@ func TestDatabrickstoken_FromChunk(t *testing.T) { if len(got[i].Raw) == 0 { t.Fatalf("no raw secret present: \n %+v", got[i]) } - got[i].Raw = nil + if (got[i].VerificationError != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError) + } } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Databrickstoken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "VerificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("DatabricksToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) } }) }