Skip to content

Commit fd007c7

Browse files
authored
add ability to run github-experimental against private repos (#4508)
The existing implementation of github-experimental requires a github token for the --object-discovery subcommand (that's the only subcommand atm); however, it didn't properly use that token to clone private repos. I used the existing logic in the github/connector.go file to add enable cloning of private repositories.
1 parent bef5eb6 commit fd007c7

File tree

3 files changed

+55
-63
lines changed

3 files changed

+55
-63
lines changed

pkg/sources/github_experimental/github_experimental.go

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@ package github_experimental
22

33
import (
44
"fmt"
5-
"net/http"
65
"strings"
76

87
"github.com/go-logr/logr"
9-
"github.com/google/go-github/v67/github"
108
"google.golang.org/protobuf/proto"
119
"google.golang.org/protobuf/types/known/anypb"
1210

13-
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1411
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
1512
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
1614
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
1715
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
1816
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
1917
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
2018
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
19+
githubsource "github.com/trufflesecurity/trufflehog/v3/pkg/sources/github"
2120
)
2221

2322
const (
24-
SourceType = sourcespb.SourceType_SOURCE_TYPE_GITHUB_EXPERIMENTAL
23+
SourceType = sourcespb.SourceType_SOURCE_TYPE_GITHUB_EXPERIMENTAL
24+
cloudV3Endpoint = "https://api.github.com"
2525
)
2626

2727
type Source struct {
@@ -33,10 +33,9 @@ type Source struct {
3333
useCustomContentWriter bool
3434
git *git.Git
3535
scanOptions *git.ScanOptions
36-
httpClient *http.Client
3736
log logr.Logger
3837
conn *sourcespb.GitHubExperimental
39-
apiClient *github.Client
38+
connector githubsource.Connector
4039

4140
sources.Progress
4241
sources.CommonSourceUnitUnmarshaller
@@ -81,9 +80,6 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
8180
s.jobID = jobID
8281
s.verify = verify
8382

84-
s.httpClient = common.RetryableHTTPClientTimeout(60)
85-
s.apiClient = github.NewClient(s.httpClient)
86-
8783
var conn sourcespb.GitHubExperimental
8884
err = anypb.UnmarshalTo(connection, &conn, proto.UnmarshalOptions{})
8985
if err != nil {
@@ -95,6 +91,32 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
9591
return fmt.Errorf("error normalizing repo: %w", err)
9692
}
9793

94+
// Get the token from the connection
95+
token := s.conn.GetToken()
96+
if token == "" {
97+
return fmt.Errorf("token is required for GitHub Experimental source")
98+
}
99+
100+
// Redact token from logs for security
101+
log.RedactGlobally(token)
102+
103+
// Create authenticated connector using the TokenConnector pattern
104+
connector, err := githubsource.NewTokenConnector(
105+
aCtx,
106+
cloudV3Endpoint, // API endpoint
107+
token, // GitHub token
108+
"", // clonePath (empty for default)
109+
true, // authInUrl
110+
func(ctx context.Context, err error) bool {
111+
// Simple rate limit handler - can be enhanced later
112+
return false
113+
},
114+
)
115+
if err != nil {
116+
return fmt.Errorf("could not create GitHub connector: %w", err)
117+
}
118+
s.connector = connector
119+
98120
s.repoInfoCache = newRepoInfoCache()
99121

100122
cfg := &git.Config{

pkg/sources/github_experimental/object_discovery.go

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,9 @@ import (
1414
"github.com/google/go-github/v67/github"
1515
"github.com/k0kubun/go-ansi"
1616
"github.com/schollz/progressbar/v3"
17-
"golang.org/x/oauth2"
1817

1918
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
2019
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
21-
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
2220
)
2321

2422
// Assumption: sleeping for 60 seconds is enough to reset the secondary rate limit
@@ -100,18 +98,7 @@ func (b *backoff) getValue() int {
10098
return int(b.value)
10199
}
102100

103-
// Github token
104-
var ghToken = ""
105-
106-
func getForksCount(owner, repoName string) (int, error) {
107-
ctx := context.Background()
108-
ts := oauth2.StaticTokenSource(
109-
&oauth2.Token{AccessToken: ghToken},
110-
)
111-
tc := oauth2.NewClient(ctx, ts)
112-
113-
client := github.NewClient(tc)
114-
101+
func getForksCount(ctx context.Context, client *github.Client, owner, repoName string) (int, error) {
115102
repo, _, err := client.Repositories.Get(ctx, owner, repoName)
116103
if err != nil {
117104
return 0, err
@@ -120,22 +107,6 @@ func getForksCount(owner, repoName string) (int, error) {
120107
return repo.GetForksCount(), nil
121108
}
122109

123-
func getGitHubUser() (string, error) {
124-
ctx := context.Background()
125-
ts := oauth2.StaticTokenSource(
126-
&oauth2.Token{AccessToken: ghToken},
127-
)
128-
tc := oauth2.NewClient(ctx, ts)
129-
130-
client := github.NewClient(tc)
131-
132-
ghUser, _, err := client.Users.Get(ctx, "")
133-
if err != nil {
134-
return "", err
135-
}
136-
return ghUser.GetLogin(), nil
137-
}
138-
139110
// runGitCommand runs a git command
140111
func runGitCommand(args []string) ([]byte, error) {
141112
cmd := exec.Command("git", args...)
@@ -322,7 +293,7 @@ func removeBySHA(existingCommits, newCommits []string, charLen int) []string {
322293
return filteredCommits
323294
}
324295

325-
func processCommits(ctx context.Context, needsProcessing []string, owner, repo, path string) {
296+
func processCommits(ctx context.Context, apiClient *github.Client, needsProcessing []string, owner, repo, path string) {
326297
repoCtx := context.WithValue(ctx, "repo", repo)
327298

328299
startingSize := float64(len(needsProcessing))
@@ -343,10 +314,12 @@ func processCommits(ctx context.Context, needsProcessing []string, owner, repo,
343314
chunk := needsProcessing[:chunkSize]
344315
needsProcessing = needsProcessing[chunkSize:]
345316

346-
commitData, err := checkHashes(owner, repo, chunk)
317+
commitData, err := checkHashes(repoCtx, apiClient, owner, repo, chunk)
347318
if err != nil {
348319
repoCtx.Logger().V(2).Info("Temporary error occurred in guessing commits", "error", err)
349-
needsProcessing = append(needsProcessing, chunk...)
320+
// Prepend the failed chunk to the FRONT of the queue for immediate retry
321+
// This ensures we retry the same hashes instead of moving to the next batch
322+
needsProcessing = append(chunk, needsProcessing...)
350323
queryChunkSize.errorOccurred()
351324
if strings.Contains(err.Error(), "You have exceeded a secondary rate limit") {
352325
repoCtx.Logger().V(2).Info("Reached secondary GitHub Rate Limit. Sleeping for 60 seconds.")
@@ -391,7 +364,7 @@ type responseData struct {
391364
Message string `json:"message"`
392365
}
393366

394-
func checkHashes(owner, repo string, hashes []string) (map[string][]string, error) {
367+
func checkHashes(ctx context.Context, client *github.Client, owner, repo string, hashes []string) (map[string][]string, error) {
395368
testCases := ""
396369
for _, h := range hashes {
397370
testCase := fmt.Sprintf(`
@@ -413,7 +386,6 @@ func checkHashes(owner, repo string, hashes []string) (map[string][]string, erro
413386
`, owner, repo, testCases)
414387

415388
headers := map[string]string{
416-
"Authorization": "Bearer " + ghToken,
417389
"Content-Type": "application/json",
418390
"Github-Verified-Fetch": "true",
419391
"X-Requested-With": "XMLHttpRequest",
@@ -426,7 +398,7 @@ func checkHashes(owner, repo string, hashes []string) (map[string][]string, erro
426398
return nil, fmt.Errorf("failed to marshal request body: %w", err)
427399
}
428400

429-
req, err := http.NewRequest("POST", "https://api.github.com/graphql", bytes.NewBuffer(requestBody))
401+
req, err := http.NewRequestWithContext(ctx, "POST", "https://api.github.com/graphql", bytes.NewBuffer(requestBody))
430402
if err != nil {
431403
return nil, fmt.Errorf("failed to create request: %w", err)
432404
}
@@ -435,8 +407,9 @@ func checkHashes(owner, repo string, hashes []string) (map[string][]string, erro
435407
req.Header.Set(key, value)
436408
}
437409

438-
client := &http.Client{}
439-
resp, err := client.Do(req)
410+
// Use the authenticated HTTP client from the GitHub API client
411+
// This client already has the Bearer token configured via OAuth2 transport
412+
resp, err := client.Client().Do(req)
440413
if err != nil {
441414
return nil, fmt.Errorf("python request error: %w", err)
442415
}
@@ -546,9 +519,6 @@ func downloadPatches(valid_cfor []string, path string) error {
546519

547520
// scanHiddenData scans hidden data (and non-hidden data) for secrets in a GitHub repository
548521
func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan *sources.Chunk) error {
549-
// assign github token to global variable
550-
ghToken = s.conn.GetToken()
551-
552522
// set collision threshold to user input
553523
collisionThreshold = float64(s.conn.CollisionThreshold)
554524

@@ -564,7 +534,7 @@ func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan
564534

565535
// get repo metadata and store in cacheRepoInfo
566536
repoCtx := context.WithValue(ctx, "repo", owner+"/"+repoName)
567-
ghRepo, _, err := s.apiClient.Repositories.Get(repoCtx, owner, repoName)
537+
ghRepo, _, err := s.connector.APIClient().Repositories.Get(repoCtx, owner, repoName)
568538
if err != nil {
569539
return fmt.Errorf("failed to fetch repository: %w", err)
570540
}
@@ -582,20 +552,14 @@ func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan
582552
return fmt.Errorf("failed to create .trufflehog folder in user's home directory: %w", err)
583553
}
584554

585-
// Get GitHub User tied to token
586-
ghUser, err := getGitHubUser()
587-
if err != nil {
588-
return fmt.Errorf("failed to get GitHub user details: %w", err)
589-
}
590-
591555
// get the number of forks
592-
forksCount, err := getForksCount(owner, repoName)
556+
forksCount, err := getForksCount(repoCtx, s.connector.APIClient(), owner, repoName)
593557
if err != nil {
594558
return fmt.Errorf("failed to get forks count: %w", err)
595559
}
596560

597-
// download the repo
598-
path, repo, err := git.CloneRepoUsingToken(ctx, ghToken, repoURL, "", ghUser, true)
561+
// download the repo using the authenticated connector
562+
path, repo, err := s.connector.Clone(ctx, repoURL)
599563
if err != nil {
600564
return fmt.Errorf("failed to clone the repository: %w", err)
601565
}
@@ -639,7 +603,7 @@ func (s *Source) EnumerateAndScanAllObjects(ctx context.Context, chunksChan chan
639603
possibleCommits = removeByShortSHA(invalidCommits, possibleCommits)
640604

641605
// Guess all possible commit hashes
642-
processCommits(ctx, possibleCommits, owner, repoName, folderPath)
606+
processCommits(ctx, s.connector.APIClient(), possibleCommits, owner, repoName, folderPath)
643607

644608
// Read in the new commits
645609
validHiddenCommits, err = readCommitsFromDisk(validHiddenCommit, folderPath)

pkg/sources/github_experimental/repo.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package github_experimental
22

33
import (
44
"fmt"
5+
"regexp"
56
"strings"
67
"sync"
78

@@ -62,10 +63,15 @@ func (s *Source) cacheRepoInfo(r *github.Repository) {
6263
}
6364

6465
func (s *Source) normalizeRepo(repo string) (string, error) {
65-
// If there's a '/', assume it's a URL and try to normalize it.
66-
if strings.ContainsRune(repo, '/') {
66+
// If it's a full URL (has protocol), normalize it
67+
if regexp.MustCompile(`^[a-z]+://`).MatchString(repo) {
6768
return giturl.NormalizeGithubRepo(repo)
6869
}
70+
// If it's a repository name (contains / but not http), convert to full URL first
71+
if strings.Contains(repo, "/") && !regexp.MustCompile(`^[a-z]+://`).MatchString(repo) {
72+
fullURL := "https://github.com/" + repo
73+
return giturl.NormalizeGithubRepo(fullURL)
74+
}
6975

7076
return "", fmt.Errorf("no repositories found for %s", repo)
7177
}

0 commit comments

Comments
 (0)