Skip to content

Commit

Permalink
feat: surface the duration and error metrics for each source when enu…
Browse files Browse the repository at this point in the history
…merating domains (#727)

* feat: Surface the time taken in the scrape result

- Add a duration field to the `subscraping.Result` struct
- Populate the `TimeTaken` field before passing to the result channel

Resolves #726

* fix: move the timing into the sources

- sources responsible for their own time keeping
- step over commoncrawl in the no auth test, it is consistently failing as a timeout

* feat: Add statistics

- add stats flag to the debug group
- print the time taken, number of results and errors for sources that run
- print stats for each run of the agent

* chore: fix linter error and use gologger rather than fmt print
  • Loading branch information
owenrumney authored Dec 15, 2022
1 parent 27d4087 commit 8f499e9
Show file tree
Hide file tree
Showing 44 changed files with 1,091 additions and 118 deletions.
33 changes: 16 additions & 17 deletions v2/pkg/passive/passive.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package passive
import (
"context"
"fmt"
"sort"
"sync"
"time"

"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
)

Expand All @@ -18,41 +18,40 @@ func (a *Agent) EnumerateSubdomains(domain string, proxy string, rateLimit, time

session, err := subscraping.NewSession(domain, proxy, rateLimit, timeout)
if err != nil {
results <- subscraping.Result{Type: subscraping.Error, Error: fmt.Errorf("could not init passive session for %s: %s", domain, err)}
results <- subscraping.Result{
Type: subscraping.Error, Error: fmt.Errorf("could not init passive session for %s: %s", domain, err),
}
return
}

ctx, cancel := context.WithTimeout(context.Background(), maxEnumTime)

timeTaken := make(map[string]string)
timeTakenMutex := &sync.Mutex{}

wg := &sync.WaitGroup{}
// Run each source in parallel on the target domain
for _, runner := range a.sources {
wg.Add(1)

now := time.Now()
go func(source subscraping.Source) {
for resp := range source.Run(ctx, domain, session) {
results <- resp
}

duration := time.Since(now)
timeTakenMutex.Lock()
timeTaken[source.Name()] = fmt.Sprintf("Source took %s for enumeration\n", duration)
timeTakenMutex.Unlock()

wg.Done()
}(runner)
}
wg.Wait()

for source, data := range timeTaken {
gologger.Verbose().Label(source).Msg(data)
}

cancel()
}()
return results
}

func (a *Agent) GetStatistics() map[string]subscraping.Statistics {
stats := make(map[string]subscraping.Statistics)
sort.Slice(a.sources, func(i, j int) bool {
return a.sources[i].Name() > a.sources[j].Name()
})

for _, source := range a.sources {
stats[source.Name()] = source.Statistics()
}
return stats
}
4 changes: 4 additions & 0 deletions v2/pkg/passive/sources_wo_auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ func TestSourcesWithoutKeys(t *testing.T) {
continue
}

if source.Name() == "commoncrawl" {
continue // commoncrawl is under resourced and will likely time-out so step over it for this test https://groups.google.com/u/2/g/common-crawl/c/3QmQjFA_3y4/m/vTbhGqIBBQAJ
}

t.Run(source.Name(), func(t *testing.T) {
var results []subscraping.Result

Expand Down
5 changes: 5 additions & 0 deletions v2/pkg/runner/enumerate.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func (r *Runner) EnumerateSingleDomain(domain string, writers []io.Writer) error
}
gologger.Info().Msgf("Found %d subdomains for '%s' in %s\n", numberOfSubDomains, domain, duration)

if r.options.Statistics {
gologger.Info().Msgf("Printing source statistics for '%s'", domain)
printStatistics(r.passiveAgent.GetStatistics())
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions v2/pkg/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type Options struct {
Version bool // Version specifies if we should just show version and exit
OnlyRecursive bool // Recursive specifies whether to use only recursive subdomain enumeration sources
All bool // All specifies whether to use all (slow) sources.
Statistics bool // Statistics specifies whether to report source statistics
Threads int // Threads controls the number of threads to use for active enumerations
Timeout int // Timeout is the seconds to wait for sources to respond
MaxEnumerationTime int // MaxEnumerationTime is the maximum amount of time in minutes to wait for enumeration
Expand Down Expand Up @@ -137,6 +138,7 @@ func ParseOptions() *Options {
flagSet.BoolVar(&options.Verbose, "v", false, "show verbose output"),
flagSet.BoolVarP(&options.NoColor, "no-color", "nc", false, "disable color in output"),
flagSet.BoolVarP(&options.ListSources, "list-sources", "ls", false, "list all available sources"),
flagSet.BoolVar(&options.Statistics, "stats", false, "report source statistics"),
)

createGroup(flagSet, "optimization", "Optimization",
Expand Down
42 changes: 42 additions & 0 deletions v2/pkg/runner/stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package runner

import (
"fmt"
"sort"
"strings"
"time"

"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
"golang.org/x/exp/maps"
)

func printStatistics(stats map[string]subscraping.Statistics) {

sources := maps.Keys(stats)
sort.Strings(sources)

var lines []string
var skipped []string

for _, source := range sources {
sourceStats := stats[source]
if sourceStats.Skipped {
skipped = append(skipped, fmt.Sprintf(" %s", source))
} else {
lines = append(lines, fmt.Sprintf(" %-20s %-10s %10d %10d", source, sourceStats.TimeTaken.Round(time.Millisecond).String(), sourceStats.Results, sourceStats.Errors))
}
}

if len(lines) > 0 {
gologger.Print().Msgf("\n Source Duration Results Errors\n%s\n", strings.Repeat("─", 56))
gologger.Print().Msgf(strings.Join(lines, "\n"))
gologger.Print().Msgf("\n")
}

if len(skipped) > 0 {
gologger.Print().Msgf("\n The following sources were included but skipped...\n\n")
gologger.Print().Msgf(strings.Join(skipped, "\n"))
gologger.Print().Msgf("\n\n")
}
}
29 changes: 26 additions & 3 deletions v2/pkg/subscraping/sources/alienvault/alienvault.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
)
Expand All @@ -18,18 +19,28 @@ type alienvaultResponse struct {
}

// Source is the passive scraping agent
type Source struct{}
type Source struct {
timeTaken time.Duration
results int
errors int
}

// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
s.errors = 0
s.results = 0

go func() {
defer close(results)
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(results)
}(time.Now())

resp, err := session.SimpleGet(ctx, fmt.Sprintf("https://otx.alienvault.com/api/v1/indicators/domain/%s/passive_dns", domain))
if err != nil && resp == nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
session.DiscardHTTPResponse(resp)
return
}
Expand All @@ -39,18 +50,22 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
err = json.NewDecoder(resp.Body).Decode(&response)
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
resp.Body.Close()
return
}
resp.Body.Close()

if response.Error != "" {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("%s, %s", response.Detail, response.Error)}
results <- subscraping.Result{
Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("%s, %s", response.Detail, response.Error),
}
return
}

for _, record := range response.PassiveDNS {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: record.Hostname}
s.results++
}
}()

Expand All @@ -77,3 +92,11 @@ func (s *Source) NeedsKey() bool {
func (s *Source) AddApiKeys(_ []string) {
// no key needed
}

func (s *Source) Statistics() subscraping.Statistics {
return subscraping.Statistics{
Errors: s.errors,
Results: s.results,
TimeTaken: s.timeTaken,
}
}
26 changes: 24 additions & 2 deletions v2/pkg/subscraping/sources/anubis/anubis.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,36 @@ package anubis
import (
"context"
"fmt"
"time"

jsoniter "github.com/json-iterator/go"

"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
)

// Source is the passive scraping agent
type Source struct{}
type Source struct {
timeTaken time.Duration
errors int
results int
}

// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
s.errors = 0
s.results = 0

go func() {
defer close(results)
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(results)
}(time.Now())

resp, err := session.SimpleGet(ctx, fmt.Sprintf("https://jonlu.ca/anubis/subdomains/%s", domain))
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
session.DiscardHTTPResponse(resp)
return
}
Expand All @@ -31,6 +42,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
err = jsoniter.NewDecoder(resp.Body).Decode(&subdomains)
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
resp.Body.Close()
return
}
Expand All @@ -39,7 +51,9 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se

for _, record := range subdomains {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: record}
s.results++
}

}()

return results
Expand All @@ -65,3 +79,11 @@ func (s *Source) NeedsKey() bool {
func (s *Source) AddApiKeys(_ []string) {
// no key needed
}

func (s *Source) Statistics() subscraping.Statistics {
return subscraping.Statistics{
Errors: s.errors,
Results: s.results,
TimeTaken: s.timeTaken,
}
}
31 changes: 27 additions & 4 deletions v2/pkg/subscraping/sources/bevigil/bevigil.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package bevigil
import (
"context"
"fmt"
"time"

jsoniter "github.com/json-iterator/go"

Expand All @@ -16,22 +17,35 @@ type Response struct {
}

type Source struct {
apiKeys []string
apiKeys []string
timeTaken time.Duration
errors int
results int
skipped bool
}

func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
s.errors = 0
s.results = 0

go func() {
defer close(results)
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(results)
}(time.Now())

randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name())
if randomApiKey == "" {
s.skipped = true
return
}

getUrl := fmt.Sprintf("https://osint.bevigil.com/api/%s/subdomains/", domain)

resp, err := session.Get(ctx, getUrl, "", map[string]string{"X-Access-Token": randomApiKey, "User-Agent": "subfinder"})
resp, err := session.Get(ctx, getUrl, "", map[string]string{
"X-Access-Token": randomApiKey, "User-Agent": "subfinder",
})
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
session.DiscardHTTPResponse(resp)
Expand All @@ -56,8 +70,8 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
for _, subdomain := range subdomains {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
}
}()

}()
return results
}

Expand All @@ -80,3 +94,12 @@ func (s *Source) NeedsKey() bool {
func (s *Source) AddApiKeys(keys []string) {
s.apiKeys = keys
}

func (s *Source) Statistics() subscraping.Statistics {
return subscraping.Statistics{
Errors: s.errors,
Results: s.results,
TimeTaken: s.timeTaken,
Skipped: s.skipped,
}
}
Loading

0 comments on commit 8f499e9

Please sign in to comment.