forked from nixsolutions/golang-analyze-tool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
119 lines (104 loc) · 3 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main
import (
"crawler/models"
"crypto/tls"
"encoding/json"
"flag"
"fmt"
"log"
"net"
"net/http"
"net/url"
"runtime"
"strconv"
"time"
"github.com/gocolly/colly"
)
func search(w http.ResponseWriter, r *http.Request) {
var m runtime.MemStats
w.Header().Set("Access-Control-Allow-Origin", "*")
urlParam := r.URL.Query().Get("url")
url, _ := url.Parse(urlParam)
depthParam := r.URL.Query().Get("depth")
threadsParam := r.URL.Query().Get("threads")
depth := 2
threads := 2
if depthParam != "" {
depth, _ = strconv.Atoi(depthParam)
depth++
}
if threadsParam != "" {
threads, _ = strconv.Atoi(threadsParam)
}
c := colly.NewCollector(
colly.MaxDepth(depth),
colly.AllowedDomains(url.Host),
colly.Async(true),
)
c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: threads})
response := &models.Result{ErrorLinks: []models.Link{}, VisitedLinks: []models.Link{}}
c.OnHTML("a", func(e *colly.HTMLElement) {
e.Request.Visit(e.Attr("href"))
})
c.OnRequest(func(r *colly.Request) {
link := models.Link{RealURL: r.URL.String(), Depth: r.Depth - 1}
response.VisitedLinks = append(response.VisitedLinks, link)
fmt.Println("Visiting", r.URL)
})
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err)
if r.StatusCode == 404 {
link := models.Link{RealURL: r.Request.URL.String(), Depth: r.Request.Depth - 1}
response.ErrorLinks = append(response.ErrorLinks, link)
}
})
startTime := time.Now()
c.Visit(urlParam)
c.Wait()
runtime.ReadMemStats(&m)
endTime := time.Now()
duration := endTime.Sub(startTime).Seconds()
response.Duration = strconv.FormatFloat(duration, 'f', 6, 64)
response.MemoryUsage = strconv.FormatFloat(float64(m.HeapAlloc/(1024*1024)), 'f', 2, 64) + "MB"
response.VisitedLinks = removeDuplicates(response.VisitedLinks)
response.VisitedLinksCount = len(response.VisitedLinks)
responseBytes, _ := json.Marshal(response)
w.Header().Set("Content-Type", "application/json")
w.Write(responseBytes)
r.Body.Close()
}
func main() {
portFlag := flag.Int("p", 9090, "A port")
flag.Parse()
port := strconv.Itoa(*portFlag)
http.DefaultTransport = &http.Transport{
DialContext: (&net.Dialer{
Timeout: 0,
KeepAlive: 0,
DualStack: true,
}).DialContext,
MaxIdleConns: 0,
IdleConnTimeout: 0,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
}
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
server := http.Server{
ReadTimeout: 5 * time.Second,
WriteTimeout: 60 * time.Minute,
Addr: ":" + port,
}
http.HandleFunc("/", search)
log.Fatal(server.ListenAndServe())
}
func removeDuplicates(elements []models.Link) []models.Link {
encountered := map[string]bool{}
result := []models.Link{}
for v := range elements {
if encountered[elements[v].RealURL] != true {
encountered[elements[v].RealURL] = true
result = append(result, elements[v])
}
}
return result
}