-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathutils.go
109 lines (102 loc) · 3.6 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package common
import (
"log"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/axgle/mahonia"
"github.com/howie6879/owllook_api/config"
"github.com/levigross/grequests"
"github.com/saintfish/chardet"
)
// DetectBody gbk convert to utf-8
func DetectBody(body []byte) string {
var bodyString string
detector := chardet.NewTextDetector()
result, err := detector.DetectBest(body)
if err != nil {
return string(body)
}
if strings.Contains(strings.ToLower(result.Charset), "utf") {
bodyString = string(body)
} else {
bodyString = mahonia.NewDecoder("gbk").ConvertString(string(body))
}
return bodyString
}
// MakeAbsolute returns a absolute url
func MakeAbsolute(homeUrl string, currentUrl string) string {
urlParse, _ := url.Parse(currentUrl)
urlHost := urlParse.Host
homeUrlParse, _ := url.Parse(homeUrl)
if urlHost == "" {
absoluteUrl := homeUrlParse.ResolveReference(urlParse)
return absoluteUrl.String()
}
return urlParse.String()
}
// FetchHtml returns a raw html
func FetchHtml(name string, rule config.NovelRule) ([]map[string]string, error) {
var resultData []map[string]string
var searchUrl string
if rule.KeywordEncoding == "" {
searchUrl = rule.SearchUrl + url.QueryEscape(name)
} else {
keyword := mahonia.NewEncoder(rule.KeywordEncoding).ConvertString(name)
quote_keyword := url.QueryEscape(keyword)
searchUrl = rule.SearchUrl + quote_keyword
}
log.Println(searchUrl)
response, err := RequestURL(searchUrl)
if err != nil {
log.Println("Request URL error", err)
return resultData, err
}
if response.StatusCode == 200 {
raw_html := DetectBody(response.Bytes())
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(raw_html))
doc.Find(rule.TargetItem).Each(func(i int, s *goquery.Selection) {
var absoluteNovelUrl string
novelName := s.Find(rule.ItemRule.NovelName).Text()
novelUrl, _ := s.Find(rule.ItemRule.NovelUrl).Attr("href")
if novelUrl != "" {
absoluteNovelUrl = MakeAbsolute(rule.HomeUrl, novelUrl)
} else {
absoluteNovelUrl = novelUrl
}
novelType := s.Find(rule.ItemRule.NovelType).Text()
novelCover, _ := s.Find(rule.ItemRule.NovelCover).Attr("src")
absoluteNovelCover := MakeAbsolute(rule.HomeUrl, novelCover)
novelAuthor := s.Find(rule.ItemRule.NovelAuthor).Text()
novelAbstract := s.Find(rule.ItemRule.NovelAbstract).Text()
novelLatestChapterName := s.Find(rule.ItemRule.NovelLatestChapterUrl).Text()
novelLatestChapterUrl, _ := s.Find(rule.ItemRule.NovelLatestChapterUrl).Attr("href")
absoluteNovelLatestChapterUrl := MakeAbsolute(rule.HomeUrl, novelLatestChapterUrl)
currentItem := make(map[string]string)
currentItem["source_name"] = rule.Name
currentItem["source_url"] = rule.HomeUrl
currentItem["novel_name"] = strings.TrimSpace(novelName)
currentItem["novel_url"] = absoluteNovelUrl
currentItem["novel_type"] = strings.TrimSpace(novelType)
currentItem["novel_cover"] = absoluteNovelCover
currentItem["novel_author"] = strings.TrimSpace(novelAuthor)
currentItem["novel_abstract"] = strings.TrimSpace(novelAbstract)
currentItem["novel_latest_chapter_name"] = novelLatestChapterName
currentItem["novel_latest_chapter_url"] = absoluteNovelLatestChapterUrl
resultData = append(resultData, currentItem)
})
}
return resultData, nil
}
// RequestURL returns a search result
func RequestURL(url string) (*grequests.Response, error) {
ro := &grequests.RequestOptions{
Headers: map[string]string{"User-Agent": config.GetUserAgent()},
}
resp, err := grequests.Get(url, ro)
if err != nil {
log.Println("Unable to make request: ", err)
}
// log.Println(resp.String())
return resp, err
}