Skip to content

Commit

Permalink
采集优化
Browse files Browse the repository at this point in the history
  • Loading branch information
TruthHun88 committed Aug 3, 2019
1 parent 4725ebb commit ab93633
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
1 change: 1 addition & 0 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ BookStack 配套微信小程序 BookChat API。
- [x] 评论审核管理
- [ ] 微信小程序配置
- [ ] 横幅管理
- [ ] 收录管理
- [x] 增加`作者`角色,用于控制普通用户创建项目权限

> 更多升级内容,请查看源码仓库 commit 记录
Expand Down
4 changes: 3 additions & 1 deletion controllers/DocumentController.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ func (this *DocumentController) Read() {
//此处的id是字符串,标识文档标识,根据文档标识和文档所属的书的id作为key去查询
doc, err = doc.FindByBookIdAndDocIdentify(bookResult.BookId, id) //文档标识
if err != nil {
beego.Error(err, docId, id, bookResult)
if err != orm.ErrNoRows {
beego.Error(err, docId, id, bookResult)
}
this.Abort("404")
}
}
Expand Down
9 changes: 6 additions & 3 deletions utils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ func CrawlHtml2Markdown(urlstr string, contType int, force bool, intelligence in
cont, err = CrawlByChrome(urlstr, project)
} else {
req := util.BuildRequest("get", urlstr, "", "", "", true, false, headers...)
req.SetTimeout(10*time.Second, 10*time.Second)
req.SetTimeout(30*time.Second, 30*time.Second)
cont, err = req.String()
}

Expand Down Expand Up @@ -345,7 +345,7 @@ func CrawlHtml2Markdown(urlstr string, contType int, force bool, intelligence in
})

//遍历替换图片相对链接
doc.Find("img").Each(func(i int, selection *goquery.Selection) {
doc.Filter(diySelector).Find("img").Each(func(i int, selection *goquery.Selection) {
//存在src,且不以http://和https://开头
if src, ok := selection.Attr("src"); ok {
//链接补全
Expand Down Expand Up @@ -737,6 +737,9 @@ func JoinURL(rawURL string, urlPath string) string {
rawURL = strings.TrimSpace(rawURL)

lowerURLPath := strings.ToLower(urlPath)
if strings.HasPrefix(lowerURLPath, "//") {
return "http:" + urlPath
}
if strings.HasPrefix(lowerURLPath, "http://") || strings.HasPrefix(lowerURLPath, "https://") {
return urlPath
}
Expand All @@ -754,7 +757,7 @@ func JoinURL(rawURL string, urlPath string) string {
}

if strings.HasPrefix(urlPath, "/") {
return u.Scheme + "://" + u.Host + urlPath
return u.Scheme + "://" + u.Host + "/" + strings.TrimLeft(urlPath, "/")
}
u.Path = path.Join(strings.TrimRight(u.Path, "/")+"/", urlPath)
// return u.String() // 会对中文进行编码
Expand Down

0 comments on commit ab93633

Please sign in to comment.