Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix URL handling in the whole markdown module, improve test coverage (fix #997) #1027

Merged
merged 1 commit into from
Feb 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/mail.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func composeTplData(subject, body, link string) map[string]interface{} {

func composeIssueMessage(issue *Issue, doer *User, tplName base.TplName, tos []string, info string) *mailer.Message {
subject := issue.mailSubject()
body := string(markdown.RenderSpecialLink([]byte(issue.Content), issue.Repo.HTMLURL(), issue.Repo.ComposeMetas()))
body := string(markdown.RenderString(issue.Content, issue.Repo.HTMLURL(), issue.Repo.ComposeMetas()))
data := composeTplData(subject, body, issue.HTMLURL())
data["Doer"] = doer

Expand Down
120 changes: 76 additions & 44 deletions modules/markdown/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ var (
ShortLinkPattern = regexp.MustCompile(`(\[\[.*\]\]\w*)`)

// AnySHA1Pattern allows to split url containing SHA into parts
AnySHA1Pattern = regexp.MustCompile(`http\S+//(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`)
AnySHA1Pattern = regexp.MustCompile(`(http\S*)://(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Protocol is important now.


// IssueFullPattern allows to split issue (and pull) URLs into parts
IssueFullPattern = regexp.MustCompile(`(?:^|\s|\()http\S+//((?:[^\s/]+/)+)((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)
IssueFullPattern = regexp.MustCompile(`(?:^|\s|\()(http\S*)://((?:[^\s/]+/)+)((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)

validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
)
Expand Down Expand Up @@ -126,10 +126,11 @@ type Renderer struct {
func (r *Renderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
if len(link) > 0 && !isLink(link) {
if link[0] != '#' {
mLink := URLJoin(r.urlPrefix, string(link))
lnk := string(link)
if r.isWikiMarkdown {
mLink = URLJoin(r.urlPrefix, "wiki", string(link))
lnk = URLJoin("wiki", lnk)
}
mLink := URLJoin(r.urlPrefix, lnk)
link = []byte(mLink)
}
}
Expand Down Expand Up @@ -206,12 +207,10 @@ func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byt
return
}
} else {
if link[0] != '/' {
if !strings.HasSuffix(prefix, "/") {
prefix += "/"
}
}
link = []byte(url.QueryEscape(prefix + string(link)))
lnk := string(link)
lnk = URLJoin(prefix, lnk)
lnk = strings.Replace(lnk, " ", "+", -1)
link = []byte(lnk)
}
}

Expand Down Expand Up @@ -246,10 +245,30 @@ func URLJoin(elem ...string) string {
last := len(elem) - 1
for i, item := range elem {
res += item
if !strings.HasSuffix(res, "/") && i != last {
if i != last && !strings.HasSuffix(res, "/") {
res += "/"
}
}
cwdIndex := strings.Index(res, "/./")
Copy link
Contributor Author

@andrew-boyarshin andrew-boyarshin Feb 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

URLJoin now correctly handles . and .. path elements.

for cwdIndex != -1 {
res = strings.Replace(res, "/./", "/", 1)
cwdIndex = strings.Index(res, "/./")
}
upIndex := strings.Index(res, "/..")
for upIndex != -1 {
res = strings.Replace(res, "/..", "", 1)
prevStart := -1
for i := upIndex - 1; i >= 0; i-- {
if res[i] == '/' {
prevStart = i
break
}
}
if prevStart != -1 {
res = res[:prevStart] + res[upIndex:]
}
upIndex = strings.Index(res, "/..")
}
return res
}

Expand Down Expand Up @@ -286,6 +305,9 @@ func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string

// IsSameDomain checks if given url string has the same hostname as current Gitea instance
func IsSameDomain(s string) bool {
if strings.HasPrefix(s, "/") {
return true
}
if uapp, err := url.Parse(setting.AppURL); err == nil {
if u, err := url.Parse(s); err == nil {
return u.Host == uapp.Host
Expand All @@ -300,26 +322,27 @@ func renderFullSha1Pattern(rawBytes []byte, urlPrefix string) []byte {
ms := AnySHA1Pattern.FindAllSubmatch(rawBytes, -1)
for _, m := range ms {
all := m[0]
paths := string(m[1])
var path = "//" + paths
author := string(m[2])
repoName := string(m[3])
protocol := string(m[1])
paths := string(m[2])
path := protocol + "://" + paths
author := string(m[3])
repoName := string(m[4])
path = URLJoin(path, author, repoName)
ltype := "src"
itemType := m[4]
itemType := m[5]
if IsSameDomain(paths) {
ltype = string(itemType)
} else if string(itemType) == "commit" {
ltype = "commit"
}
sha := m[5]
sha := m[6]
var subtree string
if len(m) > 6 && len(m[6]) > 0 {
subtree = string(m[6])
if len(m) > 7 && len(m[7]) > 0 {
subtree = string(m[7])
}
var line []byte
if len(m) > 7 && len(m[7]) > 0 {
line = m[7]
if len(m) > 8 && len(m[8]) > 0 {
line = m[8]
}
urlSuffix := ""
text := base.ShortSha(string(sha))
Expand All @@ -346,23 +369,18 @@ func renderFullIssuePattern(rawBytes []byte, urlPrefix string) []byte {
ms := IssueFullPattern.FindAllSubmatch(rawBytes, -1)
for _, m := range ms {
all := m[0]
paths := bytes.Split(m[1], []byte("/"))
protocol := string(m[1])
paths := bytes.Split(m[2], []byte("/"))
paths = paths[:len(paths)-1]
if bytes.HasPrefix(paths[0], []byte("gist.")) {
continue
}
var path string
if len(paths) > 3 {
// Internal one
path = URLJoin(urlPrefix, "issues")
} else {
path = "//" + string(m[1])
}
id := string(m[2])
path := protocol + "://" + string(m[2])
id := string(m[3])
path = URLJoin(path, id)
var comment []byte
if len(m) > 3 {
comment = m[3]
comment = m[4]
}
urlSuffix := ""
text := "#" + id
Expand Down Expand Up @@ -394,8 +412,13 @@ func lastIndexOfByte(sl []byte, target byte) int {
return -1
}

// renderShortLinks processes [[syntax]]
func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
// RenderShortLinks processes [[syntax]]
//
// noLink flag disables making link tags when set to true
// so this function just replaces the whole [[...]] with the content text
//
// isWikiMarkdown is a flag to choose linking url prefix
func RenderShortLinks(rawBytes []byte, urlPrefix string, noLink bool, isWikiMarkdown bool) []byte {
ms := ShortLinkPattern.FindAll(rawBytes, -1)
for _, m := range ms {
orig := bytes.TrimSpace(m)
Expand Down Expand Up @@ -482,11 +505,17 @@ func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
}
absoluteLink := isLink([]byte(link))
if !absoluteLink {
link = url.QueryEscape(link)
link = strings.Replace(link, " ", "+", -1)
}
if image {
if !absoluteLink {
link = URLJoin(urlPrefix, "wiki", "raw", link)
if IsSameDomain(urlPrefix) {
urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
}
if isWikiMarkdown {
link = URLJoin("wiki", "raw", link)
}
link = URLJoin(urlPrefix, link)
}
title := props["title"]
if title == "" {
Expand All @@ -504,7 +533,10 @@ func renderShortLinks(rawBytes []byte, urlPrefix string, noLink bool) []byte {
}
name = fmt.Sprintf(`<img src="%s" %s title="%s" />`, link, alt, title)
} else if !absoluteLink {
link = URLJoin(urlPrefix, "wiki", link)
if isWikiMarkdown {
link = URLJoin("wiki", link)
}
link = URLJoin(urlPrefix, link)
}
if noLink {
rawBytes = bytes.Replace(rawBytes, orig, []byte(name), -1)
Expand All @@ -527,7 +559,7 @@ func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, me
repo := string(bytes.Split(m, []byte("#"))[0])
issue := string(bytes.Split(m, []byte("#"))[1])

link := fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(urlPrefix, repo, "issues", issue), m)
link := fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, repo, "issues", issue), m)
rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1)
}
return rawBytes
Expand All @@ -548,15 +580,15 @@ func renderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte {
}

// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links.
func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte {
ms := MentionPattern.FindAll(rawBytes, -1)
for _, m := range ms {
m = m[bytes.Index(m, []byte("@")):]
rawBytes = bytes.Replace(rawBytes, m,
[]byte(fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, string(m[1:])), m)), -1)
}

rawBytes = renderShortLinks(rawBytes, urlPrefix, false)
rawBytes = RenderShortLinks(rawBytes, urlPrefix, false, isWikiMarkdown)
rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas)
rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas)
rawBytes = renderFullSha1Pattern(rawBytes, urlPrefix)
Expand Down Expand Up @@ -601,7 +633,7 @@ var noEndTags = []string{"img", "input", "br", "hr"}

// PostProcess treats different types of HTML differently,
// and only renders special links for plain text blocks.
func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string) []byte {
func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte {
startTags := make([]string, 0, 5)
var buf bytes.Buffer
tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML))
Expand All @@ -611,7 +643,7 @@ OUTER_LOOP:
token := tokenizer.Token()
switch token.Type {
case html.TextToken:
buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas))
buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas, isWikiMarkdown))

case html.StartTagToken:
buf.WriteString(token.String())
Expand All @@ -623,7 +655,7 @@ OUTER_LOOP:
token = tokenizer.Token()

// Copy the token to the output verbatim
buf.Write(renderShortLinks([]byte(token.String()), urlPrefix, true))
buf.Write(RenderShortLinks([]byte(token.String()), urlPrefix, true, isWikiMarkdown))

if token.Type == html.StartTagToken {
if !com.IsSliceContainsStr(noEndTags, token.Data) {
Expand Down Expand Up @@ -673,9 +705,9 @@ OUTER_LOOP:

// Render renders Markdown to HTML with all specific handling stuff.
func render(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte {
urlPrefix = strings.Replace(urlPrefix, " ", "%20", -1)
urlPrefix = strings.Replace(urlPrefix, " ", "+", -1)
result := RenderRaw(rawBytes, urlPrefix, isWikiMarkdown)
result = PostProcess(result, urlPrefix, metas)
result = PostProcess(result, urlPrefix, metas, isWikiMarkdown)
result = Sanitizer.SanitizeBytes(result)
return result
}
Expand Down
Loading