|
| 1 | +from __future__ import annotations |
| 2 | + |
1 | 3 | import re
|
| 4 | +from typing import Protocol |
2 | 5 |
|
3 |
| -from ..common.utils import arrayReplaceAt |
| 6 | +from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen |
4 | 7 | from ..token import Token
|
5 | 8 | from .state_core import StateCore
|
6 | 9 |
|
7 |
| -LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE) |
8 |
| -LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE) |
9 |
| - |
10 | 10 | HTTP_RE = re.compile(r"^http://")
|
11 | 11 | MAILTO_RE = re.compile(r"^mailto:")
|
12 | 12 | TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
|
13 | 13 |
|
14 | 14 |
|
15 |
| -def isLinkOpen(string: str) -> bool: |
16 |
| - return bool(LINK_OPEN_RE.search(string)) |
17 |
| - |
18 |
| - |
19 |
| -def isLinkClose(string: str) -> bool: |
20 |
| - return bool(LINK_CLOSE_RE.search(string)) |
21 |
| - |
22 |
| - |
23 | 15 | def linkify(state: StateCore) -> None:
|
24 |
| - blockTokens = state.tokens |
25 |
| - |
| 16 | + """Rule for identifying plain-text links.""" |
26 | 17 | if not state.md.options.linkify:
|
27 | 18 | return
|
28 | 19 |
|
29 | 20 | if not state.md.linkify:
|
30 | 21 | raise ModuleNotFoundError("Linkify enabled but not installed.")
|
31 | 22 |
|
32 |
| - for j in range(len(blockTokens)): |
33 |
| - if blockTokens[j].type != "inline" or not state.md.linkify.pretest( |
34 |
| - blockTokens[j].content |
| 23 | + for inline_token in state.tokens: |
| 24 | + if inline_token.type != "inline" or not state.md.linkify.pretest( |
| 25 | + inline_token.content |
35 | 26 | ):
|
36 | 27 | continue
|
37 | 28 |
|
38 |
| - tokens = blockTokens[j].children |
| 29 | + tokens = inline_token.children |
39 | 30 |
|
40 | 31 | htmlLinkLevel = 0
|
41 | 32 |
|
@@ -71,38 +62,47 @@ def linkify(state: StateCore) -> None:
|
71 | 62 | currentToken.content
|
72 | 63 | ):
|
73 | 64 | text = currentToken.content
|
74 |
| - links = state.md.linkify.match(text) |
| 65 | + links: list[_LinkType] = state.md.linkify.match(text) or [] |
75 | 66 |
|
76 | 67 | # Now split string to nodes
|
77 | 68 | nodes = []
|
78 | 69 | level = currentToken.level
|
79 | 70 | lastPos = 0
|
80 | 71 |
|
81 |
| - for ln in range(len(links)): |
82 |
| - url = links[ln].url |
| 72 | + # forbid escape sequence at the start of the string, |
| 73 | + # this avoids http\://example.com/ from being linkified as |
| 74 | + # http:<a href="//example.com/">//example.com/</a> |
| 75 | + if ( |
| 76 | + links |
| 77 | + and links[0].index == 0 |
| 78 | + and i > 0 |
| 79 | + and tokens[i - 1].type == "text_special" |
| 80 | + ): |
| 81 | + links = links[1:] |
| 82 | + |
| 83 | + for link in links: |
| 84 | + url = link.url |
83 | 85 | fullUrl = state.md.normalizeLink(url)
|
84 | 86 | if not state.md.validateLink(fullUrl):
|
85 | 87 | continue
|
86 | 88 |
|
87 |
| - urlText = links[ln].text |
| 89 | + urlText = link.text |
88 | 90 |
|
89 | 91 | # Linkifier might send raw hostnames like "example.com", where url
|
90 | 92 | # starts with domain name. So we prepend http:// in those cases,
|
91 | 93 | # and remove it afterwards.
|
92 |
| - if not links[ln].schema: |
| 94 | + if not link.schema: |
93 | 95 | urlText = HTTP_RE.sub(
|
94 | 96 | "", state.md.normalizeLinkText("http://" + urlText)
|
95 | 97 | )
|
96 |
| - elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search( |
97 |
| - urlText |
98 |
| - ): |
| 98 | + elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText): |
99 | 99 | urlText = MAILTO_RE.sub(
|
100 | 100 | "", state.md.normalizeLinkText("mailto:" + urlText)
|
101 | 101 | )
|
102 | 102 | else:
|
103 | 103 | urlText = state.md.normalizeLinkText(urlText)
|
104 | 104 |
|
105 |
| - pos = links[ln].index |
| 105 | + pos = link.index |
106 | 106 |
|
107 | 107 | if pos > lastPos:
|
108 | 108 | token = Token("text", "", 0)
|
@@ -130,12 +130,20 @@ def linkify(state: StateCore) -> None:
|
130 | 130 | token.info = "auto"
|
131 | 131 | nodes.append(token)
|
132 | 132 |
|
133 |
| - lastPos = links[ln].last_index |
| 133 | + lastPos = link.last_index |
134 | 134 |
|
135 | 135 | if lastPos < len(text):
|
136 | 136 | token = Token("text", "", 0)
|
137 | 137 | token.content = text[lastPos:]
|
138 | 138 | token.level = level
|
139 | 139 | nodes.append(token)
|
140 | 140 |
|
141 |
| - blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes) |
| 141 | + inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes) |
| 142 | + |
| 143 | + |
| 144 | +class _LinkType(Protocol): |
| 145 | + url: str |
| 146 | + text: str |
| 147 | + index: int |
| 148 | + last_index: int |
| 149 | + schema: str | None |
0 commit comments