From 3b8645dc2df3b6b674c4c7e8712eaa3b4ed20cce Mon Sep 17 00:00:00 2001 From: CoCo-Japan-pan <115922543+CoCo-Japan-pan@users.noreply.github.com> Date: Sun, 6 Oct 2024 16:59:08 +0900 Subject: [PATCH] remove brackets in embedded urls --- onlinejudge_verify/languages/special_comments.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/onlinejudge_verify/languages/special_comments.py b/onlinejudge_verify/languages/special_comments.py index bfe5f4fe..ae97da81 100644 --- a/onlinejudge_verify/languages/special_comments.py +++ b/onlinejudge_verify/languages/special_comments.py @@ -53,7 +53,7 @@ def list_doxygen_annotations(path: pathlib.Path) -> Dict[str, str]: @functools.lru_cache(maxsize=None) def list_embedded_urls(path: pathlib.Path) -> List[str]: - pattern = re.compile(r"""['"`]?https?://\S*""") # use a broad pattern. There are no needs to make match strict. + pattern = re.compile(r"""['"`<\(]?https?://\S*""") # use a broad pattern. There are no needs to make match strict. with open(path, 'rb') as fh: content = fh.read().decode() urls = [] @@ -70,5 +70,17 @@ def list_embedded_urls(path: pathlib.Path) -> List[str]: # Remove quotes and trailing superfluous chars around the URL url = url[1:end_quote_pos] break + # The URL may be written like `[atcoder](https://atcoder.jp/)` or `` in Markdown syntax. + # In this case, we need to remove brackets around the URL. + for (lbracket, rbracket) in (('<', '>'), ('(', ')')): + if url.startswith(lbracket): + end_bracket_pos = url.rfind(rbracket) + if end_bracket_pos == 0: + # Remove opening bracket from the URL like `