|  | 
| 1 | 1 | import commonmark | 
| 2 | 2 | import re | 
|  | 3 | +import html | 
|  | 4 | +from xml.dom import minidom | 
| 3 | 5 | 
 | 
| 4 | 6 | from commonmark.dump import prepare | 
| 5 | 7 | 
 | 
|  | 
| 69 | 71 |     "\u3000", | 
| 70 | 72 | } | 
| 71 | 73 | 
 | 
| 72 |  | -_NOTION_TO_MARKDOWN_MAPPER = {"i": "☃", "b": "☃☃", "s": "~~", "c": "`"} | 
|  | 74 | +_NOTION_TO_MARKDOWN_MAPPER = {"i": "☃", "b": "☃☃", "s": "~~", "c": "`", "e": "$$"} | 
| 73 | 75 | 
 | 
| 74 |  | -FORMAT_PRECEDENCE = ["s", "b", "i", "a", "c"] | 
|  | 76 | +FORMAT_PRECEDENCE = ["s", "b", "i", "a", "c", "e"] | 
| 75 | 77 | 
 | 
| 76 | 78 | 
 | 
| 77 | 79 | def _extract_text_and_format_from_ast(item): | 
| 78 | 80 | 
 | 
| 79 | 81 |     if item["type"] == "html_inline": | 
| 80 | 82 |         if item.get("literal", "") == "<s>": | 
| 81 | 83 |             return "", ("s",) | 
|  | 84 | +        if item.get("literal", "").startswith('<latex'): | 
|  | 85 | +            elem = minidom.parseString(item.get("literal", "") + '</latex>').documentElement | 
|  | 86 | +            equation = elem.attributes['equation'].value | 
|  | 87 | +            return "", ("e", equation) | 
| 82 | 88 | 
 | 
| 83 | 89 |     if item["type"] == "emph": | 
| 84 | 90 |         return item.get("literal", ""), ("i",) | 
| @@ -118,6 +124,11 @@ def markdown_to_notion(markdown): | 
| 118 | 124 |         markdown = markdown.replace("~~", "<s>", 1) | 
| 119 | 125 |         markdown = markdown.replace("~~", "</s>", 1) | 
| 120 | 126 | 
 | 
|  | 127 | +    # commonmark doesn't support latex blocks, so we need to handle it ourselves | 
|  | 128 | +    def handle_latex(match): | 
|  | 129 | +        return f'<latex equation="{html.escape(match.group(0)[2:-2])}">\u204d</latex>' | 
|  | 130 | +    markdown = re.sub(r'(?<!\\\\|\$\$)(?:\\\\)*((\$\$)+)(?!(\$\$))(.+?)(?<!(\$\$))\1(?!(\$\$))', handle_latex, markdown) | 
|  | 131 | + | 
| 121 | 132 |     # we don't want to touch dashes, so temporarily replace them here | 
| 122 | 133 |     markdown = markdown.replace("-", "⸻") | 
| 123 | 134 | 
 | 
| @@ -148,6 +159,12 @@ def markdown_to_notion(markdown): | 
| 148 | 159 |                 format.remove(("s",)) | 
| 149 | 160 |                 literal = "" | 
| 150 | 161 | 
 | 
|  | 162 | +            if item["type"] == "html_inline" and literal == "</latex>": | 
|  | 163 | +                for f in filter(lambda f: f[0] == 'e', format): | 
|  | 164 | +                    format.remove(f) | 
|  | 165 | +                    break | 
|  | 166 | +                literal = "" | 
|  | 167 | + | 
| 151 | 168 |             if item["type"] == "softbreak": | 
| 152 | 169 |                 literal = "\n" | 
| 153 | 170 | 
 | 
| @@ -227,7 +244,15 @@ def notion_to_markdown(notion): | 
| 227 | 244 |             if f[0] == "a": | 
| 228 | 245 |                 markdown += "[" | 
| 229 | 246 | 
 | 
| 230 |  | -        markdown += stripped | 
|  | 247 | +        # Check wheter a format modifies the content | 
|  | 248 | +        content_changed = False | 
|  | 249 | +        for f in sorted_format: | 
|  | 250 | +            if f[0] == 'e': | 
|  | 251 | +                markdown += f[1] | 
|  | 252 | +                content_changed = True | 
|  | 253 | + | 
|  | 254 | +        if not content_changed: | 
|  | 255 | +            markdown += stripped | 
| 231 | 256 | 
 | 
| 232 | 257 |         for f in reversed(sorted_format): | 
| 233 | 258 |             if f[0] in _NOTION_TO_MARKDOWN_MAPPER: | 
|  | 
0 commit comments