ryokat3 · ryokat3 · Feb 21, 2022 · Feb 21, 2022
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2021 wak109
+Copyright (c) 2021 - 2022 ryokat3
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "qiita-sync"
-version = "1.4.0"
+version = "1.4.4"
 description = "Synchronize GitHub with Qiita"
 authors = ["Ryoji Kato <ryokat3@gmail.com>"]
 readme = "README.md"

diff --git a/qiita_sync/qiita_sync.py b/qiita_sync/qiita_sync.py
@@ -618,7 +618,7 @@ def toApi(self) -> Dict[str, Any]:
     def fromApi(cls, item) -> QiitaArticle:
         return cls(
             data=QiitaData.fromApi(item),
-            body=item["body"],
+            body=markdown_normalize(item["body"]),
             timestamp=get_utc(item["updated_at"]),
             aux=QiitaArticleAux.fromApi(item))
 
@@ -649,41 +649,58 @@ def fromFile(cls, filepath: Path) -> GitHubArticle:
             Maybe(m).map(lambda m: m.group(1)).getOrElse(""), qiita_get_temporary_title(body),
             qiita_get_temporary_tags(body))
 
-        return cls(data=data, body=body, timestamp=timestamp, filepath=filepath)
+        return cls(data=data, body=markdown_normalize(body), timestamp=timestamp, filepath=filepath)
 
 
 #######################################################################
 # Markdown
 #######################################################################
 
-CODE_BLOCK_REGEX = re.compile(r"([\r\n]+\s*[\r\n]+(?P<CB>````*).*?[\r\n](?P=CB)\s*[\r\n]+)", re.MULTILINE | re.DOTALL)
+# CODE_BLOCK_REGEX = re.compile(r"([\r\n]+\s*[\r\n]+(?P<CB>````*).*?[\r\n](?P=CB)\s*[\r\n]+)", re.MULTILINE | re.DOTALL)
+CODE_BLOCK_RAW = r"(?P<CB>````*).*?\n.*?\n(?P=CB)"
+CODE_BLOCK_RAW_MATCH = r"(?<=\n\n)(" + CODE_BLOCK_RAW + r")(?=\n\n)"
+
+# CODE_BLOCK_REGEX = re.compile(r"(?<=\n\n)((?P<CB>````*).*?[\r\n](?P=CB)\n)(?=\n)", re.MULTILINE | re.DOTALL)
+CODE_BLOCK_REGEX = re.compile(CODE_BLOCK_RAW_MATCH, re.MULTILINE | re.DOTALL)
+# CODE_BLOCK_REGEX_2 = re.compile(r"(?P<CB>````*).*?[\r\n](?P=CB)\n", re.MULTILINE | re.DOTALL)
+CODE_BLOCK_REGEX_2 = re.compile(CODE_BLOCK_RAW, re.MULTILINE | re.DOTALL)
 CODE_INLINE_REGEX = re.compile(r"((?P<BT>``*)[^\r\n]*?(?P=BT))", re.MULTILINE | re.DOTALL)
 MARKDOWN_LINK_REGEX = re.compile(r"(?<!\!)(\[[^\]]*\]\()([^\ \)]+)(.*?\))", re.MULTILINE | re.DOTALL)
 MARKDOWN_IMAGE_REGEX = re.compile(r"(\!\[[^\]]*\]\()([^\ \)]+)(.*?\))", re.MULTILINE | re.DOTALL)
 
+TAILING_SPACES_REGEX = re.compile(r"\s*$")
 
 def markdown_code_block_split(text: str) -> List[str]:
-    return list(
-        filter(lambda elm: elm is not None and re.match(r"^````*$", elm) is None, re.split(CODE_BLOCK_REGEX, text)))
-
-
-def markdown_code_inline_split(text: str) -> List[str]:
-    return list(
-        filter(
-            None,
-            filter(lambda elm: elm is not None and re.match(r"^``*$", elm) is None, re.split(CODE_INLINE_REGEX,
-                                                                                             text))))
+    #
+    # NOTE 1:
+    # When using regex including placeholder (e.g. (?<BT>...) ) for re.split, the placeholder is included in the result.
+    # In this case, ``` will be included in re.split result.
+    # 'filter' function eliminates ```  from the list
+    #
+    # NOTE 2:
+    # In order to split by code block, \n\n is added to the head and the tail when calling re.split.
+    # This will be eliminated later
+    #
+    blocks = list(filter(lambda elm: elm is not None and re.match(r"^````*$", elm) is None, re.split(CODE_BLOCK_REGEX, '\n\n' + text + '\n\n')))
+    blocks = blocks[1:] if blocks[0] == '\n\n' else ([blocks[0][2:]] + blocks[1:])
+    blocks = blocks[:-1] if blocks[-1] == '\n\n' else (blocks[:-1] + [blocks[-1][:-2]])
+    return blocks
+
+
+def markdown_code_inline_split(text: str) -> List[str]:    
+    return list(filter(None, filter(lambda elm: elm is not None and re.match(r"^``*$", elm) is None, re.split(CODE_INLINE_REGEX, text))))
 
 
 def markdown_replace_block_text(func: Callable[[str], str], text: str):
-    return "".join(
-        [func(block) if CODE_BLOCK_REGEX.match(block) is None else block for block in markdown_code_block_split(text)])
+    return "".join(        
+        #[func(block) if CODE_BLOCK_REGEX.match(block) is None else block for block in markdown_code_block_split(text)])
+        [func(block) if CODE_BLOCK_REGEX_2.match(block) is None else block for block in markdown_code_block_split(text)])
 
 
 def markdown_replace_text(func: Callable[[str], str], text: str):
     return markdown_replace_block_text(
         lambda block: "".join(
-            [func(x) if CODE_INLINE_REGEX.match(x) is None else x for x in markdown_code_inline_split(block)]), text)
+            [func(x) if CODE_INLINE_REGEX.match(x) is None else x for x in markdown_code_inline_split(block)]), markdown_normalize(text))
 
 
 def markdown_replace_link(conv: Callable[[str], str], text: str):
@@ -694,6 +711,10 @@ def markdown_replace_image(conv: Callable[[str], str], text: str):
     return re.sub(MARKDOWN_IMAGE_REGEX, lambda m: "".join([m.group(1), conv(m.group(2)), m.group(3)]), text)
 
 
+def markdown_normalize(text: str) -> str:
+    return "\n".join(map(lambda line: re.sub(TAILING_SPACES_REGEX, "", line), text.splitlines()))
+
+
 #######################################################################
 # GitHub
 #######################################################################

diff --git a/tests/cassettes/test_qiita_create_caller.yaml b/tests/cassettes/test_qiita_create_caller.yaml
@@ -16,7 +16,7 @@ interactions:
     uri: https://qiita.com/api/v2/authenticated_user
   response:
     body:
-      string: '{"description":"Programmer","facebook_id":"","followees_count":2,"followers_count":1,"github_login_name":"ryokat3","id":"ryokat3","items_count":9,"linkedin_id":"","location":"Japan","name":"Ryoji
+      string: '{"description":"Programmer","facebook_id":"","followees_count":2,"followers_count":4,"github_login_name":"ryokat3","id":"ryokat3","items_count":11,"linkedin_id":"","location":"Japan","name":"Ryoji
         Kato","organization":"","permanent_id":115148,"profile_image_url":"https://qiita-image-store.s3.ap-northeast-1.amazonaws.com/0/115148/profile-images/1641617983","team_only":false,"twitter_screen_name":null,"website_url":"","image_monthly_upload_limit":104857600,"image_monthly_upload_remaining":104857600}'
     headers:
       Cache-Control:
@@ -26,15 +26,15 @@ interactions:
       Content-Type:
       - application/json; charset=utf-8
       Date:
-      - Fri, 21 Jan 2022 04:17:39 GMT
+      - Mon, 21 Feb 2022 06:06:47 GMT
       ETag:
-      - W/"2aaf04820679c58d926153e93c3c317f"
+      - W/"da1a063958bbc463c6f0766c0d01dba6"
       Rate-Limit:
       - '1000'
       Rate-Remaining:
-      - '972'
+      - '924'
       Rate-Reset:
-      - '1642741683'
+      - '1645425273'
       Referrer-Policy:
       - strict-origin-when-cross-origin
       Server:
@@ -54,9 +54,9 @@ interactions:
       X-Permitted-Cross-Domain-Policies:
       - none
       X-Request-Id:
-      - be34b3a6-d047-4bf4-87cd-1fe5613ba812
+      - 14e4f8a3-6a67-4aaa-80a1-4fc3c23038db
       X-Runtime:
-      - '0.175251'
+      - '0.128404'
       X-XSS-Protection:
       - 1; mode=block
     status: