Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 wak109
Copyright (c) 2021 - 2022 ryokat3

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "qiita-sync"
version = "1.4.0"
version = "1.4.4"
description = "Synchronize GitHub with Qiita"
authors = ["Ryoji Kato <ryokat3@gmail.com>"]
readme = "README.md"
Expand Down
53 changes: 37 additions & 16 deletions qiita_sync/qiita_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def toApi(self) -> Dict[str, Any]:
def fromApi(cls, item) -> QiitaArticle:
return cls(
data=QiitaData.fromApi(item),
body=item["body"],
body=markdown_normalize(item["body"]),
timestamp=get_utc(item["updated_at"]),
aux=QiitaArticleAux.fromApi(item))

Expand Down Expand Up @@ -649,41 +649,58 @@ def fromFile(cls, filepath: Path) -> GitHubArticle:
Maybe(m).map(lambda m: m.group(1)).getOrElse(""), qiita_get_temporary_title(body),
qiita_get_temporary_tags(body))

return cls(data=data, body=body, timestamp=timestamp, filepath=filepath)
return cls(data=data, body=markdown_normalize(body), timestamp=timestamp, filepath=filepath)


#######################################################################
# Markdown
#######################################################################

CODE_BLOCK_REGEX = re.compile(r"([\r\n]+\s*[\r\n]+(?P<CB>````*).*?[\r\n](?P=CB)\s*[\r\n]+)", re.MULTILINE | re.DOTALL)
# CODE_BLOCK_REGEX = re.compile(r"([\r\n]+\s*[\r\n]+(?P<CB>````*).*?[\r\n](?P=CB)\s*[\r\n]+)", re.MULTILINE | re.DOTALL)
CODE_BLOCK_RAW = r"(?P<CB>````*).*?\n.*?\n(?P=CB)"
CODE_BLOCK_RAW_MATCH = r"(?<=\n\n)(" + CODE_BLOCK_RAW + r")(?=\n\n)"

# CODE_BLOCK_REGEX = re.compile(r"(?<=\n\n)((?P<CB>````*).*?[\r\n](?P=CB)\n)(?=\n)", re.MULTILINE | re.DOTALL)
CODE_BLOCK_REGEX = re.compile(CODE_BLOCK_RAW_MATCH, re.MULTILINE | re.DOTALL)
# CODE_BLOCK_REGEX_2 = re.compile(r"(?P<CB>````*).*?[\r\n](?P=CB)\n", re.MULTILINE | re.DOTALL)
CODE_BLOCK_REGEX_2 = re.compile(CODE_BLOCK_RAW, re.MULTILINE | re.DOTALL)
CODE_INLINE_REGEX = re.compile(r"((?P<BT>``*)[^\r\n]*?(?P=BT))", re.MULTILINE | re.DOTALL)
MARKDOWN_LINK_REGEX = re.compile(r"(?<!\!)(\[[^\]]*\]\()([^\ \)]+)(.*?\))", re.MULTILINE | re.DOTALL)
MARKDOWN_IMAGE_REGEX = re.compile(r"(\!\[[^\]]*\]\()([^\ \)]+)(.*?\))", re.MULTILINE | re.DOTALL)

TAILING_SPACES_REGEX = re.compile(r"\s*$")

def markdown_code_block_split(text: str) -> List[str]:
return list(
filter(lambda elm: elm is not None and re.match(r"^````*$", elm) is None, re.split(CODE_BLOCK_REGEX, text)))


def markdown_code_inline_split(text: str) -> List[str]:
return list(
filter(
None,
filter(lambda elm: elm is not None and re.match(r"^``*$", elm) is None, re.split(CODE_INLINE_REGEX,
text))))
#
# NOTE 1:
# When using regex including placeholder (e.g. (?<BT>...) ) for re.split, the placeholder is included in the result.
# In this case, ``` will be included in re.split result.
# 'filter' function eliminates ``` from the list
#
# NOTE 2:
# In order to split by code block, \n\n is added to the head and the tail when calling re.split.
# This will be eliminated later
#
blocks = list(filter(lambda elm: elm is not None and re.match(r"^````*$", elm) is None, re.split(CODE_BLOCK_REGEX, '\n\n' + text + '\n\n')))
blocks = blocks[1:] if blocks[0] == '\n\n' else ([blocks[0][2:]] + blocks[1:])
blocks = blocks[:-1] if blocks[-1] == '\n\n' else (blocks[:-1] + [blocks[-1][:-2]])
return blocks


def markdown_code_inline_split(text: str) -> List[str]:
return list(filter(None, filter(lambda elm: elm is not None and re.match(r"^``*$", elm) is None, re.split(CODE_INLINE_REGEX, text))))


def markdown_replace_block_text(func: Callable[[str], str], text: str):
return "".join(
[func(block) if CODE_BLOCK_REGEX.match(block) is None else block for block in markdown_code_block_split(text)])
return "".join(
#[func(block) if CODE_BLOCK_REGEX.match(block) is None else block for block in markdown_code_block_split(text)])
[func(block) if CODE_BLOCK_REGEX_2.match(block) is None else block for block in markdown_code_block_split(text)])


def markdown_replace_text(func: Callable[[str], str], text: str):
return markdown_replace_block_text(
lambda block: "".join(
[func(x) if CODE_INLINE_REGEX.match(x) is None else x for x in markdown_code_inline_split(block)]), text)
[func(x) if CODE_INLINE_REGEX.match(x) is None else x for x in markdown_code_inline_split(block)]), markdown_normalize(text))


def markdown_replace_link(conv: Callable[[str], str], text: str):
Expand All @@ -694,6 +711,10 @@ def markdown_replace_image(conv: Callable[[str], str], text: str):
return re.sub(MARKDOWN_IMAGE_REGEX, lambda m: "".join([m.group(1), conv(m.group(2)), m.group(3)]), text)


def markdown_normalize(text: str) -> str:
return "\n".join(map(lambda line: re.sub(TAILING_SPACES_REGEX, "", line), text.splitlines()))


#######################################################################
# GitHub
#######################################################################
Expand Down
14 changes: 7 additions & 7 deletions tests/cassettes/test_qiita_create_caller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ interactions:
uri: https://qiita.com/api/v2/authenticated_user
response:
body:
string: '{"description":"Programmer","facebook_id":"","followees_count":2,"followers_count":1,"github_login_name":"ryokat3","id":"ryokat3","items_count":9,"linkedin_id":"","location":"Japan","name":"Ryoji
string: '{"description":"Programmer","facebook_id":"","followees_count":2,"followers_count":4,"github_login_name":"ryokat3","id":"ryokat3","items_count":11,"linkedin_id":"","location":"Japan","name":"Ryoji
Kato","organization":"","permanent_id":115148,"profile_image_url":"https://qiita-image-store.s3.ap-northeast-1.amazonaws.com/0/115148/profile-images/1641617983","team_only":false,"twitter_screen_name":null,"website_url":"","image_monthly_upload_limit":104857600,"image_monthly_upload_remaining":104857600}'
headers:
Cache-Control:
Expand All @@ -26,15 +26,15 @@ interactions:
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 21 Jan 2022 04:17:39 GMT
- Mon, 21 Feb 2022 06:06:47 GMT
ETag:
- W/"2aaf04820679c58d926153e93c3c317f"
- W/"da1a063958bbc463c6f0766c0d01dba6"
Rate-Limit:
- '1000'
Rate-Remaining:
- '972'
- '924'
Rate-Reset:
- '1642741683'
- '1645425273'
Referrer-Policy:
- strict-origin-when-cross-origin
Server:
Expand All @@ -54,9 +54,9 @@ interactions:
X-Permitted-Cross-Domain-Policies:
- none
X-Request-Id:
- be34b3a6-d047-4bf4-87cd-1fe5613ba812
- 14e4f8a3-6a67-4aaa-80a1-4fc3c23038db
X-Runtime:
- '0.175251'
- '0.128404'
X-XSS-Protection:
- 1; mode=block
status:
Expand Down
Loading