Skip to content

Commit

Permalink
Migrate scripts to docs
Browse files Browse the repository at this point in the history
  • Loading branch information
TomShawn committed Aug 30, 2021
1 parent 788ce63 commit f4c89c5
Show file tree
Hide file tree
Showing 9 changed files with 865 additions and 10 deletions.
15 changes: 5 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@ jobs:
command: |
git remote add upstream https://github.com/pingcap/docs.git
git fetch upstream
wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-file-encoding.py
python3 check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
python3 scripts/check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
- run:
name: "Check git conflicts"
command: |
wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-conflicts.py
python3 check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml')
python3 scripts/check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml')
- run:
name: "Install markdownlint"
Expand All @@ -53,20 +51,17 @@ jobs:
- run:
name: "Check control characters"
command: |
wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-control-char.py
python3 check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
python3 scripts/check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
- run:
name: "Check unclosed tags"
command: |
wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-tags.py
python3 check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
python3 scripts/check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
- run:
name: "Check manual line breaks"
command: |
wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-manual-line-breaks.py
python3 check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
python3 scripts/check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*')
build:
docker:
Expand Down
73 changes: 73 additions & 0 deletions scripts/check-conflicts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2021 PingCAP, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MIT License

# Copyright (c) 2021 Charlotte Liu

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-conflicts.py.

import re
import sys
import os

lineNum = 0
flag = 0
pos = []
single = []
mark = 0

for filename in sys.argv[1:]:
single = []
lineNum = 0
if os.path.isfile(filename):
with open(filename,'r') as file:
for line in file:
lineNum += 1
if re.match(r'<{7}.*\n', line):
flag = 1
single.append(lineNum)
elif re.match(r'={7}\n', line) :
flag = 2
elif re.match(r'>{7}', line) and flag == 2:
single.append(lineNum)
pos.append(single)
single = []
flag = 0
else:
continue


if len(pos):
mark = 1
print("\n" + filename + ": this file has conflicts in the following lines:\n")
for conflict in pos:
if len(conflict) == 2:
print("CONFLICTS: line " + str(conflict[0]) + " to line " + str(conflict[1]) + "\n")

pos = []

if mark:
print("The above conflicts will cause website build failure. Please fix them.")
exit(1)
69 changes: 69 additions & 0 deletions scripts/check-control-char.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2021 PingCAP, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MIT License

# Copyright (c) 2021 Charlotte Liu

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-control-char.py.

import re, sys, os

# Check control characters.
def check_control_char(filename):

lineNum = 0
pos = []
flag = 0

with open(filename,'r') as file:
for line in file:

lineNum += 1

if re.search(r'[\b]', line):
pos.append(lineNum)
flag = 1

if flag:
print("\n" + filename + ": this file has control characters in the following lines:\n")
for cc in pos:
print("CONTROL CHARACTERS: L" + str(cc))
print("\nPlease delete these control characters.")

return flag

if __name__ == "__main__":

count = 0

for filename in sys.argv[1:]:
if os.path.isfile(filename):
flag = check_control_char(filename)
if flag:
count+=1

if count:
print("\nThe above issues will cause website build failure. Please fix them.")
exit(1)
57 changes: 57 additions & 0 deletions scripts/check-file-encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2021 PingCAP, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MIT License

# Copyright (c) 2021 Charlotte Liu

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-file-encoding.py.

import sys, os, codecs

# Convert the file encoding to the default UTF-8 without BOM.
def check_BOM(filename):
BUFSIZE = 4096
BOMLEN = len(codecs.BOM_UTF8)

with open(filename, "r+b") as fp:
chunk = fp.read(BUFSIZE)
if chunk.startswith(codecs.BOM_UTF8):
i = 0
chunk = chunk[BOMLEN:]
while chunk:
fp.seek(i)
fp.write(chunk)
i += len(chunk)
fp.seek(BOMLEN, os.SEEK_CUR)
chunk = fp.read(BUFSIZE)
fp.seek(-BOMLEN, os.SEEK_CUR)
fp.truncate()
print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.")

if __name__ == "__main__":

for filename in sys.argv[1:]:
if os.path.isfile(filename):
check_BOM(filename)
115 changes: 115 additions & 0 deletions scripts/check-manual-line-breaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright 2021 PingCAP, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MIT License

# Copyright (c) 2021 Charlotte Liu

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-manual-line-breaks.py.

import re, sys, os

# Check manual line break within a paragraph.
def check_manual_break(filename):

two_lines = []
metadata = 0
toggle = 0
ctoggle = 0
lineNum = 0
mark = 0

with open(filename,'r') as file:
for line in file:

lineNum += 1

# Count the number of '---' to skip metadata.
if metadata < 2 :
if re.match(r'(\s|\t)*(-){3}', line):
metadata += 1
continue
else:
# Skip tables and notes.
if re.match(r'(\s|\t)*(\||>)\s*\w*',line):
continue

# Skip html tags and markdownlint tags.
if re.match(r'(\s|\t)*((<\/*(.*)>)|<!--|-->)\s*\w*',line):
if re.match(r'(\s|\t)*(<pre><code>|<table>)',line):
ctoggle = 1
elif re.match(r'(\s|\t)*(<\/code><\/pre>|<\/table>)',line):
ctoggle = 0
else:
continue

# Skip image links.
if re.match(r'(\s|\t)*!\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line):
continue

# Set a toggle to skip code blocks.
if re.match(r'(\s|\t)*`{3}', line):
toggle = abs(1-toggle)

if toggle == 1 or ctoggle == 1:
continue
else:
# Keep a record of the current line and the former line.
if len(two_lines)<1:
two_lines.append(line)
continue
elif len(two_lines) == 1:
two_lines.append(line)
else:
two_lines.append(line)
two_lines.pop(0)

# Compare if there is a manual line break between the two lines.
if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]):
continue
else:
if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]):
continue

if mark == 0:
print("\n" + filename + ": this file has manual line breaks in the following lines:\n")
mark = 1

print("MANUAL LINE BREAKS: L" + str(lineNum))
return mark


if __name__ == "__main__":

count = 0

for filename in sys.argv[1:]:
if os.path.isfile(filename):
mark = check_manual_break(filename)
if mark :
count+=1

if count:
print("\nThe above issues will cause website build failure. Please fix them.")
exit(1)
Loading

0 comments on commit f4c89c5

Please sign in to comment.