Skip to content

Commit

Permalink
update pdf generation for dev version
Browse files Browse the repository at this point in the history
  • Loading branch information
yinixu9506 committed Feb 21, 2020
1 parent e56bd2e commit 4816bdc
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 188 deletions.
14 changes: 2 additions & 12 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,12 @@ jobs:
command: |
sudo bash -c 'echo "222.222.95.49 uc.qbox.me" >> /etc/hosts';
if [ "${CIRCLE_BRANCH}" == "master" ]; then
python3 scripts/upload.py dev/output.pdf tidb-manual-cn-dev.pdf;
python3 scripts/upload.py v3.0/output.pdf tidb-manual-cn-v3.0.pdf;
python3 scripts/upload.py v2.1/output.pdf tidb-manual-cn-v2.1.pdf;
python3 scripts/upload.py v3.1/output.pdf tidb-manual-cn-v3.1.pdf;
fi
if [ "${CIRCLE_BRANCH}" == "website-preview" ]; then
python3 scripts/upload.py dev/output.pdf tidb-manual-cn-preview-dev.pdf;
python3 scripts/upload.py v3.0/output.pdf tidb-manual-cn-preview-v3.0.pdf;
python3 scripts/upload.py v2.1/output.pdf tidb-manual-cn-preview-v2.1.pdf;
python3 scripts/upload.py v3.1/output.pdf tidb-manual-cn-preview-v3.1.pdf;
python3 scripts/upload.py output.pdf tidb-manual-cn-dev.pdf;
fi
- run:
name: "Copy Generated PDF"
command: mkdir /tmp/artifacts && cp dev/output.pdf dev/doc.md /tmp/artifacts && cp v3.0/output.pdf v3.0/doc.md /tmp/artifacts && cp v2.1/output.pdf v2.1/doc.md /tmp/artifacts
command: mkdir /tmp/artifacts && cp output.pdf doc.md /tmp/artifacts

- store_artifacts:
path: /tmp/artifacts
Expand All @@ -90,4 +81,3 @@ workflows:
branches:
only:
- master
- website-preview
43 changes: 18 additions & 25 deletions scripts/generate_pdf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,21 @@ _version_tag="$(date '+%Y%m%d')"
# used to debug template setting error

# add docs versions
# generate PDF for each version
docs_versions=(dev v3.0 v2.1 v3.1)

for i in "${docs_versions[@]}"
do
echo "$i"
version="$i/doc.md"
echo "$version"
output_path="$i/output.pdf"

pandoc -N --toc --smart --latex-engine=xelatex \
--template=templates/template.tex \
--listings \
--columns=80 \
-V title="TiDB 中文手册" \
-V author="PingCAP Inc." \
-V date="${_version_tag}" \
-V CJKmainfont="${MAINFONT}" \
-V mainfont="${MAINFONT}" \
-V sansfont="${MAINFONT}" \
-V monofont="${MONOFONT}" \
-V geometry:margin=1in \
-V include-after="\\input{templates/copyright.tex}" \
$version -s -o $output_path
done
# generate PDF for dev version

output_path="output.pdf"

pandoc -N --toc --smart --latex-engine=xelatex \
--template=templates/template.tex \
--listings \
--columns=80 \
-V title="TiDB 中文手册" \
-V author="PingCAP Inc." \
-V date="${_version_tag}" \
-V CJKmainfont="${MAINFONT}" \
-V mainfont="${MAINFONT}" \
-V sansfont="${MAINFONT}" \
-V monofont="${MONOFONT}" \
-V geometry:margin=1in \
-V include-after="\\input{templates/copyright.tex}" \
"doc.md" -s -o "output.pdf"
298 changes: 147 additions & 151 deletions scripts/merge_by_toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
followups = []
in_toc = False
contents = []
docs_versions = ['dev', 'v3.0', 'v2.1', 'v3.1']

hyper_link_pattern = re.compile(r'\[(.*?)\]\((.*?)(#.*?)?\)')
toc_line_pattern = re.compile(r'([\-\+]+)\s\[(.*?)\]\((.*?)(#.*?)?\)')
Expand All @@ -23,159 +22,156 @@
# match all headings
heading_patthern = re.compile(r'(^#+|\n#+)\s')

for version in docs_versions:
entry_file = version + "/TOC.md"
followups = []
in_toc = False
contents = []

# stage 1, parse toc
with open(entry_file) as fp:
level = 0
current_level = ""
for line in fp:
if not in_toc and line.startswith("## "):
in_toc = True
print("in toc")
elif in_toc and line.startswith('## '):
in_toc = False
# yes, toc processing done
# contents.append(line[1:]) # skip 1 level TOC
break
elif in_toc and not line.startswith('#') and line.strip():
## get level from space length
print(line)
level_space_str = level_pattern.findall(line)[0][:-1]
level = len(level_space_str) // 2 + 1 ## python divide get integer

matches = toc_line_pattern.findall(line)
if matches:
for match in matches:
fpath = match[2]
if fpath.endswith('.md'):
# remove the first slash in the relative path
fpath = fpath[1:]
key = ('FILE', level, fpath)
if key not in followups:
print(key)
followups.append(key)
elif fpath.startswith('http'):
## remove list format character `- `, `+ `
followups.append(('TOC', level, line.strip()[2:]))
else:
name = line.strip().split(None, 1)[-1]
key = ('TOC', level, name)
if key not in followups:
print(key)
followups.append(key)

else:
pass

# overview part in README.md
followups.insert(1, ("RAW", 0, fp.read()))

for k in followups:
print(k)

# stage 2, get file heading
file_link_name = {}
title_pattern = re.compile(r'(^#+)\s.*')
for tp, lv, f in followups:
if tp != 'FILE':
continue
try:
for line in open(f).readlines():
if line.startswith("#"):
tag = line.strip()
break
except Exception as e:
print(e)
tag = ""
if tag.startswith('# '):
tag = tag[2:]
elif tag.startswith('## '):
tag = tag[3:]
file_link_name[f] = tag.lower().replace(' ', '-')

print(file_link_name)

def replace_link_wrap(chapter, name):

# Note: 仅仅支持 hash 匹配,如果在多个文档中有同名 heading 会碰撞
# 支持 chapter 文档中的 ./ddd.md, xxx.md, xxx.md#xxx 等
def replace_link(match):
full = match.group(0)
link_name = match.group(1)
link = match.group(2)
frag = match.group(3)
if link.endswith('.md') or '.md#' in link:
if not frag:
relative_path = ''
if not link.startswith('.'):
relative_path = '../'
_rel_path = os.path.normpath(os.path.join(name, relative_path, link))
for fpath in file_link_name:
if _rel_path == fpath:
frag = '#' + file_link_name[fpath]
return '[%s](%s)' % (link_name, frag)
elif link.endswith('.png') or link.endswith('.jpeg') or link.endswith('.svg') or link.endswith('.gif') or link.endswith('.jpg'):
# special handing for pic
img_link = re.sub(r'[\.\/]*media\/', './media/', link, count=0, flags=0)
# print('****************', img_link)
# print('================', '[%s](%s)' % (link_name, img_link))
# return '[%s](%s/%s)' % (link_name, dirname, fname)
return '[%s](%s)' % (link_name, img_link)
entry_file = "TOC.md"

# stage 1, parse toc
with open(entry_file) as fp:
level = 0
current_level = ""
for line in fp:
if not in_toc and line.startswith("## "):
in_toc = True
print("in toc")
elif in_toc and line.startswith('## '):
in_toc = False
# yes, toc processing done
# contents.append(line[1:]) # skip 1 level TOC
break
elif in_toc and not line.startswith('#') and line.strip():
## get level from space length
print(line)
level_space_str = level_pattern.findall(line)[0][:-1]
level = len(level_space_str) // 2 + 1 ## python divide get integer

matches = toc_line_pattern.findall(line)
if matches:
for match in matches:
fpath = match[2]
if fpath.endswith('.md'):
# remove the first slash in the relative path
fpath = fpath[1:]
key = ('FILE', level, fpath)
if key not in followups:
print(key)
followups.append(key)
elif fpath.startswith('http'):
## remove list format character `- `, `+ `
followups.append(('TOC', level, line.strip()[2:]))
else:
return full

return hyper_link_pattern.sub(replace_link, chapter)

def replace_heading_func(diff_level=0):

def replace_heading(match):
if diff_level == 0:
return match.group(0)
else:
return '\n' + '#' * (match.group(0).count('#') + diff_level) + ' '


return replace_heading

def replace_img_link(match):
name = line.strip().split(None, 1)[-1]
key = ('TOC', level, name)
if key not in followups:
print(key)
followups.append(key)

else:
pass

# overview part in README.md
followups.insert(1, ("RAW", 0, fp.read()))

for k in followups:
print(k)

# stage 2, get file heading
file_link_name = {}
title_pattern = re.compile(r'(^#+)\s.*')
for tp, lv, f in followups:
if tp != 'FILE':
continue
try:
for line in open(f).readlines():
if line.startswith("#"):
tag = line.strip()
break
except Exception as e:
print(e)
tag = ""
if tag.startswith('# '):
tag = tag[2:]
elif tag.startswith('## '):
tag = tag[3:]
file_link_name[f] = tag.lower().replace(' ', '-')

print(file_link_name)

def replace_link_wrap(chapter, name):

# Note: 仅仅支持 hash 匹配,如果在多个文档中有同名 heading 会碰撞
# 支持 chapter 文档中的 ./ddd.md, xxx.md, xxx.md#xxx 等
def replace_link(match):
full = match.group(0)
link_name = match.group(1)
link = match.group(2)
frag = match.group(3)
if link.endswith('.md') or '.md#' in link:
if not frag:
relative_path = ''
if not link.startswith('.'):
relative_path = '../'
_rel_path = os.path.normpath(os.path.join(name, relative_path, link))
for fpath in file_link_name:
if _rel_path == fpath:
frag = '#' + file_link_name[fpath]
return '[%s](%s)' % (link_name, frag)
elif link.endswith('.png') or link.endswith('.jpeg') or link.endswith('.svg') or link.endswith('.gif') or link.endswith('.jpg'):
# special handing for pic
img_link = re.sub(r'[\.\/]*media\/', './media/', link, count=0, flags=0)
# print('****************', img_link)
# print('================', '[%s](%s)' % (link_name, img_link))
# return '[%s](%s/%s)' % (link_name, dirname, fname)
return '[%s](%s)' % (link_name, img_link)
else:
return full

return hyper_link_pattern.sub(replace_link, chapter)

def replace_heading_func(diff_level=0):

def replace_heading(match):
if diff_level == 0:
return match.group(0)
else:
return '\n' + '#' * (match.group(0).count('#') + diff_level) + ' '


return replace_heading

def replace_img_link(match):
full = match.group(0)
link_name = match.group(1)
link = match.group(2)

if link.endswith('.png'):
fname = os.path.basename(link)
return '![%s](./media/%s)' % (link_name, fname)

# stage 3, concat files
for type_, level, name in followups:
if type_ == 'TOC':
contents.append("\n{} {}\n".format('#' * level, name))
elif type_ == 'RAW':
contents.append(name)
elif type_ == 'FILE':
try:
with open(name) as fp:
chapter = fp.read()
chapter = replace_link_wrap(chapter, name)
# chapter = image_link_pattern.sub(replace_img_link, chapter)

# fix heading level
diff_level = level - heading_patthern.findall(chapter)[0].count('#')

print(name, type_, level, diff_level)
chapter = heading_patthern.sub(replace_heading_func(diff_level), chapter)
contents.append(chapter)
contents.append('') # add an empty line
except Exception as e:
print(e)
print("generate file error: ignore!")

if link.endswith('.png'):
fname = os.path.basename(link)
return '![%s](./media/%s)' % (link_name, fname)

# stage 3, concat files
for type_, level, name in followups:
if type_ == 'TOC':
contents.append("\n{} {}\n".format('#' * level, name))
elif type_ == 'RAW':
contents.append(name)
elif type_ == 'FILE':
try:
with open(name) as fp:
chapter = fp.read()
chapter = replace_link_wrap(chapter, name)
# chapter = image_link_pattern.sub(replace_img_link, chapter)

# fix heading level
diff_level = level - heading_patthern.findall(chapter)[0].count('#')

print(name, type_, level, diff_level)
chapter = heading_patthern.sub(replace_heading_func(diff_level), chapter)
contents.append(chapter)
contents.append('') # add an empty line
except Exception as e:
print(e)
print("generate file error: ignore!")

# stage 4, generage final doc.md
target_doc_file = version + '/doc.md'
with open(target_doc_file, 'w') as fp:
fp.write('\n'.join(contents))
# stage 4, generage final doc.md
target_doc_file = 'doc.md'
with open(target_doc_file, 'w') as fp:
fp.write('\n'.join(contents))

0 comments on commit 4816bdc

Please sign in to comment.