update pdf generation for dev version

junlan-zhang · Feb 21, 2020 · 4816bdc · 4816bdc
1 parent e56bd2e
commit 4816bdc
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 188 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -59,21 +59,12 @@ jobs:
           command: |
             sudo bash -c 'echo "222.222.95.49 uc.qbox.me" >> /etc/hosts';
             if [ "${CIRCLE_BRANCH}" == "master" ]; then
-              python3 scripts/upload.py dev/output.pdf tidb-manual-cn-dev.pdf;
-              python3 scripts/upload.py v3.0/output.pdf tidb-manual-cn-v3.0.pdf;
-              python3 scripts/upload.py v2.1/output.pdf tidb-manual-cn-v2.1.pdf;
-              python3 scripts/upload.py v3.1/output.pdf tidb-manual-cn-v3.1.pdf;
-            fi
-            if [ "${CIRCLE_BRANCH}" == "website-preview" ]; then
-              python3 scripts/upload.py dev/output.pdf tidb-manual-cn-preview-dev.pdf;
-              python3 scripts/upload.py v3.0/output.pdf tidb-manual-cn-preview-v3.0.pdf;
-              python3 scripts/upload.py v2.1/output.pdf tidb-manual-cn-preview-v2.1.pdf;
-              python3 scripts/upload.py v3.1/output.pdf tidb-manual-cn-preview-v3.1.pdf;
+              python3 scripts/upload.py output.pdf tidb-manual-cn-dev.pdf;
             fi
 
       - run:
           name: "Copy Generated PDF"
-          command: mkdir /tmp/artifacts && cp dev/output.pdf dev/doc.md /tmp/artifacts && cp v3.0/output.pdf v3.0/doc.md /tmp/artifacts && cp v2.1/output.pdf v2.1/doc.md /tmp/artifacts
+          command: mkdir /tmp/artifacts && cp output.pdf doc.md /tmp/artifacts
 
       - store_artifacts:
           path: /tmp/artifacts
@@ -90,4 +81,3 @@ workflows:
             branches:
               only:
                 - master
-                - website-preview
diff --git a/scripts/generate_pdf.sh b/scripts/generate_pdf.sh
@@ -16,28 +16,21 @@ _version_tag="$(date '+%Y%m%d')"
 # used to debug template setting error
 
 # add docs versions
-# generate PDF for each version
-docs_versions=(dev v3.0 v2.1 v3.1) 
-
-for i in "${docs_versions[@]}"
-do
-   echo "$i"
-   version="$i/doc.md"
-   echo "$version"
-   output_path="$i/output.pdf"
-
-   pandoc -N --toc --smart --latex-engine=xelatex \
-    --template=templates/template.tex \
-    --listings \
-    --columns=80 \
-    -V title="TiDB 中文手册" \
-    -V author="PingCAP Inc." \
-    -V date="${_version_tag}" \
-    -V CJKmainfont="${MAINFONT}" \
-    -V mainfont="${MAINFONT}" \
-    -V sansfont="${MAINFONT}" \
-    -V monofont="${MONOFONT}" \
-    -V geometry:margin=1in \
-    -V include-after="\\input{templates/copyright.tex}" \
-    $version -s -o $output_path
-done
+# generate PDF for dev version
+
+output_path="output.pdf"
+
+pandoc -N --toc --smart --latex-engine=xelatex \
+--template=templates/template.tex \
+--listings \
+--columns=80 \
+-V title="TiDB 中文手册" \
+-V author="PingCAP Inc." \
+-V date="${_version_tag}" \
+-V CJKmainfont="${MAINFONT}" \
+-V mainfont="${MAINFONT}" \
+-V sansfont="${MAINFONT}" \
+-V monofont="${MONOFONT}" \
+-V geometry:margin=1in \
+-V include-after="\\input{templates/copyright.tex}" \
+"doc.md" -s -o "output.pdf"
diff --git a/scripts/merge_by_toc.py b/scripts/merge_by_toc.py
@@ -14,7 +14,6 @@
 followups = []
 in_toc = False
 contents = []
-docs_versions = ['dev', 'v3.0', 'v2.1', 'v3.1']
 
 hyper_link_pattern = re.compile(r'\[(.*?)\]\((.*?)(#.*?)?\)')
 toc_line_pattern = re.compile(r'([\-\+]+)\s\[(.*?)\]\((.*?)(#.*?)?\)')
@@ -23,159 +22,156 @@
 # match all headings
 heading_patthern = re.compile(r'(^#+|\n#+)\s')
 
-for version in docs_versions:
-    entry_file = version + "/TOC.md"
-    followups = []
-    in_toc = False
-    contents = []
-
-    # stage 1, parse toc
-    with open(entry_file) as fp:
-        level = 0
-        current_level = ""
-        for line in fp:
-            if not in_toc and line.startswith("## "):
-                in_toc = True
-                print("in toc")
-            elif in_toc and line.startswith('## '):
-                in_toc = False
-                # yes, toc processing done
-                # contents.append(line[1:]) # skip 1 level TOC
-                break
-            elif in_toc and not line.startswith('#') and line.strip():
-                ## get level from space length
-                print(line)
-                level_space_str = level_pattern.findall(line)[0][:-1]
-                level = len(level_space_str) // 2 + 1 ## python divide get integer
-
-                matches = toc_line_pattern.findall(line)
-                if matches:
-                    for match in matches:
-                        fpath = match[2]
-                        if fpath.endswith('.md'):
-                            # remove the first slash in the relative path
-                            fpath = fpath[1:]
-                            key = ('FILE', level, fpath)
-                            if key not in followups:
-                                print(key)
-                                followups.append(key)
-                        elif fpath.startswith('http'):
-                            ## remove list format character `- `, `+ `
-                            followups.append(('TOC', level, line.strip()[2:]))
-                else:
-                    name = line.strip().split(None, 1)[-1]
-                    key = ('TOC', level, name)
-                    if key not in followups:
-                        print(key)
-                        followups.append(key)
-
-            else:
-                pass
-
-        # overview part in README.md
-        followups.insert(1, ("RAW", 0, fp.read()))
-
-    for k in followups:
-        print(k)
 
-    # stage 2, get file heading
-    file_link_name = {}
-    title_pattern = re.compile(r'(^#+)\s.*')
-    for tp, lv, f in followups:
-        if tp != 'FILE':
-            continue
-        try:
-            for line in open(f).readlines():
-                if line.startswith("#"):
-                    tag = line.strip()
-                    break
-        except Exception as e:
-            print(e)
-            tag = ""
-        if tag.startswith('# '):
-            tag = tag[2:]
-        elif tag.startswith('## '):
-            tag = tag[3:]
-        file_link_name[f] = tag.lower().replace(' ', '-')
-
-    print(file_link_name)
-
-    def replace_link_wrap(chapter, name):
-
-        # Note: 仅仅支持 hash 匹配，如果在多个文档中有同名 heading 会碰撞
-        # 支持 chapter 文档中的 ./ddd.md, xxx.md, xxx.md#xxx 等
-        def replace_link(match):
-            full = match.group(0)
-            link_name = match.group(1)
-            link = match.group(2)
-            frag = match.group(3)
-            if link.endswith('.md') or '.md#' in link:
-                if not frag:
-                    relative_path = ''
-                    if not link.startswith('.'):
-                        relative_path = '../'
-                    _rel_path = os.path.normpath(os.path.join(name, relative_path, link))
-                    for fpath in file_link_name:
-                        if _rel_path == fpath:
-                            frag = '#' + file_link_name[fpath]
-                return '[%s](%s)' % (link_name, frag)
-            elif link.endswith('.png') or link.endswith('.jpeg') or link.endswith('.svg') or link.endswith('.gif') or link.endswith('.jpg'):
-                # special handing for pic
-                img_link = re.sub(r'[\.\/]*media\/', './media/', link, count=0, flags=0)
-                # print('****************', img_link)
-                # print('================', '[%s](%s)' % (link_name, img_link))
-                # return '[%s](%s/%s)' % (link_name, dirname, fname)
-                return '[%s](%s)' % (link_name, img_link)
+entry_file = "TOC.md"
+
+# stage 1, parse toc
+with open(entry_file) as fp:
+    level = 0
+    current_level = ""
+    for line in fp:
+        if not in_toc and line.startswith("## "):
+            in_toc = True
+            print("in toc")
+        elif in_toc and line.startswith('## '):
+            in_toc = False
+            # yes, toc processing done
+            # contents.append(line[1:]) # skip 1 level TOC
+            break
+        elif in_toc and not line.startswith('#') and line.strip():
+            ## get level from space length
+            print(line)
+            level_space_str = level_pattern.findall(line)[0][:-1]
+            level = len(level_space_str) // 2 + 1 ## python divide get integer
+
+            matches = toc_line_pattern.findall(line)
+            if matches:
+                for match in matches:
+                    fpath = match[2]
+                    if fpath.endswith('.md'):
+                        # remove the first slash in the relative path
+                        fpath = fpath[1:]
+                        key = ('FILE', level, fpath)
+                        if key not in followups:
+                            print(key)
+                            followups.append(key)
+                    elif fpath.startswith('http'):
+                        ## remove list format character `- `, `+ `
+                        followups.append(('TOC', level, line.strip()[2:]))
             else:
-                return full
-
-        return hyper_link_pattern.sub(replace_link, chapter)
-
-    def replace_heading_func(diff_level=0):
-
-        def replace_heading(match):
-            if diff_level == 0:
-                return match.group(0)
-            else:
-                return '\n' + '#' * (match.group(0).count('#') + diff_level) + ' '
-
-
-        return replace_heading
-
-    def replace_img_link(match):
+                name = line.strip().split(None, 1)[-1]
+                key = ('TOC', level, name)
+                if key not in followups:
+                    print(key)
+                    followups.append(key)
+
+        else:
+            pass
+
+    # overview part in README.md
+    followups.insert(1, ("RAW", 0, fp.read()))
+
+for k in followups:
+    print(k)
+
+# stage 2, get file heading
+file_link_name = {}
+title_pattern = re.compile(r'(^#+)\s.*')
+for tp, lv, f in followups:
+    if tp != 'FILE':
+        continue
+    try:
+        for line in open(f).readlines():
+            if line.startswith("#"):
+                tag = line.strip()
+                break
+    except Exception as e:
+        print(e)
+        tag = ""
+    if tag.startswith('# '):
+        tag = tag[2:]
+    elif tag.startswith('## '):
+        tag = tag[3:]
+    file_link_name[f] = tag.lower().replace(' ', '-')
+
+print(file_link_name)
+
+def replace_link_wrap(chapter, name):
+
+    # Note: 仅仅支持 hash 匹配，如果在多个文档中有同名 heading 会碰撞
+    # 支持 chapter 文档中的 ./ddd.md, xxx.md, xxx.md#xxx 等
+    def replace_link(match):
         full = match.group(0)
         link_name = match.group(1)
         link = match.group(2)
+        frag = match.group(3)
+        if link.endswith('.md') or '.md#' in link:
+            if not frag:
+                relative_path = ''
+                if not link.startswith('.'):
+                    relative_path = '../'
+                _rel_path = os.path.normpath(os.path.join(name, relative_path, link))
+                for fpath in file_link_name:
+                    if _rel_path == fpath:
+                        frag = '#' + file_link_name[fpath]
+            return '[%s](%s)' % (link_name, frag)
+        elif link.endswith('.png') or link.endswith('.jpeg') or link.endswith('.svg') or link.endswith('.gif') or link.endswith('.jpg'):
+            # special handing for pic
+            img_link = re.sub(r'[\.\/]*media\/', './media/', link, count=0, flags=0)
+            # print('****************', img_link)
+            # print('================', '[%s](%s)' % (link_name, img_link))
+            # return '[%s](%s/%s)' % (link_name, dirname, fname)
+            return '[%s](%s)' % (link_name, img_link)
+        else:
+            return full
+
+    return hyper_link_pattern.sub(replace_link, chapter)
+
+def replace_heading_func(diff_level=0):
+
+    def replace_heading(match):
+        if diff_level == 0:
+            return match.group(0)
+        else:
+            return '\n' + '#' * (match.group(0).count('#') + diff_level) + ' '
+
+
+    return replace_heading
+
+def replace_img_link(match):
+    full = match.group(0)
+    link_name = match.group(1)
+    link = match.group(2)
+
+    if link.endswith('.png'):
+        fname = os.path.basename(link)
+        return '![%s](./media/%s)' % (link_name, fname)
+
+# stage 3, concat files
+for type_, level, name in followups:
+    if type_ == 'TOC':
+        contents.append("\n{} {}\n".format('#' * level, name))
+    elif type_ == 'RAW':
+        contents.append(name)
+    elif type_ == 'FILE':
+        try:
+            with open(name) as fp:
+                chapter = fp.read()
+                chapter = replace_link_wrap(chapter, name)
+                # chapter = image_link_pattern.sub(replace_img_link, chapter)
+
+                # fix heading level
+                diff_level = level - heading_patthern.findall(chapter)[0].count('#')
+
+                print(name, type_, level, diff_level)
+                chapter = heading_patthern.sub(replace_heading_func(diff_level), chapter)
+                contents.append(chapter)
+                contents.append('') # add an empty line
+        except Exception as e:
+            print(e)
+            print("generate file error: ignore!")
 
-        if link.endswith('.png'):
-            fname = os.path.basename(link)
-            return '![%s](./media/%s)' % (link_name, fname)
-
-    # stage 3, concat files
-    for type_, level, name in followups:
-        if type_ == 'TOC':
-            contents.append("\n{} {}\n".format('#' * level, name))
-        elif type_ == 'RAW':
-            contents.append(name)
-        elif type_ == 'FILE':
-            try:
-                with open(name) as fp:
-                    chapter = fp.read()
-                    chapter = replace_link_wrap(chapter, name)
-                    # chapter = image_link_pattern.sub(replace_img_link, chapter)
-
-                    # fix heading level
-                    diff_level = level - heading_patthern.findall(chapter)[0].count('#')
-
-                    print(name, type_, level, diff_level)
-                    chapter = heading_patthern.sub(replace_heading_func(diff_level), chapter)
-                    contents.append(chapter)
-                    contents.append('') # add an empty line
-            except Exception as e:
-                print(e)
-                print("generate file error: ignore!")
-
-    # stage 4, generage final doc.md
-    target_doc_file = version + '/doc.md'
-    with open(target_doc_file, 'w') as fp:
-        fp.write('\n'.join(contents))
+# stage 4, generage final doc.md
+target_doc_file = 'doc.md'
+with open(target_doc_file, 'w') as fp:
+    fp.write('\n'.join(contents))