|
| 1 | +#!/bin/bash |
| 2 | +set -eu |
| 3 | + |
| 4 | +############################################################################### |
| 5 | +# Convert recursively all .html files to .md (GitHub flavoured Markdown) |
| 6 | +# |
| 7 | +# Dependencies: |
| 8 | +# pandoc |
| 9 | +# wget |
| 10 | +# |
| 11 | +# Author(s): |
| 12 | +# Martin Landa, Markus Neteler |
| 13 | +# |
| 14 | +# Usage: |
| 15 | +# If you have "pandoc" in PATH, execute for HTML file conversion in |
| 16 | +# current directory and subdirectories: |
| 17 | +# ./utils/grass_html2md.sh |
| 18 | +# |
| 19 | +# COPYRIGHT: (C) 2024 by the GRASS Development Team |
| 20 | +# |
| 21 | +# This program is free software under the GNU General Public |
| 22 | +# License (>=v2). Read the file COPYING that comes with GRASS |
| 23 | +# for details. |
| 24 | +# |
| 25 | +############################################################################### |
| 26 | + |
| 27 | +# cleanup at user break |
| 28 | +cleanup() |
| 29 | +{ |
| 30 | + rm -f "${f%%.html}_tmp.html" |
| 31 | +} |
| 32 | + |
| 33 | +# what to do in case of user break: |
| 34 | +exitprocedure() |
| 35 | +{ |
| 36 | + echo "User break!" |
| 37 | + cleanup |
| 38 | + exit 1 |
| 39 | +} |
| 40 | +# shell check for user break (signal list: trap -l) |
| 41 | +trap "exitprocedure" 2 3 15 |
| 42 | + |
| 43 | +# path to LUA file (./utils/pandoc_codeblock.lua) |
| 44 | +UTILSPATH="utils" |
| 45 | + |
| 46 | +# run recursively: HTML to MD |
| 47 | +for f in $(find . -name *.html); do |
| 48 | + echo "${f}" |
| 49 | + |
| 50 | + # HTML: Process the tmp file to selectively replace .html with .md only in relative URLs |
| 51 | + sed -E ' |
| 52 | + # Step 1: Preserve http/https links with .html (and optional anchors) |
| 53 | + s|(<a href="https?://[^"]+\.html)(#[^"]*)?">|\1_KEEPHTML\2">|g; |
| 54 | + # Step 2: Replace .html with .md for local links (with or without anchors) |
| 55 | + s|(<a href=")([^"]+)\.html(#[^"]*)?">|\1\2.md\3">|g; |
| 56 | + # Step 3: Restore preserved http/https links with .html |
| 57 | + s|_KEEPHTML||g; |
| 58 | +' "${f%%.html}.html" > "${f%%.html}_tmp.html" |
| 59 | + |
| 60 | + cat "${f%%.html}_tmp.html" | \ |
| 61 | + sed 's#<div class="code"><pre>#<pre><code>#g' | \ |
| 62 | + sed 's#</pre></div>#</code></pre>#g' | \ |
| 63 | + pandoc --from=html --to=markdown -t gfm \ |
| 64 | + --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" | \ |
| 65 | + sed 's+ \\\$+ \$+g' | sed 's+%20+-+g' > "${f%%.html}.md" |
| 66 | + |
| 67 | + rm -f "${f%%.html}_tmp.html" |
| 68 | + |
| 69 | +done |
0 commit comments