forked from sjbotha/moin2confluence
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmoin2markdown.sh
executable file
·97 lines (71 loc) · 3.6 KB
/
moin2markdown.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/bin/bash
# change to directory where script is located
CUR=$(dirname "$0")
WORKING=$CUR/working
[ -n "$TMP" ] && WORKING=$TMP/moin2markdown
PAGENAME=$1
MOINROOT=$2
OUTPUT=$3
if [[ "$OUTPUT" == "" ]]; then
echo ""
echo "Syntax: ./$0 <PageName> <wiki root> <output dir>"
echo "Example: ./$0 Certificates /data/webs/DVTech outputdir"
echo "The wiki root directory should be the one that contains the data directory"
echo ""
exit 1;
fi
mkdir -p $WORKING
mkdir -p $OUTPUT
LATEST_REV=`ls $MOINROOT/data/pages/$PAGENAME/revisions | sort -r | head -n 1`
cp $MOINROOT/data/pages/$PAGENAME/revisions/$LATEST_REV $WORKING/$PAGENAME.wiki
# convert moinmoin to mediawiki
# the moinmoin format is very similar to mediawiki
# here we convert a few things that are different
sed -i "s/^ \*/*/g" "$WORKING/$PAGENAME.wiki"
sed -i "s/^ \*/**/g" "$WORKING/$PAGENAME.wiki"
sed -i "s/^ \*/***/g" "$WORKING/$PAGENAME.wiki"
sed -i "s/^ 1. /# /g" "$WORKING/$PAGENAME.wiki"
sed -i "s/^ 1. /## /g" "$WORKING/$PAGENAME.wiki"
sed -i "s/{{{/<pre>/g" "$WORKING/$PAGENAME.wiki"
sed -i "s/}}}/<\/pre>/g" "$WORKING/$PAGENAME.wiki"
# auto convert camel case moinmoin to link in mediawiki
sed -i -r 's/ ([/]?[A-Z][A-Za-z0-9]+[A-Z]+[a-z0-9]+)/ [[\1]]/g' "$WORKING/$PAGENAME.wiki"
# remove ''''''
sed -i "s/\([A-Z]([A-Z0-9]*[a-z][a-z0-9]*[A-Z]|[a-z0-9]*[A-Z][A-Z0-9]*[a-z])[A-Za-z0-9]*\)/[[\1]]/g" "$WORKING/$PAGENAME.wiki"
# Start table handling; PAA - omit for Markdown??
# This table handling is a hack because we actually convert straight to confluence format and not mediawiki format
# handle 8 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><td>\4<\/td><td>\5<\/td><td>\6<\/td><td>\7<\/td><td>\8<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 7 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><td>\4<\/td><td>\5<\/td><td>\6<\/td><td>\7<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 6 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><td>\4<\/td><td>\5<\/td><td>\6<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 5 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><td>\4<\/td><td>\5<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 4 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><td>\4<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 3 column tables
#sed -i "s/^||\(.*\)||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><td>\3<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# handle 2 column tables
#sed -i "s/^||\(.*\)||\(.*\)||/<tr><td>\1<\/td><td>\2<\/td><\/tr>/g" "$WORKING/$PAGENAME.wiki"
# add <table> to start of table
#perl -0777 -i -pe 's/[^>]\r\n<tr>/\r\n<table>\r\n<tr>/g' "$WORKING/$PAGENAME.wiki"
# add </table> to end of table
#perl -0777 -i -pe 's/<\/tr>\r\n[^<]/<\/tr>\r\n<\/table>/g' "$WORKING/$PAGENAME.wiki"
# end table handling
# convert from mediawiki to markdown
pandoc -f mediawiki -t markdown -s "$WORKING/$PAGENAME.wiki" -o "$WORKING/$PAGENAME.md"
# Here we clean up the file names and replace common special characters used
CLEANFILE=$PAGENAME
# apos
CLEANFILE=`echo $CLEANFILE | sed "s/(27)/'/g"`
# space
CLEANFILE=`echo $CLEANFILE | sed "s/(20)/ /g"`
# For subpages / is used, we replace / with __
# /
CLEANFILE=`echo $CLEANFILE | sed "s/(2f)/__/g"`
# -
CLEANFILE=`echo $CLEANFILE | sed "s/(2d)/'/g"`
echo "$PAGENAME > $CLEANFILE"
mv "$WORKING/$PAGENAME.md" "$OUTPUT/$CLEANFILE.md"
rm -rf $WORKING