-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconverter.py
95 lines (77 loc) · 3.19 KB
/
converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import shutil
import re
from utils import bcolors
def toSimpleMarkdown(post, folder):
"""
Convert to Markdown (without headers)
"""
# Create output folder if it doesn't exist
folder_name = 'converted_posts'
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# Set filename
file_basename = os.path.basename(post['filename'])
target_filename = os.path.splitext(file_basename)[0] + '.md'
# Write converted file
with open(folder_name + '/' + target_filename, 'w') as target_file:
target_file.write(post['chapo'] + post['content'])
def toGrav(post, folder):
"""
Convert to Markdown + YAML Front Matter for Grav CMS
"""
# Escape double quotes from title
if re.search('"', post['title']):
post['title'] = '\'' + post['title'] + '\''
# Write YAML header for Markdown file
header = '---'
header += '\n' + 'title: ' + post['title']
if post['tags'] is not None:
header += '\n' + 'tag: ' + post['tags']
if post['draft'] is True:
header += '\n' + 'published: false '
header += '\n' + 'date: \'' + str(post['date'] +'\'')
header += '\n' + '---' + '\n'
# As html2text library currently inserts newlines when links are too long,
# it is currently needed to fix them here
# See: https://github.com/Alir3z4/html2text/issues/127
def remove_newlines(match):
return "".join(match.group().strip().split('\n'))
links_pattern = re.compile(r'\[([\w\s*:/\-\.]*)\]\(([^()]+)\)')
post['content'] = links_pattern.sub(remove_newlines, post['content'])
# Change all PluXML images sources by local one (images will be copied
# afterwards)
if len(post['images']) is not 0:
src = re.compile(r'!\[([\w\s*:/\-\.]*)\]\(data\/images\/')
post['content'] = src.sub(r'
# Create summary if there is a chapo
if post['chapo'] is not '':
post['chapo'] += '===\n\n'
print('Converted markdown :')
print(header)
# Create output folder if it doesn't exist yet
folder_name = 'converted_posts'
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# Create unique folder for the post
post_folder = post['filename'].split('.')[-2]
post_folder_path = folder_name + '/' + post_folder
if not os.path.exists(post_folder_path):
os.makedirs(post_folder_path)
# Set item.md as filename which is ideal for a Grav blog
target_filename = 'item.md'
# Write converted file
with open(post_folder_path + '/' + target_filename, 'w') as target_file:
target_file.write(header + post['chapo'] + post['content'])
# If they are images, copy them from PluXml folder into the new folder
for image in post['images']:
print('Copying image from: ', folder + '/images/' +
os.path.basename(image))
image_path = folder + '/images/' + os.path.basename(image)
try:
shutil.copy2(image_path, post_folder_path)
except FileNotFoundError:
print(bcolors.FAIL + 'Error: Image {0} doesn\'t seem to exist and \
has not been copied'.format(image) + bcolors.ENDC)