-
Notifications
You must be signed in to change notification settings - Fork 0
/
gourmet2pdf.py
executable file
·298 lines (258 loc) · 12.9 KB
/
gourmet2pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#! /usr/bin/env python3
"""
gourmet2pdf
@author: Christian Wichmann
"""
import re
import io
import json
import base64
import string
import argparse
from pathlib import Path
from bs4 import BeautifulSoup, CData
from reportlab.lib import colors
from reportlab.lib.units import cm, mm
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
from reportlab.platypus.doctemplate import BaseDocTemplate, PageTemplate
from reportlab.platypus import Image, PageBreak, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
from reportlab.platypus.flowables import BalancedColumns, KeepTogether
PAGE_WIDTH, PAGE_HEIGHT = A4
BORDER_HORIZONTAL = 2.0*cm
BORDER_VERTICAL = 1.5*cm
PAGE_BREAK_AFTER_RECIPE = True
TITLE = 'Rezeptsammlung'
AUTHOR = 'Markus Wichmann'
class Heading(Paragraph):
"""
Subclass for recipe headings that adds an entry in the documents outline
shown by most PDF viewers.
"""
def draw(self):
super(Heading, self).draw()
key = self.text
self.canv.bookmarkPage(key)
self.canv.addOutlineEntry(self.text, key, 0, 0)
def starify_rating(rating):
"""Creates a number of full and half stars according to the given rating."""
rate = 0
try:
rate = float(rating.split('/')[0])
except ValueError:
print('Could not parse recipe rating: ', rating)
full = ''.join('\uf005' * int(rate))
half = '\uf089' if rate != int(rate) else ''
return '<font face="FontAwesome">{}{}</font>'.format(full, half)
def create_first_page(canvas, doc):
canvas.saveState()
canvas.setFont('Helvetica', 16)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT-98, TITLE)
canvas.setFont('Helvetica', 11)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT-130, AUTHOR)
canvas.setFont('Helvetica', 10)
canvas.drawString(BORDER_HORIZONTAL, BORDER_VERTICAL, TITLE)
canvas.drawRightString(PAGE_WIDTH-BORDER_HORIZONTAL , BORDER_VERTICAL, "Seite 1")
canvas.restoreState()
def create_later_pages(canvas, doc):
canvas.saveState()
canvas.setFont('Helvetica', 10)
canvas.drawString(BORDER_HORIZONTAL, BORDER_VERTICAL, TITLE)
canvas.drawRightString(PAGE_WIDTH-BORDER_HORIZONTAL, BORDER_VERTICAL, "Seite {}".format(doc.page))
canvas.restoreState()
def add_ingredients_for_group(enclosing_tag):
ingredients_heading_style = ParagraphStyle(name='Normal', fontName='Helvetica', fontSize=10, leading=10, leftIndent=8)
ingredients_style = ParagraphStyle(name='Normal', fontName='Times-Roman', fontSize=10, leading=10, leftIndent=8)
p = []
if enclosing_tag.groupname:
p.append(Paragraph(enclosing_tag.groupname.text, ingredients_heading_style))
for i in enclosing_tag.find_all('ingredient'):
p.append(Paragraph('{} {} {}'.format(i.amount if i.amount else '',
i.unit if i.unit else '',
i.item if i.item else ''), ingredients_style))
return p
def create_pdf_doc(input_file, output_file):
pdfmetrics.registerFont(TTFont('FontAwesome', 'font_awesome.ttf'))
heading_style = ParagraphStyle(name='Normal', fontName='Helvetica',
spaceAfter=0.25*cm, spaceBefore=0.5*cm, fontSize=15, leading=18)
subheading_style = ParagraphStyle(name='Normal', fontName='Helvetica',
spaceAfter=0.2*cm, spaceBefore=0.4*cm,fontSize=13, leading=18)
paragraph_style = ParagraphStyle(name='Normal', fontName='Times-Roman', fontSize=11, leading=18)
small_style = ParagraphStyle(name='Normal', fontName='Times-Roman', fontSize=8)
doc = SimpleDocTemplate(output_file, author=AUTHOR, title=TITLE)
story = [Spacer(1,3.5*cm)]
link_template = '<link href="{0}" color="blue">{0}</link>'
# create necessary building blocks for each recipe
for recipe in parse_xml_file(input_file):
substory = []
recipe_heading = Heading('{}'.format(recipe.title.string), heading_style)
substory.append(recipe_heading)
# build block with information about the recipe
topline = []
if recipe.source: topline.append('Quelle: {}'.format(recipe.source.string))
if recipe.link: topline.append('Link: {}'.format(link_template.format(recipe.link.string)))
if recipe.rating: topline.append('Bewertung: {}'.format(starify_rating(recipe.rating.string)))
if recipe.category: topline.append('Kategorie: {}'.format(recipe.category.string))
substory.append(Paragraph('<br/>'.join(topline), small_style))
# extract image if it exists
if recipe.image:
im = Image(io.BytesIO(base64.b64decode(recipe.image.string)))
im._restrictSize(7*cm, 7*cm)
im.hAlign = 'RIGHT'
else:
im = Paragraph('', paragraph_style)
# extract all ingredient groups with their ingredients
ingredient_groups = []
# TODO: Search only in <ingredient-list> tag.
igroup_tags = recipe.find_all('inggroup')
if igroup_tags:
for igroup in igroup_tags:
ingredient_groups.append(add_ingredients_for_group(igroup))
else:
ingredient_groups.append(add_ingredients_for_group(recipe))
# build two columns for ingredients and image (covering multiple rows!)
substory.append(Paragraph('Zutaten', subheading_style))
try:
data = [ [ ingredient_groups[0][0], im ] ]
except:
data = [ [ Paragraph('Keine Zutaten für dieses Rezept gegeben!', paragraph_style), im ] ]
# add remaining ingredients for first ingredients group
for i in ingredient_groups[0][1:]:
data.append( [i] )
# add ingredients for all remaining ingredient groups to document
for g in ingredient_groups[1:]:
data.append( [Spacer(1,2*mm)])
for i in g:
data.append( [i] )
# build table from list of elements
table = Table(data, splitByRow=True)
table.setStyle(TableStyle([('VALIGN',(0, 0), (-1, -1), 'TOP'),
('ALIGN', (0, 0), (0, 0), 'LEFT'),
('SPAN', (1, 0), (1, min(10, len(ingredient_groups[0])-1))),
('ALIGN', (-1, 0), (-1, 0), 'RIGHT')]))
substory.append(table)
# build text blocks for instructions and notes
if recipe.instructions:
substory.append(Paragraph('Anweisungen', subheading_style))
s = recipe.instructions.string.replace('\n', '<br/>')
substory.append(Paragraph('{}'.format(s), paragraph_style))
if recipe.modifications:
substory.append(Paragraph('Notizen', subheading_style))
s = recipe.modifications.string.replace('\n', '<br/>')
substory.append(Paragraph('{}'.format(s), paragraph_style))
# break page after each recipe if PAGE_BREAK_AFTER_RECIPE is true
if PAGE_BREAK_AFTER_RECIPE:
substory.append(PageBreak())
else:
substory.append(Paragraph('<br/><br/><br/>', ParagraphStyle(name='Normal')))
story = story + substory
doc.build(story, onFirstPage=create_first_page, onLaterPages=create_later_pages)
def parse_time(time_string):
"""
Parses time from the format:
* 1 Stunde
* 45 Minuten
* 1/2 Stunden
to the format: PT0H45M
"""
# parse string and capture the to numbers for hours and minutes
regex = r"(?:(?P<hours>\d?\/?\d) Stunden?)? ?(?:(?P<minutes>\d?\/?\d) Minuten?)?"
matches = re.finditer(regex, time_string, re.IGNORECASE)
for m in matches:
if m['hours'] and '/' in m['hours']:
h1, h2 = [int(x) for x in m['hours'].split('/')]
if m['minutes']:
hours = 1
minutes = (int(m['minutes']) + int(h1 / h2 * 60)) % 60
else:
hours = 0
minutes = int(h1 / h2 * 60)
else:
hours = int(m['hours']) if m['hours'] else 0
minutes = int(m['minutes']) if m['minutes'] else 0
break
return 'PT{}H{}M'.format(hours, minutes)
def create_json_doc(input_file, output_dir):
"""
Source: https://schema.org/Recipe
"""
base_path = Path(output_dir)
if not base_path.is_dir or not base_path.exists:
print('Output directory ({}) is not a directory!'.format(output_dir))
return
for recipe in parse_xml_file(input_file):
# filter out all characters not suitable for the filesystem
valid_chars = "-_.() {0}{1}äöüÄÖÜß".format(string.ascii_letters, string.digits)
valid_dirname = "".join(ch for ch in recipe.title.string if ch in valid_chars)
recipe_dir = base_path / valid_dirname
try:
recipe_dir.mkdir()
except FileExistsError as e:
print('Recipe already converted: {}'.format(recipe.title.string))
continue
recipe_data = {'@context': 'https://schema.org', '@type': 'Recipe'}
recipe_data['name'] = recipe.title.string
recipe_data['author'] = AUTHOR
# TODO: Check how to store the source of the recipe correctly.
if recipe.source: recipe_data['publisher'] = {'@type': 'Organization', 'name': recipe.source.string}
if recipe.link: recipe_data['url'] = recipe.link.string
if recipe.category: recipe_data['recipeCategory'] = recipe.category.string
if recipe.rating:
rate = 0
try:
rate = float(recipe.rating.string.split('/')[0]) / 5 * 10
except ValueError:
print('Could not parse recipe rating: ', recipe.rating)
except TypeError:
print('Could not parse recipe rating: ', recipe.rating)
recipe_data['aggregateRating'] = {"@type": "AggregateRating", "ratingCount": 1, "ratingValue": str(rate)}
if recipe.preptime: recipe_data['prepTime'] = parse_time(recipe.preptime.string)
if recipe.cooktime: recipe_data['cookTime'] = parse_time(recipe.cooktime.string)
if recipe.totalTime: recipe_data['performTime'] = parse_time(recipe.totalTime.string)
if recipe.yields: recipe_data['recipeYield'] = recipe.yields.string
#if recipe.image: recipe_data['image'] = 'data:image/jpeg;base64,{}'.format(recipe.image.string)
if recipe.image:
image_file_name = recipe_dir / 'full.jpg'
with open(image_file_name, 'wb') as imagefile:
imagefile.write(base64.b64decode(recipe.image.string))
recipe_data['image'] = str(image_file_name)
# TODO: Handle ingredient groups better (for support in Nextcloud see: https://github.com/nextcloud/cookbook/issues/311)
ingredients = []
igroup_tags = recipe.find_all('inggroup')
if igroup_tags:
for igroup in igroup_tags:
if igroup.groupname:
ingredients.append('## {}'.format(igroup.groupname))
for i in igroup.find_all('ingredient'):
ingredients.append('{} {} {}'.format(i.amount.string if i.amount else '', i.unit.string if i.unit else '', i.item.string if i.item else ''))
else:
for i in recipe.find_all('ingredient'):
ingredients.append('{} {} {}'.format(i.amount.string if i.amount else '', i.unit.string if i.unit else '', i.item.string if i.item else ''))
recipe_data['recipeIngredient'] = ingredients
# build text blocks for instructions and notes
if recipe.instructions and recipe.instructions.string:
recipe_data['recipeInstructions'] = recipe.instructions.string.split('\n')
if recipe.modifications:
recipe_data['comment'] = recipe.modifications.string
with open(recipe_dir / 'recipe.json', 'w') as f:
json.dump(recipe_data, f)
def parse_xml_file(input_file):
with open(input_file, 'r') as recipe_file:
soup = BeautifulSoup(recipe_file.read(), 'lxml-xml')
for recipe in soup.find_all('recipe'):
yield recipe
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Converts recipes in the file format of Gourmet Recipe Manager to other formats.')
parser.add_argument('input_file', help='Gourmet recipe file')
parser.add_argument('output_file', help='Output file or directory', nargs='?', default='')
parser.add_argument('-f', '--export_format', help='File format to convert Gourmet recipe database to', nargs=1, default='pdf', choices=['json', 'pdf'])
args = parser.parse_args()
if 'pdf' in args.export_format:
create_pdf_doc(args.input_file, args.output_file if args.output_file else args.input_file+'.pdf')
elif 'json' in args.export_format:
create_json_doc(args.input_file, args.output_file if args.output_file else '.')
else:
print('Chosen file format not supported.')