-
Notifications
You must be signed in to change notification settings - Fork 0
/
__init__.py
211 lines (176 loc) · 8.95 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# -*- coding: utf-8 -*-
"""
based on the TXTOutput plugin for Calibre by John Schember <john@nachtimwald.com
"""
import os
import shutil
import lxml.etree as etree
import lxml.builder
import re
# namespaces
PEFNS = "http://www.daisy.org/ns/2008/pef"
DCNS = "http://purl.org/dc/elements/1.1/"
from calibre.ebooks.metadata.meta import get_metadata, set_metadata
from calibre.rpdb import set_trace
from calibre.ebooks.txt.newlines import specified_newlines, TxtNewlines
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ptempfile import TemporaryDirectory, TemporaryFile
NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows']
class PEFOutput(OutputFormatPlugin):
name = 'PEF Output'
author = 'Matt Venn'
file_type = 'pef'
options = set([
OptionRecommendation(name='newline', recommended_value='system',
level=OptionRecommendation.LOW,
short_switch='n', choices=NEWLINE_TYPES,
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
'type used by this OS.') % sorted(NEWLINE_TYPES)),
OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the output document. ' \
'The default is utf-8.')),
OptionRecommendation(name='ueb2',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Convert to Unified English Braille Grade 2')),
OptionRecommendation(name='inline_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Add Table of Contents to beginning of the book.')),
OptionRecommendation(name='num_rows',
recommended_value=4, level=OptionRecommendation.LOW,
help=_('The maximum number of rows per page, defaults to 4.')),
OptionRecommendation(name='max_line_length',
recommended_value=40, level=OptionRecommendation.LOW,
help=_('The maximum number of characters per line. This splits on '
'the first space before the specified value. If no space is found '
'the line will be broken at the space after and will exceed the '
'specified value. Also, there is a minimum of 25 characters. '
'Use 0 to disable line splitting. Default is 40.')),
OptionRecommendation(name='force_max_line_length',
recommended_value=True, level=OptionRecommendation.LOW,
help=_('Force splitting on the max-line-length value when no space '
'is present. Also allows max-line-length to be below the minimum. '
'Default true.')),
OptionRecommendation(name='txt_output_formatting',
recommended_value='plain',
choices=['plain', 'markdown', 'textile'],
help=_('Formatting used within the document.\n'
'* plain: Produce plain text.\n'
'* markdown: Produce Markdown formatted text.\n'
'* textile: Produce Textile formatted text.')),
OptionRecommendation(name='keep_links',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove links within the document. This is only ' \
'useful when paired with a txt-output-formatting option that '
'is not none because links are always removed with plain text output.')),
OptionRecommendation(name='keep_image_references',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove image references within the document. This is only ' \
'useful when paired with a txt-output-formatting option that '
'is not none because links are always removed with plain text output.')),
OptionRecommendation(name='keep_color',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove font color from output. This is only useful when ' \
'txt-output-formatting is set to textile. Textile is the only ' \
'formatting that supports setting font color. If this option is ' \
'not specified font color will not be set and default to the ' \
'color displayed by the reader (generally this is black).')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.utils.cleantext import clean_ascii_chars
self.log = log
if opts.txt_output_formatting.lower() == 'markdown':
from calibre.ebooks.txt.markdownml import MarkdownMLizer
self.writer = MarkdownMLizer(log)
elif opts.txt_output_formatting.lower() == 'textile':
from calibre.ebooks.txt.textileml import TextileMLizer
self.writer = TextileMLizer(log)
else:
self.writer = TXTMLizer(log)
txt = self.writer.extract_content(oeb_book, opts)
txt = clean_ascii_chars(txt)
log.debug('\tReplacing newlines with selected type...')
txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)
txt = txt.encode(opts.txt_output_encoding, 'replace')
if opts.ueb2:
import louis
newline_char = TxtNewlines(opts.newline).newline
grade2 = ""
for line in txt.split(newline_char):
grade2 += louis.translateString(['en-GB-g2.ctb'], line)
grade2 += "\n"
log.debug(grade2)
txt = grade2
log.debug('\tStripping final newline characters')
txt = re.sub(TxtNewlines(opts.newline).newline + '*$', '', txt)
log.debug('\tGenerating PEF...')
metadata = oeb_book.metadata
pef = self.create_pef(txt, opts, metadata)
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
import codecs
fh = codecs.open(output_path, "w", "utf-8")
fh.write(pef)
def create_pef(self, txt, opts, metadata):
newline_char = TxtNewlines(opts.newline).newline
# setup PEF doc
# http://files.pef-format.org/specifications/pef-2008-1/pef-specification.html
pef = etree.Element('pef', nsmap = {None: PEFNS })
tree = etree.ElementTree(pef)
pef.set("version", "2008-1")
head = etree.SubElement(pef, 'head')
meta = etree.SubElement(head, 'meta', nsmap = {"dc" : DCNS})
dc_format = etree.SubElement(meta, '{%s}format' % DCNS)
dc_format.text = "application/x-pef+xml"
dc_ident = etree.SubElement(meta, "{%s}identifier" % DCNS)
dc_ident.text = "org.pef-format.00002"
if len(metadata.title):
title = metadata.title[0].value
dc_title = etree.SubElement(meta, '{%s}title' % DCNS)
dc_title.text = title
if len(metadata.author):
author = metadata.author[0].value
dc_creator = etree.SubElement(meta, '{%s}creator' % DCNS)
dc_creator.text = author
body = etree.SubElement(pef, 'body')
volume = etree.SubElement(body, 'volume')
volume.set("cols", str(opts.max_line_length))
volume.set("rows", str(opts.num_rows))
section = etree.SubElement(volume, 'section')
page_open = False
rows = 0
self.bad_chars = {}
for line in txt.split(newline_char):
self.log.debug('got new line [%s]' % line)
if rows % opts.num_rows == 0:
page = etree.SubElement(section, 'page')
try:
row = etree.SubElement(page, 'row')
stripped = line.strip()
pef = self.convert_to_pef(stripped)
row.text = ''.join(pef)
rows += 1
except ValueError as e:
print e
print text
return lxml.etree.tostring(tree, xml_declaration=True, encoding='UTF-8',pretty_print=True)
# convert a single alpha, digit or some punctuation to 6 pin braille
# http://en.wikipedia.org/wiki/Braille_ASCII#Braille_ASCII_values
def alpha_to_pef(self, alpha):
mapping = " A1B'K2L@CIF/MSP\"E3H9O6R^DJG>NTQ,*5<-U8V.%[$+X!&;:4\\0Z7(_?W]#Y)="
alpha = alpha.upper()
try:
pin_num = mapping.index(alpha)
return unichr(pin_num+10240)
except ValueError as e:
if not self.bad_chars.has_key(alpha):
self.log.info("can't convert [%s] to braille" % alpha)
self.bad_chars[alpha] = True
return unichr(10240)
# convert a list of alphas to pef unicode
def convert_to_pef(self, alphas):
return map(self.alpha_to_pef, alphas)