Skip to content

Commit b2a9878

Browse files
author
Jeroen Ketema
committed
Give rtf reader an option to skip paragraph cleaning
I need this for some other project where position dependent replacements are applied to the rft, where the position depends on the white space (not on the mark-up data though).
1 parent c6b94d8 commit b2a9878

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

pyth/plugins/rtf15/reader.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,19 @@ class BackslashEscape(Exception):
7777
class Rtf15Reader(PythReader):
7878

7979
@classmethod
80-
def read(self, source, errors='strict'):
80+
def read(self, source, errors='strict', clean_paragraphs=True):
8181
"""
8282
source: A list of P objects.
8383
"""
8484

85-
reader = Rtf15Reader(source, errors)
85+
reader = Rtf15Reader(source, errors, clean_paragraphs)
8686
return reader.go()
8787

8888

89-
def __init__(self, source, errors='strict'):
89+
def __init__(self, source, errors='strict', clean_paragraphs=True):
9090
self.source = source
9191
self.errors = errors
92+
self.clean_paragraphs = clean_paragraphs
9293
self.document = document.Document
9394

9495

@@ -193,7 +194,7 @@ def getControl(self):
193194
def build(self):
194195
doc = document.Document()
195196

196-
ctx = DocBuilder(doc)
197+
ctx = DocBuilder(doc, self.clean_paragraphs)
197198

198199
for bit in self.group.flatten():
199200
typeName = type(bit).__name__
@@ -207,14 +208,16 @@ def build(self):
207208

208209
class DocBuilder(object):
209210

210-
def __init__(self, doc):
211+
def __init__(self, doc, clean_paragraphs=True):
211212
self.run = []
212213
self.propStack = [{}]
213214
self.block = None
214215

215216
self.listLevel = None
216217
self.listStack = [doc]
217218

219+
self.clean_paragraphs = clean_paragraphs
220+
218221

219222
def flushRun(self):
220223
if self.block is None:
@@ -239,6 +242,9 @@ def cleanParagraph(self):
239242
self.block = None
240243
return
241244

245+
if not self.clean_paragraphs:
246+
return
247+
242248
joinedRuns = []
243249
hasContent = False
244250

0 commit comments

Comments
 (0)