Skip to content

Commit

Permalink
Added ParseTreeBuilder to reformat oageview dump into nicer tree
Browse files Browse the repository at this point in the history
  • Loading branch information
jaap-karssenberg committed Aug 23, 2009
1 parent 5b6ef0b commit d336608
Show file tree
Hide file tree
Showing 11 changed files with 282 additions and 41 deletions.
20 changes: 11 additions & 9 deletions HACKING/Tasks.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Content-Type: text/x-zim-wiki
Wiki-Format: zim 0.26
Creation-Date: Sat, 04 Apr 2009 10:54:39 +0200
Modification-Date: Sat, 15 Aug 2009 17:01:42 +0200
Modification-Date: Mon, 17 Aug 2009 21:12:22 +0200

====== Tasks ======

Expand All @@ -15,11 +15,7 @@ Modification-Date: Sat, 15 Aug 2009 17:01:42 +0200


== Packaging ==
[*] Setup: need to figure out how setup.py should look to install properly
[*] Translations: add script to include comments in zim.pot
[ ] Update CHANGELOG.txt
[*] Update "Install" and "Packaging" sections in README.txt
[*] Package test data as a xml file
[ ] Build ubuntu / debian package
[ ] Build windows installer (with or without Python + Gtk included)
[ ] Add website to bazaar + tools/generate_wesite.py
Expand All @@ -30,12 +26,10 @@ Modification-Date: Sat, 15 Aug 2009 17:01:42 +0200


== Defects ==
[ ] Pageview: enforce sane indenting levels for lists
[ ] Pageview: enforce heading can not be indented
[ ] Pageview: Para indenting tags and bullet offset tags need to be split up
[ ] Pageview: Fix issue with Verbatim paragraphs
[*] Pageview: Fix issue with Verbatim paragraphs
[ ] Pageview: Rich copy paste / drag drop
[ ] Pageview: implement recursive checkbox lists
[ ] Pageindex: Drag&Drop in the side pane is not working
[ ] Index: add API for listing and indexing attachments
[ ] Exporter: Exporter needs to support documents & attachments
Expand All @@ -44,14 +38,22 @@ Modification-Date: Sat, 15 Aug 2009 17:01:42 +0200
[ ] WWW: check resolving of files and icons for web server
* Add extra check that no files outside allowed directories are served
[ ] Links to non-exisiting pages are not indexed
* Keep a table that has dead-links by basename and trigger index of source pages when a page is created with the same basename
[ ] Take cleanup functions out of wiki dumper and into ParseTree.reformat() - copy of elements between trees makes it fast
* Break formatting at lines (in em / strong / head / links / ..)
* Group in para and ensure para end in \n
* Ensure \n around pre and h
* ensure no h in para or indent etc.

== Features ==
== Missing Features ==
[*] Implement Calendar plugin
[*] Implement insert date dialog
[ ] Hook signal for linking - split off suggest-link from end-of-word ?
* use e.g. in InsertDateDialog
[*] Implement search + search dialog
[*] Pageview: Finish the UndoStackManager and hook to the pageview
[ ] Pageview: implement recursive checkbox lists
[ ] Pageview: make child items in list also (un)indent with parent
[ ] Implement TaskList plugin
* Use prefrences for keywords, heading keywords, use of checkboxes
[ ] Implement VersionControl plugin(s)
Expand Down
55 changes: 55 additions & 0 deletions tests/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,3 +351,58 @@ def link(self, link): return '%s://%s' % (link_type(link), link)
def img(self, src): return 'img://' + src

def icon(self, name): return 'icon://' + name


class TestPareTreeBuilder(TestCase):

def runTest(self):
'''Test ParseTreeBuilder class'''
# - Test \n before and after h / p / pre
# - Test break line into lines
input = '''\
<?xml version='1.0' encoding='utf-8'?>
<zim-tree>
foo<h level="1">bar</h>baz
dus<pre>ja</pre>hmm
<h level="2">foo
</h>bar
dus ja <emphasis>hmm
dus ja
</emphasis>grrr
</zim-tree>'''

wanted = '''\
<?xml version='1.0' encoding='utf-8'?>
<zim-tree>
foo
<h level="1">bar</h>
baz
dus
<pre>ja
</pre>
hmm
<h level="2">foo</h>
bar
dus ja <emphasis>hmm</emphasis>
<emphasis>dus ja</emphasis>
grrr
</zim-tree>'''

# For some reason this does not work with cElementTree.XMLBuilder ...
from xml.etree.ElementTree import XMLTreeBuilder
builder = XMLTreeBuilder(target=ParseTreeBuilder())
builder.feed(input)
root = builder.close()
tree = ParseTree(root)
self.assertEqualDiff(tree.tostring(), wanted)

2 changes: 1 addition & 1 deletion tests/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ def runTest(self):
#~ print '>>', page, path
iter = treestore.get_iter(tuple(path))
indexpath = treestore.get_indexpath(iter)
#~ print '>>>', indexpath
self.assertEqual(indexpath, page)
#~ print '>>', path, page
self.assertEqual(
treestore.get_value(iter, 0), page.basename)
self.assertEqual(
Expand Down
16 changes: 12 additions & 4 deletions tests/pageview.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,24 @@

class TestTextBuffer(TestCase):

# TODO test that raw parsetree is really raw - so test that
# buffer.get_parstree(raw=True) provides roundtrip the other
# way, so formatting with "errors" reproduces exactly the same after
# reloading the raw parsetree

def runTest(self):
'''Test serialization of the page view textbuffer'''
wikitext = get_test_data_page('wiki', 'roundtrip')
tree = wiki.Parser().parse(wikitext)
notebook, page = get_test_page()
notebook.get_store(page).dir = Dir('/foo') # HACK
tree.resolve_images(notebook, page)
#~ print tree.tostring()
buffer = TextBuffer()
buffer.set_parsetree(tree)
tree = buffer.get_parsetree()
#~ print tree.tostring()
result = u''.join(wiki.Dumper().dump(tree))
result = buffer.get_parsetree()
#~ print result.tostring()
#~ self.assertEqualDiff(result.tostring(), tree.tostring())
result = u''.join(wiki.Dumper().dump(result))
self.assertEqualDiff(result, wikitext)


13 changes: 9 additions & 4 deletions zim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,15 @@ def load_plugin(self, name):
'''Load a single plugin by name'''
assert isinstance(name, basestring)
import zim.plugins
klass = zim.plugins.get_plugin(name)
plugin = klass(self)
self.plugins.append(plugin)
logger.debug('Loaded plugin %s (%s)', name, plugin)
try:
klass = zim.plugins.get_plugin(name)
plugin = klass(self)
except:
logger.exception('Failed to load plugin %s', name)
return
else:
self.plugins.append(plugin)
logger.debug('Loaded plugin %s (%s)', name, plugin)

plugin.plugin_key = name
if not name in self.preferences['General']['plugins']:
Expand Down
122 changes: 122 additions & 0 deletions zim/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,128 @@ def count(self, text, case=True):
return score


count_eol_re = re.compile(r'\n+\Z')
split_para_re = re.compile(r'((?:^[ \t]*\n){2,})', re.M)


class ParseTreeBuilder(object):

def __init__(self):
self._stack = [] # stack of elements for open tags
self._last = None # last element opened or closed
self._data = [] # buffer with data
self._tail = False # True if we are after an end tag
self._seen_eol = 2 # track line ends on flushed data
# starts with "2" so check is ok for first top level element

def start(self, tag, attrib=None):
if tag in ('h', 'p', 'pre'):
self._flush(need_eol=2)
else:
self._flush()
#~ print 'START', tag

if attrib:
self._last = Element(tag, attrib)
else:
self._last = Element(tag)

if self._stack:
self._stack[-1].append(self._last)
else:
assert tag == 'zim-tree', 'root element needs to be "zim-tree"'
self._stack.append(self._last)

self._tail = False
return self._last

def end(self, tag):
if tag in ('p', 'pre'):
self._flush(need_eol=1)
else:
self._flush()
#~ print 'END', tag

self._last = self._stack.pop()
assert self._last.tag == tag, \
"end tag mismatch (expected %s, got %s)" % (self._last.tag, tag)
self._tail = True
return self._last

def data(self, text):
assert isinstance(text, basestring)
self._data.append(text)

def _flush(self, need_eol=0):
# need_eol makes sure previous data ends with \n

#~ print 'DATA:', self._data
text = ''.join(self._data)

# Fix trailing newlines
if text:
m = count_eol_re.search(text)
if m: self._seen_eol = len(m.group(0))
else: self._seen_eol = 0

if need_eol > self._seen_eol:
text += '\n' * (need_eol - self._seen_eol)
self._seen_eol = need_eol

# Fix prefix newlines
if self._tail and self._last.tag in ('h', 'p', 'pre') \
and not text.startswith('\n'):
if text:
text = '\n' + text
else:
text = '\n'
self._seen_eol = 1

if text:
assert not self._last is None, 'data seen before root element'
self._data = []

# Tags that are not allowed to have newlines
if not self._tail and self._last.tag in (
'h', 'emphasis', 'strong', 'mark', 'srtike', 'code'):
# assume no nested tags in these types ...
if self._seen_eol:
text = text.rstrip('\n')
self._data.append('\n' * self._seen_eol)
self._seen_eol = 0
lines = text.split('\n')

self._stack.pop()
for line in lines[:-1]:
assert self._last.text is None, "internal error (text)"
assert self._last.tail is None, "internal error (tail)"
self._last.text = line
self._last.tail = '\n'
self._last = Element(self._last.tag)
self._stack[-1].append(self._last)

self._stack.append(self._last)

assert self._last.text is None, "internal error (text)"
self._last.text = lines[-1]
else:
# TODO split paragraphs

# And finally add the text to the tree...
if self._tail:
assert self._last.tail is None, "internal error (tail)"
self._last.tail = text
else:
assert self._last.text is None, "internal error (text)"
self._last.text = text


def close(self):
assert len(self._stack) == 0, 'missing end tags'
assert self._last and self._last.tag == 'zim-tree', 'missing root element'
return self._last


class ParserClass(object):
'''Base class for parsers
Expand Down
11 changes: 7 additions & 4 deletions zim/formats/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,13 @@ def _parse_block(self, builder, block):
'''Parse a block, like a verbatim paragraph'''
if not self.backward:
m = parser_re['pre'].match(block)
assert m, 'Block does not match pre'
builder.start('pre')
builder.data(m.group(1))
builder.end('pre')
if not m:
logger.warn('Block does not match pre >>>\n%s<<<', block)
builder.data(block)
else:
builder.start('pre')
builder.data(m.group(1))
builder.end('pre')
else:
builder.start('pre')
builder.data(block)
Expand Down
2 changes: 1 addition & 1 deletion zim/gui/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def do_save_page(self, page):
try:
self.notebook.store_page(page)
except Exception, error:
logger.warn('Failed to save page: %s', page.name)
logger.exception('Failed to save page: %s', page.name)
SavePageErrorDialog(self, error, page).run()

def save_copy(self):
Expand Down
Loading

0 comments on commit d336608

Please sign in to comment.