Improved file linking for export

TaurusLegend · Sep 2, 2009 · d839704 · d839704
1 parent 378429a
commit d839704
Show file tree

Hide file tree

Showing 11 changed files with 225 additions and 105 deletions.
diff --git a/HACKING/Ideas/Exporter.txt b/HACKING/Ideas/Exporter.txt
diff --git a/HACKING/Tasks.txt b/HACKING/Tasks.txt
@@ -26,13 +26,8 @@ Creation-Date: Sat, 04 Apr 2009 10:54:39 +0200
 == Defects ==
 [ ] Pageview: enforce heading can not be indented
 [ ] Pageview: Para indenting tags and bullet offset tags need to be split up
-[*] Pageview: Fix issue with Verbatim paragraphs
 [ ] Pageview: Rich copy paste / drag drop
 [ ] Pageindex: Drag&Drop in the side pane is not working
-[ ] Index: add API for listing and indexing attachments
-[ ] Exporter: Exporter needs to support documents & attachments 
-[ ] Exporter: Check if linker for exporter is OK
-[*] Notebook: When moving / renaming links need to be updated
 [ ] WWW: check resolving of files and icons for web server
 	* Add extra check that no files outside allowed directories are served
 [ ] Links to non-exisiting pages are not indexed
@@ -55,6 +50,7 @@ Creation-Date: Sat, 04 Apr 2009 10:54:39 +0200
 [*] Implement EquationEditor and DiagramEditor plugins
 [ ] Implement Fuse filesystem plugins for auto-mounting sshfs / encfs / ..
 	* Preferences dialog should have list of mount points
+[ ] Package icons in target directory when exporting
 
 == New Features ==
 [ ] Pageview: menu controls to indent / unindent / move up move down a list item

diff --git a/tests/data/notebook-wiki.xml b/tests/data/notebook-wiki.xml
@@ -115,7 +115,21 @@ sdffdsfds
 <page name="Test:foo">
 ====== Foo ======
 
+# This page is used among others for testing export
+
 bar baz
+
+[[foo:bar]]
+[[http://foo.org/bar]]
+[[mailto:foo@bar.org]]
+[[foo@bar.org]]
+
+[[/foo/bar.pdf]]
+{{/foo/bar.pdf}}
+
+[[~/foo/bar.pdf]]
+{{~/foo/bar.pdf}}
+
 </page>
 <page name="Test:foo:bar">
 = FOO Bar =

diff --git a/tests/export.py b/tests/export.py
@@ -10,17 +10,22 @@
 from zim.notebook import Path, Notebook
 from zim.exporter import Exporter
 
+# TODO add check that attachments are copied correctly
+
 class TestExport(TestCase):
 
 	slowTest = True
 
+	options = {'format': 'html', 'template': 'Default'}
+
 	def setUp(self):
 		self.dir = Dir(create_tmp_dir('export_ExportedFiles'))
 
 	def export(self):
 		notebook = get_test_notebook()
+		notebook.get_store(Path(':')).dir = Dir('/foo/bar') # fake source dir
 		notebook.index.update()
-		exporter = Exporter(notebook, format='html', template='Default')
+		exporter = Exporter(notebook, **self.options)
 		exporter.export_all(self.dir)
 
 	def runTest(self):
@@ -34,6 +39,19 @@ def runTest(self):
 		self.assertTrue('<h1>Foo</h1>' in text)
 
 
+class TestExportFullOptions(TestExport):
+
+	options = {'format': 'html', 'template': 'Default',
+			'index_page': 'index', 'document_root_url': 'http://foo.org/'}
+
+	def runTest(self):
+		'''Test export notebook to html with all options'''
+		TestExport.runTest(self)
+		file = self.dir.file('index.html')
+		self.assertTrue(file.exists())
+		# print file.read() TODO check content of index
+
+
 class TestExportCommandLine(TestExport):
 
 	def export(self):

diff --git a/tests/printtobrowser.py b/tests/printtobrowser.py
@@ -24,6 +24,7 @@ def runTest(self):
 		content = file.read()
 		self.assertTrue('<h1>Foo</h1>' in content)
 
+
 class StubUI(object):
 
 	ui_type = 'stub'

diff --git a/zim/exporter.py b/zim/exporter.py
@@ -7,12 +7,10 @@
 import logging
 
 from zim.fs import *
-from zim.formats import get_format
+from zim.formats import get_format, BaseLinker
 from zim.templates import get_template
-from zim.notebook import Page, IndexPage
+from zim.notebook import Page, IndexPage, PageNameError
 from zim.stores import encode_filename
-from zim.config import data_file
-from zim.parsing import link_type
 
 logger = logging.getLogger('zim.exporter')
 
@@ -22,14 +20,17 @@ class Exporter(object):
 
 	def __init__(self, notebook, format, template=None,
 					index_page=None, document_root_url=None):
-		'''TODO document eport options'''
+		'''Constructor. The 'notebook' is the source for pages to be exported.
+		(The export target is given as an argument to export_all() or export().)
+		The 'format' and 'template' arguments determine the output format.
+		If 'index_page' is given a page index is generated and
+		'document_root_url' is used to prefix any file links that start with '/'.
+		'''
 		self.notebook = notebook
 		self.index_page = index_page
 		self.document_root_url = document_root_url
-		self.linker = BaseLinker(format, notebook)
-
-		if document_root_url:
-			print 'TODO: implement map document root'
+		self.linker = StaticLinker(format, notebook,
+						document_root_url=document_root_url)
 
 		if isinstance(format, basestring):
 			self.format = get_format(format)
@@ -45,6 +46,11 @@ def __init__(self, notebook, format, template=None,
 			self.template.set_linker(self.linker)
 
 	def export_all(self, dir, callback=None):
+		'''Export all pages in the notebook to 'dir'. Attachments are copied
+		along. The function 'callback' will be called after each page with the
+		page object as single argument. If the callback returns False the
+		export will be cancelled.
+		'''
 		logger.info('Exporting notebook to %s', dir)
 
 		for page in self.notebook.walk():
@@ -63,17 +69,28 @@ def export_all(self, dir, callback=None):
 		return True
 
 	def export_page(self, dir, page):
+		'''Export 'page' to a file below 'dir'. Path below 'dir' will be
+		determined by the namespace of 'page'. Attachments wil also be
+		copied along.
+		'''
 		logger.info('Exporting %s', page.name)
-		filename = encode_filename(page.name)
-		filename += '.' + self.format.info['extension']
+		dirname = encode_filename(page.name)
+		filename = dirname + '.' + self.format.info['extension']
 		file = dir.file(filename)
 		fh = file.open('w')
 		self.export_page_to_fh(fh, page)
 		fh.close()
-		# TODO add attachments + copy documents
+		subdir = dir.subdir(dirname)
+		attachments = self.notebook.get_attachments_dir(page)
+		for name in attachments.list():
+			file = attachments.file(name)
+			if file.exists(): # tests os.isfile
+				file.copyto(subdir)
 
 	def export_page_to_fh(self, fh, page):
-		# TODO use document_root_url
+		'''Export 'page' and print the output to open file handle 'hf'.
+		(Does not do anything with attachments.)
+		'''
 		if self.template is None:
 			self.linker.set_path(page)
 			lines = page.dump(self.format, linker=self.linker)
@@ -82,50 +99,49 @@ def export_page_to_fh(self, fh, page):
 		fh.writelines(l.encode('utf-8') for l in lines)
 
 
-class BaseLinker(object):
+class StaticLinker(BaseLinker):
 	'''Linker object for exporting a single page. It links files, images
 	and icons with absolute file paths, but can not link other pages corectly.
 	'''
 
-	def __init__(self, format, notebook, path=None):
+	def __init__(self, format, notebook, path=None, document_root_url=None):
+		BaseLinker.__init__(self)
 		if isinstance(format, basestring):
 			format = get_format(format)
 		self.notebook = notebook
 		self.path = path
+		self.document_root_url = document_root_url
 		self._extension = '.' + format.info['extension']
-		self._icons = {}
-
-	def set_path(self, path):
-		self.path = path
 
-	def link(self, link):
-		'''Returns an url for 'link' '''
-		type = link_type(link)
-		if type == 'page':
-			# even though useless in the typical use-case still resolve pages so they look OK
+	def page(self, link):
+		try:
 			page = self.notebook.resolve_path(link, source=self.path)
-			href = '/' + encode_filename(page.name) + self._extension
-		elif type == 'file':
-			href = self.src(link, path)
-		elif type == 'mailto':
-			if link.startswith('mailto:'):
-				href = link
+		except PageNameError:
+			return ''
+		else:
+			if page == self.path:
+				return ''
+
+			parent = page.commonparent(self.path)
+			if parent == self.path:
+				path = './' + self.path.basename + '/'
+				downpath = page.relname(parent)
+				path += encode_filename(downpath) + self._extension
+			elif parent == page:
+				uppath = self.path.relname(parent)
+				path = '../' * (uppath.count(':') + 1)
+				path += encode_filename(page.basename) + self._extension
 			else:
-				href = 'mailto:' + link
+				uppath = self.path.relname(parent)
+				downpath = page.relname(parent)
+				path = '../' * uppath.count(':') or './'
+				path += encode_filename(downpath) + self._extension
+			#~ print '>>>', path
+			return path
+
+	def file(self, link):
+		if self.document_root_url and link.startswith('/'):
+			return ''.join((self.document_root_url.rstrip('/'), link))
 		else:
-			# I dunno, some url ?
-			href = link
-		return href
-
-	def img(self, src):
-		'''Returns an url for image file 'src' '''
-		file = self.notebook.resolve_file(src, self.path)
+			file = self.notebook.resolve_file(link, self.path)
 		return file.uri
-
-	def icon(self, name):
-		'''Returns an url for an icon'''
-		if not name in self._icons:
-			self._icons[name] = data_file('pixmaps/%s.png' % name).uri
-		return self._icons[name]
-
-# TODO: linker for using a single page
diff --git a/zim/formats/__init__.py b/zim/formats/__init__.py
@@ -55,6 +55,10 @@
 import re
 import logging
 
+from zim.parsing import link_type
+from zim.config import data_file
+
+
 logger = logging.getLogger('zim.formats')
 
 # Needed to determine RTL, but may not be available
@@ -85,6 +89,7 @@
 XCHECKED_BOX = 'xchecked-box'
 BULLET = '*'
 
+
 def list_formats(type):
 	if type == EXPORT_FORMAT:
 		return ['HTML']
@@ -412,3 +417,52 @@ def isrtl(self, element):
 				return dir == pango.DIRECTION_RTL
 
 		return None
+
+
+class BaseLinker(object):
+	'''Base class for linker objects. Linker object translate links in zim pages
+	to either paths or urls. Paths should be interpreted relative to the
+	document in the way this is done in html.
+	'''
+
+	def __init__(self):
+		self._icons = {}
+
+	def set_path(self, path):
+		self.path = path
+
+	def link(self, link):
+		'''Returns a path or url for 'link' '''
+		# TODO optimize by hashing links seen (reset per page)
+		type = link_type(link)
+		if type == 'page':
+			return self.page(link)
+		elif type == 'file':
+			return self.file(link)
+		elif type == 'mailto':
+			if link.startswith('mailto:'):
+				return link
+			else:
+				return 'mailto:' + link
+		else:
+			# I dunno, some url ?
+			return link
+
+	def img(self, src):
+		'''Returns a path or url for image file 'src' '''
+		return self.file(src)
+
+	def icon(self, name):
+		'''Returns a path or url for an icon'''
+		if not name in self._icons:
+			self._icons[name] = data_file('pixmaps/%s.png' % name).uri
+		return self._icons[name]
+
+	def page(self, link):
+		'''To be overloaded'''
+		raise NotImplementedError
+
+	def file(self, path):
+		'''To be overloaded'''
+		raise NotImplementedError
+