diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 00000000000..7e28631131d --- /dev/null +++ b/.eslintignore @@ -0,0 +1 @@ +qutebrowser/3rdparty/pdfjs/* diff --git a/MANIFEST.in b/MANIFEST.in index 07726eca7a8..e2dd15509e8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ recursive-include qutebrowser/html *.html recursive-include qutebrowser/img *.svg *.png recursive-include qutebrowser/test *.py recursive-include qutebrowser/javascript *.js +graft qutebrowser/3rdparty graft icons graft doc/img graft misc @@ -27,6 +28,7 @@ exclude qutebrowser.rcc exclude .coveragerc exclude .pylintrc exclude .eslintrc +exclude .eslintignore exclude doc/help exclude .appveyor.yml exclude .travis.yml diff --git a/README.asciidoc b/README.asciidoc index ff903ce99cb..7902360f1c8 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -272,3 +272,13 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . + +pdf.js +------ + +qutebrowser uses https://github.com/mozilla/pdf.js/[pdf.js] to display +PDF files in the browser. + +pdf.js is distributed under the terms of the Apache License. You can +find a copy of the license in `qutebrowser/pdfjs/LICENSE` or online +http://www.apache.org/licenses/LICENSE-2.0.html[here]. diff --git a/qutebrowser/browser/network/qutescheme.py b/qutebrowser/browser/network/qutescheme.py index e74bdd2bca6..876d85e1fcb 100644 --- a/qutebrowser/browser/network/qutescheme.py +++ b/qutebrowser/browser/network/qutescheme.py @@ -31,11 +31,13 @@ import functools import configparser +import mimetypes from PyQt5.QtCore import pyqtSlot, QObject from PyQt5.QtNetwork import QNetworkReply import qutebrowser +from qutebrowser.browser import pdfjs from qutebrowser.browser.network import schemehandler, networkreply from qutebrowser.utils import (version, utils, jinja, log, message, docutils, objreg) @@ -93,8 +95,11 @@ def createRequest(self, _op, request, _outgoing_data): return networkreply.ErrorNetworkReply( request, str(e), QNetworkReply.ContentNotFoundError, self.parent()) + mimetype, _encoding = mimetypes.guess_type(request.url().fileName()) + if mimetype is None: + mimetype = 'text/html' return networkreply.FixedDataNetworkReply( - request, data, 'text/html', self.parent()) + request, data, mimetype, self.parent()) class JSBridge(QObject): @@ -201,3 +206,10 @@ def qute_settings(win_id, _request): win_id=win_id, title='settings', config=configdata, confget=config_getter) return html.encode('UTF-8', errors='xmlcharrefreplace') + + +@add_handler('pdfjs') +def qute_pdfjs(_win_id, request): + """Handler for qute://pdfjs. Return the pdf.js viewer.""" + urlpath = request.url().path() + return pdfjs.get_pdfjs_res(urlpath) diff --git a/qutebrowser/browser/pdfjs.py b/qutebrowser/browser/pdfjs.py new file mode 100644 index 00000000000..83b89330ccb --- /dev/null +++ b/qutebrowser/browser/pdfjs.py @@ -0,0 +1,175 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2015 Daniel Schadt +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""pdf.js integration for qutebrowser.""" + +import os + +from PyQt5.QtCore import QUrl + +from qutebrowser.browser import webelem +from qutebrowser.utils import utils + + +class PDFJSNotFound(Exception): + + """Raised when no pdf.js installation is found.""" + + pass + + +def generate_pdfjs_page(url): + """Return the html content of a page that displays url with pdfjs. + + Returns a string. + + Args: + url: The url of the pdf as QUrl. + """ + viewer = get_pdfjs_res('web/viewer.html').decode('utf-8') + script = _generate_pdfjs_script(url) + html_page = viewer.replace( + '', ''.format(script) + ) + return html_page + + +def _generate_pdfjs_script(url): + """Generate the script that shows the pdf with pdf.js. + + Args: + url: The url of the pdf page as QUrl. + """ + return ( + 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' + 'PDFView.open("{url}");\n' + ).format(url=webelem.javascript_escape(url.toString(QUrl.FullyEncoded))) + + +def fix_urls(asset): + """Take a html page and replace each relative URL wth an absolute. + + This is specialized for pdf.js files and not a general purpose function. + + Args: + asset: js file or html page as string. + """ + new_urls = { + 'viewer.css': 'qute://pdfjs/web/viewer.css', + 'compatibility.js': 'qute://pdfjs/web/compatibility.js', + 'locale/locale.properties': + 'qute://pdfjs/web/locale/locale.properties', + 'l10n.js': 'qute://pdfjs/web/l10n.js', + '../build/pdf.js': 'qute://pdfjs/build/pdf.js', + 'debugger.js': 'qute://pdfjs/web/debugger.js', + 'viewer.js': 'qute://pdfjs/web/viewer.js', + 'compressed.tracemonkey-pldi-09.pdf': '', + './images/': 'qute://pdfjs/web/images/', + '../build/pdf.worker.js': 'qute://pdfjs/build/pdf.worker.js', + '../web/cmaps/': 'qute://pdfjs/web/cmaps/', + } + for original, new in new_urls.items(): + asset = asset.replace(original, new) + return asset + + +SYSTEM_PDFJS_PATHS = [ + '/usr/share/pdf.js/', # Debian pdf.js-common + '/usr/share/javascript/pdf/', # Debian libjs-pdf + os.path.expanduser('~/.local/share/qutebrowser/pdfjs/'), # fallback +] + + +def get_pdfjs_res(path): + """Get a pdf.js resource in binary format. + + Args: + path: The path inside the pdfjs directory. + """ + path = path.lstrip('/') + content = None + + # First try a system wide installation + # System installations might strip off the 'build/' or 'web/' prefixes. + # qute expects them, so we need to adjust for it. + names_to_try = [path, _remove_prefix(path)] + for system_path in SYSTEM_PDFJS_PATHS: + content = _read_from_system(system_path, names_to_try) + if content is not None: + break + + # Fallback to bundled pdf.js + if content is None: + res_path = '3rdparty/pdfjs/{}'.format(path) + try: + content = utils.read_file(res_path, binary=True) + except FileNotFoundError: + raise PDFJSNotFound + + try: + # Might be script/html or might be binary + text_content = content.decode('utf-8') + except UnicodeDecodeError: + return content + text_content = fix_urls(text_content) + return text_content.encode('utf-8') + + +def _remove_prefix(path): + """Remove the web/ or build/ prefix of a pdfjs-file-path. + + Args: + path: Path as string where the prefix should be stripped off. + """ + prefixes = {'web/', 'build/'} + if any(path.startswith(prefix) for prefix in prefixes): + return path.split('/', maxsplit=1)[1] + # Return the unchanged path if no prefix is found + return path + + +def _read_from_system(system_path, names): + """Try to read a file with one of the given names in system_path. + + Each file in names is considered equal, the first file that is found + is read and its binary content returned. + + Returns None if no file could be found + + Args: + system_path: The folder where the file should be searched. + names: List of possible file names. + """ + for name in names: + try: + with open(os.path.join(system_path, name), 'rb') as f: + return f.read() + except OSError: + continue + return None + + +def is_available(): + """Return true if a pdfjs installation is available.""" + try: + get_pdfjs_res('build/pdf.js') + except PDFJSNotFound: + return False + else: + return True diff --git a/qutebrowser/browser/webpage.py b/qutebrowser/browser/webpage.py index e375f721d0b..49a77cc8c13 100644 --- a/qutebrowser/browser/webpage.py +++ b/qutebrowser/browser/webpage.py @@ -30,7 +30,7 @@ from PyQt5.QtWebKitWidgets import QWebPage from qutebrowser.config import config -from qutebrowser.browser import http, tabhistory +from qutebrowser.browser import http, tabhistory, pdfjs from qutebrowser.browser.network import networkmanager from qutebrowser.utils import (message, usertypes, log, jinja, qtutils, utils, objreg, debug) @@ -218,6 +218,19 @@ def _ask(self, text, mode, default=None): q.deleteLater() return q.answer + def _show_pdfjs(self, reply): + """Show the reply with pdfjs.""" + try: + page = pdfjs.generate_pdfjs_page(reply.url()).encode('utf-8') + except pdfjs.PDFJSNotFound: + # pylint: disable=no-member + # WORKAROUND for https://bitbucket.org/logilab/pylint/issue/490/ + page = (jinja.env.get_template('no_pdfjs.html') + .render(url=reply.url().toDisplayString()) + .encode('utf-8')) + self.mainFrame().setContent(page, 'text/html', reply.url()) + reply.deleteLater() + def shutdown(self): """Prepare the web page for being deleted.""" self._is_shutting_down = True @@ -305,6 +318,10 @@ def on_unsupported_content(self, reply): else: reply.finished.connect(functools.partial( self.display_content, reply, 'image/jpeg')) + elif (mimetype in {'application/pdf', 'application/x-pdf'} and + config.get('content', 'enable-pdfjs')): + # Use pdf.js to display the page + self._show_pdfjs(reply) else: # Unknown mimetype, so download anyways. download_manager.fetch(reply, diff --git a/qutebrowser/config/configdata.py b/qutebrowser/config/configdata.py index 9114d7662fb..a9fbb2669b2 100644 --- a/qutebrowser/config/configdata.py +++ b/qutebrowser/config/configdata.py @@ -841,6 +841,11 @@ def data(readonly=False): "required to exactly match the requested domain.\n\n" "Local domains are always exempt from hostblocking."), + ('enable-pdfjs', SettingValue(typ.Bool(), 'false'), + "Enable pdf.js to view PDF files in the browser.\n\n" + "Note that the files can still be downloaded by clicking" + " the download button in the pdf.js viewer."), + readonly=readonly )), diff --git a/qutebrowser/html/no_pdfjs.html b/qutebrowser/html/no_pdfjs.html new file mode 100644 index 00000000000..694affc4dd6 --- /dev/null +++ b/qutebrowser/html/no_pdfjs.html @@ -0,0 +1,129 @@ +{% extends "base.html" %} +{% block style %} +{{ super() }} +* { + margin: 0px 0px; + padding: 0px 0px; +} + +body { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + -webkit-text-size-adjust: none; + color: #333333; + background-color: #EEEEEE; + font-size: 1.2em; +} + +#error-container { + margin-left: 20px; + margin-right: 20px; + margin-top: 20px; + border: 1px solid #CCCCCC; + box-shadow: 0px 0px 6px rgba(0, 0, 0, 0.20); + border-radius: 5px; + background-color: #FFFFFF; + padding: 20px 20px; +} + +#header { + border-bottom: 1px solid #CCC; +} + +.qutebrowser-broken { + display: block; + width: 100%; +} + +span.warning { + text-weigth: bold; + color: red; +} + +td { + margin-top: 20px; + color: #555; +} + +h1, h2 { + font-weight: normal; + color: #1e89c6; + margin-bottom: 10px; +} + +ul { + margin-left: 20px; + margin-top: 20px; + margin-bottom: 20px; +} + +li { + margin-top: 10px; + margin-bottom: 10px; +} +{% endblock %} + +{% block content %} +
+ + + + + +
+ + +

No pdf.js installation found

+

Error while opening {{ url }}:
+

qutebrowser can't find a suitable pdf.js installation

+ +

It looks like you set content -> enable-pdfjs + to true but qutebrowser can't find the required files.

+ +
+ +

Possible fixes

+
    +
  • + Disable content -> enable-pdfjs and reload the page. + You will need to download the pdf-file and open it with an external + tool instead. +
  • + +
  • + If you have installed a packaged version of qutebrowser, make sure + the required packages for pdf.js are also installed. +
  • + +
  • + If you have installed a pdf.js package and qutebrowser still can't + find it, please send us a report with your system and the package + name, so we can add it to the list of supported packages. +
  • + +
  • + If you're running a self-built version or the source version, make + sure you have pdf.js in qutebrowser/3rdparty/pdfjs. + You can use the scripts/dev/update_3rdparty.py script + to download the latest version. +
  • + +
  • + You can manually download the pdf.js archive + here + and extract it to ~/.local/share/qutebrowser/pdfjs +
    + Warning: Using this method you are + responsible for yourself to keep the installation updated! If a + vulnerability is found in pdf.js, neither qutebrowser nor your + system's package manager will update your pdf.js installation. + Use it at your own risk! +
  • +
+ +

+ If none of these fixes work for you, please send us a bug report so + we can fix the issue. +

+
+
+{% endblock %} diff --git a/scripts/dev/freeze.py b/scripts/dev/freeze.py index b651dd19469..e2321d4a852 100755 --- a/scripts/dev/freeze.py +++ b/scripts/dev/freeze.py @@ -35,6 +35,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) from scripts import setupcommon +from scripts.dev import update_3rdparty BASEDIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), @@ -65,6 +66,7 @@ def get_build_exe_options(skip_html=False): ('qutebrowser/git-commit-id', 'git-commit-id'), ('qutebrowser/utils/testfile', 'utils/testfile'), ('qutebrowser/html', 'html'), + ('qutebrowser/3rdparty/pdfjs', '3rdparty/pdfjs'), ] if not skip_html: @@ -119,6 +121,8 @@ def main(): 'bundle_name': 'qutebrowser', } + update_3rdparty.main() + try: setupcommon.write_git_file() cx.setup( diff --git a/scripts/dev/freeze_tests.py b/scripts/dev/freeze_tests.py index 2e01de5a547..9dd01defe07 100755 --- a/scripts/dev/freeze_tests.py +++ b/scripts/dev/freeze_tests.py @@ -35,7 +35,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) from scripts import setupcommon -from scripts.dev import freeze +from scripts.dev import freeze, update_3rdparty @contextlib.contextmanager @@ -72,6 +72,7 @@ def get_build_exe_options(): def main(): """Main entry point.""" base = 'Win32GUI' if sys.platform.startswith('win') else None + update_3rdparty.main() with temp_git_commit_file(): cx.setup( executables=[cx.Executable('scripts/dev/run_frozen_tests.py', diff --git a/scripts/dev/misc_checks.py b/scripts/dev/misc_checks.py index ef32be41d10..e629f215222 100644 --- a/scripts/dev/misc_checks.py +++ b/scripts/dev/misc_checks.py @@ -96,12 +96,19 @@ def check_spelling(): '[Ss]tatemachine', '[Mm]etaobject', '[Ll]ogrecord', '[Ff]iletype'} + # Files which should be ignored, e.g. because they come from another + # package + ignored = [ + os.path.join('.', 'scripts', 'dev', 'misc_checks.py'), + os.path.join('.', 'qutebrowser', '3rdparty', 'pdfjs'), + ] + seen = collections.defaultdict(list) try: ok = True for fn in _get_files(): with tokenize.open(fn) as f: - if fn == os.path.join('.', 'scripts', 'dev', 'misc_checks.py'): + if any(fn.startswith(i) for i in ignored): continue for line in f: for w in words: diff --git a/scripts/dev/run_vulture.py b/scripts/dev/run_vulture.py index 2ee3a776ac8..db520f9248f 100755 --- a/scripts/dev/run_vulture.py +++ b/scripts/dev/run_vulture.py @@ -89,6 +89,7 @@ def whitelist_generator(): yield 'qutebrowser.utils.log.VDEBUG' yield 'qutebrowser.utils.log.QtWarningFilter.filter' yield 'logging.LogRecord.log_color' + yield 'qutebrowser.browser.pdfjs.is_available' # vulture doesn't notice the hasattr() and thus thinks netrc_used is unused # in NetworkManager.on_authentication_required yield 'PyQt5.QtNetwork.QNetworkReply.netrc_used' diff --git a/scripts/dev/update_3rdparty.py b/scripts/dev/update_3rdparty.py new file mode 100755 index 00000000000..6db01bc1110 --- /dev/null +++ b/scripts/dev/update_3rdparty.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2015 Daniel Schadt +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Update all third-party-modules.""" + +import urllib.request +import shutil +import json +import os + + +def get_latest_pdfjs_url(): + """Get the URL of the latest pdf.js prebuilt package. + + Returns a (version, url)-tuple.""" + github_api = 'https://api.github.com' + endpoint = 'repos/mozilla/pdf.js/releases/latest' + request_url = '{}/{}'.format(github_api, endpoint) + with urllib.request.urlopen(request_url) as fp: + data = json.loads(fp.read().decode('utf-8')) + + download_url = data['assets'][0]['browser_download_url'] + version_name = data['name'] + return (version_name, download_url) + + +def update_pdfjs(): + """Download and extract the latest pdf.js version.""" + version, url = get_latest_pdfjs_url() + target_path = os.path.join('qutebrowser', '3rdparty', 'pdfjs') + print("=> Downloading pdf.js {}".format(version)) + (archive_path, _headers) = urllib.request.urlretrieve(url) + if os.path.isdir(target_path): + print("Removing old version in {}".format(target_path)) + shutil.rmtree(target_path) + os.makedirs(target_path) + print("Extracting new version") + with open(archive_path, 'rb') as archive: + shutil.unpack_archive(archive, target_path, 'zip') + urllib.request.urlcleanup() + + +def main(): + update_pdfjs() + +if __name__ == '__main__': + main() diff --git a/tests/integration/data/misc/test.pdf b/tests/integration/data/misc/test.pdf new file mode 100644 index 00000000000..6bd5f16bed8 Binary files /dev/null and b/tests/integration/data/misc/test.pdf differ diff --git a/tests/integration/features/misc.feature b/tests/integration/features/misc.feature index 1df5d99a3d6..b31ef0f67a5 100644 --- a/tests/integration/features/misc.feature +++ b/tests/integration/features/misc.feature @@ -245,3 +245,17 @@ Feature: Various utility commands. When I set general -> startpage to http://localhost:(port)/data/numbers/1.txt,http://localhost:(port)/data/numbers/2.txt And I run :home Then data/numbers/1.txt should be loaded + + # pdfjs support + + Scenario: pdfjs is used for pdf files + Given pdfjs is available + When I set content -> enable-pdfjs to true + And I open data/misc/test.pdf + Then the javascript message "PDF * [*] (PDF.js: *)" should be logged + + Scenario: pdfjs is not used when disabled + When I set content -> enable-pdfjs to false + And I set storage -> prompt-download-directory to false + And I open data/misc/test.pdf + Then "Download finished" should be logged diff --git a/tests/integration/features/test_misc.py b/tests/integration/features/test_misc.py index dc3cebdfab3..a19d3c908a4 100644 --- a/tests/integration/features/test_misc.py +++ b/tests/integration/features/test_misc.py @@ -23,10 +23,13 @@ import pytest import pytest_bdd as bdd -bdd.scenarios('misc.feature') import qutebrowser from qutebrowser.utils import docutils +from qutebrowser.browser import pdfjs + + +bdd.scenarios('misc.feature') @bdd.when("the documentation is up to date") @@ -51,3 +54,9 @@ def update_documentation(): update_script = os.path.join(script_path, 'asciidoc2html.py') subprocess.call([sys.executable, update_script]) + + +@bdd.given('pdfjs is available') +def pdfjs_available(): + if not pdfjs.is_available(): + pytest.skip("No pdfjs installation found.") diff --git a/tests/unit/browser/test_pdfjs.py b/tests/unit/browser/test_pdfjs.py new file mode 100644 index 00000000000..88c6153c784 --- /dev/null +++ b/tests/unit/browser/test_pdfjs.py @@ -0,0 +1,82 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2015 Daniel Schadt +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Tests for qutebrowser.browser.pdfjs""" + +import textwrap + +import pytest +from PyQt5.QtCore import QUrl + +from qutebrowser.browser import pdfjs + + +# Note that we got double protection, once because we use QUrl.FullyEncoded and +# because we use qutebrowser.browser.webelem.javascript_escape. Characters +# like " are already replaced by QUrl. +@pytest.mark.parametrize('url, expected', [ + ('http://foo.bar', + 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' + 'PDFView.open("http://foo.bar");\n'), + ('http://"', + 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' + 'PDFView.open("");\n'), + ('\0', + 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' + 'PDFView.open("%00");\n'), + ('http://foobar/");alert("attack!");', + 'PDFJS.verbosity = PDFJS.VERBOSITY_LEVELS.info;\n' + 'PDFView.open("http://foobar/%22);alert(%22attack!%22);");\n'), +]) +def test_generate_pdfjs_script(url, expected): + url = QUrl(url) + actual = pdfjs._generate_pdfjs_script(url) + assert actual == expected + + +def test_fix_urls(): + page = textwrap.dedent(""" + + + + + + """).strip() + + expected = textwrap.dedent(""" + + + + + + """).strip() + + actual = pdfjs.fix_urls(page) + assert actual == expected + + +@pytest.mark.parametrize('path, expected', [ + ('web/viewer.js', 'viewer.js'), + ('build/locale/foo.bar', 'locale/foo.bar'), + ('viewer.js', 'viewer.js'), + ('foo/viewer.css', 'foo/viewer.css'), +]) +def test_remove_prefix(path, expected): + assert pdfjs._remove_prefix(path) == expected +