Skip to content

Commit

Permalink
Merge branch 'feature/custom_css'
Browse files Browse the repository at this point in the history
  • Loading branch information
GjjvdBurg committed Dec 27, 2020
2 parents 7f6ed43 + 8f4bcee commit a6e50d0
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 9 deletions.
12 changes: 12 additions & 0 deletions docs/man.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ reMarkable options:
If the target directory does not exist it will be created. If not
specified, the root directory will be used.

Output customization:

--css=FILENAME
Path to a CSS file with custom styling for the HTML output. This option
is ignored for any of the other providers. The code for the HTML
provider contains the default CSS style, which can be used as a starting
point.

--font-urls=FILENAME
Path to a file with font urls (one per line) for the HTML output. This
will generally be used in combination with the ``--css`` option.

System settings:

You'll only need to specify these options if the programs are not available on
Expand Down
4 changes: 2 additions & 2 deletions paper2remarkable/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"""

# NOTE: I know about the logging module, but this was easier because one of the
# dependencies was using that and it became complicated. This one is obviously
# not thread-safe and is very simple.
# dependencies was using that and it interfered with our logging. The logger
# class below is obviously not thread-safe and is very simple.

import datetime
import sys
Expand Down
7 changes: 6 additions & 1 deletion paper2remarkable/providers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(
pdftk_path="pdftk",
qpdf_path="qpdf",
gs_path="gs",
css_path=None,
font_urls_path=None,
cookiejar=None,
):
self.upload = upload
Expand All @@ -60,9 +62,12 @@ def __init__(
self.pdftk_path = pdftk_path
self.qpdf_path = qpdf_path
self.gs_path = gs_path
self.informer = Informer()
self.css_path = css_path
self.font_urls_path = font_urls_path
self.cookiejar = cookiejar

self.informer = Informer()

self.pdftool = check_pdftool(self.pdftk_path, self.qpdf_path)

# wait time to not hit the server too frequently
Expand Down
42 changes: 36 additions & 6 deletions paper2remarkable/providers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@

import html2text
import markdown
import os
import re
import readability
import titlecase
import unidecode
import urllib
import weasyprint
import weasyprint.fonts

from ._base import Provider
from ._info import Informer
Expand All @@ -34,7 +34,6 @@
logger = Logger()

CSS = """
@import url('https://fonts.googleapis.com/css?family=EB+Garamond|Noto+Serif|Inconsolata&display=swap');
@page { size: 702px 936px; margin: 1in; }
a { color: black; }
img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
Expand All @@ -48,6 +47,13 @@
code { font-family: 'Inconsolata'; font-size: .7rem; background: #efefef; }
"""

# NOTE: For some reason, Weasyprint no longer accepts the @import statement in
# the CSS to load the fonts. This may have to do with recent changes they've
# introduced. Providing the font urls separately does seem to work.
FONT_URLS = [
"https://fonts.googleapis.com/css2?family=EB+Garamond&family=Noto+Serif&family=Inconsolata"
]


def url_fetcher(url):
if url.startswith("//"):
Expand Down Expand Up @@ -168,6 +174,30 @@ def preprocess_html(self, pdf_url, title, article):
html_article = md.convert(article)
return html_article

def get_css(self):
if self.css_path is None:
return CSS
if not os.path.exists(self.css_path):
logger.warning(
f"CSS file {self.css_path} doesn't exist, using default style."
)
return CSS
with open(self.css_path, "r") as fp:
css = fp.read()
return css

def get_font_urls(self):
if self.font_urls_path is None:
return FONT_URLS
if not os.path.exists(self.font_urls_path):
logger.warning(
f"Font urls file {self.font_urls_path} doesn't exist, using default."
)
return FONT_URLS
with open(self.font_urls_path, "r") as fp:
font_urls = [l.strip() for l in fp.read().split("\n")]
return font_urls

def retrieve_pdf(self, pdf_url, filename):
"""Turn the HTML article in a clean pdf file
Expand All @@ -193,11 +223,11 @@ def retrieve_pdf(self, pdf_url, filename):
with open("./paper.html", "w") as fp:
fp.write(html_article)

font_config = weasyprint.fonts.FontConfiguration()
html = weasyprint.HTML(string=html_article, url_fetcher=url_fetcher)
css = weasyprint.CSS(string=CSS, font_config=font_config)

html.write_pdf(filename, stylesheets=[css], font_config=font_config)
css = self.get_css()
font_urls = self.get_font_urls()
style = weasyprint.CSS(string=css)
html.write_pdf(filename, stylesheets=[style] + font_urls)

def validate(src):
# first check if it is a valid url
Expand Down
10 changes: 10 additions & 0 deletions paper2remarkable/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ def parse_args():
help="path to rmapi executable (default: rmapi)",
default="rmapi",
)
parser.add_argument(
"--css", help="path to custom CSS file for HTML output", default=None
)
parser.add_argument(
"--font-urls",
help="path to custom font urls file for HTML output",
default=None,
)
parser.add_argument(
"input",
help="One or more URLs to a paper or paths to local PDF files",
Expand Down Expand Up @@ -229,6 +237,8 @@ def main():
pdftk_path=args.pdftk,
qpdf_path=args.qpdf,
gs_path=args.gs,
css_path=args.css,
font_urls_path=args.font_urls,
cookiejar=cookiejar,
)
prov.run(new_input, filename=filename)
35 changes: 35 additions & 0 deletions tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
"""

import os
import pdfplumber
import tempfile
import unittest

from paper2remarkable.providers.html import HTML
Expand All @@ -24,6 +27,38 @@ def test_experimental_fix_lazy_loading(self):
expected_image = "https://www.seriouseats.com/images/2015/01/20150118-tea-max-falkowitz-3.jpg"
self.assertIn(expected_image, html_article)

def test_custom_css(self):
test_css = """
@page { size: 702px 936px; margin: 1in; }
img { display: block; margin: 0 auto; text-align: center; max-width: 70%; max-height: 300px; }
h1,h2,h3 { font-family: 'Montserrat'; }
p, li { font-size: 12pt; line-height: 2; font-family: 'Montserrat'; text-align: left; }
"""

test_font_urls = [
"https://fonts.googleapis.com/css2?family=Montserrat&display=swap"
]

tmpfd, tempfname_css = tempfile.mkstemp(prefix="p2r_", suffix=".css")
with os.fdopen(tmpfd, "w") as fp:
fp.write(test_css)

tmpfd, tempfname_urls = tempfile.mkstemp(prefix="p2r_", suffix=".txt")
with os.fdopen(tmpfd, "w") as fp:
fp.write("\n".join(test_font_urls))

url = "https://hbr.org/2019/11/getting-your-team-to-do-more-than-meet-deadlines"
prov = HTML(
upload=False, css_path=tempfname_css, font_urls_path=tempfname_urls
)
filename = prov.run(url)
with pdfplumber.open(filename) as pdf:
self.assertEqual(8, len(pdf.pages))

os.unlink(tempfname_css)
os.unlink(tempfname_urls)
os.unlink(filename)


if __name__ == "__main__":
unittest.main()

0 comments on commit a6e50d0

Please sign in to comment.