Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add support for no pre_head_insert and no js_notify callback
  • Loading branch information
benoit74 committed Oct 24, 2024
commit d6a297b98d4182046a71e85d375f562ce25a25d0
13 changes: 8 additions & 5 deletions src/zimscraperlib/rewriting/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ class HtmlRewriter(HTMLParser):
def __init__(
self,
url_rewriter: ArticleUrlRewriter,
pre_head_insert: str,
pre_head_insert: str | None,
post_head_insert: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
super().__init__(convert_charrefs=False)
self.url_rewriter = url_rewriter
Expand Down Expand Up @@ -430,7 +430,7 @@ def do_attribute_rewrite(
css_rewriter: CssRewriter,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
) -> AttrNameAndValue:
"""Utility function to process all attribute rewriting rules

Expand Down Expand Up @@ -587,7 +587,7 @@ def rewrite_href_src_attributes(
attrs: AttrsList,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
"""Rewrite href and src attributes

Expand All @@ -596,7 +596,10 @@ def rewrite_href_src_attributes(
"""
if attr_name not in ("href", "src") or not attr_value:
return
if get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module":
if (
notify_js_module
and get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module"
):
notify_js_module(url_rewriter.get_item_path(attr_value, base_href=base_href))
return (
attr_name,
Expand Down
11 changes: 6 additions & 5 deletions src/zimscraperlib/rewriting/js.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def __init__(
self,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
super().__init__(None)
self.first_buff = self._init_local_declaration(GLOBAL_OVERRIDES)
Expand Down Expand Up @@ -298,11 +298,12 @@ def func(
m_object: re.Match[str], _opts: dict[str, Any] | None = None
) -> str:
def sub_funct(match: re.Match[str]) -> str:
self.notify_js_module(
self.url_rewriter.get_item_path(
match.group(2), base_href=self.base_href
if self.notify_js_module:
self.notify_js_module(
self.url_rewriter.get_item_path(
match.group(2), base_href=self.base_href
)
)
)
return (
f"{match.group(1)}{get_rewriten_import_url(match.group(2))}"
f"{match.group(3)}"
Expand Down
10 changes: 0 additions & 10 deletions tests/rewriting/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,6 @@
)


@pytest.fixture(scope="module")
def no_js_notify():
"""Fixture to not care about notification of detection of a JS file"""

def no_js_notify_handler(_: str):
pass

yield no_js_notify_handler


class SimpleUrlRewriter(ArticleUrlRewriter):
"""Basic URL rewriter mocking most calls"""

Expand Down
126 changes: 55 additions & 71 deletions tests/rewriting/test_html_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,15 @@ def no_rewrite_content(request: pytest.FixtureRequest):
yield request.param


def test_no_rewrite(
no_rewrite_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_no_rewrite(no_rewrite_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{no_rewrite_content.article_url}"),
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(no_rewrite_content.input_str)
.content
Expand Down Expand Up @@ -116,17 +114,15 @@ def escaped_content(request: pytest.FixtureRequest):
yield request.param


def test_escaped_content(
escaped_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_escaped_content(escaped_content: ContentForTests):
transformed = (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{escaped_content.article_url}")
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(escaped_content.input_str)
.content
Expand Down Expand Up @@ -239,17 +235,15 @@ def js_rewrites(request: pytest.FixtureRequest):
yield request.param


def test_js_rewrites(
js_rewrites: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_js_rewrites(js_rewrites: ContentForTests):
transformed = (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{js_rewrites.article_url}")
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(js_rewrites.input_str)
.content
Expand Down Expand Up @@ -334,24 +328,24 @@ def rewrite_url(request: pytest.FixtureRequest):
yield request.param


def test_rewrite(rewrite_url: ContentForTests, no_js_notify: Callable[[ZimPath], None]):
def test_rewrite(rewrite_url: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{rewrite_url.article_url}"),
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_url.input_str)
.content
== rewrite_url.expected_str
)


def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
def test_extract_title():
content = """<html>
<head>
<title>Page title</title>
Expand All @@ -367,25 +361,25 @@ def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
article_url=HttpUrl("http://example.com"),
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(content)
.title
== "Page title"
)


def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
def test_rewrite_attributes():
rewriter = HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl("http://kiwix.org/"),
existing_zim_paths={ZimPath("kiwix.org/foo")},
),
"",
"",
no_js_notify,
None,
None,
None,
)

assert (
Expand All @@ -407,13 +401,13 @@ def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
)


def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
def test_rewrite_css():
output = (
HtmlRewriter(
ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/")),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(
"<style>p { /* A comment with a http://link.org/ */ "
Expand All @@ -427,7 +421,7 @@ def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
)


def test_head_insert(no_js_notify: Callable[[ZimPath], None]):
def test_head_insert():
content = """<html>
<head>
<title>A test content</title>
Expand All @@ -439,18 +433,17 @@ def test_head_insert(no_js_notify: Callable[[ZimPath], None]):

url_rewriter = ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/"))
assert (
HtmlRewriter(url_rewriter, "", "", no_js_notify).rewrite(content).content
== content
HtmlRewriter(url_rewriter, None, None, None).rewrite(content).content == content
)

assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", "", no_js_notify).rewrite(
assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", None, None).rewrite(
content
).content == content.replace("<head>", "<head>PRE_HEAD_INSERT")
assert HtmlRewriter(url_rewriter, "", "POST_HEAD_INSERT", no_js_notify).rewrite(
assert HtmlRewriter(url_rewriter, None, "POST_HEAD_INSERT", None).rewrite(
content
).content == content.replace("</head>", "POST_HEAD_INSERT</head>")
assert HtmlRewriter(
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", no_js_notify
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", None
).rewrite(content).content == content.replace(
"<head>", "<head>PRE_HEAD_INSERT"
).replace(
Expand Down Expand Up @@ -735,9 +728,7 @@ def rewrite_base_href_content(request):
yield request.param


def test_rewrite_base_href(
rewrite_base_href_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_rewrite_base_href(rewrite_base_href_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
Expand All @@ -750,9 +741,9 @@ def test_rewrite_base_href(
ZimPath("kiwix.org/favicon.png"),
},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_base_href_content.input_str)
.content
Expand Down Expand Up @@ -795,15 +786,13 @@ def test_rewrite_base_href(
),
],
)
def test_simple_rewrite(
input_content: str, expected_output: str, no_js_notify: Callable[[ZimPath], None]
):
def test_simple_rewrite(input_content: str, expected_output: str):
assert (
HtmlRewriter(
ArticleUrlRewriter(article_url=HttpUrl("http://example.com")),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(input_content)
.content
Expand Down Expand Up @@ -862,9 +851,7 @@ def rewrite_onxxx_content(request: pytest.FixtureRequest):
yield request.param


def test_rewrite_onxxx_event(
rewrite_onxxx_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_rewrite_onxxx_event(rewrite_onxxx_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
Expand All @@ -877,9 +864,9 @@ def test_rewrite_onxxx_event(
ZimPath("kiwix.org/favicon.png"),
},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_onxxx_content.input_str)
.content
Expand Down Expand Up @@ -924,20 +911,17 @@ def rewrite_meta_charset_content(request: pytest.FixtureRequest):
yield request.param


def test_rewrite_meta_charset(
rewrite_meta_charset_content: ContentForTests,
no_js_notify: Callable[[ZimPath], None],
):
def test_rewrite_meta_charset(rewrite_meta_charset_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(
f"http://{rewrite_meta_charset_content.article_url}"
)
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_meta_charset_content.input_str)
.content
Expand All @@ -963,7 +947,6 @@ def rewrite_meta_http_equiv_redirect_full_content(request: pytest.FixtureRequest

def test_rewrite_meta_http_equiv_redirect_full(
rewrite_meta_http_equiv_redirect_full_content: ContentForTests,
no_js_notify: Callable[[ZimPath], None],
):
assert (
HtmlRewriter(
Expand All @@ -973,9 +956,9 @@ def test_rewrite_meta_http_equiv_redirect_full(
),
existing_zim_paths={ZimPath("kiwix.org/somepage")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_meta_http_equiv_redirect_full_content.input_str)
.content
Expand Down Expand Up @@ -1112,11 +1095,12 @@ def rewrite_tag_name(attr_name: str, attr_value: str | None) -> AttrNameAndValue
@rules.rewrite_attribute()
def rewrite_call_notify(
attr_name: str,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
) -> AttrNameAndValue | None:
if attr_name != "call_notify":
return
notify_js_module(ZimPath("foo"))
if notify_js_module:
notify_js_module(ZimPath("foo"))
return


Expand Down
Loading