From 23f12aa5e20f85115cf4423f34d657efbf1c77da Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Mon, 3 Apr 2023 15:17:11 -0700 Subject: [PATCH] Don't apply smart typography to escaped text. This passes all escaped text back out as HTEXT_ESCAPED, which is opaque to the smart formatter. --- document.c | 50 ++++++++++++++++++++++++++-------- lowdown.h | 2 ++ man/lowdown.3 | 6 ++++ regress/smarty-no-escape.html | 1 + regress/smarty-no-escape.md | 2 ++ regress/smarty-no-escape2.html | 4 +++ regress/smarty-no-escape2.md | 4 +++ regress/smarty-no-escape3.html | 1 + regress/smarty-no-escape3.md | 2 ++ regress/smarty-no-escape4.html | 1 + regress/smarty-no-escape4.md | 2 ++ smartypants.c | 11 ++++++-- 12 files changed, 72 insertions(+), 14 deletions(-) create mode 100644 regress/smarty-no-escape.html create mode 100644 regress/smarty-no-escape.md create mode 100644 regress/smarty-no-escape2.html create mode 100644 regress/smarty-no-escape2.md create mode 100644 regress/smarty-no-escape3.html create mode 100644 regress/smarty-no-escape3.md create mode 100644 regress/smarty-no-escape4.html create mode 100644 regress/smarty-no-escape4.md diff --git a/document.c b/document.c index 3a86b54..46d3d01 100644 --- a/document.c +++ b/document.c @@ -158,26 +158,29 @@ parse_listitem(struct lowdown_buf *, struct lowdown_doc *, char *, size_t, enum hlist_fl *, size_t); /* - * Add a node to the parse stack, or retrieve a current node if - * requesting multiple LOWDOWN_NORMAL_TEXTs in sequence. Returns the - * node, initialised to the given type, after adjusting the parse + * Add a node to the parse stack or retrieve a current node if + * requesting multiple similar LOWDOWN_NORMAL_TEXT in sequence. Returns + * the node, initialised to the given type, after adjusting the parse * position. Returns NULL on memory allocation failure. */ static struct lowdown_node * -pushnode(struct lowdown_doc *doc, enum lowdown_rndrt t) +pushnode_full(struct lowdown_doc *doc, enum lowdown_rndrt t, int fl) { struct lowdown_node *n; /* * Special case: if we're pushing a NORMAL_TEXT node, see if one - * already exists and return that. This means that each push - * for text nodes should be careful to use hbuf_push() instead - * of hbuf_create() when adding text content. + * already exists with the same flags and return that. This + * means that each push for text nodes should be careful to use + * hbuf_push() instead of hbuf_create() when adding text + * content. */ if (t == LOWDOWN_NORMAL_TEXT && doc->current != NULL) { n = TAILQ_LAST(&doc->current->children, lowdown_nodeq); - if (n != NULL && n->type == t) { + if (n != NULL && + n->type == LOWDOWN_NORMAL_TEXT && + n->rndr_normal_text.flags == fl) { doc->depth++; doc->current = n; return n; @@ -201,6 +204,26 @@ pushnode(struct lowdown_doc *doc, enum lowdown_rndrt t) return n; } +/* + * Push a new node or, if LOWDOWN_NORMAL_TEXT, retrieve the existing one + * if the flags exactly match. + */ +static struct lowdown_node * +pushnode(struct lowdown_doc *doc, enum lowdown_rndrt t) +{ + return pushnode_full(doc, t, 0); +} + +/* + * Push a new LOWDOWN_NORMAL_TEXT or retrieve the existing one if the + * flags exactly match. + */ +static struct lowdown_node * +pushtext(struct lowdown_doc *doc, int flags) +{ + return pushnode_full(doc, LOWDOWN_NORMAL_TEXT, flags); +} + /* * Sets a buffer with the contents of "data" of size "datasz". The * buffer must be empty. Return FALSE on failure, TRUE on success. @@ -1047,7 +1070,10 @@ char_codespan(struct lowdown_doc *doc, } /* - * '\\' backslash escape + * '\\' backslash escaped text. + * Escaped text isn't handled by smart typography, although it must be + * escaped for output. Mark it as HTEXT_ESCAPED to make sure that we + * don't use smart typography on the node. */ static ssize_t char_escape(struct lowdown_doc *doc, @@ -1091,14 +1117,16 @@ char_escape(struct lowdown_doc *doc, if (strchr(escape_chars, data[1]) == NULL) return 0; - if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) + if ((n = pushtext(doc, HTEXT_ESCAPED)) == NULL) return -1; + n->rndr_normal_text.flags = HTEXT_ESCAPED; if (!hbuf_push(&n->rndr_normal_text.text, data + 1, 1)) return -1; popnode(doc, n); } else if (size == 1) { - if ((n = pushnode(doc, LOWDOWN_NORMAL_TEXT)) == NULL) + if ((n = pushtext(doc, HTEXT_ESCAPED)) == NULL) return -1; + n->rndr_normal_text.flags = HTEXT_ESCAPED; if (!hbuf_push(&n->rndr_normal_text.text, data, 1)) return -1; popnode(doc, n); diff --git a/lowdown.h b/lowdown.h index e03e15f..b127b90 100644 --- a/lowdown.h +++ b/lowdown.h @@ -155,6 +155,8 @@ struct rndr_paragraph { }; struct rndr_normal_text { + int flags; +#define HTEXT_ESCAPED 0x01 struct lowdown_buf text; }; diff --git a/man/lowdown.3 b/man/lowdown.3 index 5f397af..112d847 100644 --- a/man/lowdown.3 +++ b/man/lowdown.3 @@ -878,6 +878,12 @@ The basic .Va text content for .Dv LOWDOWN_NORMAL_TEXT . +If +.Va flags +is set to +.Dv HTEXT_ESCAPED , +the text may be escaped for output, but may not be altered by any smart +typography or similar (it should be passed as-is). .It Va rndr_paragraph For .Dv LOWDOWN_PARAGRAPH , diff --git a/regress/smarty-no-escape.html b/regress/smarty-no-escape.html new file mode 100644 index 0000000..989a328 --- /dev/null +++ b/regress/smarty-no-escape.html @@ -0,0 +1 @@ +

Hi "there".

diff --git a/regress/smarty-no-escape.md b/regress/smarty-no-escape.md new file mode 100644 index 0000000..6054b68 --- /dev/null +++ b/regress/smarty-no-escape.md @@ -0,0 +1,2 @@ + +Hi \"there\". diff --git a/regress/smarty-no-escape2.html b/regress/smarty-no-escape2.html new file mode 100644 index 0000000..299e257 --- /dev/null +++ b/regress/smarty-no-escape2.html @@ -0,0 +1,4 @@ + diff --git a/regress/smarty-no-escape2.md b/regress/smarty-no-escape2.md new file mode 100644 index 0000000..4eb3d04 --- /dev/null +++ b/regress/smarty-no-escape2.md @@ -0,0 +1,4 @@ +Blah: + + - **--foo-bar** + - **\-\-foo-bar** diff --git a/regress/smarty-no-escape3.html b/regress/smarty-no-escape3.html new file mode 100644 index 0000000..e6c808b --- /dev/null +++ b/regress/smarty-no-escape3.html @@ -0,0 +1 @@ +

Should be esaped: <hi />. <hi>.

diff --git a/regress/smarty-no-escape3.md b/regress/smarty-no-escape3.md new file mode 100644 index 0000000..a430e6f --- /dev/null +++ b/regress/smarty-no-escape3.md @@ -0,0 +1,2 @@ + +Should be esaped: \. \. diff --git a/regress/smarty-no-escape4.html b/regress/smarty-no-escape4.html new file mode 100644 index 0000000..f5cec15 --- /dev/null +++ b/regress/smarty-no-escape4.html @@ -0,0 +1 @@ +

a–--b

diff --git a/regress/smarty-no-escape4.md b/regress/smarty-no-escape4.md new file mode 100644 index 0000000..b207097 --- /dev/null +++ b/regress/smarty-no-escape4.md @@ -0,0 +1,2 @@ + +a--\-\-b diff --git a/smartypants.c b/smartypants.c index 94386fa..3db2eaf 100644 --- a/smartypants.c +++ b/smartypants.c @@ -315,7 +315,7 @@ smarty_right_wb(const struct lowdown_node *n, size_t pos) * of the parse tree, <0 on failure, otherwise return zero. */ static int -smarty_hbuf(struct lowdown_node *n, size_t *maxn, +smarty_text(struct lowdown_node *n, size_t *maxn, struct lowdown_buf *b, struct smarty *s) { size_t i = 0, j, sz; @@ -329,6 +329,11 @@ smarty_hbuf(struct lowdown_node *n, size_t *maxn, assert(n->type == LOWDOWN_NORMAL_TEXT); + /* If the text node was escaped, pass it out unchanged. */ + + if (n->rndr_normal_text.flags & HTEXT_ESCAPED) + return 0; + for (i = 0; i < b->size; i++) { switch (b->data[i]) { case '.': @@ -421,7 +426,7 @@ smarty_span(struct lowdown_node *root, size_t *maxn, TAILQ_FOREACH(n, &root->children, entries) switch (types[n->type]) { case TYPE_TEXT: - c = smarty_hbuf(n, maxn, + c = smarty_text(n, maxn, &n->rndr_normal_text.text, s); if (c < 0) return 0; @@ -464,7 +469,7 @@ smarty_block(struct lowdown_node *root, return 0; break; case TYPE_TEXT: - c = smarty_hbuf(n, maxn, + c = smarty_text(n, maxn, &n->rndr_normal_text.text, &s); if (c < 0) return 0;