Skip to content

Commit cd747ac

Browse files
committed
v0.1.10, support empty tableCells, and tableCells containing just hard breaks'
1 parent 1aa2a34 commit cd747ac

File tree

10 files changed

+285
-14
lines changed

10 files changed

+285
-14
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "htmltoadf"
3-
version = "0.1.9"
3+
version = "0.1.10"
44
edition = "2021"
55
license = "MIT"
66
description = "An HTML to Atlassian Document Format (ADF) converter"

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ $ html2adf -h
3838
```
3939

4040
```
41-
htmltoadf 0.1.9
41+
htmltoadf 0.1.10
4242
An HTML to Atlassian Document Format (ADF) converter
4343
4444
USAGE:
@@ -56,20 +56,20 @@ OPTIONS:
5656
### Install Binary from Crates.io with `cargo install`
5757
```
5858
$ cargo install htmltoadf
59-
installing htmltoadf v0.1.9 (/usr/src/html2adf)
59+
installing htmltoadf v0.1.10 (/usr/src/html2adf)
6060
Updating crates.io index
6161
Downloading crates ...
6262
Downloaded lock_api v0.4.6
6363
--snip--
64-
Compiling htmltoadf v0.1.9
64+
Compiling htmltoadf v0.1.10
6565
Finished release [optimized] target(s) in 1m 42s
6666
Installing ~/.cargo/bin/htmltoadf
67-
Installed package `htmltoadf v0.1.9` (executable `html2adf`)
67+
Installed package `htmltoadf v0.1.10` (executable `html2adf`)
6868
```
6969

7070
### Download Binary file from Github
7171
Pre-built binaries can be downloaded from here:
72-
https://github.com/wouterken/htmltoadf/releases/tag/0.1.9
72+
https://github.com/wouterken/htmltoadf/releases/tag/0.1.10
7373

7474
### Docker Image
7575
**Docker Repo:**
@@ -79,10 +79,10 @@ https://hub.docker.com/r/wouterken/html2adf
7979
**Usage**
8080

8181
```bash
82-
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.9
82+
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.10
8383
{"version":1,"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"type":"text","text":"Hello world"},{"type":"text","text":"Test"}]}]}
8484

85-
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.9 | jq
85+
$ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf:0.1.10 | jq
8686
{
8787
"version": 1,
8888
"type": "doc",
@@ -115,7 +115,7 @@ $ echo "<h1>Hello world<p>Test</p></h1>" | docker run --rm -i wouterken/html2adf
115115

116116
```toml
117117
[dependencies]
118-
htmltoadf = "0.1.9"
118+
htmltoadf = "0.1.10"
119119
```
120120

121121
**Code**

docs/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979

8080
</style>
8181
<script defer type="module">
82-
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.9/htmltoadf.js";
82+
import init, {convert} from "https://unpkg.com/htmltoadf@0.1.10/htmltoadf.js";
8383

8484
let editor;
8585

src/adf_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use scraper::ElementRef;
1111
use scraper::Html;
1212
use serde_json::{Map, Value};
1313

14-
static VALID_EMPTY_TYPES: [&str; 4] = ["hr", "iframe", "img", "br"];
14+
static VALID_EMPTY_TYPES: [&str; 5] = ["hr", "iframe", "img", "br", "td"];
1515

1616
/**
1717
* The main procedure for our ADF Builder.

src/adf_structure.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ lazy_static! {
5858
),
5959
(
6060
String::from("tableCell"),
61-
AdfPermittedChildren::any(&["codeBlock", "blockCard", "paragraph", "bulletList", "mediaSingle", "orderedList", "heading", "panel", "blockquote", "rule", "mediaGroup", "decisionList", "taskList", "extension", "embedCard", "nestedExpand"])
61+
AdfPermittedChildren::any(&["codeBlock", "blockCard", "paragraph", "bulletList", "mediaSingle", "orderedList", "heading", "panel", "blockquote", "rule", "mediaGroup", "decisionList", "taskList", "extension", "embedCard", "nestedExpand", "hardBreak"])
6262
),
6363
(
6464
String::from("doc"),

src/extractor.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use ego_tree::iter::Edge;
2+
use ego_tree::NodeRef;
23
use regex::Regex;
34
use scraper::Node;
45
use scraper::{ElementRef, Html};
@@ -32,6 +33,18 @@ pub fn squish_surrounding_whitespace(input: &str) -> String {
3233
re.replace_all(input, " ").to_string()
3334
}
3435

36+
pub fn has_text_node(node: NodeRef<Node>) -> bool {
37+
node.children().any(|node| {
38+
if let Some(element) = node.value().as_element() {
39+
element.name() == "br" || has_text_node(node)
40+
}
41+
else if let Some(text_node) = node.value().as_text() {
42+
!text_node.text.trim().is_empty()
43+
} else {
44+
false
45+
}
46+
})
47+
}
3548
/**
3649
* We parse a raw scraper::HTML and return a
3750
* list of leaf doc nodes (each with a linked list pointer to the root)
@@ -63,6 +76,15 @@ pub fn extract_leaves(fragment: &Html) -> Vec<DocNode> {
6376
text: "".trim().to_owned(),
6477
node,
6578
})
79+
} else if element.value().name() == "td" {
80+
let has_text_node = has_text_node(node);
81+
if !has_text_node {
82+
leaf_nodes.push(DocNode {
83+
name: "td",
84+
text: "".trim().to_owned(),
85+
node,
86+
})
87+
}
6688
}
6789
} else if let Node::Text(text_node) = node.value() {
6890
if let Some(parent) = node.parent().and_then(ElementRef::wrap) {

src/tests/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod image;
77
mod lists;
88
mod marks;
99
mod paragraphs;
10+
mod tables;
1011
use crate::convert_html_str_to_adf_str;
1112

1213
#[allow(dead_code)]

src/tests/tables.rs

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#[allow(unused_imports)]
2+
use super::assert_output_json_eq;
3+
4+
#[allow(unused_imports)]
5+
use serde_json::json;
6+
7+
#[cfg(test)]
8+
#[test]
9+
fn test_empty_cell() {
10+
assert_output_json_eq(
11+
r"<div><table ><tbody>
12+
<tr><td >A</td><td >B</td><td >C</td></tr>
13+
<tr><td >value 1</td><td ></td><td >value 2</td></tr>
14+
</tbody></table>
15+
</div>",
16+
json!({
17+
"version": 1,
18+
"type": "doc",
19+
"content": [
20+
{
21+
"type": "table",
22+
"content": [
23+
{
24+
"type": "tableRow",
25+
"content": [
26+
{
27+
"type": "tableCell",
28+
"content": [
29+
{
30+
"type": "paragraph",
31+
"content": [
32+
{
33+
"type": "text",
34+
"text": "A"
35+
}
36+
]
37+
}
38+
]
39+
},
40+
{
41+
"type": "tableCell",
42+
"content": [
43+
{
44+
"type": "paragraph",
45+
"content": [
46+
{
47+
"type": "text",
48+
"text": "B"
49+
}
50+
]
51+
}
52+
]
53+
},
54+
{
55+
"type": "tableCell",
56+
"content": [
57+
{
58+
"type": "paragraph",
59+
"content": [
60+
{
61+
"type": "text",
62+
"text": "C"
63+
}
64+
]
65+
}
66+
]
67+
}
68+
]
69+
},
70+
{
71+
"type": "tableRow",
72+
"content": [
73+
{
74+
"type": "tableCell",
75+
"content": [
76+
{
77+
"type": "paragraph",
78+
"content": [
79+
{
80+
"type": "text",
81+
"text": "value 1"
82+
}
83+
]
84+
}
85+
]
86+
},
87+
{
88+
"type": "tableCell"
89+
},
90+
{
91+
"type": "tableCell",
92+
"content": [
93+
{
94+
"type": "paragraph",
95+
"content": [
96+
{
97+
"type": "text",
98+
"text": "value 2"
99+
}
100+
]
101+
}
102+
]
103+
}
104+
]
105+
}
106+
]
107+
},
108+
{
109+
"type": "paragraph",
110+
"content": [
111+
{
112+
"type": "text",
113+
"text": " "
114+
}
115+
]
116+
}
117+
]
118+
}),
119+
);
120+
}
121+
122+
#[test]
123+
/**
124+
* Nested Paragraphs are flattened
125+
*/
126+
fn test_hard_break_in_cell() {
127+
assert_output_json_eq(
128+
r"<div><table ><tbody>
129+
<tr><td >A</td><td >B</td><td >C</td></tr>
130+
<tr><td >value 1</td><td ><br/></td><td >value 2</td></tr>
131+
</tbody></table>
132+
</div>",
133+
json!({
134+
"version": 1,
135+
"type": "doc",
136+
"content": [
137+
{
138+
"type": "table",
139+
"content": [
140+
{
141+
"type": "tableRow",
142+
"content": [
143+
{
144+
"type": "tableCell",
145+
"content": [
146+
{
147+
"type": "paragraph",
148+
"content": [
149+
{
150+
"type": "text",
151+
"text": "A"
152+
}
153+
]
154+
}
155+
]
156+
},
157+
{
158+
"type": "tableCell",
159+
"content": [
160+
{
161+
"type": "paragraph",
162+
"content": [
163+
{
164+
"type": "text",
165+
"text": "B"
166+
}
167+
]
168+
}
169+
]
170+
},
171+
{
172+
"type": "tableCell",
173+
"content": [
174+
{
175+
"type": "paragraph",
176+
"content": [
177+
{
178+
"type": "text",
179+
"text": "C"
180+
}
181+
]
182+
}
183+
]
184+
}
185+
]
186+
},
187+
{
188+
"type": "tableRow",
189+
"content": [
190+
{
191+
"type": "tableCell",
192+
"content": [
193+
{
194+
"type": "paragraph",
195+
"content": [
196+
{
197+
"type": "text",
198+
"text": "value 1"
199+
}
200+
]
201+
}
202+
]
203+
},
204+
{
205+
"type": "tableCell",
206+
"content": [
207+
{
208+
"type": "paragraph",
209+
"content": [
210+
{
211+
"type": "hardBreak"
212+
}
213+
]
214+
}
215+
]
216+
},
217+
{
218+
"type": "tableCell",
219+
"content": [
220+
{
221+
"type": "paragraph",
222+
"content": [
223+
{
224+
"type": "text",
225+
"text": "value 2"
226+
}
227+
]
228+
}
229+
]
230+
}
231+
]
232+
}
233+
]
234+
},
235+
{
236+
"type": "paragraph",
237+
"content": [
238+
{
239+
"type": "text",
240+
"text": " "
241+
}
242+
]
243+
}
244+
]
245+
}),
246+
);
247+
}

0 commit comments

Comments
 (0)