-
Notifications
You must be signed in to change notification settings - Fork 138
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* table: first basic version of table plugin * table: abort table if problematic node (e.g. hr) is discovered * table: render caption below table * table: fill up missing cells * table: combine header row with rows array * table: replicate content to spanned cell * table: change header underline format * table: grow table for colspan & rowspan * table: fix order of applying modifications * table: add align to the underline * table: loop through cells directly * table: abort early for nested table & small cleanup * table: add more tests * table: make empty cell wider * table: add option WithSkipEmptyRows * table: add option WithHeaderPromotion * table: escape "|" character inside table * table: improve naming & docs for WithSpanCellBehavior * table: skip render for role="presentation" table * table: skip render when parent is problematic * table: fallback render newlines between rows * collapse: update testcases to use RenderRepresentation
- Loading branch information
1 parent
7e02068
commit bd15218
Showing
20 changed files
with
3,235 additions
and
130 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package table | ||
|
||
import ( | ||
"github.com/JohannesKaufmann/dom" | ||
"golang.org/x/net/html" | ||
) | ||
|
||
func selectHeaderRowNode(node *html.Node) *html.Node { | ||
thead := dom.FindFirstNode(node, func(n *html.Node) bool { | ||
return dom.NodeName(n) == "thead" | ||
}) | ||
if thead != nil { | ||
firstTr := dom.FindFirstNode(thead, func(n *html.Node) bool { | ||
return dom.NodeName(n) == "tr" | ||
}) | ||
if firstTr != nil { | ||
// YEAH we found the "tr" inside the "thead" | ||
return firstTr | ||
} | ||
} | ||
|
||
firstTh := dom.FindFirstNode(node, func(n *html.Node) bool { | ||
return dom.NodeName(n) == "th" | ||
}) | ||
if firstTh != nil { | ||
// YEAH we found the "th" | ||
return firstTh.Parent | ||
} | ||
|
||
return nil | ||
} | ||
func selectNormalRowNodes(tableNode *html.Node, selectedHeaderRowNode *html.Node) []*html.Node { | ||
var collected []*html.Node | ||
|
||
var finder func(node *html.Node) | ||
finder = func(node *html.Node) { | ||
name := dom.NodeName(node) | ||
if name == "tr" && node != selectedHeaderRowNode { | ||
// We want to make sure to not select the header row a *second* time. | ||
collected = append(collected, node) | ||
} | ||
|
||
for child := node.FirstChild; child != nil; child = child.NextSibling { | ||
finder(child) | ||
} | ||
} | ||
finder(tableNode) | ||
|
||
return collected | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
package table | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/JohannesKaufmann/html-to-markdown/v2/collapse" | ||
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/tester" | ||
"golang.org/x/net/html" | ||
) | ||
|
||
func TestSelectRowNodes(t *testing.T) { | ||
runs := []struct { | ||
desc string | ||
input string | ||
|
||
expected string | ||
}{ | ||
{ | ||
desc: "invalid table", | ||
input: ` | ||
<table> | ||
<tbody> | ||
<tr>there is no data cell tag</tr> | ||
</tbody> | ||
</table> | ||
`, | ||
|
||
// Note: "golang.org/x/net/html" automatically cleans up the "table" | ||
expected: ` | ||
├─body | ||
│ ├─#text "there is no data cell tag" | ||
│ ├─table | ||
│ │ ├─tbody | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
`, | ||
}, | ||
{ | ||
desc: "completely empty table", | ||
input: `<table></table>`, | ||
|
||
expected: ` | ||
├─body | ||
│ ├─table | ||
`, | ||
}, | ||
{ | ||
desc: "completely empty tbody", | ||
input: `<table><tbody></tbody></table>`, | ||
|
||
expected: ` | ||
├─body | ||
│ ├─table | ||
│ │ ├─tbody | ||
`, | ||
}, | ||
{ | ||
desc: "basic table", | ||
input: ` | ||
<table> | ||
<tr> | ||
<td>A1</td> | ||
<td>A2</td> | ||
</tr> | ||
<tr> | ||
<td>B1</td> | ||
<td>B2</td> | ||
</tr> | ||
</table> | ||
`, | ||
// Note: "golang.org/x/net/html" automatically adds the "tbody" | ||
expected: ` | ||
├─body | ||
│ ├─table | ||
│ │ ├─tbody | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "A1" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "A2" | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "B1" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "B2" | ||
`, | ||
}, | ||
{ | ||
desc: "basic table with th", | ||
input: ` | ||
<table> | ||
<tr> | ||
<th>Heading 1</td> | ||
<th>Heading 2</td> | ||
</tr> | ||
<tr> | ||
<td>A1</td> | ||
<td>A2</td> | ||
</tr> | ||
</table> | ||
`, | ||
expected: ` | ||
├─body | ||
│ ├─table | ||
│ │ ├─tbody | ||
│ │ │ ├─tr (__test_header_row__="true") | ||
│ │ │ │ ├─th | ||
│ │ │ │ │ ├─#text "Heading 1" | ||
│ │ │ │ ├─th | ||
│ │ │ │ │ ├─#text "Heading 2" | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "A1" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "A2" | ||
`, | ||
}, | ||
{ | ||
desc: "with caption, thead, tbody, tfoot", | ||
input: ` | ||
<table> | ||
<caption> | ||
A description about the table | ||
</caption> | ||
<thead> | ||
<tr> | ||
<th scope="col">Name</th> | ||
<th scope="col">City</th> | ||
<th scope="col">Age</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<th scope="row">Max Mustermann</th> | ||
<td>Berlin</td> | ||
<td>20</td> | ||
</tr> | ||
<tr> | ||
<th scope="row">Max Müller</th> | ||
<td>München</td> | ||
<td>30</td> | ||
</tr> | ||
</tbody> | ||
<tfoot> | ||
<tr> | ||
<th scope="row" colspan="2">Average age</th> | ||
<td>25</td> | ||
</tr> | ||
</tfoot> | ||
</table> | ||
`, | ||
expected: ` | ||
├─body | ||
│ ├─table | ||
│ │ ├─caption | ||
│ │ │ ├─#text "A description about the table" | ||
│ │ ├─thead | ||
│ │ │ ├─tr (__test_header_row__="true") | ||
│ │ │ │ ├─th (scope="col") | ||
│ │ │ │ │ ├─#text "Name" | ||
│ │ │ │ ├─th (scope="col") | ||
│ │ │ │ │ ├─#text "City" | ||
│ │ │ │ ├─th (scope="col") | ||
│ │ │ │ │ ├─#text "Age" | ||
│ │ ├─tbody | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─th (scope="row") | ||
│ │ │ │ │ ├─#text "Max Mustermann" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "Berlin" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "20" | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─th (scope="row") | ||
│ │ │ │ │ ├─#text "Max Müller" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "München" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "30" | ||
│ │ ├─tfoot | ||
│ │ │ ├─tr (__test_normal_row__="true") | ||
│ │ │ │ ├─th (scope="row" colspan="2") | ||
│ │ │ │ │ ├─#text "Average age" | ||
│ │ │ │ ├─td | ||
│ │ │ │ │ ├─#text "25" | ||
`, | ||
}, | ||
} | ||
for _, run := range runs { | ||
t.Run(run.desc, func(t *testing.T) { | ||
doc := tester.Parse(t, run.input, "") | ||
|
||
// NOTE FOR FUTURE: I discovered that "golang.org/x/net/html" automatically adds the "tbody". | ||
// => So we probably don't need to do that much work beforehand. | ||
collapse.Collapse(doc, nil) | ||
|
||
{ | ||
// We can then see if we correctly *identified* all the necessary table components. | ||
// For that we add an attribute (just for the test). | ||
|
||
headerRow := selectHeaderRowNode(doc) | ||
if headerRow != nil { | ||
headerRow.Attr = append(headerRow.Attr, html.Attribute{ | ||
Key: "__test_header_row__", | ||
Val: "true", | ||
}) | ||
} | ||
for _, n := range selectNormalRowNodes(doc, headerRow) { | ||
n.Attr = append(n.Attr, html.Attribute{ | ||
Key: "__test_normal_row__", | ||
Val: "true", | ||
}) | ||
} | ||
} | ||
|
||
tester.ExpectRepresentation(t, doc, "output", run.expected) | ||
}) | ||
} | ||
} |
Oops, something went wrong.