@@ -6,26 +6,30 @@ import 'package:html/dom.dart' as html;
66class WhitespaceProcessing {
77 /// [processWhitespace] handles the removal of unnecessary whitespace from
88 /// a StyledElement tree.
9+ ///
10+ /// The criteria for determining which whitespace is replaceable is outlined
11+ /// at https://www.w3.org/TR/css-text-3/
12+ /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
913 static StyledElement processWhitespace (StyledElement tree) {
1014 tree = _processInternalWhitespace (tree);
1115 tree = _processInlineWhitespace (tree);
16+ tree = _processBlockWhitespace (tree);
1217 tree = _removeEmptyElements (tree);
1318 return tree;
1419 }
1520
1621 /// [_processInternalWhitespace] removes unnecessary whitespace from the StyledElement tree.
17- ///
18- /// The criteria for determining which whitespace is replaceable is outlined
19- /// at https://www.w3.org/TR/css-text-3/
20- /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
2122 static StyledElement _processInternalWhitespace (StyledElement tree) {
22- if ((tree.style.whiteSpace ?? WhiteSpace .normal) == WhiteSpace .pre) {
23- // Preserve this whitespace
24- } else if (tree is TextContentElement ) {
23+ if (tree.style.whiteSpace == WhiteSpace .pre) {
24+ return tree;
25+ }
26+
27+ if (tree is TextContentElement ) {
2528 tree.text = _removeUnnecessaryWhitespace (tree.text! );
2629 } else {
2730 tree.children.forEach (_processInternalWhitespace);
2831 }
32+
2933 return tree;
3034 }
3135
@@ -36,13 +40,95 @@ class WhitespaceProcessing {
3640 return _processInlineWhitespaceRecursive (tree, Context (false ));
3741 }
3842
43+ /// [_processBlockWhitespace] removes unnecessary whitespace from block
44+ /// rendering contexts. Specifically, a space at the beginning and end of
45+ /// each inline rendering context should be removed.
46+ static StyledElement _processBlockWhitespace (StyledElement tree) {
47+ if (tree.style.whiteSpace == WhiteSpace .pre) {
48+ return tree;
49+ }
50+
51+ bool isBlockContext = false ;
52+ for (final child in tree.children) {
53+ if (child.style.display == Display .block || child.name == "br" ) {
54+ isBlockContext = true ;
55+ }
56+
57+ _processBlockWhitespace (child);
58+ }
59+
60+ if (isBlockContext) {
61+ for (int i = 0 ; i < tree.children.length; i++ ) {
62+ final lastChild = i != 0 ? tree.children[i - 1 ] : null ;
63+ final child = tree.children[i];
64+ final nextChild =
65+ (i + 1 ) != tree.children.length ? tree.children[i + 1 ] : null ;
66+
67+ if (child.style.whiteSpace == WhiteSpace .pre) {
68+ continue ;
69+ }
70+
71+ if (child.style.display == Display .block) {
72+ _removeLeadingSpace (child);
73+ _removeTrailingSpace (child);
74+ }
75+
76+ if (lastChild? .style.display == Display .block ||
77+ lastChild? .name == "br" ) {
78+ _removeLeadingSpace (child);
79+ }
80+
81+ if (nextChild? .style.display == Display .block ||
82+ nextChild? .name == "br" ) {
83+ _removeTrailingSpace (child);
84+ }
85+ }
86+ }
87+
88+ return tree;
89+ }
90+
91+ /// [_removeLeadingSpace] removes any leading space
92+ /// from the text of the tree at this level, no matter how deep in the tree
93+ /// it may be.
94+ static void _removeLeadingSpace (StyledElement element) {
95+ if (element.style.whiteSpace == WhiteSpace .pre) {
96+ return ;
97+ }
98+
99+ if (element is TextContentElement ) {
100+ element.text = element.text? .trimLeft ();
101+ } else if (element.children.isNotEmpty) {
102+ _removeLeadingSpace (element.children.first);
103+ }
104+ }
105+
106+ /// [_removeTrailingSpace] removes any leading space
107+ /// from the text of the tree at this level, no matter how deep in the tree
108+ /// it may be.
109+ static void _removeTrailingSpace (StyledElement element) {
110+ if (element.style.whiteSpace == WhiteSpace .pre) {
111+ return ;
112+ }
113+
114+ if (element is TextContentElement ) {
115+ element.text = element.text? .trimRight ();
116+ } else if (element.children.isNotEmpty) {
117+ _removeTrailingSpace (element.children.last);
118+ }
119+ }
120+
39121 /// [_processInlineWhitespaceRecursive] analyzes the whitespace between and among different
40122 /// inline elements, and replaces any instance of two or more spaces with a single space, according
41123 /// to the w3's HTML whitespace processing specification linked to above.
42124 static StyledElement _processInlineWhitespaceRecursive (
43125 StyledElement tree,
44126 Context <bool > keepLeadingSpace,
45127 ) {
128+ if (tree.style.whiteSpace == WhiteSpace .pre) {
129+ return tree;
130+ }
131+
46132 if (tree is TextContentElement ) {
47133 /// initialize indices to negative numbers to make conditionals a little easier
48134 int textIndex = - 1 ;
@@ -62,9 +148,9 @@ class WhitespaceProcessing {
62148 final parentNodes = tree.element? .parent? .nodes;
63149
64150 /// find the index of the tree itself in the parent nodes
65- if (( parentNodes? .length ?? 0 ) >= 1 ) {
151+ if (parentNodes? .isNotEmpty ?? false ) {
66152 elementIndex =
67- parentNodes? .indexWhere ((element) => element == tree.element) ?? - 1 ;
153+ parentNodes! .indexWhere ((element) => element == tree.element);
68154 }
69155
70156 /// if the tree is any node except the last node in the node list and the
@@ -117,9 +203,7 @@ class WhitespaceProcessing {
117203 /// update the [Context] to signify to that next text node whether it should
118204 /// keep its whitespace. This is based on whether the current text ends with a
119205 /// whitespace.
120- if (textIndex ==
121- ((tree.element? .nodes.length ?? 0 ) -
122- 1 ) && //TODO is this the proper ??
206+ if (textIndex == (tree.node.nodes.length - 1 ) &&
123207 tree.element? .localName != "br" &&
124208 parentAfterText.startsWith (' ' )) {
125209 keepLeadingSpace.data = ! tree.text! .endsWith (' ' );
@@ -142,11 +226,11 @@ class WhitespaceProcessing {
142226 /// (4) Replace any instances of two or more spaces with a single space.
143227 static String _removeUnnecessaryWhitespace (String text) {
144228 return text
145- .replaceAll (RegExp (" \\ *(?=\n )" ), "\n " )
146- .replaceAll (RegExp ("(?: \n )\\ *" ), "\n " )
229+ .replaceAll (RegExp (r" *(?=\n)" ), "" )
230+ .replaceAll (RegExp (r "(?<= \n) *" ), "" )
147231 .replaceAll ("\n " , " " )
148232 .replaceAll ("\t " , " " )
149- .replaceAll (RegExp (" {2,}" ), " " );
233+ .replaceAll (RegExp (r " {2,}" ), " " );
150234 }
151235
152236 /// [_removeEmptyElements] recursively removes empty elements.
@@ -155,7 +239,7 @@ class WhitespaceProcessing {
155239 /// or any block-level [TextContentElement] that contains only whitespace and doesn't follow
156240 /// a block element or a line break.
157241 static StyledElement _removeEmptyElements (StyledElement tree) {
158- List <StyledElement > toRemove = < StyledElement > [] ;
242+ Set <StyledElement > toRemove = < StyledElement > {} ;
159243 bool lastChildBlock = true ;
160244 tree.children.forEachIndexed ((index, child) {
161245 if (child is EmptyContentElement ) {
0 commit comments