Skip to content

Commit 1e7b41d

Browse files
committed
Merge branch 'tfedor-feature-options-setters' into dev/3.0.0
2 parents b86c1d3 + e250ec8 commit 1e7b41d

File tree

2 files changed

+212
-0
lines changed

2 files changed

+212
-0
lines changed

src/PHPHtmlParser/Options.php

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,132 @@ public function __construct()
5757
$this->options = $this->defaults;
5858
}
5959

60+
/**
61+
* The whitespaceTextNode, by default true, option tells the parser to save textnodes even if the content of the
62+
* node is empty (only whitespace). Setting it to false will ignore all whitespace only text node found in the document.
63+
* @param bool $value
64+
* @return Options
65+
*/
66+
public function setWhitespaceTextNode(bool $value): self {
67+
$this->options['whitespaceTextNode'] = $value;
68+
return $this;
69+
}
70+
71+
/**
72+
* Strict, by default false, will throw a StrictException if it finds that the html is not strictly compliant
73+
* (all tags must have a closing tag, no attribute with out a value, etc.).
74+
* @param bool $value
75+
* @return Options
76+
*/
77+
public function setStrict(bool $value): self {
78+
$this->options['strict'] = $value;
79+
return $this;
80+
}
81+
82+
/**
83+
* The enforceEncoding, by default null, option will enforce an character set to be used for reading the content
84+
* and returning the content in that encoding. Setting it to null will trigger an attempt to figure out
85+
* the encoding from within the content of the string given instead.
86+
* @param string|null $value
87+
* @return Options
88+
*/
89+
public function setEnforceEncoding(?string $value): self {
90+
$this->options['enforceEncoding'] = $value;
91+
return $this;
92+
}
93+
94+
/**
95+
* Set this to false to skip the entire clean up phase of the parser. Defaults to true.
96+
* @param bool $value
97+
* @return Options
98+
*/
99+
public function setCleanupInput(bool $value): self {
100+
$this->options['cleanupInput'] = $value;
101+
return $this;
102+
}
103+
104+
/**
105+
* Set this to false to skip removing the script tags from the document body. This might have adverse effects.
106+
* Defaults to true.
107+
*
108+
* NOTE: Ignored if cleanupInit is true.
109+
*
110+
* @param bool $value
111+
* @return Options
112+
*/
113+
public function setRemoveScripts(bool $value): self {
114+
$this->options['removeScripts'] = $value;
115+
return $this;
116+
}
117+
118+
/**
119+
* Set this to false to skip removing of style tags from the document body. This might have adverse effects. Defaults to true.
120+
*
121+
* NOTE: Ignored if cleanupInit is true.
122+
* @param bool $value
123+
* @return Options
124+
*/
125+
public function setRemoveStyles(bool $value): self {
126+
$this->options['removeStyles'] = $value;
127+
return $this;
128+
}
129+
130+
/**
131+
* Preserves Line Breaks if set to true. If set to false line breaks are cleaned up
132+
* as part of the input clean up process. Defaults to false.
133+
*
134+
* NOTE: Ignored if cleanupInit is true.
135+
* @param bool $value
136+
* @return Options
137+
*/
138+
public function setPreserveLineBreaks(bool $value): self {
139+
$this->options['preserveLineBreaks'] = $value;
140+
return $this;
141+
}
142+
143+
/**
144+
* Set this to false if you want to preserve whitespace inside of text nodes. It is set to true by default.
145+
* @param bool $value
146+
* @return Options
147+
*/
148+
public function setRemoveDoubleSpace(bool $value): self {
149+
$this->options['removeDoubleSpace'] = $value;
150+
return $this;
151+
}
152+
153+
/**
154+
* Set this to false if you want to preserve smarty script found in the html content. It is set to true by default.
155+
* @param bool $value
156+
* @return Options
157+
*/
158+
public function setRemoveSmartyScripts(bool $value): self {
159+
$this->options['removeSmartyScripts'] = $value;
160+
return $this;
161+
}
162+
163+
/**
164+
* By default this is set to false for legacy support. Setting this to true will change the behavior of find
165+
* to order elements by depth first. This will properly preserve the order of elements as they where in the HTML.
166+
*
167+
* @param bool $value
168+
* @return Options
169+
* @deprecated This option will be removed in version 3.0.0 with the new behavior being as if it was set to true.
170+
*/
171+
public function setDepthFirstSearch(bool $value): self {
172+
$this->options['depthFirstSearch'] = $value;
173+
return $this;
174+
}
175+
176+
/**
177+
* By default this is set to false. Setting this to true will apply the php function htmlspecialchars_decode too all attribute values and text nodes.
178+
* @param bool $value
179+
* @return Options
180+
*/
181+
public function setHtmlSpecialCharsDecode(bool $value): self {
182+
$this->options['htmlSpecialCharsDecode'] = $value;
183+
return $this;
184+
}
185+
60186
/**
61187
* A magic get to call the get() method.
62188
*

tests/OptionsTest.php

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,92 @@ public function testGettingNoOption()
5656
$this->assertEquals(null, $options->get('doesnotexist'));
5757
}
5858

59+
public function testSetters() {
60+
$options = new Options();
61+
62+
$options->setOptions([
63+
'whitespaceTextNode' => false,
64+
'strict' => false,
65+
'enforceEncoding' => null,
66+
'cleanupInput' => false,
67+
'removeScripts' => false,
68+
'removeStyles' => false,
69+
'preserveLineBreaks' => false,
70+
'removeDoubleSpace' => false,
71+
'removeSmartyScripts' => false,
72+
'depthFirstSearch' => false,
73+
'htmlSpecialCharsDecode' => false,
74+
]);
75+
76+
$options->setWhitespaceTextNode(true);
77+
$this->assertTrue($options->get('whitespaceTextNode'));
78+
79+
$options->setStrict(true);
80+
$this->assertTrue($options->get('strict'));
81+
82+
$options->setEnforceEncoding("utf8");
83+
$this->assertEquals("utf8", $options->get('enforceEncoding'));
84+
85+
$options->setCleanupInput(true);
86+
$this->assertTrue($options->get('cleanupInput'));
87+
88+
$options->setRemoveScripts(true);
89+
$this->assertTrue($options->get('removeScripts'));
90+
91+
$options->setRemoveStyles(true);
92+
$this->assertTrue($options->get('removeStyles'));
93+
94+
$options->setPreserveLineBreaks(true);
95+
$this->assertTrue($options->get('preserveLineBreaks'));
96+
97+
$options->setRemoveDoubleSpace(true);
98+
$this->assertTrue($options->get('removeDoubleSpace'));
99+
100+
$options->setRemoveSmartyScripts(true);
101+
$this->assertTrue($options->get('removeSmartyScripts'));
102+
103+
$options->setDepthFirstSearch(true);
104+
$this->assertTrue($options->get('depthFirstSearch'));
105+
106+
$options->setHtmlSpecialCharsDecode(true);
107+
$this->assertTrue($options->get('htmlSpecialCharsDecode'));
108+
109+
// now reset to false
110+
111+
$options->setWhitespaceTextNode(false);
112+
$this->assertFalse($options->get('whitespaceTextNode'));
113+
114+
$options->setStrict(false);
115+
$this->assertFalse($options->get('strict'));
116+
117+
$options->setEnforceEncoding(null);
118+
$this->assertNull($options->get('enforceEncoding'));
119+
120+
$options->setCleanupInput(false);
121+
$this->assertFalse($options->get('cleanupInput'));
122+
123+
$options->setRemoveScripts(false);
124+
$this->assertFalse($options->get('removeScripts'));
125+
126+
$options->setRemoveStyles(false);
127+
$this->assertFalse($options->get('removeStyles'));
128+
129+
$options->setPreserveLineBreaks(false);
130+
$this->assertFalse($options->get('preserveLineBreaks'));
131+
132+
$options->setRemoveDoubleSpace(false);
133+
$this->assertFalse($options->get('removeDoubleSpace'));
134+
135+
$options->setRemoveSmartyScripts(false);
136+
$this->assertFalse($options->get('removeSmartyScripts'));
137+
138+
$options->setDepthFirstSearch(false);
139+
$this->assertFalse($options->get('depthFirstSearch'));
140+
141+
$options->setHtmlSpecialCharsDecode(false);
142+
$this->assertFalse($options->get('htmlSpecialCharsDecode'));
143+
}
144+
59145
public function testUnknownOptionDom() {
60146
$dom = new Dom;
61147
$dom->setOptions([

0 commit comments

Comments
 (0)