Skip to content

Commit 149a333

Browse files
committed
Remove addPreFilter
Pre filters are used in the __construct so adding more pre filters once the object is instantiated is useless.
1 parent 209c404 commit 149a333

File tree

3 files changed

+40
-19
lines changed

3 files changed

+40
-19
lines changed

src/JSLikeHTMLElement.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ public function __set($name, $value)
5656
$f = $this->ownerDocument->createDocumentFragment();
5757

5858
// appendXML() expects well-formed markup (XHTML)
59-
$result = @$f->appendXML($value); // @ to suppress PHP warnings
59+
// @ to suppress PHP warnings
60+
$result = @$f->appendXML($value);
6061
if ($result) {
6162
if ($f->hasChildNodes()) {
6263
$this->appendChild($f);
@@ -75,6 +76,7 @@ public function __set($name, $value)
7576

7677
if ($result) {
7778
$import = $f->getElementsByTagName('htmlfragment')->item(0);
79+
7880
foreach ($import->childNodes as $child) {
7981
$importedNode = $this->ownerDocument->importNode($child, true);
8082
$this->appendChild($importedNode);
@@ -102,6 +104,7 @@ public function __get($name)
102104
{
103105
if ($name == 'innerHTML') {
104106
$inner = '';
107+
105108
foreach ($this->childNodes as $child) {
106109
$inner .= $this->ownerDocument->saveXML($child);
107110
}

src/Readability.php

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,17 +250,6 @@ public function getContent()
250250
return $this->articleContent;
251251
}
252252

253-
/**
254-
* Add pre filter for raw input HTML processing.
255-
*
256-
* @param string RegExp for replace
257-
* @param string (optional) Replacer
258-
*/
259-
public function addPreFilter($filter, $replacer = '')
260-
{
261-
$this->pre_filters[$filter] = $replacer;
262-
}
263-
264253
/**
265254
* Add post filter for raw output HTML processing.
266255
*
@@ -302,7 +291,7 @@ public function init()
302291
}
303292
}
304293

305-
if ($bodyElems->length > 0 && $this->body == null) {
294+
if ($bodyElems->length > 0 && $this->body === null) {
306295
$this->body = $bodyElems->item(0);
307296
}
308297

@@ -385,12 +374,11 @@ public function postProcessContent(\DOMElement $articleContent)
385374
*/
386375
protected function getArticleTitle()
387376
{
388-
$origTitle = '';
389-
390377
try {
391378
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
392379
} catch (\Exception $e) {
393380
$curTitle = '';
381+
$origTitle = '';
394382
}
395383

396384
if (preg_match('/ [\|\-] /', $curTitle)) {
@@ -431,7 +419,7 @@ protected function prepDocument()
431419
* In some cases a body element can't be found (if the HTML is totally hosed for example)
432420
* so we create a new body node and append it to the document.
433421
*/
434-
if ($this->body == null) {
422+
if ($this->body === null) {
435423
$this->body = $this->dom->createElement('body');
436424
$this->dom->documentElement->appendChild($this->body);
437425
}
@@ -571,7 +559,6 @@ public function prepArticle(\DOMElement $articleContent)
571559
$this->cleanConditionally($articleContent, 'form');
572560
$this->cleanConditionally($articleContent, 'table');
573561
$this->cleanConditionally($articleContent, 'ul');
574-
//if (!$this->lightClean)
575562
$this->cleanConditionally($articleContent, 'div');
576563

577564
// Remove extra paragraphs.

tests/ReadabilityTest.php

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,22 @@ public function testWithClasses()
215215
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
216216
}
217217

218+
public function testWithClassesWithoutLightClean()
219+
{
220+
$readability = new ReadabilityTested('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<div style="display:none">'.str_repeat('<p class="clock">This text should be removed</p>', 10).'</div></article>', 'http://0.0.0.0');
221+
$readability->debug = true;
222+
$readability->lightClean = false;
223+
$res = $readability->init();
224+
225+
$this->assertTrue($res);
226+
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
227+
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
228+
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
229+
$this->assertEmpty($readability->getTitle()->innerHTML);
230+
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
231+
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
232+
}
233+
218234
public function testWithTd()
219235
{
220236
$readability = new ReadabilityTested('<table><tr>'.str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7).'</tr></table>', 'http://0.0.0.0');
@@ -429,7 +445,22 @@ public function testAppendIdAlreadyHere()
429445
$this->assertTrue($res);
430446
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
431447
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
432-
// $this->assertContains('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->innerHTML);
433-
// $this->assertContains('3D Touch', $readability->getTitle()->innerHTML);
448+
}
449+
450+
public function testPostFilters()
451+
{
452+
$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
453+
$res = $readability->init();
454+
455+
$this->assertTrue($res);
456+
$this->assertContains('This <strong>is</strong> the awesome content :)', $readability->getContent()->innerHTML);
457+
458+
$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
459+
$readability->addPostFilter('!<strong[^>]*>(.*?)</strong>!is', '');
460+
461+
$res = $readability->init();
462+
463+
$this->assertTrue($res);
464+
$this->assertContains('This the awesome content :)', $readability->getContent()->innerHTML);
434465
}
435466
}

0 commit comments

Comments
 (0)