22
33namespace Readability ;
44
5- use DOMElement ;
65use Masterminds \HTML5 ;
76use Psr \Log \LoggerAwareInterface ;
87use Psr \Log \LoggerInterface ;
@@ -115,7 +114,7 @@ class Readability implements LoggerAwareInterface
115114 // HACK: replace linebreaks plus br's with p's
116115 '!(<br[^>]*>[ \r\n\s]*){2,}!i ' => '</p><p> ' ,
117116 // replace noscripts
118- //'!</?noscript>!is' => '',
117+ // '!</?noscript>!is' => '',
119118 // replace fonts to spans
120119 '!<(/?)font[^>]*>!is ' => '< \\1span> ' ,
121120 ];
@@ -126,8 +125,8 @@ class Readability implements LoggerAwareInterface
126125 // replace empty tags that break layouts
127126 '!<(?:a|div|p|figure)[^>]+/>!is ' => '' ,
128127 // remove all attributes on text tags
129- //'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
130- //single newlines cleanup
128+ // '!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
129+ // single newlines cleanup
131130 "/ \n+/ " => "\n" ,
132131 // modern web...
133132 '!<pre[^>]*>\s*<code!is ' => '<pre ' ,
@@ -161,7 +160,7 @@ public function setLogger(LoggerInterface $logger): void
161160 /**
162161 * Get article title element.
163162 *
164- * @return DOMElement
163+ * @return \ DOMElement
165164 */
166165 public function getTitle ()
167166 {
@@ -171,7 +170,7 @@ public function getTitle()
171170 /**
172171 * Get article content element.
173172 *
174- * @return DOMElement
173+ * @return \ DOMElement
175174 */
176175 public function getContent ()
177176 {
@@ -280,7 +279,7 @@ public function init(): bool
280279 /**
281280 * Run any post-process modifications to article content as necessary.
282281 */
283- public function postProcessContent (DOMElement $ articleContent ): void
282+ public function postProcessContent (\ DOMElement $ articleContent ): void
284283 {
285284 if ($ this ->convertLinksToFootnotes && !preg_match ('/\bwiki/ ' , $ this ->url )) {
286285 $ this ->addFootnotes ($ articleContent );
@@ -292,7 +291,7 @@ public function postProcessContent(DOMElement $articleContent): void
292291 *
293292 * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
294293 */
295- public function addFootnotes (DOMElement $ articleContent ): void
294+ public function addFootnotes (\ DOMElement $ articleContent ): void
296295 {
297296 $ footnotesWrapper = $ this ->dom ->createElement ('footer ' );
298297 $ footnotesWrapper ->setAttribute ('class ' , 'readability-footnotes ' );
@@ -335,7 +334,7 @@ public function addFootnotes(DOMElement $articleContent): void
335334 $ articleLink ->setAttribute ('style ' , 'color: inherit; text-decoration: none; ' );
336335 $ articleLink ->setAttribute ('name ' , 'readabilityLink- ' . $ linkCount );
337336 $ footnote ->setInnerHtml ('<small><sup><a href="#readabilityLink- ' . $ linkCount . '" title="Jump to Link in Article">^</a></sup></small> ' );
338- $ footnoteLink ->setInnerHtml (( '' !== $ footnoteLink ->getAttribute ('title ' ) ? $ footnoteLink ->getAttribute ('title ' ) : $ linkText) );
337+ $ footnoteLink ->setInnerHtml ('' !== $ footnoteLink ->getAttribute ('title ' ) ? $ footnoteLink ->getAttribute ('title ' ) : $ linkText );
339338 $ footnoteLink ->setAttribute ('name ' , 'readabilityFootnoteLink- ' . $ linkCount );
340339 $ footnote ->appendChild ($ footnoteLink );
341340
@@ -356,7 +355,7 @@ public function addFootnotes(DOMElement $articleContent): void
356355 */
357356 public function prepArticle (\DOMNode $ articleContent ): void
358357 {
359- if (!$ articleContent instanceof DOMElement) {
358+ if (!$ articleContent instanceof \ DOMElement) {
360359 return ;
361360 }
362361
@@ -456,9 +455,9 @@ public function prepArticle(\DOMNode $articleContent): void
456455 * Get the inner text of a node.
457456 * This also strips out any excess whitespace to be found.
458457 *
459- * @param DOMElement $e
460- * @param bool $normalizeSpaces (default: true)
461- * @param bool $flattenLines (default: false)
458+ * @param \ DOMElement $e
459+ * @param bool $normalizeSpaces (default: true)
460+ * @param bool $flattenLines (default: false)
462461 */
463462 public function getInnerText ($ e , bool $ normalizeSpaces = true , bool $ flattenLines = false ): string
464463 {
@@ -482,7 +481,7 @@ public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLine
482481 /**
483482 * Remove the style attribute on every $e and under.
484483 */
485- public function cleanStyles (DOMElement $ e ): void
484+ public function cleanStyles (\ DOMElement $ e ): void
486485 {
487486 if (\is_object ($ e )) {
488487 $ elems = $ e ->getElementsByTagName ('* ' );
@@ -515,7 +514,7 @@ public function getWordCount(string $text): int
515514 * This is the amount of text that is inside a link divided by the total text in the node.
516515 * Can exclude external references to differentiate between simple text and menus/infoblocks.
517516 */
518- public function getLinkDensity (DOMElement $ e , bool $ excludeExternal = false ): float
517+ public function getLinkDensity (\ DOMElement $ e , bool $ excludeExternal = false ): float
519518 {
520519 $ links = $ e ->getElementsByTagName ('a ' );
521520 $ textLength = mb_strlen ($ this ->getInnerText ($ e , true , true ));
@@ -538,7 +537,7 @@ public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): fl
538537 /**
539538 * Get an element relative weight.
540539 */
541- public function getWeight (DOMElement $ e ): int
540+ public function getWeight (\ DOMElement $ e ): int
542541 {
543542 if (!$ this ->flagIsActive (self ::FLAG_WEIGHT_ATTRIBUTES )) {
544543 return 0 ;
@@ -556,7 +555,7 @@ public function getWeight(DOMElement $e): int
556555 /**
557556 * Remove extraneous break tags from a node.
558557 */
559- public function killBreaks (DOMElement $ node ): void
558+ public function killBreaks (\ DOMElement $ node ): void
560559 {
561560 $ html = $ node ->getInnerHTML ();
562561 $ html = preg_replace ($ this ->regexps ['killBreaks ' ], '<br /> ' , $ html );
@@ -569,7 +568,7 @@ public function killBreaks(DOMElement $node): void
569568 *
570569 * Updated 2012-09-18 to preserve youtube/vimeo iframes
571570 */
572- public function clean (DOMElement $ e , string $ tag ): void
571+ public function clean (\ DOMElement $ e , string $ tag ): void
573572 {
574573 $ targetList = $ e ->getElementsByTagName ($ tag );
575574 $ isEmbed = ('audio ' === $ tag || 'video ' === $ tag || 'iframe ' === $ tag || 'object ' === $ tag || 'embed ' === $ tag );
@@ -601,7 +600,7 @@ public function clean(DOMElement $e, string $tag): void
601600 * "Fishy" is an algorithm based on content length, classnames,
602601 * link density, number of images & embeds, etc.
603602 */
604- public function cleanConditionally (DOMElement $ e , string $ tag ): void
603+ public function cleanConditionally (\ DOMElement $ e , string $ tag ): void
605604 {
606605 if (!$ this ->flagIsActive (self ::FLAG_CLEAN_CONDITIONALLY )) {
607606 return ;
@@ -714,7 +713,7 @@ public function cleanConditionally(DOMElement $e, string $tag): void
714713 /**
715714 * Clean out spurious headers from an Element. Checks things like classnames and link density.
716715 */
717- public function cleanHeaders (DOMElement $ e ): void
716+ public function cleanHeaders (\ DOMElement $ e ): void
718717 {
719718 for ($ headerIndex = 1 ; $ headerIndex < 3 ; ++$ headerIndex ) {
720719 $ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
@@ -754,7 +753,7 @@ public function removeFlag(int $flag): void
754753 /**
755754 * Get the article title as an H1.
756755 *
757- * @return DOMElement
756+ * @return \ DOMElement
758757 */
759758 protected function getArticleTitle ()
760759 {
@@ -826,7 +825,7 @@ protected function prepDocument(): void
826825 * Initialize a node with the readability object. Also checks the
827826 * className/id for special names to add to its score.
828827 */
829- protected function initializeNode (DOMElement $ node ): void
828+ protected function initializeNode (\ DOMElement $ node ): void
830829 {
831830 if (!isset ($ node ->tagName )) {
832831 return ;
@@ -894,11 +893,11 @@ protected function initializeNode(DOMElement $node): void
894893 * Using a variety of metrics (content score, classname, element types), find the content that is
895894 * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
896895 *
897- * @param DOMElement $page
896+ * @param \ DOMElement $page
898897 *
899- * @return DOMElement|false
898+ * @return \ DOMElement|false
900899 */
901- protected function grabArticle (DOMElement $ page = null )
900+ protected function grabArticle (\ DOMElement $ page = null )
902901 {
903902 if (!$ page ) {
904903 $ page = $ this ->dom ;
@@ -1040,7 +1039,7 @@ protected function grabArticle(DOMElement $page = null)
10401039 // For every SCORE_CHARS_IN_PARAGRAPH (default:100) characters in this paragraph, add another point. Up to 3 points.
10411040 $ contentScore += min (floor (mb_strlen ($ innerText ) / self ::SCORE_CHARS_IN_PARAGRAPH ), 3 );
10421041 // For every SCORE_WORDS_IN_PARAGRAPH (default:20) words in this paragraph, add another point. Up to 3 points.
1043- //$contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
1042+ // $contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
10441043
10451044 foreach ($ ancestors as $ level => $ ancestor ) {
10461045 if (!$ ancestor ->nodeName || !$ ancestor ->parentNode ) {
@@ -1211,7 +1210,7 @@ protected function grabArticle(DOMElement $page = null)
12111210 if (0 === strcasecmp ($ tagName , 'td ' ) || 0 === strcasecmp ($ tagName , 'tr ' )) {
12121211 $ up = $ topCandidate ;
12131212
1214- if ($ up ->parentNode instanceof DOMElement) {
1213+ if ($ up ->parentNode instanceof \ DOMElement) {
12151214 $ up = $ up ->parentNode ;
12161215
12171216 if (0 === strcasecmp ($ up ->tagName , 'table ' )) {
@@ -1292,8 +1291,8 @@ protected function grabArticle(DOMElement $page = null)
12921291
12931292 // To ensure a node does not interfere with readability styles, remove its classnames & ids.
12941293 // Now done via RegExp post_filter.
1295- //$nodeToAppend->removeAttribute('class');
1296- //$nodeToAppend->removeAttribute('id');
1294+ // $nodeToAppend->removeAttribute('class');
1295+ // $nodeToAppend->removeAttribute('id');
12971296 // Append sibling and subtract from our list as appending removes a node.
12981297 $ articleContent ->appendChild ($ nodeToAppend );
12991298 }
@@ -1340,7 +1339,7 @@ protected function grabArticle(DOMElement $page = null)
13401339 * Get an element weight by attribute.
13411340 * Uses regular expressions to tell if this element looks good or bad.
13421341 */
1343- protected function weightAttribute (DOMElement $ element , string $ attribute ): int
1342+ protected function weightAttribute (\ DOMElement $ element , string $ attribute ): int
13441343 {
13451344 if (!$ element ->hasAttribute ($ attribute )) {
13461345 return 0 ;
@@ -1443,14 +1442,14 @@ private function loadHtml(): void
14431442 libxml_use_internal_errors (false );
14441443 }
14451444
1446- $ this ->dom ->registerNodeClass (DOMElement::class, \Readability \JSLikeHTMLElement::class);
1445+ $ this ->dom ->registerNodeClass (\ DOMElement::class, \Readability \JSLikeHTMLElement::class);
14471446 }
14481447
1449- private function getAncestors (DOMElement $ node , int $ maxDepth = 0 ): array
1448+ private function getAncestors (\ DOMElement $ node , int $ maxDepth = 0 ): array
14501449 {
14511450 $ ancestors = [];
14521451 $ i = 0 ;
1453- while ($ node ->parentNode instanceof DOMElement) {
1452+ while ($ node ->parentNode instanceof \ DOMElement) {
14541453 $ ancestors [] = $ node ->parentNode ;
14551454 if (++$ i === $ maxDepth ) {
14561455 break ;
@@ -1470,7 +1469,7 @@ private function isPhrasingContent($node): bool
14701469 }, iterator_to_array ($ node ->childNodes )), true ));
14711470 }
14721471
1473- private function hasSingleTagInsideElement (DOMElement $ node , string $ tag ): bool
1472+ private function hasSingleTagInsideElement (\ DOMElement $ node , string $ tag ): bool
14741473 {
14751474 if (1 !== $ node ->childNodes ->length || $ node ->childNodes ->item (0 )->nodeName !== $ tag ) {
14761475 return false ;
@@ -1490,11 +1489,11 @@ private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
14901489 * Tidy must be configured to not clean the input for this function to
14911490 * work as expected, see $this->tidy_config['clean']
14921491 */
1493- private function isNodeVisible (DOMElement $ node ): bool
1492+ private function isNodeVisible (\ DOMElement $ node ): bool
14941493 {
14951494 return !($ node ->hasAttribute ('style ' )
14961495 && preg_match ($ this ->regexps ['isNotVisible ' ], $ node ->getAttribute ('style ' ))
1497- )
1496+ )
14981497 && !$ node ->hasAttribute ('hidden ' );
14991498 }
15001499}
0 commit comments