Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 436d81b

Browse files
committedDec 5, 2023
analysis: do not parse/check empty HTML (it produced unnecessary warning) - it is valid to have content-type: text/html but with connect-lengt: 0 (for example case for 'gtm.js?id=')
1 parent f42fe18 commit 436d81b

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed
 

‎src/Crawler/Analysis/Manager.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ public function analyzeVisitedUrl(VisitedUrl $visitedUrl, ?string $body, ?array
140140
$result = [];
141141

142142
$dom = null;
143-
if ($visitedUrl->contentType === Crawler::CONTENT_TYPE_ID_HTML) {
143+
if ($visitedUrl->contentType === Crawler::CONTENT_TYPE_ID_HTML && $body !== null && trim($body) !== '') {
144144
$s = microtime(true);
145145
$encodedBody = mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8');
146146
if (!$encodedBody) {

‎src/Crawler/Analysis/SecurityAnalyzer.php

+4-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ public function analyzeVisitedUrl(VisitedUrl $visitedUrl, ?string $body, ?DOMDoc
128128
$result = new UrlAnalysisResult();
129129

130130
$this->checkHeaders($headers, $visitedUrl->isHttps(), $result);
131-
$this->checkHtmlSecurity($body, $visitedUrl->isHttps(), $result);
131+
132+
if ($body !== null && trim($body) !== '') {
133+
$this->checkHtmlSecurity($body, $visitedUrl->isHttps(), $result);
134+
}
132135

133136
return $result;
134137
}

0 commit comments

Comments
 (0)
Please sign in to comment.