Skip to content

Commit b1a677a

Browse files
cloudstudiogithub-actions[bot]
authored andcommitted
Fix styling
1 parent 1b48da0 commit b1a677a

11 files changed

+53
-71
lines changed

src/HtmlCrawler.php

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,50 +10,36 @@ class HtmlCrawler
1010
{
1111
/**
1212
* The original HTML content.
13-
*
14-
* @var string
1513
*/
1614
protected string $html;
1715

1816
/**
1917
* Array of allowed HTML tags (e.g. ['p', 'a', 'h1']).
20-
*
21-
* @var array
2218
*/
2319
protected array $allowedTags = [];
2420

2521
/**
2622
* A custom regex pattern to remove parts of the HTML.
27-
*
28-
* @var string|null
2923
*/
3024
protected ?string $customPattern = null;
3125

3226
/**
3327
* Whether to preserve newlines in the output.
34-
*
35-
* @var bool
3628
*/
3729
protected bool $preserveNewlines;
3830

3931
/**
4032
* Whether to convert the cleaned HTML to Markdown.
41-
*
42-
* @var bool
4333
*/
4434
protected bool $convertToMarkdown;
4535

4636
/**
4737
* Whether to remove script blocks by default.
48-
*
49-
* @var bool
5038
*/
5139
protected bool $removeScripts;
5240

5341
/**
5442
* Whether to remove style blocks by default.
55-
*
56-
* @var bool
5743
*/
5844
protected bool $removeStyles;
5945

@@ -64,38 +50,37 @@ class HtmlCrawler
6450
*/
6551
public function __construct()
6652
{
67-
$this->preserveNewlines = config('htmlcrawler.preserve_newlines', true);
68-
$this->allowedTags = config('htmlcrawler.allowed_tags', []);
69-
$this->convertToMarkdown = config('htmlcrawler.convert_to_markdown', false);
70-
$this->removeScripts = config('htmlcrawler.remove_scripts', true);
71-
$this->removeStyles = config('htmlcrawler.remove_styles', true);
53+
$this->preserveNewlines = config('htmlcrawler.preserve_newlines', true);
54+
$this->allowedTags = config('htmlcrawler.allowed_tags', []);
55+
$this->convertToMarkdown = config('htmlcrawler.convert_to_markdown', false);
56+
$this->removeScripts = config('htmlcrawler.remove_scripts', true);
57+
$this->removeStyles = config('htmlcrawler.remove_styles', true);
7258
}
7359

7460
/**
7561
* Initialize the crawler with an HTML string.
7662
*
77-
* @param string $html
7863
* @return static
7964
*/
8065
public static function fromHtml(string $html): self
8166
{
82-
$instance = new self();
67+
$instance = new self;
8368
$instance->html = $html;
69+
8470
return $instance;
8571
}
8672

8773
/**
8874
* Initialize the crawler with HTML loaded from a URL.
8975
*
90-
* @param string $url
9176
* @return static
9277
*
9378
* @throws InvalidUrlException
9479
* @throws \RuntimeException
9580
*/
9681
public static function fromUrl(string $url): self
9782
{
98-
if (!filter_var($url, FILTER_VALIDATE_URL)) {
83+
if (! filter_var($url, FILTER_VALIDATE_URL)) {
9984
throw new InvalidUrlException("Invalid URL provided: {$url}");
10085
}
10186

@@ -110,25 +95,26 @@ public static function fromUrl(string $url): self
11095
/**
11196
* Specify which HTML tags should be preserved.
11297
*
113-
* @param string|array $tags A tag or an array of tags (e.g., 'p' or ['p', 'a']).
98+
* @param string|array $tags A tag or an array of tags (e.g., 'p' or ['p', 'a']).
11499
* @return $this
115100
*/
116101
public function keepTags(string|array $tags): self
117102
{
118103
$tags = is_array($tags) ? $tags : [$tags];
119104
$this->allowedTags = array_unique(array_merge($this->allowedTags, $tags));
105+
120106
return $this;
121107
}
122108

123109
/**
124110
* Replace the allowed tags list with a new set.
125111
*
126-
* @param array $tags
127112
* @return $this
128113
*/
129114
public function setAllowedTags(array $tags): self
130115
{
131116
$this->allowedTags = $tags;
117+
132118
return $this;
133119
}
134120

@@ -201,9 +187,10 @@ public function keepLinks(): self
201187
public function keepScripts(): self
202188
{
203189
$this->removeScripts = false;
204-
if (!in_array('script', $this->allowedTags)) {
190+
if (! in_array('script', $this->allowedTags)) {
205191
$this->allowedTags[] = 'script';
206192
}
193+
207194
return $this;
208195
}
209196

@@ -216,37 +203,38 @@ public function keepScripts(): self
216203
public function keepCss(): self
217204
{
218205
$this->removeStyles = false;
219-
if (!in_array('style', $this->allowedTags)) {
206+
if (! in_array('style', $this->allowedTags)) {
220207
$this->allowedTags[] = 'style';
221208
}
222-
if (!in_array('link', $this->allowedTags)) {
209+
if (! in_array('link', $this->allowedTags)) {
223210
$this->allowedTags[] = 'link';
224211
}
212+
225213
return $this;
226214
}
227215

228216
/**
229217
* Set a custom regex pattern to remove parts of the HTML.
230218
* This pattern takes precedence over allowed tags.
231219
*
232-
* @param string $pattern
233220
* @return $this
234221
*/
235222
public function useCustomPattern(string $pattern): self
236223
{
237224
$this->customPattern = $pattern;
225+
238226
return $this;
239227
}
240228

241229
/**
242230
* Set whether to preserve newlines.
243231
*
244-
* @param bool $preserve
245232
* @return $this
246233
*/
247234
public function preserveNewlines(bool $preserve = true): self
248235
{
249236
$this->preserveNewlines = $preserve;
237+
250238
return $this;
251239
}
252240

@@ -258,6 +246,7 @@ public function preserveNewlines(bool $preserve = true): self
258246
public function withMarkdown(): self
259247
{
260248
$this->convertToMarkdown = true;
249+
261250
return $this;
262251
}
263252

@@ -269,7 +258,7 @@ public function withMarkdown(): self
269258
*/
270259
public function clean(): string
271260
{
272-
$cleaner = new HtmlCleaner();
261+
$cleaner = new HtmlCleaner;
273262
$result = $cleaner->clean(
274263
$this->html,
275264
$this->allowedTags,
@@ -288,8 +277,6 @@ public function clean(): string
288277

289278
/**
290279
* Magic method to return the cleaned content when the object is treated as a string.
291-
*
292-
* @return string
293280
*/
294281
public function __toString(): string
295282
{

src/HtmlCrawlerServiceProvider.php

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,16 @@
88
/**
99
* Class HtmlCrawlerServiceProvider
1010
*
11-
* Service provider for the HtmlCrawler package.
12-
* Responsible for configuring the package, including
11+
* Service provider for the HtmlCrawler package.
12+
* Responsible for configuring the package, including
1313
* setting up configuration files and views.
1414
*/
1515
class HtmlCrawlerServiceProvider extends PackageServiceProvider
1616
{
1717
/**
1818
* Configure the package.
1919
*
20-
* @param Package $package The package instance to configure.
21-
* @return void
20+
* @param Package $package The package instance to configure.
2221
*/
2322
public function configurePackage(Package $package): void
2423
{
@@ -32,13 +31,11 @@ public function configurePackage(Package $package): void
3231
*
3332
* This method binds the HtmlCrawler class to the service container
3433
* and creates an alias for easier access.
35-
*
36-
* @return void
3734
*/
3835
public function packageRegistered(): void
3936
{
4037
$this->app->singleton(HtmlCrawler::class, function ($app) {
41-
return new HtmlCrawler();
38+
return new HtmlCrawler;
4239
});
4340

4441
$this->app->alias(HtmlCrawler::class, 'html-crawler');

src/Services/Cleaner/CleanerInterface.php

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@ interface CleanerInterface
66
{
77
/**
88
* Clean the given HTML and return the modified content.
9-
*
10-
* @param string $html
11-
* @return string
129
*/
1310
public function clean(string $html): string;
1411
}

src/Services/Cleaner/CustomPatternCleaner.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class CustomPatternCleaner implements CleanerInterface
1818
/**
1919
* CustomPatternCleaner constructor.
2020
*
21-
* @param string|null $pattern The regex pattern to apply for cleaning.
21+
* @param string|null $pattern The regex pattern to apply for cleaning.
2222
*/
2323
public function __construct(?string $pattern)
2424
{
@@ -28,14 +28,15 @@ public function __construct(?string $pattern)
2828
/**
2929
* Clean the given HTML content by applying the custom regex pattern.
3030
*
31-
* @param string $html The HTML content to clean.
31+
* @param string $html The HTML content to clean.
3232
* @return string The cleaned HTML content.
3333
*/
3434
public function clean(string $html): string
3535
{
3636
if ($this->pattern) {
3737
return preg_replace($this->pattern, '', $html);
3838
}
39+
3940
return $html;
4041
}
4142
}

src/Services/Cleaner/ScriptRemover.php

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class ScriptRemover implements CleanerInterface
2323
/**
2424
* ScriptRemover constructor.
2525
*
26-
* @param bool $remove Indicates whether to remove <script> tags.
27-
* @param array $allowedTags List of tags that are allowed in the HTML.
26+
* @param bool $remove Indicates whether to remove <script> tags.
27+
* @param array $allowedTags List of tags that are allowed in the HTML.
2828
*/
2929
public function __construct(bool $remove, array $allowedTags)
3030
{
@@ -35,14 +35,15 @@ public function __construct(bool $remove, array $allowedTags)
3535
/**
3636
* Clean the given HTML content by removing <script> tags if specified.
3737
*
38-
* @param string $html The HTML content to clean.
38+
* @param string $html The HTML content to clean.
3939
* @return string The cleaned HTML content.
4040
*/
4141
public function clean(string $html): string
4242
{
43-
if ($this->remove && !in_array('script', $this->allowedTags)) {
43+
if ($this->remove && ! in_array('script', $this->allowedTags)) {
4444
return preg_replace('#<script.*?</script>#is', '', $html);
4545
}
46+
4647
return $html;
4748
}
4849
}

src/Services/Cleaner/StyleRemover.php

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class StyleRemover implements CleanerInterface
2323
/**
2424
* StyleRemover constructor.
2525
*
26-
* @param bool $remove Indicates whether to remove <style> tags.
27-
* @param array $allowedTags List of tags that are allowed in the HTML.
26+
* @param bool $remove Indicates whether to remove <style> tags.
27+
* @param array $allowedTags List of tags that are allowed in the HTML.
2828
*/
2929
public function __construct(bool $remove, array $allowedTags)
3030
{
@@ -35,14 +35,15 @@ public function __construct(bool $remove, array $allowedTags)
3535
/**
3636
* Clean the given HTML content by removing <style> tags if specified.
3737
*
38-
* @param string $html The HTML content to clean.
38+
* @param string $html The HTML content to clean.
3939
* @return string The cleaned HTML content.
4040
*/
4141
public function clean(string $html): string
4242
{
43-
if ($this->remove && !in_array('style', $this->allowedTags)) {
43+
if ($this->remove && ! in_array('style', $this->allowedTags)) {
4444
return preg_replace('#<style.*?</style>#is', '', $html);
4545
}
46+
4647
return $html;
4748
}
4849
}

src/Services/Cleaner/TagCleaner.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class TagCleaner implements CleanerInterface
1818
/**
1919
* TagCleaner constructor.
2020
*
21-
* @param array $allowedTags List of tags that are allowed in the HTML.
21+
* @param array $allowedTags List of tags that are allowed in the HTML.
2222
*/
2323
public function __construct(array $allowedTags)
2424
{
@@ -28,7 +28,7 @@ public function __construct(array $allowedTags)
2828
/**
2929
* Clean the given HTML content by stripping disallowed tags.
3030
*
31-
* @param string $html The HTML content to clean.
31+
* @param string $html The HTML content to clean.
3232
* @return string The cleaned HTML content.
3333
*/
3434
public function clean(string $html): string
@@ -41,6 +41,7 @@ public function clean(string $html): string
4141
foreach ($this->allowedTags as $tag) {
4242
$allowed .= "<{$tag}>";
4343
}
44+
4445
return strip_tags($html, $allowed);
4546
}
4647
}

src/Services/Cleaner/WhitespaceNormalizer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class WhitespaceNormalizer implements CleanerInterface
1919
/**
2020
* WhitespaceNormalizer constructor.
2121
*
22-
* @param bool $preserveNewlines Indicates whether to preserve newlines.
22+
* @param bool $preserveNewlines Indicates whether to preserve newlines.
2323
*/
2424
public function __construct(bool $preserveNewlines)
2525
{
@@ -32,7 +32,7 @@ public function __construct(bool $preserveNewlines)
3232
* This method collapses multiple spaces into a single space and
3333
* preserves newlines if specified.
3434
*
35-
* @param string $html The HTML content to clean.
35+
* @param string $html The HTML content to clean.
3636
* @return string The cleaned HTML content with normalized whitespace.
3737
*/
3838
public function clean(string $html): string

0 commit comments

Comments
 (0)