Skip to content

Commit 24386fa

Browse files
committed
Add more logging
1 parent 4fdbd92 commit 24386fa

6 files changed

+196
-2
lines changed

app/Crawler.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,21 @@ public function sendRequest(RequestInterface $request): ResponseInterface
6363
public function sendRequests(RequestInterface ...$requests): array
6464
{
6565
if ($this->client instanceof CurlClient) {
66+
info('Crawler::sendRequests CurlClient', [
67+
'requests' => array_map(
68+
fn ($request) => $request->getUri(),
69+
$requests
70+
),
71+
]);
6672
return $this->client->sendRequests(...$requests);
6773
}
74+
75+
info('Crawler::sendRequests regular', [
76+
'requests' => array_map(
77+
fn ($request) => $request->getUri(),
78+
$requests
79+
),
80+
]);
6881

6982
return array_map(
7083
fn ($request) => $this->client->sendRequest($request),

app/DomainFilteringAdapter.php

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the league/commonmark package.
7+
*
8+
* (c) Colin O'Dell <colinodell@gmail.com>
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace App;
15+
16+
use League\CommonMark\Extension\Embed\Embed;
17+
use League\CommonMark\Extension\Embed\EmbedAdapterInterface;
18+
19+
class DomainFilteringAdapter implements EmbedAdapterInterface
20+
{
21+
private EmbedAdapterInterface $decorated;
22+
23+
/** @psalm-var non-empty-string */
24+
private string $regex;
25+
26+
/**
27+
* @param string[] $allowedDomains
28+
*/
29+
public function __construct(EmbedAdapterInterface $decorated, array $allowedDomains)
30+
{
31+
$this->decorated = $decorated;
32+
$this->regex = self::createRegex($allowedDomains);
33+
}
34+
35+
/**
36+
* {@inheritDoc}
37+
*/
38+
public function updateEmbeds(array $embeds): void
39+
{
40+
info('DomainFilteringAdapter::updateEmbeds');
41+
$this->decorated->updateEmbeds(\array_values(\array_filter($embeds, function (Embed $embed): bool {
42+
return \preg_match($this->regex, $embed->getUrl()) === 1;
43+
})));
44+
}
45+
46+
/**
47+
* @param string[] $allowedDomains
48+
*
49+
* @psalm-return non-empty-string
50+
*/
51+
private static function createRegex(array $allowedDomains): string
52+
{
53+
$allowedDomains = \array_map('preg_quote', $allowedDomains);
54+
55+
return '/^(?:https?:\/\/)?(?:[^.]+\.)*(' . \implode('|', $allowedDomains) . ')/';
56+
}
57+
}

app/EmbedExtension.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the league/commonmark package.
7+
*
8+
* (c) Colin O'Dell <colinodell@gmail.com>
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace App;
15+
16+
use League\CommonMark\Environment\EnvironmentBuilderInterface;
17+
use League\CommonMark\Event\DocumentParsedEvent;
18+
use League\CommonMark\Extension\ConfigurableExtensionInterface;
19+
use League\CommonMark\Extension\Embed\Embed;
20+
use League\CommonMark\Extension\Embed\EmbedAdapterInterface;
21+
use League\CommonMark\Extension\Embed\EmbedRenderer;
22+
use League\CommonMark\Extension\Embed\EmbedStartParser;
23+
use League\Config\ConfigurationBuilderInterface;
24+
use Nette\Schema\Expect;
25+
26+
final class EmbedExtension implements ConfigurableExtensionInterface
27+
{
28+
public function configureSchema(ConfigurationBuilderInterface $builder): void
29+
{
30+
$builder->addSchema('embed', Expect::structure([
31+
'adapter' => Expect::type(EmbedAdapterInterface::class),
32+
'allowed_domains' => Expect::arrayOf('string')->default([]),
33+
'fallback' => Expect::anyOf('link', 'remove')->default('link'),
34+
]));
35+
}
36+
37+
public function register(EnvironmentBuilderInterface $environment): void
38+
{
39+
$adapter = $environment->getConfiguration()->get('embed.adapter');
40+
\assert($adapter instanceof EmbedAdapterInterface);
41+
42+
$allowedDomains = $environment->getConfiguration()->get('embed.allowed_domains');
43+
if ($allowedDomains !== []) {
44+
$adapter = new DomainFilteringAdapter($adapter, $allowedDomains);
45+
}
46+
47+
$environment
48+
->addBlockStartParser(new EmbedStartParser(), 300)
49+
->addEventListener(DocumentParsedEvent::class, new EmbedProcessor($adapter, $environment->getConfiguration()->get('embed.fallback')), 1010)
50+
->addRenderer(Embed::class, new EmbedRenderer());
51+
}
52+
}

app/EmbedProcessor.php

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the league/commonmark package.
7+
*
8+
* (c) Colin O'Dell <colinodell@gmail.com>
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace App;
15+
16+
use League\CommonMark\Event\DocumentParsedEvent;
17+
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
18+
use League\CommonMark\Extension\Embed\Embed;
19+
use League\CommonMark\Extension\Embed\EmbedAdapterInterface;
20+
use League\CommonMark\Node\Block\Paragraph;
21+
use League\CommonMark\Node\Inline\Text;
22+
use League\CommonMark\Node\NodeIterator;
23+
24+
final class EmbedProcessor
25+
{
26+
public const FALLBACK_REMOVE = 'remove';
27+
public const FALLBACK_LINK = 'link';
28+
29+
private EmbedAdapterInterface $adapter;
30+
private string $fallback;
31+
32+
public function __construct(EmbedAdapterInterface $adapter, string $fallback = self::FALLBACK_REMOVE)
33+
{
34+
$this->adapter = $adapter;
35+
$this->fallback = $fallback;
36+
}
37+
38+
public function __invoke(DocumentParsedEvent $event): void
39+
{
40+
$document = $event->getDocument();
41+
$embeds = [];
42+
foreach (new NodeIterator($document) as $node) {
43+
if (! ($node instanceof Embed)) {
44+
continue;
45+
}
46+
47+
if ($node->parent() !== $document) {
48+
$replacement = new Paragraph();
49+
$replacement->appendChild(new Text($node->getUrl()));
50+
$node->replaceWith($replacement);
51+
} else {
52+
$embeds[] = $node;
53+
}
54+
}
55+
info('EmbedProcessor::__invoke');
56+
$this->adapter->updateEmbeds($embeds);
57+
58+
foreach ($embeds as $embed) {
59+
if ($embed->getEmbedCode() !== null) {
60+
continue;
61+
}
62+
63+
if ($this->fallback === self::FALLBACK_REMOVE) {
64+
$embed->detach();
65+
} elseif ($this->fallback === self::FALLBACK_LINK) {
66+
$paragraph = new Paragraph();
67+
$paragraph->appendChild(new Link($embed->getUrl(), $embed->getUrl()));
68+
$embed->replaceWith($paragraph);
69+
}
70+
}
71+
}
72+
}

app/Markdown/MarkdownServiceProvider.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
namespace App\Markdown;
44

55
use App\Crawler;
6+
use App\EmbedExtension;
67
use App\OscaroteroEmbedAdapter;
78
use Embed\Embed;
89
use Embed\Http\CurlClient;
910
use Illuminate\Support\ServiceProvider;
1011
use League\CommonMark\Environment\Environment;
1112
use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
12-
use League\CommonMark\Extension\Embed\EmbedExtension;
1313
use League\CommonMark\Extension\ExternalLink\ExternalLinkExtension;
1414
use League\CommonMark\Extension\GithubFlavoredMarkdownExtension;
1515
use League\CommonMark\Extension\Mention\MentionExtension;

app/OscaroteroEmbedAdapter.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ public function __construct(?EmbedLib $embed = null)
3030
public function updateEmbeds(array $embeds): void
3131
{
3232
$extractors = $this->embedLib->getMulti(...\array_map(static fn (Embed $embed) => $embed->getUrl(), $embeds));
33+
info(print_r($extractors, true));
3334
foreach ($extractors as $i => $extractor) {
34-
info(print_r($extractor, true));
3535
if ($extractor->code !== null) {
3636
$embeds[$i]->setEmbedCode($extractor->code->html);
3737
}

0 commit comments

Comments
 (0)