Skip to content

Commit

Permalink
Complete rewrite
Browse files Browse the repository at this point in the history
The code has a better structure, it's more readable and more extendable.
It is almost completly tested, and fully hardened.
There is more things to come but for now, it's a good improvement.
  • Loading branch information
aledeg committed May 25, 2023
1 parent 64ce9d2 commit 80b8491
Show file tree
Hide file tree
Showing 48 changed files with 2,125 additions and 425 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ static/*
vendor/
var/
.phpunit.cache/
phpunit.xml
54 changes: 54 additions & 0 deletions Client/Client.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?php

declare(strict_types=1);

namespace RedditImage\Client;

use RedditImage\Exception\ClientException;

class Client {
private string $userAgent;

public function __construct(string $userAgent) {
$this->userAgent = $userAgent;
}

public function jsonGet(string $url, array $headers = []): array {
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_HEADER => 0,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_FAILONERROR => true,
CURLOPT_USERAGENT => $this->userAgent,
CURLOPT_HTTPHEADER => $headers,
]);
$jsonString = curl_exec($ch);
if (curl_errno($ch)) {
curl_close($ch);
throw new ClientException(curl_error($ch));
}
curl_close($ch);

return json_decode($jsonString, true, 512, JSON_THROW_ON_ERROR);
}

public function isAccessible(string $url, array $headers = []): bool {
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $url,
CURLOPT_NOBODY => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_FAILONERROR => true,
CURLOPT_USERAGENT => $this->userAgent,
CURLOPT_HTTPHEADER => $headers,
]);
curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

return 200 === $httpCode;
}
}
10 changes: 10 additions & 0 deletions Exception/ClientException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

declare(strict_types=1);

namespace RedditImage\Exception;

use \Exception;

class ClientException extends Exception {
}
4 changes: 0 additions & 4 deletions Media/Image.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ public function __construct(string $url) {
$this->url = $url;
}

public function getUrl(): string {
return $this->url;
}

public function toDomElement(\DomDocument $domDocument): \DomElement {
$image = $domDocument->createElement('img');
$image->setAttribute('src', $this->url);
Expand Down
22 changes: 22 additions & 0 deletions Media/Link.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

declare(strict_types=1);

namespace RedditImage\Media;

class Link implements DomElementInterface {
private string $url;

public function __construct(string $url) {
$this->url = $url;
}

public function toDomElement(\DomDocument $domDocument): \DomElement {
$p = $domDocument->createElement('p');
$a = $p->appendChild($domDocument->createElement('a'));
$a->setAttribute('href', $this->url);
$a->appendChild($domDocument->createTextNode($this->url));

return $p;
}
}
31 changes: 31 additions & 0 deletions Processor/AbstractProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

declare(strict_types=1);

namespace RedditImage\Processor;

use RedditImage\Settings;

abstract class AbstractProcessor {
protected const MATCH_REDDIT = 'reddit.com';

protected $settings;

/** @var TransformerInterface[] */
protected array $transformers = [];

public function __construct(Settings $settings) {
$this->settings = $settings;
$this->settings->setProcessor(get_class($this));
}

/**
* @param FreshRSS_Entry $entry
* @return FreshRSS_Entry
*/
abstract public function process($entry);

protected function isRedditLink($entry): bool {
return (bool) strpos($entry->link(), static::MATCH_REDDIT);
}
}
129 changes: 129 additions & 0 deletions Processor/BeforeDisplayProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
<?php

declare(strict_types=1);

namespace RedditImage\Processor;

use \Throwable;
use Minz_Log;
use RedditImage\Content;
use RedditImage\Settings;
use RedditImage\Transformer\Agnostic\ImageTransformer as AgnosticImageTransformer;
use RedditImage\Transformer\Agnostic\LinkTransformer as AgnosticLinkTransformer;
use RedditImage\Transformer\Agnostic\VideoTransformer as AgnosticVideoTransformer;
use RedditImage\Transformer\Imgur\ImageTransformer as ImgurImageTransformer;
use RedditImage\Transformer\Imgur\VideoTransformer as ImgurVideoTransformer;

class BeforeDisplayProcessor extends AbstractProcessor {
public function __construct(Settings $settings) {
parent::__construct($settings);

if ($this->settings->getDisplayImage()) {
$this->transformers[] = new AgnosticImageTransformer($this->settings);
$this->transformers[] = new ImgurVideoTransformer($this->settings);
$this->transformers[] = new ImgurImageTransformer($this->settings);
}
if ($this->settings->getDisplayVideo()) {
$this->transformers[] = new AgnosticVideoTransformer($this->settings);
}
$this->transformers[] = new AgnosticLinkTransformer($this->settings);
}

/**
* @param FreshRSS_Entry $entry
* @return FreshRSS_Entry
*/
public function process($entry) {
if (false === $this->isRedditLink($entry)) {
return $entry;
}

$content = new Content($entry->content());
$improved = $this->getImprovedContent($content);
$original = $this->getOriginalContent($content);
$metadata = $this->getMetadataContent($content);

if (!$this->settings->getDisplayThumbnails()) {
$entry->_attributes('thumbnail', null);
$entry->_attributes('enclosures', null);
}
$entry->_content("{$improved}{$content->getReal()}{$original}{$metadata}");
$entry->_link($content->getContentLink());

return $entry;
}

private function getImprovedContent(Content $content): string {
$improved = $content->hasBeenPreprocessed() ? $content->getPreprocessed() : $this->processContent($content);

if ($improved === '') {
return '';
}

$dom = new \DomDocument('1.0', 'UTF-8');
$dom->loadHTML($improved, LIBXML_NOERROR);

if (!$this->settings->getDisplayImage()) {
$images = $dom->getElementsByTagName('img');
// See https://www.php.net/manual/en/class.domnodelist.php#83390
for ($i = $images->length; --$i >= 0; ) {
$image = $images->item($i);
$image->parentNode->removeChild($image);
}
}

if (!$this->settings->getDisplayVideo()) {
$videos = $dom->getElementsByTagName('video');
// See https://www.php.net/manual/en/class.domnodelist.php#83390
for ($i = $videos->length; --$i >= 0; ) {
$video = $videos->item($i);
$video->parentNode->removeChild($video);
}
}

if ($this->settings->getMutedVideo()) {
$videos = $dom->getElementsByTagName('video');
foreach ($videos as $video) {
$video->setAttribute('muted', 'true');
}
$audios = $dom->getElementsByTagName('audio');
foreach ($audios as $audio) {
$audio->setAttribute('muted', 'true');
}
}

return $dom->saveHTML();
}

private function processContent(Content $content): string {
foreach ($this->transformers as $transformer) {
if (!$transformer->canTransform($content)) {
continue;
}

try {
return $transformer->transform($content);
} catch (Throwable $e) {
Minz_Log::error("{$e->__toString()} - {$content->getContentLink()}");
}
}

return '';
}

private function getOriginalContent(Content $content): string {
if ($this->settings->getDisplayOriginal()) {
return $content->getRaw();
}

return '';
}

private function getMetadataContent(Content $content): string {
if ($this->settings->getDisplayMetadata()) {
return "<div>{$content->getMetadata()}</div>";
}

return '';
}
}
68 changes: 68 additions & 0 deletions Processor/BeforeInsertProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

declare(strict_types=1);

namespace RedditImage\Processor;

use \Throwable;
use Minz_Log;
use RedditImage\Client\Client;
use RedditImage\Content;
use RedditImage\Settings;
use RedditImage\Transformer\Agnostic\ImageTransformer as AgnosticImageTransformer;
use RedditImage\Transformer\Gfycat\VideoTransformer as GfycatVideoTransformer;
use RedditImage\Transformer\Imgur\GalleryWithClientIdTransformer as ImgurGalleryWithClientIdTransformer;
use RedditImage\Transformer\Imgur\ImageTransformer as ImgurImageTransformer;
use RedditImage\Transformer\Imgur\VideoTransformer as ImgurVideoTransformer;
use RedditImage\Transformer\Reddit\GalleryTransformer as RedditGalleryTransformer;
use RedditImage\Transformer\Reddit\VideoTransformer as RedditVideoTransformer;

class BeforeInsertProcessor extends AbstractProcessor {
public function __construct(Settings $settings, Client $client) {
parent::__construct($settings);

$this->transformers[] = new AgnosticImageTransformer($this->settings);
$this->transformers[] = new ImgurGalleryWithClientIdTransformer($this->settings);
$this->transformers[] = new ImgurImageTransformer($this->settings);
$this->transformers[] = new ImgurVideoTransformer($this->settings);
$this->transformers[] = new GfycatVideoTransformer($this->settings);
$this->transformers[] = new RedditVideoTransformer($this->settings);
$this->transformers[] = new RedditGalleryTransformer($this->settings);

foreach ($this->transformers as $transformer) {
$transformer->setClient($client);
}
}

/**
* @param FreshRSS_Entry $entry
* @return FreshRSS_Entry
*/
public function process($entry) {
if (false === $this->isRedditLink($entry)) {
return $entry;
}

$newContent = '';
$content = new Content($entry->content());

foreach ($this->transformers as $transformer) {
if (!$transformer->canTransform($content)) {
continue;
}

try {
$newContent = $transformer->transform($content);
break;
} catch (Throwable $e) {
Minz_Log::error("{$e->__toString()} - {$content->getContentLink()}");
}
}

if ($newContent !== '') {
$entry->_content("{$newContent}{$content->getRaw()}");
}

return $entry;
}
}
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ At the moment, the following resources are recognized:

&nbsp; |match | type | support
-------|------|------|--------
1 | links finished by jpg, png, gif, bmp | image | full
1 | links finished by jpg, jpeg, png, gif, bmp | image | full
2 | imgur links finished by gifv | video | full
3 | imgur links finished with a token | image | partial
4 | links finished by webm, mp4 | video | full
5 | gfycat links finished with a token | video | full
6 | redgifs links finished with a token | video | none
7 | reddit links finished with a token | video | limited (no audio)
8 | reddit image galleries | image | full
9 | imgur image galleries | image | full with API client id; partial without
9 | imgur image galleries | image | full with API client id; none without

**Note** the support from redgifs links with a token went from full to none after a change in their API.

Expand All @@ -31,6 +31,7 @@ Display images | Choose if images are displayed | **True**
Display videos | Choose if videos are displayed | **True**
Display original content | Choose if original contents are displayed | **True**
Display metadata | Choose if original content metadata are displayed | **False**
Display thumbnails | Choose if feed enclosure are displayed | **False**

**Note:**
When the *display original content* option is set to *true*, text content will be displayed twice. Once from the extracted content and once from the original content. To have a nicer interface, it is recommended to set that option to *false*.
Expand Down
Loading

0 comments on commit 80b8491

Please sign in to comment.