Skip to content

Commit aebf524

Browse files
authored
Merge pull request #1744 from hydephp/even-smarter-sitemap-generation
[2.x] Even smarter sitemap generation
2 parents 5811af9 + b8d30ba commit aebf524

File tree

4 files changed

+224
-53
lines changed

4 files changed

+224
-53
lines changed

RELEASE_NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ This serves two purposes:
2727
- Minor: Data collection files are now validated for syntax errors during discovery in https://github.com/hydephp/develop/pull/1732
2828
- Minor: Methods in the `Includes` facade now return `HtmlString` objects instead of `string` in https://github.com/hydephp/develop/pull/1738. For more information, see below.
2929
- Minor: `Includes::path()` and `Includes::get()` methods now normalizes paths to be basenames to match the behaviour of the other include methods in https://github.com/hydephp/develop/pull/1738. This means that nested directories are no longer supported, as you should use a data collection for that.
30+
- Minor: The `processing_time_ms` attribute in the `sitemap.xml` file has now been removed in https://github.com/hydephp/develop/pull/1744
31+
- Improved the sitemap data generation to be smarter and more dynamic in https://github.com/hydephp/develop/pull/1744
3032
- The `hasFeature` method on the Hyde facade and HydeKernel now only accepts a Feature enum value instead of a string for its parameter.
3133
- Changed how the documentation search is generated, to be an `InMemoryPage` instead of a post-build task.
3234
- Media asset files are now copied using the new build task instead of the deprecated `BuildService::transferMediaAssets()` method.

packages/framework/src/Framework/Features/XmlGenerators/SitemapGenerator.php

Lines changed: 42 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,25 @@
88

99
use Hyde\Hyde;
1010
use SimpleXMLElement;
11-
use Hyde\Facades\Config;
11+
use Hyde\Pages\HtmlPage;
1212
use Hyde\Pages\BladePage;
1313
use Hyde\Pages\MarkdownPage;
1414
use Hyde\Pages\MarkdownPost;
15+
use Hyde\Facades\Filesystem;
16+
use Hyde\Pages\InMemoryPage;
1517
use Hyde\Support\Models\Route;
18+
use Illuminate\Support\Carbon;
1619
use Hyde\Pages\DocumentationPage;
1720
use Hyde\Foundation\Facades\Routes;
18-
use Hyde\Framework\Concerns\TracksExecutionTime;
1921

20-
use function blank;
21-
use function filemtime;
2222
use function in_array;
2323
use function date;
24-
use function time;
25-
use function str_starts_with;
2624

2725
/**
2826
* @see https://www.sitemaps.org/protocol.html
2927
*/
3028
class SitemapGenerator extends BaseXmlGenerator
3129
{
32-
use TracksExecutionTime;
33-
3430
public function generate(): static
3531
{
3632
Routes::all()->each(function (Route $route): void {
@@ -40,17 +36,8 @@ public function generate(): static
4036
return $this;
4137
}
4238

43-
public function getXml(): string
44-
{
45-
$this->xmlElement->addAttribute('processing_time_ms', $this->getFormattedProcessingTime());
46-
47-
return parent::getXml();
48-
}
49-
5039
protected function constructBaseElement(): void
5140
{
52-
$this->startClock();
53-
5441
$this->xmlElement = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9"></urlset>');
5542
$this->xmlElement->addAttribute('generator', 'HydePHP '.Hyde::version());
5643
}
@@ -61,62 +48,69 @@ protected function addRoute(Route $route): void
6148

6249
$this->addChild($urlItem, 'loc', $this->resolveRouteLink($route));
6350
$this->addChild($urlItem, 'lastmod', $this->getLastModDate($route->getSourcePath()));
64-
$this->addChild($urlItem, 'changefreq', 'daily');
51+
$this->addChild($urlItem, 'changefreq', $this->generateChangeFrequency(...$this->getRouteInformation($route)));
52+
$this->addChild($urlItem, 'priority', $this->generatePriority(...$this->getRouteInformation($route)));
53+
}
6554

66-
if (Config::getBool('hyde.sitemap.dynamic_priority', true)) {
67-
$this->addChild($urlItem, 'priority', $this->getPriority(
68-
$route->getPageClass(), $route->getPage()->getIdentifier()
69-
));
70-
}
55+
protected function resolveRouteLink(Route $route): string
56+
{
57+
return Hyde::url($route->getOutputPath());
7158
}
7259

7360
protected function getLastModDate(string $file): string
7461
{
75-
return date('c', @filemtime($file) ?: time());
62+
return date('c', @Filesystem::lastModified($file) ?: Carbon::now()->timestamp);
7663
}
7764

78-
protected function getPriority(string $pageClass, string $slug): string
65+
/**
66+
* @param class-string<\Hyde\Pages\Concerns\HydePage> $pageClass
67+
* @return numeric-string
68+
*/
69+
protected function generatePriority(string $pageClass, string $identifier): string
7970
{
8071
$priority = 0.5;
8172

82-
if (in_array($pageClass, [BladePage::class, MarkdownPage::class])) {
73+
if (in_array($pageClass, [BladePage::class, MarkdownPage::class, DocumentationPage::class])) {
8374
$priority = 0.9;
84-
if ($slug === 'index') {
75+
76+
if ($identifier === 'index') {
8577
$priority = 1;
8678
}
87-
if ($slug === '404') {
88-
$priority = 0.5;
89-
}
9079
}
9180

92-
if ($pageClass === DocumentationPage::class) {
93-
$priority = 0.9;
81+
if (in_array($pageClass, [MarkdownPost::class, InMemoryPage::class, HtmlPage::class])) {
82+
$priority = 0.75;
9483
}
9584

96-
if ($pageClass === MarkdownPost::class) {
97-
$priority = 0.75;
85+
if ($identifier === '404') {
86+
$priority = 0.25;
9887
}
9988

10089
return (string) $priority;
10190
}
10291

103-
/** @return numeric-string */
104-
protected function getFormattedProcessingTime(): string
105-
{
106-
return (string) $this->getExecutionTimeInMs();
107-
}
108-
109-
protected function resolveRouteLink(Route $route): string
92+
/**
93+
* @param class-string<\Hyde\Pages\Concerns\HydePage> $pageClass
94+
* @return 'always'|'hourly'|'daily '|'weekly'|'monthly'|'yearly'|'never'
95+
*/
96+
protected function generateChangeFrequency(string $pageClass, string $identifier): string
11097
{
111-
$baseUrl = Config::getNullableString('hyde.url');
98+
$frequency = 'weekly';
11299

113-
if (blank($baseUrl) || str_starts_with($baseUrl, 'http://localhost')) {
114-
// While the sitemap spec requires a full URL, we rather fall back
115-
// to using relative links instead of using localhost links.
100+
if (in_array($pageClass, [BladePage::class, MarkdownPage::class, DocumentationPage::class])) {
101+
$frequency = 'daily';
102+
}
116103

117-
return $route->getLink();
118-
} else {
119-
return Hyde::url($route->getOutputPath());
104+
if ($identifier === '404') {
105+
$frequency = 'monthly';
120106
}
107+
108+
return $frequency;
109+
}
110+
111+
/** @return array{class-string<\Hyde\Pages\Concerns\HydePage>, string} */
112+
protected function getRouteInformation(Route $route): array
113+
{
114+
return [$route->getPageClass(), $route->getPage()->getIdentifier()];
121115
}
122116
}

packages/framework/tests/Feature/Commands/BuildSitemapCommandTest.php

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
namespace Hyde\Framework\Testing\Feature\Commands;
66

7-
use Hyde\Facades\Filesystem;
87
use Hyde\Hyde;
98
use Hyde\Testing\TestCase;
109

@@ -16,14 +15,33 @@ class BuildSitemapCommandTest extends TestCase
1615
{
1716
public function testSitemapIsGeneratedWhenConditionsAreMet()
1817
{
19-
$this->withSiteUrl();
20-
config(['hyde.generate_sitemap' => true]);
18+
config(['hyde.url' => 'https://example.com']);
19+
20+
$this->cleanUpWhenDone('_site/sitemap.xml');
2121

2222
$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));
2323

24-
$this->artisan('build:sitemap')->assertExitCode(0);
24+
$this->artisan('build:sitemap')
25+
->expectsOutputToContain('Generating sitemap...')
26+
->doesntExpectOutputToContain('Skipped')
27+
->expectsOutputToContain(' > Created _site/sitemap.xml')
28+
->assertExitCode(0);
29+
2530
$this->assertFileExists(Hyde::path('_site/sitemap.xml'));
31+
}
32+
33+
public function testSitemapIsNotGeneratedWhenConditionsAreNotMet()
34+
{
35+
config(['hyde.url' => '']);
2636

27-
Filesystem::unlink('_site/sitemap.xml');
37+
$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));
38+
39+
$this->artisan('build:sitemap')
40+
->expectsOutputToContain('Generating sitemap...')
41+
->expectsOutputToContain('Skipped')
42+
->expectsOutput(' > Cannot generate sitemap without a valid base URL')
43+
->assertExitCode(0);
44+
45+
$this->assertFileDoesNotExist(Hyde::path('_site/sitemap.xml'));
2846
}
2947
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Hyde\Framework\Testing\Feature;
6+
7+
use Mockery;
8+
use Hyde\Hyde;
9+
use Hyde\Testing\TestCase;
10+
use Illuminate\Support\Carbon;
11+
use Illuminate\Support\Facades\File;
12+
use Illuminate\Filesystem\Filesystem;
13+
14+
/**
15+
* High level test of the sitemap generation feature.
16+
*
17+
* It contains a setup that covers all code paths, proving 100% coverage in actual usage.
18+
*
19+
* @see \Hyde\Framework\Testing\Feature\Services\SitemapServiceTest
20+
* @see \Hyde\Framework\Testing\Feature\Commands\BuildSitemapCommandTest
21+
*
22+
* @covers \Hyde\Framework\Features\XmlGenerators\SitemapGenerator
23+
* @covers \Hyde\Framework\Actions\PostBuildTasks\GenerateSitemap
24+
* @covers \Hyde\Console\Commands\BuildSitemapCommand
25+
*/
26+
class SitemapFeatureTest extends TestCase
27+
{
28+
public function testTheSitemapFeature()
29+
{
30+
Carbon::setTestNow('2024-01-01 12:00:00');
31+
$filesystem = Mockery::mock(Filesystem::class)->makePartial();
32+
$filesystem->shouldReceive('lastModified')->andReturn(Carbon::now()->timestamp);
33+
File::swap($filesystem);
34+
35+
$this->cleanUpWhenDone('_site/sitemap.xml');
36+
$this->setUpBroadSiteStructure();
37+
$this->withSiteUrl();
38+
39+
$this->artisan('build:sitemap')
40+
->expectsOutputToContain('Created _site/sitemap.xml')
41+
->assertExitCode(0);
42+
43+
$this->assertFileExists('_site/sitemap.xml');
44+
45+
$this->assertSameXml(
46+
'<?xml version="1.0" encoding="UTF-8"?>'."\n{$this->stripFormatting($this->expected(Hyde::version()))}\n",
47+
file_get_contents('_site/sitemap.xml')
48+
);
49+
}
50+
51+
protected function setUpBroadSiteStructure(): void
52+
{
53+
$this->file('_pages/about.md', "# About\n\nThis is the about page.");
54+
$this->file('_pages/contact.html', '<h1>Contact</h1><p>This is the contact page.</p>');
55+
$this->file('_posts/hello-world.md', "# Hello, World!\n\nThis is the first post.");
56+
$this->file('_posts/second-post.md', "# Second Post\n\nThis is the second post.");
57+
$this->file('_docs/index.md', "# Documentation\n\nThis is the documentation index.");
58+
$this->file('_docs/installation.md', "# Installation\n\nThis is the installation guide.");
59+
$this->file('_docs/usage.md', "# Usage\n\nThis is the usage guide.");
60+
$this->file('_docs/404.md', "# 404\n\nThis is the 404 page.");
61+
}
62+
63+
protected function expected(string $version): string
64+
{
65+
return <<<XML
66+
<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9" generator="HydePHP $version">
67+
<url>
68+
<loc>https://example.com/contact.html</loc>
69+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
70+
<changefreq>weekly</changefreq>
71+
<priority>0.75</priority>
72+
</url>
73+
<url>
74+
<loc>https://example.com/404.html</loc>
75+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
76+
<changefreq>monthly</changefreq>
77+
<priority>0.25</priority>
78+
</url>
79+
<url>
80+
<loc>https://example.com/index.html</loc>
81+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
82+
<changefreq>daily</changefreq>
83+
<priority>1</priority>
84+
</url>
85+
<url>
86+
<loc>https://example.com/about.html</loc>
87+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
88+
<changefreq>daily</changefreq>
89+
<priority>0.9</priority>
90+
</url>
91+
<url>
92+
<loc>https://example.com/posts/hello-world.html</loc>
93+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
94+
<changefreq>weekly</changefreq>
95+
<priority>0.75</priority>
96+
</url>
97+
<url>
98+
<loc>https://example.com/posts/second-post.html</loc>
99+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
100+
<changefreq>weekly</changefreq>
101+
<priority>0.75</priority>
102+
</url>
103+
<url>
104+
<loc>https://example.com/docs/404.html</loc>
105+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
106+
<changefreq>monthly</changefreq>
107+
<priority>0.25</priority>
108+
</url>
109+
<url>
110+
<loc>https://example.com/docs/index.html</loc>
111+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
112+
<changefreq>daily</changefreq>
113+
<priority>1</priority>
114+
</url>
115+
<url>
116+
<loc>https://example.com/docs/installation.html</loc>
117+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
118+
<changefreq>daily</changefreq>
119+
<priority>0.9</priority>
120+
</url>
121+
<url>
122+
<loc>https://example.com/docs/usage.html</loc>
123+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
124+
<changefreq>daily</changefreq>
125+
<priority>0.9</priority>
126+
</url>
127+
<url>
128+
<loc>https://example.com/docs/search.json</loc>
129+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
130+
<changefreq>weekly</changefreq>
131+
<priority>0.5</priority>
132+
</url>
133+
<url>
134+
<loc>https://example.com/docs/search.html</loc>
135+
<lastmod>2024-01-01T12:00:00+00:00</lastmod>
136+
<changefreq>weekly</changefreq>
137+
<priority>0.5</priority>
138+
</url>
139+
</urlset>
140+
XML;
141+
}
142+
143+
protected function stripFormatting(string $xml): string
144+
{
145+
return implode('', array_map('trim', explode("\n", $xml)));
146+
}
147+
148+
protected function expandLines(string $xml): string
149+
{
150+
return str_replace('><', ">\n<", $xml);
151+
}
152+
153+
protected function assertSameXml(string $expected, string $actual): void
154+
{
155+
$this->assertSame($this->expandLines($expected), $this->expandLines($actual));
156+
}
157+
}

0 commit comments

Comments
 (0)