Skip to content

Commit

Permalink
ENH: Provide more control over elemental block indexing.
Browse files Browse the repository at this point in the history
Provides a new configuration variable to exclude specific elemental block classes from being indexed in search.
Provides a new extension point for modifying exactly what gets indexed for each block.
Provides a new configuration variable to define the delimiter used
between blocks in the search index.

A single space delimiter is non-intrusive and will not require any changes within existing projects to avoid changes to the way content is indexed - but providing an option for configuring the delimiter can help avoid false-positive results in phrase queries.
  • Loading branch information
GuySartorelli committed Feb 2, 2022
1 parent 5298524 commit d970378
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 4 deletions.
23 changes: 23 additions & 0 deletions docs/en/searching-blocks.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,27 @@ You can disable it via YAML config in favour of your own index definition:
SilverStripe\FullTextSearch\Search\FullTextSearch:
indexes:
- MyCustomIndex
```
You can define whether each block is included in your search index using the
`search_indexable` configuration variable, which is `true` by default:

```yml
App\Models\MyCustomElementalBlock:
search_indexable: false
```

You can also customise the content that is indexed for your blocks. By default
the block is rendered in full using the templating engine, and the resultant
markup is included in the index. You can override the `getContentForSearchIndex`
method on your elemental blocks to change that. This is useful, for example, if
your templates include hardcoded text or references to other content you don't
want to be indexed.

If you want to use a specific delimiter between each block, that can be configured
as well. The default is a space, but you might for example want to use an ellipses
to make it clear in search results where one piece of content ends and another begins.
```yml
Page:
search_index_element_delimiter: ' ... '
```
23 changes: 20 additions & 3 deletions src/Extensions/ElementalPageExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ class ElementalPageExtension extends ElementalAreasExtension
'ElementalArea',
];

/**
* The delimiter to separate distinct elements in indexed content.
*
* When using the getElementsForSearch() method to index all elements in a single field,
* a custom delimiter can be used help to avoid false positive results for phrase queries.
*
* @config
* @var string
*/
private static $search_index_element_delimiter = ' ';

/**
* Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing
*
Expand All @@ -43,8 +54,14 @@ public function getElementsForSearch()
/** @var ElementalArea $area */
$area = $this->owner->$key();
if ($area) {
// Replace HTML tags with spaces
$output[] = strip_tags(str_replace('<', ' <', $area->forTemplate()));
foreach ($area->Elements() as $element) {
if ($element->getSearchIndexable()) {
$content = $element->getContentForSearchIndex();
if ($content) {
$output[] = $content;
}
}
}
}
}
} finally {
Expand All @@ -53,7 +70,7 @@ public function getElementsForSearch()
// CMS layout can break on the response. (SilverStripe 4.1.1)
SSViewer::set_themes($oldThemes);
}
return implode($output);
return implode($this->owner->config()->get('search_index_element_delimiter'), $output);
}

public function MetaTags(&$tags)
Expand Down
36 changes: 36 additions & 0 deletions src/Models/BaseElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ class BaseElement extends DataObject implements CMSPreviewable
*/
private static $displays_title_in_template = true;

/**
* Determines whether a block should be indexable in search.
*
* @config
* @var boolean
* @see ElementalPageExtension::getElementsForSearch()
*/
private static $search_indexable = true;

/**
* Store used anchor names, this is to avoid title clashes
* when calling 'getAnchor'
Expand Down Expand Up @@ -476,6 +485,33 @@ public function Top()
return (Controller::has_curr()) ? Controller::curr() : null;
}

/**
* Determines whether this elemental block is indexable in search.
*
* By default, this uses the configurable variable search_indexable, but
* this method can be overridden to provide more complex logic if required.
*
* @return boolean
*/
public function getSearchIndexable(): bool
{
return (bool) $this->config()->get('search_indexable');
}

/**
* Provides content to be indexed in search.
*
* @return string
*/
public function getContentForSearchIndex(): string
{
// Strips tags but be sure there's a space between words.
$content = trim(strip_tags(str_replace('<', ' <', $this->forTemplate())));
// Allow projects to update indexable content of third-party elements.
$this->extend('updateContentForSearchIndex', $content);
return $content;
}

/**
* Default way to render element in templates. Note that all blocks should
* be rendered through their {@link ElementController} class as this
Expand Down
18 changes: 18 additions & 0 deletions tests/BaseElementTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use DNADesign\Elemental\Models\BaseElement;
use DNADesign\Elemental\Models\ElementalArea;
use DNADesign\Elemental\Models\ElementContent;
use DNADesign\Elemental\Tests\Src\TestContentForSearchIndexExtension;
use DNADesign\Elemental\Tests\Src\TestElement;
use DNADesign\Elemental\Tests\Src\TestPage;
use Page;
Expand Down Expand Up @@ -236,4 +237,21 @@ public function testOnBeforeWriteNoParent()

$this->assertEquals(0, (int) $element1->Sort);
}

public function testGetContentForSearchIndex()
{
$element = $this->objFromFixture(ElementContent::class, 'content4');
// Content should have tags stripped with a space before what were the < characters
// One closing tag plus one opening tag means there should be two spaced between paragraphs
$this->assertEquals('One paragraph And another one', $element->getContentForSearchIndex());
}

public function testUpdateContentForSearchIndex()
{
ElementContent::add_extension(TestContentForSearchIndexExtension::class);
$element = $this->objFromFixture(ElementContent::class, 'content4');
// Content should be updated by the extension
$this->assertEquals('This is the updated content.', $element->getContentForSearchIndex());
ElementContent::remove_extension(TestContentForSearchIndexExtension::class);
}
}
15 changes: 15 additions & 0 deletions tests/ElementalPageExtensionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,19 @@ public function testGetElementsForSearch()
$this->assertStringNotContainsString('oneMore', $output);
$this->assertStringNotContainsString('paragraphsAnd', $output);
}

public function testSearchIndexElementDelimiter()
{
/** @var TestPage $page */
$page = $this->objFromFixture(TestPage::class, 'page_with_html_elements');

// Confirm default delimiter of a single space is applied between elements
$output = $page->getElementsForSearch();
$this->assertStringContainsString('another one More paragraphs', $output);

// Confirm configured delimiter is applied between elements
Config::modify()->set(TestPage::class, 'search_index_element_delimiter', ' ... ');
$output = $page->getElementsForSearch();
$this->assertStringContainsString('another one ... More paragraphs', $output);
}
}
2 changes: 1 addition & 1 deletion tests/ElementalPageExtensionTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ DNADesign\Elemental\Models\ElementContent:
ParentID: =>DNADesign\Elemental\Models\ElementalArea.area53
content5:
Title: More paragraph content
Sort: 1
Sort: 2
HTML: '<p>More paragraphs</p><p>And yet more</p>'
ParentID: =>DNADesign\Elemental\Models\ElementalArea.area53
DNADesign\Elemental\Tests\Src\TestPage:
Expand Down
14 changes: 14 additions & 0 deletions tests/Src/TestContentForSearchIndexExtension.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php

namespace DNADesign\Elemental\Tests\Src;

use SilverStripe\Core\Extension;
use SilverStripe\Dev\TestOnly;

class TestContentForSearchIndexExtension extends Extension implements TestOnly
{
public function updateContentForSearchIndex(&$content)
{
$content = 'This is the updated content.';
}
}

0 comments on commit d970378

Please sign in to comment.