Skip to content

Commit d42aa8a

Browse files
committed
feature #465 [Examples][Store] Implement indexing pipeline (OskarStark)
This PR was squashed before being merged into the main branch. Discussion ---------- [Examples][Store] Implement indexing pipeline | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | no | Issues | Fixes #429 | License | MIT ### Needs * symfony/ai#468 ### `./runner indexer` <img width="1832" height="664" alt="CleanShot 2025-09-08 at 17 11 53@2x" src="https://github.com/user-attachments/assets/19f32f93-f4b5-4b90-86a4-74d1623f78f1" /> ### `bin/console app:blog:embed` (removed) <img width="1780" height="306" alt="CleanShot 2025-09-08 at 17 12 45@2x" src="https://github.com/user-attachments/assets/4c8905be-f7b3-455b-8d20-112fe9f77136" /> ### `bin/console app:blog:query` <img width="1752" height="1168" alt="CleanShot 2025-09-08 at 17 13 34@2x" src="https://github.com/user-attachments/assets/45211bfb-366e-413b-b80f-33a757c63e31" /> ### `bin/console ai:store:index blog` <img width="1930" height="388" alt="CleanShot 2025-09-08 at 22 41 00@2x" src="https://github.com/user-attachments/assets/1d27d5e4-de23-4c3a-8465-a87eba79f2fc" /> Commits ------- 29349c11 [Examples][Store] Implement indexing pipeline
2 parents 184ae3a + c100665 commit d42aa8a

File tree

4 files changed

+270
-2
lines changed

4 files changed

+270
-2
lines changed

config/options.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,19 @@
397397
->useAttributeAsKey('name')
398398
->arrayPrototype()
399399
->children()
400+
->scalarNode('loader')
401+
->info('Service name of loader')
402+
->isRequired()
403+
->end()
404+
->variableNode('source')
405+
->info('Source identifier (file path, URL, etc.) or array of sources')
406+
->defaultNull()
407+
->end()
408+
->arrayNode('transformers')
409+
->info('Array of transformer service names')
410+
->scalarPrototype()->end()
411+
->defaultValue([])
412+
->end()
400413
->scalarNode('vectorizer')
401414
->info('Service name of vectorizer')
402415
->defaultValue(VectorizerInterface::class)

config/services.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
use Symfony\AI\Platform\Contract\JsonSchema\DescriptionParser;
3939
use Symfony\AI\Platform\Contract\JsonSchema\Factory as SchemaFactory;
4040
use Symfony\AI\Store\Command\DropStoreCommand;
41+
use Symfony\AI\Store\Command\IndexCommand;
4142
use Symfony\AI\Store\Command\SetupStoreCommand;
4243

4344
return static function (ContainerConfigurator $container): void {
@@ -157,5 +158,10 @@
157158
tagged_locator('ai.store', indexAttribute: 'name'),
158159
])
159160
->tag('console.command')
161+
->set('ai.command.index', IndexCommand::class)
162+
->args([
163+
tagged_locator('ai.indexer', indexAttribute: 'name'),
164+
])
165+
->tag('console.command')
160166
;
161167
};

src/AiBundle.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,11 +1068,20 @@ private function processVectorizerConfig(string $name, array $config, ContainerB
10681068
*/
10691069
private function processIndexerConfig(int|string $name, array $config, ContainerBuilder $container): void
10701070
{
1071+
$transformers = [];
1072+
foreach ($config['transformers'] ?? [] as $transformer) {
1073+
$transformers[] = new Reference($transformer);
1074+
}
1075+
10711076
$definition = new Definition(Indexer::class, [
1077+
new Reference($config['loader']),
10721078
new Reference($config['vectorizer']),
10731079
new Reference($config['store']),
1080+
$config['source'],
1081+
$transformers,
10741082
new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE),
10751083
]);
1084+
$definition->addTag('ai.indexer', ['name' => $name]);
10761085

10771086
$container->setDefinition('ai.indexer.'.$name, $definition);
10781087
}

tests/DependencyInjection/AiBundleTest.php

Lines changed: 242 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
use PHPUnit\Framework\TestCase;
2020
use Symfony\AI\AiBundle\AiBundle;
2121
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
22+
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
23+
use Symfony\AI\Store\Document\Transformer\TextTrimTransformer;
2224
use Symfony\AI\Store\Document\Vectorizer;
2325
use Symfony\Component\Config\Definition\Exception\InvalidConfigurationException;
2426
use Symfony\Component\DependencyInjection\ContainerBuilder;
@@ -678,6 +680,7 @@ public function testIndexerWithConfiguredVectorizer()
678680
],
679681
'indexer' => [
680682
'my_indexer' => [
683+
'loader' => InMemoryLoader::class,
681684
'vectorizer' => 'ai.vectorizer.my_vectorizer',
682685
'store' => 'ai.store.memory.my_store',
683686
],
@@ -691,15 +694,251 @@ public function testIndexerWithConfiguredVectorizer()
691694
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
692695
$arguments = $indexerDefinition->getArguments();
693696

694-
// First argument should be a reference to the vectorizer
695697
$this->assertInstanceOf(Reference::class, $arguments[0]);
696-
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[0]);
698+
$this->assertSame(InMemoryLoader::class, (string) $arguments[0]);
699+
700+
$this->assertInstanceOf(Reference::class, $arguments[1]);
701+
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[1]);
697702

698703
// Should not create model-specific vectorizer when using configured one
699704
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.vectorizer'));
700705
$this->assertFalse($container->hasDefinition('ai.indexer.my_indexer.model'));
701706
}
702707

708+
public function testIndexerWithStringSource()
709+
{
710+
$container = $this->buildContainer([
711+
'ai' => [
712+
'store' => [
713+
'memory' => [
714+
'my_store' => [],
715+
],
716+
],
717+
'indexer' => [
718+
'my_indexer' => [
719+
'loader' => InMemoryLoader::class,
720+
'source' => 'https://example.com/feed.xml',
721+
'vectorizer' => 'my_vectorizer_service',
722+
'store' => 'ai.store.memory.my_store',
723+
],
724+
],
725+
],
726+
]);
727+
728+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
729+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
730+
$arguments = $indexerDefinition->getArguments();
731+
732+
$this->assertSame('https://example.com/feed.xml', $arguments[3]);
733+
}
734+
735+
public function testIndexerWithArraySource()
736+
{
737+
$container = $this->buildContainer([
738+
'ai' => [
739+
'store' => [
740+
'memory' => [
741+
'my_store' => [],
742+
],
743+
],
744+
'indexer' => [
745+
'my_indexer' => [
746+
'loader' => InMemoryLoader::class,
747+
'source' => [
748+
'/path/to/file1.txt',
749+
'/path/to/file2.txt',
750+
'https://example.com/feed.xml',
751+
],
752+
'vectorizer' => 'my_vectorizer_service',
753+
'store' => 'ai.store.memory.my_store',
754+
],
755+
],
756+
],
757+
]);
758+
759+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
760+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
761+
$arguments = $indexerDefinition->getArguments();
762+
763+
$this->assertIsArray($arguments[3]);
764+
$this->assertCount(3, $arguments[3]);
765+
$this->assertSame([
766+
'/path/to/file1.txt',
767+
'/path/to/file2.txt',
768+
'https://example.com/feed.xml',
769+
], $arguments[3]);
770+
}
771+
772+
public function testIndexerWithNullSource()
773+
{
774+
$container = $this->buildContainer([
775+
'ai' => [
776+
'store' => [
777+
'memory' => [
778+
'my_store' => [],
779+
],
780+
],
781+
'indexer' => [
782+
'my_indexer' => [
783+
'loader' => InMemoryLoader::class,
784+
'vectorizer' => 'my_vectorizer_service',
785+
'store' => 'ai.store.memory.my_store',
786+
// source not configured, should default to null
787+
],
788+
],
789+
],
790+
]);
791+
792+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
793+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
794+
$arguments = $indexerDefinition->getArguments();
795+
796+
$this->assertNull($arguments[3]);
797+
}
798+
799+
public function testIndexerWithConfiguredTransformers()
800+
{
801+
$container = $this->buildContainer([
802+
'ai' => [
803+
'store' => [
804+
'memory' => [
805+
'my_store' => [],
806+
],
807+
],
808+
'indexer' => [
809+
'my_indexer' => [
810+
'loader' => InMemoryLoader::class,
811+
'transformers' => [
812+
TextTrimTransformer::class,
813+
'App\CustomTransformer',
814+
],
815+
'vectorizer' => 'my_vectorizer_service',
816+
'store' => 'ai.store.memory.my_store',
817+
],
818+
],
819+
],
820+
]);
821+
822+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
823+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
824+
$arguments = $indexerDefinition->getArguments();
825+
826+
$this->assertIsArray($arguments[4]);
827+
$this->assertCount(2, $arguments[4]);
828+
829+
$this->assertInstanceOf(Reference::class, $arguments[4][0]);
830+
$this->assertSame(TextTrimTransformer::class, (string) $arguments[4][0]);
831+
832+
$this->assertInstanceOf(Reference::class, $arguments[4][1]);
833+
$this->assertSame('App\CustomTransformer', (string) $arguments[4][1]);
834+
}
835+
836+
public function testIndexerWithEmptyTransformers()
837+
{
838+
$container = $this->buildContainer([
839+
'ai' => [
840+
'store' => [
841+
'memory' => [
842+
'my_store' => [],
843+
],
844+
],
845+
'indexer' => [
846+
'my_indexer' => [
847+
'loader' => InMemoryLoader::class,
848+
'transformers' => [],
849+
'vectorizer' => 'my_vectorizer_service',
850+
'store' => 'ai.store.memory.my_store',
851+
],
852+
],
853+
],
854+
]);
855+
856+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
857+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
858+
$arguments = $indexerDefinition->getArguments();
859+
860+
$this->assertSame([], $arguments[4]);
861+
}
862+
863+
public function testIndexerWithoutTransformers()
864+
{
865+
$container = $this->buildContainer([
866+
'ai' => [
867+
'store' => [
868+
'memory' => [
869+
'my_store' => [],
870+
],
871+
],
872+
'indexer' => [
873+
'my_indexer' => [
874+
'loader' => InMemoryLoader::class,
875+
'vectorizer' => 'my_vectorizer_service',
876+
'store' => 'ai.store.memory.my_store',
877+
// transformers not configured, should default to empty array
878+
],
879+
],
880+
],
881+
]);
882+
883+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
884+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
885+
$arguments = $indexerDefinition->getArguments();
886+
887+
$this->assertSame([], $arguments[4]);
888+
}
889+
890+
public function testIndexerWithSourceAndTransformers()
891+
{
892+
$container = $this->buildContainer([
893+
'ai' => [
894+
'store' => [
895+
'memory' => [
896+
'my_store' => [],
897+
],
898+
],
899+
'indexer' => [
900+
'my_indexer' => [
901+
'loader' => InMemoryLoader::class,
902+
'source' => [
903+
'/path/to/file1.txt',
904+
'/path/to/file2.txt',
905+
],
906+
'transformers' => [
907+
TextTrimTransformer::class,
908+
],
909+
'vectorizer' => 'my_vectorizer_service',
910+
'store' => 'ai.store.memory.my_store',
911+
],
912+
],
913+
],
914+
]);
915+
916+
$this->assertTrue($container->hasDefinition('ai.indexer.my_indexer'));
917+
$indexerDefinition = $container->getDefinition('ai.indexer.my_indexer');
918+
$arguments = $indexerDefinition->getArguments();
919+
920+
$this->assertInstanceOf(Reference::class, $arguments[0]);
921+
$this->assertSame(InMemoryLoader::class, (string) $arguments[0]);
922+
923+
$this->assertInstanceOf(Reference::class, $arguments[1]);
924+
$this->assertSame('my_vectorizer_service', (string) $arguments[1]);
925+
926+
$this->assertInstanceOf(Reference::class, $arguments[2]);
927+
$this->assertSame('ai.store.memory.my_store', (string) $arguments[2]);
928+
929+
$this->assertIsArray($arguments[3]);
930+
$this->assertCount(2, $arguments[3]);
931+
$this->assertSame([
932+
'/path/to/file1.txt',
933+
'/path/to/file2.txt',
934+
], $arguments[3]);
935+
936+
$this->assertIsArray($arguments[4]);
937+
$this->assertCount(1, $arguments[4]);
938+
$this->assertInstanceOf(Reference::class, $arguments[4][0]);
939+
$this->assertSame(TextTrimTransformer::class, (string) $arguments[4][0]);
940+
}
941+
703942
private function buildContainer(array $configuration): ContainerBuilder
704943
{
705944
$container = new ContainerBuilder();
@@ -959,6 +1198,7 @@ private function getFullConfig(): array
9591198
],
9601199
'indexer' => [
9611200
'my_text_indexer' => [
1201+
'loader' => InMemoryLoader::class,
9621202
'vectorizer' => 'ai.vectorizer.test_vectorizer',
9631203
'store' => 'my_azure_search_store_service_id',
9641204
],

0 commit comments

Comments
 (0)