|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file is part of the Symfony package. |
| 5 | + * |
| 6 | + * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | + * |
| 8 | + * For the full copyright and license information, please view the LICENSE |
| 9 | + * file that was distributed with this source code. |
| 10 | + */ |
| 11 | + |
| 12 | +namespace Symfony\AI\Store\Document; |
| 13 | + |
| 14 | +use Psr\Log\LoggerInterface; |
| 15 | +use Psr\Log\NullLogger; |
| 16 | +use Symfony\AI\Store\Indexer; |
| 17 | + |
| 18 | +/** |
| 19 | + * Default implementation of DocumentProcessorInterface that orchestrates |
| 20 | + * the complete document processing pipeline: load → transform → vectorize → store. |
| 21 | + * |
| 22 | + * @author Oskar Stark <oskarstark@googlemail.com> |
| 23 | + */ |
| 24 | +final readonly class DocumentProcessor implements DocumentProcessorInterface |
| 25 | +{ |
| 26 | + /** |
| 27 | + * @param TransformerInterface[] $transformers |
| 28 | + */ |
| 29 | + public function __construct( |
| 30 | + private LoaderInterface $loader, |
| 31 | + private array $transformers, |
| 32 | + private Indexer $indexer, |
| 33 | + private LoggerInterface $logger = new NullLogger(), |
| 34 | + ) { |
| 35 | + } |
| 36 | + |
| 37 | + public function process(string|array $source, array $options = []): void |
| 38 | + { |
| 39 | + $this->logger->debug('Starting document processing', [ |
| 40 | + 'source' => $source, |
| 41 | + 'options' => $options, |
| 42 | + ]); |
| 43 | + |
| 44 | + $sources = (array) $source; |
| 45 | + $allDocuments = []; |
| 46 | + |
| 47 | + // Load documents from all sources |
| 48 | + foreach ($sources as $singleSource) { |
| 49 | + $documents = ($this->loader)($singleSource, $options['loader'] ?? []); |
| 50 | + foreach ($documents as $document) { |
| 51 | + $allDocuments[] = $document; |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + // Transform documents through all transformers |
| 56 | + $transformedDocuments = $allDocuments; |
| 57 | + foreach ($this->transformers as $transformer) { |
| 58 | + $transformedDocuments = ($transformer)($transformedDocuments, $options['transformer'] ?? []); |
| 59 | + } |
| 60 | + |
| 61 | + // Vectorize and store documents |
| 62 | + $this->indexer->index($transformedDocuments, $options['chunk_size'] ?? 50); |
| 63 | + |
| 64 | + $this->logger->debug('Document processing completed', [ |
| 65 | + 'source' => $source, |
| 66 | + ]); |
| 67 | + } |
| 68 | +} |
0 commit comments