|
13 | 13 |
|
14 | 14 | use Psr\Log\LoggerInterface; |
15 | 15 | use Psr\Log\NullLogger; |
| 16 | +use Symfony\AI\Store\Document\LoaderInterface; |
16 | 17 | use Symfony\AI\Store\Document\TextDocument; |
| 18 | +use Symfony\AI\Store\Document\TransformerInterface; |
17 | 19 | use Symfony\AI\Store\Document\VectorizerInterface; |
18 | 20 |
|
19 | 21 | /** |
20 | 22 | * @author Christopher Hertel <mail@christopher-hertel.de> |
21 | 23 | */ |
22 | 24 | final readonly class Indexer implements IndexerInterface |
23 | 25 | { |
| 26 | + /** |
| 27 | + * @param TransformerInterface[] $transformers |
| 28 | + */ |
24 | 29 | public function __construct( |
25 | 30 | private VectorizerInterface $vectorizer, |
26 | 31 | private StoreInterface $store, |
| 32 | + private ?LoaderInterface $loader = null, |
| 33 | + private array $transformers = [], |
27 | 34 | private LoggerInterface $logger = new NullLogger(), |
28 | 35 | ) { |
29 | 36 | } |
@@ -56,4 +63,46 @@ public function index(TextDocument|iterable $documents, int $chunkSize = 50): vo |
56 | 63 |
|
57 | 64 | $this->logger->debug(0 === $counter ? 'No documents to index' : \sprintf('Indexed %d documents', $counter)); |
58 | 65 | } |
| 66 | + |
| 67 | + /** |
| 68 | + * Process sources through the complete document pipeline: load → transform → vectorize → store. |
| 69 | + * |
| 70 | + * @param string|array<string> $source Source identifier (file path, URL, etc.) or array of sources |
| 71 | + * @param array<string, mixed> $options Processing options |
| 72 | + */ |
| 73 | + public function __invoke(string|array $source, array $options = []): void |
| 74 | + { |
| 75 | + if (null === $this->loader) { |
| 76 | + throw new \LogicException('Cannot process sources without a loader. Either provide documents directly to index() or configure a loader in the constructor.'); |
| 77 | + } |
| 78 | + |
| 79 | + $this->logger->debug('Starting document processing', [ |
| 80 | + 'source' => $source, |
| 81 | + 'options' => $options, |
| 82 | + ]); |
| 83 | + |
| 84 | + $sources = (array) $source; |
| 85 | + $allDocuments = []; |
| 86 | + |
| 87 | + // Load documents from all sources |
| 88 | + foreach ($sources as $singleSource) { |
| 89 | + $documents = ($this->loader)($singleSource, $options['loader'] ?? []); |
| 90 | + foreach ($documents as $document) { |
| 91 | + $allDocuments[] = $document; |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + // Transform documents through all transformers |
| 96 | + $transformedDocuments = $allDocuments; |
| 97 | + foreach ($this->transformers as $transformer) { |
| 98 | + $transformedDocuments = ($transformer)($transformedDocuments, $options['transformer'] ?? []); |
| 99 | + } |
| 100 | + |
| 101 | + // Vectorize and store documents |
| 102 | + $this->index($transformedDocuments, $options['chunk_size'] ?? 50); |
| 103 | + |
| 104 | + $this->logger->debug('Document processing completed', [ |
| 105 | + 'source' => $source, |
| 106 | + ]); |
| 107 | + } |
59 | 108 | } |
0 commit comments