Skip to content

Commit 05e6b69

Browse files
OskarStarkclaude
andcommitted
Implement document indexing pipeline with InMemoryLoader
- Add IndexerInterface for document processing pipeline - Implement Indexer class with complete load → transform → vectorize → store pipeline - Add InMemoryLoader for loading documents from memory without external sources - Create comprehensive test suite for InMemoryLoader - Add example demonstrating InMemoryLoader usage - Support null source parameter for loaders that don't require external sources - Streamline interface comments to avoid duplication 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0ee0c18 commit 05e6b69

File tree

15 files changed

+370
-37
lines changed

15 files changed

+370
-37
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
15+
use Symfony\AI\Store\Document\Loader\TextFileLoader;
16+
use Symfony\AI\Store\Document\Transformer\ReplaceTextTransformer;
17+
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
18+
use Symfony\AI\Store\Document\Vectorizer;
19+
use Symfony\AI\Store\Indexer;
20+
21+
require_once dirname(__DIR__).'/bootstrap.php';
22+
23+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
24+
$store = new InMemoryStore();
25+
$indexer = new Indexer(
26+
new TextFileLoader(),
27+
[
28+
new ReplaceTextTransformer(search: '## Plot', replace: '## Synopsis'),
29+
new TextSplitTransformer(chunkSize: 500, overlap: 100),
30+
],
31+
new Vectorizer($platform, new Embeddings('text-embedding-3-small')),
32+
$store
33+
);
34+
35+
$movies = [
36+
dirname(__DIR__, 2).'/fixtures/movies/gladiator.md',
37+
dirname(__DIR__, 2).'/fixtures/movies/inception.md',
38+
dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md',
39+
];
40+
41+
$indexer->index($movies);
42+
43+
$vector = $platform->invoke(new Embeddings('text-embedding-3-small'), 'Roman gladiator revenge')->asVectors()[0];
44+
$results = $store->query($vector, ['maxItems' => 2]);
45+
foreach ($results as $i => $document) {
46+
echo sprintf("%d. %s\n", $i + 1, substr($document->id, 0, 40).'...');
47+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
15+
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\TextDocument;
18+
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
19+
use Symfony\AI\Store\Document\Vectorizer;
20+
use Symfony\AI\Store\Indexer;
21+
use Symfony\Component\Uid\Uuid;
22+
23+
require_once dirname(__DIR__).'/bootstrap.php';
24+
25+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
26+
$store = new InMemoryStore();
27+
28+
$documents = [
29+
new TextDocument(
30+
Uuid::v4(),
31+
'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.',
32+
new Metadata(['title' => 'AI Revolution'])
33+
),
34+
new TextDocument(
35+
Uuid::v4(),
36+
'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.',
37+
new Metadata(['title' => 'Climate Action'])
38+
),
39+
];
40+
41+
$indexer = new Indexer(
42+
new InMemoryLoader($documents),
43+
[
44+
new TextSplitTransformer(chunkSize: 100, overlap: 20),
45+
],
46+
new Vectorizer($platform, new Embeddings('text-embedding-3-small')),
47+
$store
48+
);
49+
50+
$indexer->index(null);
51+
52+
$vector = $platform->invoke(new Embeddings('text-embedding-3-small'), 'machine learning artificial intelligence')->asVectors()[0];
53+
$results = $store->query($vector, ['maxItems' => 2]);
54+
foreach ($results as $i => $document) {
55+
echo sprintf("%d. %s\n", $i + 1, substr($document->id, 0, 40).'...');
56+
}

fixtures/movies/gladiator.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Gladiator (2000)
2+
3+
**IMDB**: https://www.imdb.com/title/tt0172495/
4+
5+
**Director:** Ridley Scott
6+
7+
## Cast
8+
9+
- **Russell Crowe** as Maximus Decimus Meridius
10+
- **Joaquin Phoenix** as Emperor Commodus
11+
- **Connie Nielsen** as Lucilla
12+
- **Oliver Reed** as Proximo
13+
- **Derek Jacobi** as Senator Gracchus
14+
- **Djimon Hounsou** as Juba
15+
- **Richard Harris** as Marcus Aurelius
16+
- **Ralf Möller** as Hagen
17+
- **Tommy Flanagan** as Cicero
18+
- **David Schofield** as Falco
19+
20+
## Plot
21+
22+
A former Roman General sets out to exact vengeance against the corrupt emperor who murdered his family and sent him into slavery.
23+
24+
**Maximus Decimus Meridius** is a powerful Roman general beloved by the people and the aging Emperor **Marcus Aurelius**. As Marcus Aurelius lies dying, he makes known his wish that Maximus should succeed him and return Rome to the former glory of the Republic rather than the corrupt Empire it has become.
25+
26+
However, Marcus Aurelius's son **Commodus** learns of his father's plan and murders him before he can publicly name Maximus as his successor. Commodus then orders the execution of Maximus and his family. Maximus escapes the execution but arrives at his farm too late to save his wife and son.
27+
28+
Wounded and devastated, Maximus is captured by slave traders and forced to become a gladiator. Under the training of **Proximo**, a former gladiator, Maximus becomes a skilled fighter and eventually makes his way to the **Colosseum** in Rome, where he gains fame and the crowd's favor.
29+
30+
Using his newfound popularity with the people, Maximus seeks to avenge the murder of his family and fulfill his promise to Marcus Aurelius to restore Rome to a republic. The film culminates in a final confrontation between Maximus and Commodus in the arena.
31+
32+
The film explores themes of *honor*, *revenge*, *political corruption*, and the struggle between personal desires and duty to the greater good.

fixtures/movies/inception.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Inception (2010)
2+
3+
**IMDB**: https://www.imdb.com/title/tt1375666/
4+
5+
**Director:** Christopher Nolan
6+
7+
## Cast
8+
9+
- **Leonardo DiCaprio** as Dom Cobb
10+
- **Marion Cotillard** as Mal Cobb
11+
- **Tom Hardy** as Eames
12+
- **Elliot Page** as Ariadne
13+
- **Ken Watanabe** as Saito
14+
- **Dileep Rao** as Yusuf
15+
- **Cillian Murphy** as Robert Fischer Jr.
16+
- **Tom Berenger** as Peter Browning
17+
- **Michael Caine** as Professor Stephen Miles
18+
- **Lukas Haas** as Nash
19+
20+
## Plot
21+
22+
A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone's subconscious.
23+
24+
**Dom Cobb** is a skilled thief who specializes in *extraction* - stealing secrets from people's subconscious minds while they dream. This unique skill has made him a valuable player in the world of corporate espionage, but it has also cost him everything he loves. Cobb's rare ability has made him a coveted player in this treacherous new world of corporate espionage, but it has also made him an international fugitive and cost him everything he has ever loved.
25+
26+
Now Cobb is being offered a chance at redemption. One last job could give him his life back but only if he can accomplish the impossible - **inception**. Instead of the perfect heist, Cobb and his team of specialists have to pull off the reverse: their task is not to steal an idea but to plant one. If they succeed, it could be the perfect crime.
27+
28+
The film explores themes of *reality*, *dreams*, *memory*, and the nature of consciousness through multiple layers of dream states, creating a complex narrative structure that challenges both characters and audience to question what is real.

fixtures/movies/jurassic-park.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Jurassic Park (1993)
2+
3+
**IMDB**: https://www.imdb.com/title/tt0107290/
4+
5+
**Director:** Steven Spielberg
6+
7+
## Cast
8+
9+
- **Sam Neill** as Dr. Alan Grant
10+
- **Laura Dern** as Dr. Ellie Sattler
11+
- **Jeff Goldblum** as Dr. Ian Malcolm
12+
- **Richard Attenborough** as John Hammond
13+
- **Bob Peck** as Robert Muldoon
14+
- **Martin Ferrero** as Donald Gennaro
15+
- **BD Wong** as Dr. Henry Wu
16+
- **Joseph Mazzello** as Tim Murphy
17+
- **Ariana Richards** as Lex Murphy
18+
- **Wayne Knight** as Dennis Nedry
19+
20+
## Plot
21+
22+
During a preview tour, a theme park suffers a major power breakdown that allows its cloned dinosaur exhibits to run amok.
23+
24+
Billionaire **John Hammond** has created a theme park on a remote island where he has successfully cloned dinosaurs from ancient DNA found in prehistoric mosquitoes preserved in amber. Before opening to the public, Hammond invites a select group of people to tour the park, including paleontologist **Dr. Alan Grant**, paleobotanist **Dr. Ellie Sattler**, and mathematician **Dr. Ian Malcolm**.
25+
26+
The tour begins smoothly, but things quickly go wrong when the park's computer systems are sabotaged by the disgruntled programmer **Dennis Nedry**, who is attempting to steal dinosaur embryos. The security systems fail, and the dinosaurs break free from their enclosures.
27+
28+
As the island descends into chaos, the visitors must survive encounters with various dangerous dinosaurs, including the intelligent and deadly **Velociraptors** and the massive **Tyrannosaurus Rex**. Dr. Grant finds himself responsible for Hammond's grandchildren, Tim and Lex, as they attempt to reach safety.
29+
30+
The film explores themes of *scientific ethics*, the *hubris of trying to control nature*, and the *unintended consequences of genetic engineering*. It questions whether humans have the right to resurrect extinct species and whether scientific advancement should be pursued without considering the potential risks and moral implications.

src/store/src/Document/Loader/InMemoryLoader.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public function __construct(
3030
) {
3131
}
3232

33-
public function load(string $source, array $options = []): iterable
33+
public function load(null|string $source, array $options = []): iterable
3434
{
3535
yield from $this->documents;
3636
}

src/store/src/Document/Loader/TextFileLoader.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
*/
2323
final readonly class TextFileLoader implements LoaderInterface
2424
{
25-
public function load(string $source, array $options = []): iterable
25+
public function load(null|string $source, array $options = []): iterable
2626
{
27-
if (!is_file($source)) {
28-
throw new RuntimeException(\sprintf('File "%s" does not exist.', $source));
27+
if (null === $source || !is_file($source)) {
28+
throw new RuntimeException(\sprintf('File "%s" does not exist.', $source ?? 'null'));
2929
}
3030

3131
$content = file_get_contents($source);

src/store/src/Document/LoaderInterface.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
interface LoaderInterface
1818
{
1919
/**
20-
* @param string $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL.
20+
* @param null|string $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL. Can be null for InMemoryLoader.
2121
* @param array<string, mixed> $options loader specific set of options to control the loading process
2222
*
2323
* @return iterable<TextDocument> iterable of TextDocuments loaded from the source
2424
*/
25-
public function load(string $source, array $options = []): iterable;
25+
public function load(null|string $source, array $options = []): iterable;
2626
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Document\Transformer;
13+
14+
use Symfony\AI\Store\Document\Metadata;
15+
use Symfony\AI\Store\Document\TextDocument;
16+
use Symfony\AI\Store\Document\TransformerInterface;
17+
use Symfony\AI\Store\Exception\InvalidArgumentException;
18+
use Symfony\Component\Uid\Uuid;
19+
20+
/**
21+
* Replaces specified text within document content.
22+
*
23+
* @author Oskar Stark <oskarstark@googlemail.com>
24+
*/
25+
final readonly class ReplaceTextTransformer implements TransformerInterface
26+
{
27+
public const OPTION_SEARCH = 'search';
28+
public const OPTION_REPLACE = 'replace';
29+
30+
public function __construct(
31+
private string $search = '',
32+
private string $replace = '',
33+
) {
34+
self::validate($search, $replace);
35+
}
36+
37+
/**
38+
* @param array{search?: string, replace?: string} $options
39+
*/
40+
public function transform(iterable $documents, array $options = []): iterable
41+
{
42+
$search = $options[self::OPTION_SEARCH] ?? $this->search;
43+
$replace = $options[self::OPTION_REPLACE] ?? $this->replace;
44+
45+
self::validate($search, $replace);
46+
47+
foreach ($documents as $document) {
48+
yield $document->withContent(str_replace($search, $replace, $document->content));
49+
}
50+
}
51+
52+
private static function validate(string $search, string $replace): void
53+
{
54+
if ($search === $replace) {
55+
throw new InvalidArgumentException('Search and replace strings must be different.');
56+
}
57+
}
58+
}

src/store/src/Document/Transformer/TextSplitTransformer.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public function __construct(
3434
private int $overlap = 200,
3535
) {
3636
if ($this->overlap < 0 || $this->overlap >= $this->chunkSize) {
37-
throw new InvalidArgumentException(\sprintf('Overlap must be non-negative and less than chunk size. Got chunk size: %d, overlap: %d', $this->chunkSize, $this->overlap));
37+
throw new InvalidArgumentException(sprintf('Overlap must be non-negative and less than chunk size. Got chunk size: %d, overlap: %d.', $this->chunkSize, $this->overlap));
3838
}
3939
}
4040

0 commit comments

Comments
 (0)