Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

Commit be704f7

Browse files
authored
feat: add MariaDB store (#342)
Related to #28
1 parent 076ff69 commit be704f7

File tree

5 files changed

+248
-0
lines changed

5 files changed

+248
-0
lines changed

.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,6 @@ RUN_EXPENSIVE_EXAMPLES=false
6464

6565
# For using Gemini
6666
GOOGLE_API_KEY=
67+
68+
# For MariaDB store. Server defined in compose.yaml
69+
MARIADB_URI=pdo-mysql://root@127.0.0.1:3309/my_database

compose.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
services:
2+
mariadb:
3+
image: mariadb:11.7
4+
environment:
5+
MARIADB_ALLOW_EMPTY_ROOT_PASSWORD: 1
6+
MARIADB_DATABASE: my_database
7+
ports:
8+
- "3309:3306"

composer.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@
3737
"webmozart/assert": "^1.11"
3838
},
3939
"require-dev": {
40+
"ext-pdo": "*",
4041
"codewithkyrian/chromadb-php": "^0.2.1 || ^0.3 || ^0.4",
4142
"codewithkyrian/transformers": "^0.5.3",
4243
"async-aws/bedrock-runtime": "^0.1.0",
44+
"doctrine/dbal": "^3.0 || ^4.0",
4345
"mongodb/mongodb": "^1.21 || ^2.0",
4446
"php-cs-fixer/shim": "^3.70",
4547
"phpstan/phpstan": "^2.0",
@@ -58,9 +60,11 @@
5860
"symfony/var-dumper": "^6.4 || ^7.1"
5961
},
6062
"suggest": {
63+
"ext-pdo": "For using MariaDB as retrieval vector store.",
6164
"async-aws/bedrock-runtime": "For using the Bedrock platform.",
6265
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
6366
"codewithkyrian/transformers": "For using the TransformersPHP with FFI to run models in PHP.",
67+
"doctrine/dbal": "For using MariaDB via Doctrine as retrieval vector store",
6468
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
6569
"probots-io/pinecone-php": "For using the Pinecone as retrieval vector store.",
6670
"symfony/css-selector": "For using the YouTube transcription tool.",
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?php
2+
3+
use Doctrine\DBAL\DriverManager;
4+
use Doctrine\DBAL\Tools\DsnParser;
5+
use PhpLlm\LlmChain\Chain\Chain;
6+
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
7+
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch;
8+
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
9+
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\Embeddings;
10+
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\GPT;
11+
use PhpLlm\LlmChain\Platform\Bridge\OpenAI\PlatformFactory;
12+
use PhpLlm\LlmChain\Platform\Message\Message;
13+
use PhpLlm\LlmChain\Platform\Message\MessageBag;
14+
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store;
15+
use PhpLlm\LlmChain\Store\Document\Metadata;
16+
use PhpLlm\LlmChain\Store\Document\TextDocument;
17+
use PhpLlm\LlmChain\Store\Indexer;
18+
use Symfony\Component\Dotenv\Dotenv;
19+
use Symfony\Component\Uid\Uuid;
20+
21+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
22+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
23+
24+
if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MARIADB_URI'])) {
25+
echo 'Please set OPENAI_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL;
26+
exit(1);
27+
}
28+
29+
// initialize the store
30+
$store = Store::fromDbal(
31+
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])),
32+
tableName: 'my_table',
33+
indexName: 'my_index',
34+
vectorFieldName: 'embedding',
35+
);
36+
37+
// our data
38+
$movies = [
39+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
40+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
41+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
42+
];
43+
44+
// create embeddings and documents
45+
foreach ($movies as $i => $movie) {
46+
$documents[] = new TextDocument(
47+
id: Uuid::v4(),
48+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
49+
metadata: new Metadata($movie),
50+
);
51+
}
52+
53+
// initialize the table
54+
$store->initialize();
55+
56+
// create embeddings for documents
57+
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
58+
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
59+
$indexer->index($documents);
60+
61+
$model = new GPT(GPT::GPT_4O_MINI);
62+
63+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
64+
$toolbox = Toolbox::create($similaritySearch);
65+
$processor = new ChainProcessor($toolbox);
66+
$chain = new Chain($platform, $model, [$processor], [$processor]);
67+
68+
$messages = new MessageBag(
69+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
70+
Message::ofUser('Which movie fits the theme of the mafia?')
71+
);
72+
$response = $chain->call($messages);
73+
74+
echo $response->getContent().\PHP_EOL;

src/Store/Bridge/MariaDB/Store.php

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpLlm\LlmChain\Store\Bridge\MariaDB;
6+
7+
use Doctrine\DBAL\Connection;
8+
use Doctrine\DBAL\Exception as DBALException;
9+
use PhpLlm\LlmChain\Platform\Vector\Vector;
10+
use PhpLlm\LlmChain\Store\Document\Metadata;
11+
use PhpLlm\LlmChain\Store\Document\VectorDocument;
12+
use PhpLlm\LlmChain\Store\Exception\InvalidArgumentException;
13+
use PhpLlm\LlmChain\Store\InitializableStoreInterface;
14+
use PhpLlm\LlmChain\Store\VectorStoreInterface;
15+
use Symfony\Component\Uid\Uuid;
16+
17+
/**
18+
* Requires MariaDB >=11.7.
19+
*
20+
* @see https://mariadb.org/rag-with-mariadb-vector/
21+
*
22+
* @author Valtteri R <valtzu@gmail.com>
23+
*/
24+
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface
25+
{
26+
/**
27+
* @param string $tableName The name of the table
28+
* @param string $indexName The name of the vector search index
29+
* @param string $vectorFieldName The name of the field in the index that contains the vector
30+
*/
31+
public function __construct(
32+
private \PDO $connection,
33+
private string $tableName,
34+
private string $indexName,
35+
private string $vectorFieldName,
36+
) {
37+
}
38+
39+
public static function fromPdo(\PDO $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
40+
{
41+
return new self($connection, $tableName, $indexName, $vectorFieldName);
42+
}
43+
44+
/**
45+
* @throws DBALException
46+
*/
47+
public static function fromDbal(Connection $connection, string $tableName, string $indexName = 'embedding', string $vectorFieldName = 'embedding'): self
48+
{
49+
$pdo = $connection->getNativeConnection();
50+
51+
if (!$pdo instanceof \PDO) {
52+
throw new InvalidArgumentException('Only DBAL connections using PDO driver are supported.');
53+
}
54+
55+
return self::fromPdo($pdo, $tableName, $indexName, $vectorFieldName);
56+
}
57+
58+
public function add(VectorDocument ...$documents): void
59+
{
60+
$statement = $this->connection->prepare(
61+
\sprintf(
62+
<<<'SQL'
63+
INSERT INTO %1$s (id, metadata, %2$s)
64+
VALUES (:id, :metadata, VEC_FromText(:vector))
65+
ON DUPLICATE KEY UPDATE metadata = :metadata, %2$s = VEC_FromText(:vector)
66+
SQL,
67+
$this->tableName,
68+
$this->vectorFieldName,
69+
),
70+
);
71+
72+
foreach ($documents as $document) {
73+
$operation = [
74+
'id' => $document->id->toBinary(),
75+
'metadata' => json_encode($document->metadata->getArrayCopy()),
76+
'vector' => json_encode($document->vector->getData()),
77+
];
78+
79+
$statement->execute($operation);
80+
}
81+
}
82+
83+
/**
84+
* @param array{
85+
* limit?: positive-int,
86+
* } $options
87+
*/
88+
public function query(Vector $vector, array $options = [], ?float $minScore = null): array
89+
{
90+
$statement = $this->connection->prepare(
91+
\sprintf(
92+
<<<'SQL'
93+
SELECT id, VEC_ToText(%1$s) embedding, metadata, VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) AS score
94+
FROM %2$s
95+
%3$s
96+
ORDER BY score ASC
97+
LIMIT %4$d
98+
SQL,
99+
$this->vectorFieldName,
100+
$this->tableName,
101+
null !== $minScore ? 'WHERE VEC_DISTANCE_EUCLIDEAN(%1$s, VEC_FromText(:embedding)) >= :minScore' : '',
102+
$options['limit'] ?? 5,
103+
),
104+
);
105+
106+
$params = ['embedding' => json_encode($vector->getData())];
107+
108+
if (null !== $minScore) {
109+
$params['minScore'] = $minScore;
110+
}
111+
112+
$documents = [];
113+
114+
$statement->execute($params);
115+
116+
foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) {
117+
$documents[] = new VectorDocument(
118+
id: Uuid::fromBinary($result['id']),
119+
vector: new Vector(json_decode((string) $result['embedding'], true)),
120+
metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true)),
121+
score: $result['score'],
122+
);
123+
}
124+
125+
return $documents;
126+
}
127+
128+
/**
129+
* @param array{} $options
130+
*/
131+
public function initialize(array $options = []): void
132+
{
133+
if ([] !== $options) {
134+
throw new InvalidArgumentException('No supported options');
135+
}
136+
137+
$serverVersion = $this->connection->getAttribute(\PDO::ATTR_SERVER_VERSION);
138+
139+
if (!str_contains((string) $serverVersion, 'MariaDB') || version_compare($serverVersion, '11.7.0') < 0) {
140+
throw new InvalidArgumentException('You need MariaDB >=11.7 to use this feature');
141+
}
142+
143+
$this->connection->exec(
144+
\sprintf(
145+
<<<'SQL'
146+
CREATE TABLE IF NOT EXISTS %1$s (
147+
id BINARY(16) NOT NULL PRIMARY KEY,
148+
metadata JSON,
149+
%2$s VECTOR(1536) NOT NULL,
150+
VECTOR INDEX %3$s (%2$s)
151+
)
152+
SQL,
153+
$this->tableName,
154+
$this->vectorFieldName,
155+
$this->indexName,
156+
),
157+
);
158+
}
159+
}

0 commit comments

Comments
 (0)