2626# along with this program; if not, write to the Free Software Foundation, Inc.,
2727# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2828
29+ """MySQL-backed vector store for embeddings and semantic document retrieval.
30+
31+ Provides a VectorStore implementation persisting documents, metadata, and
32+ embeddings in MySQL, plus similarity search utilities.
33+ """
34+
2935import json
3036
3137from typing import Any , Iterable , List , Optional , Sequence , Union
3541from langchain_core .documents import Document
3642from langchain_core .embeddings import Embeddings
3743from langchain_core .vectorstores import VectorStore
44+ from pydantic import PrivateAttr
45+
3846from mysql .ai .genai .embedding import MyEmbeddings
3947from mysql .ai .utils import (
4048 VAR_NAME_SPACE ,
4856 source_schema ,
4957 table_exists ,
5058)
51-
5259from mysql .connector .abstracts import MySQLConnectionAbstract
53- from pydantic import PrivateAttr
5460
5561BASIC_EMBEDDING_QUERY = "Hello world!"
5662EMBEDDING_SOURCE = "external_source"
@@ -78,7 +84,8 @@ class MyVectorStore(VectorStore):
7884 db_connection (MySQLConnectionAbstract): Active MySQL database connection.
7985 embedder (Embeddings): Embeddings generator for computing vector representations.
8086 schema_name (str): SQL schema for table storage.
81- table_name (Optional[str]): Name of the active table backing the store (or None until created).
87+ table_name (Optional[str]): Name of the active table backing the store
88+ (or None until created).
8289 embedding_dimension (int): Size of embedding vectors stored.
8390 next_id (int): Internal counter for unique document ID generation.
8491 """
@@ -116,8 +123,11 @@ def __init__(
116123 self ._db_connection = db_connection
117124 self ._table_name : Optional [str ] = None
118125
119- # Embedding dimension determined using an example call. Assumes embeddings have fixed length.
120- self ._embedding_dimension = len (self ._embedder .embed_query (BASIC_EMBEDDING_QUERY ))
126+ # Embedding dimension determined using an example call.
127+ # Assumes embeddings have fixed length.
128+ self ._embedding_dimension = len (
129+ self ._embedder .embed_query (BASIC_EMBEDDING_QUERY )
130+ )
121131
122132 def _get_ids (self , num_ids : int ) -> list [str ]:
123133 """
@@ -174,7 +184,7 @@ def _make_vector_store(self) -> None:
174184
175185 self ._table_name = table_name
176186
177- def delete (self , ids : Optional [Sequence [str ]] = None , ** kwargs : Any ) -> None :
187+ def delete (self , ids : Optional [Sequence [str ]] = None , ** _ : Any ) -> None :
178188 """
179189 Delete documents by ID. Optionally deletes the vector table if empty after deletions.
180190
@@ -190,7 +200,8 @@ def delete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
190200 If an operational error occurs during execution.
191201
192202 Notes:
193- If the backing table is empty after deletions, the table is dropped and table_name is set to None.
203+ If the backing table is empty after deletions, the table is dropped and
204+ table_name is set to None.
194205 """
195206 with atomic_transaction (self ._db_connection ) as cursor :
196207 if ids :
@@ -221,7 +232,7 @@ def add_texts(
221232 texts : Iterable [str ],
222233 metadatas : Optional [list [dict ]] = None ,
223234 ids : Optional [List [str ]] = None ,
224- ** kwargs : dict ,
235+ ** _ : dict ,
225236 ) -> List [str ]:
226237 """
227238 Add a batch of text strings and corresponding metadata to the vector store.
@@ -312,7 +323,7 @@ def add_documents(
312323 """
313324 if ids and len (ids ) != len (documents ):
314325 msg = (
315- f "ids must be the same length as documents. "
326+ "ids must be the same length as documents. "
316327 f"Got { len (ids )} ids and { len (documents )} documents."
317328 )
318329 raise ValueError (msg )
@@ -357,7 +368,8 @@ def similarity_search(
357368 Args:
358369 query: String query to embed and use for similarity search.
359370 k: Number of top documents to return.
360- kwargs: options to pass to ML_SIMILARITY_SEARCH. Currently supports distance_metric, max_distance, percentage_distance, and segment_overlap
371+ kwargs: options to pass to ML_SIMILARITY_SEARCH. Currently supports
372+ distance_metric, max_distance, percentage_distance, and segment_overlap
361373
362374 Returns:
363375 List of Document objects, ordered from most to least similar.
@@ -399,8 +411,14 @@ def similarity_search(
399411 similarity_search_query = f"""
400412 CALL sys.ML_SIMILARITY_SEARCH(
401413 @{ VAR_EMBEDDING } ,
402- JSON_ARRAY('{ self ._schema_name } .{ self ._table_name } '),
403- JSON_OBJECT("segment", "content", "segment_embedding", "embed", "document_name", "id"),
414+ JSON_ARRAY(
415+ '{ self ._schema_name } .{ self ._table_name } '
416+ ),
417+ JSON_OBJECT(
418+ "segment", "content",
419+ "segment_embedding", "embed",
420+ "document_name", "id"
421+ ),
404422 { k } ,
405423 %s,
406424 NULL,
@@ -425,13 +443,17 @@ def similarity_search(
425443 for context in context_maps :
426444 execute_sql (
427445 cursor ,
428- f"SELECT id, content, metadata FROM { self ._schema_name } .{ self ._table_name } WHERE id =%s" ,
446+ (
447+ "SELECT id, content, metadata "
448+ f"FROM { self ._schema_name } .{ self ._table_name } "
449+ "WHERE id = %s"
450+ ),
429451 params = (context ["document_name" ],),
430452 )
431- id , content , metadata = cursor .fetchone ()
453+ doc_id , content , metadata = cursor .fetchone ()
432454
433455 doc_args = {
434- "id" : id ,
456+ "id" : doc_id ,
435457 "page_content" : content ,
436458 }
437459 if metadata is not None :
@@ -450,8 +472,10 @@ def __enter__(self) -> "VectorStore":
450472 The current MyVectorStore object, allowing use within a `with` statement block.
451473
452474 Usage Notes:
453- - Intended for use in a `with` statement to ensure automatic cleanup of resources.
454- - No special initialization occurs during context entry, but enables proper context-managed lifecycle.
475+ - Intended for use in a `with` statement to ensure automatic
476+ cleanup of resources.
477+ - No special initialization occurs during context entry, but enables
478+ proper context-managed lifecycle.
455479
456480 Example:
457481 with MyVectorStore(db_connection, embedder) as vectorstore:
@@ -468,7 +492,8 @@ def __exit__(
468492 exc_tb : Union [object , None ],
469493 ) -> None :
470494 """
471- Exit the runtime context for the vector store, ensuring all storage resources are cleaned up.
495+ Exit the runtime context for the vector store, ensuring all storage
496+ resources are cleaned up.
472497
473498 Args:
474499 exc_type: The exception type, if any exception occurred in the context block.
0 commit comments