4
4
import asyncio
5
5
from collections .abc import Callable
6
6
from dataclasses import dataclass , field
7
- from typing import Annotated
7
+ from typing import Annotated , Literal
8
8
from uuid import uuid4
9
9
10
10
import numpy as np
11
11
12
+ from samples .concepts .memory .utils import print_record
13
+ from samples .concepts .resources .utils import Colors , print_with_color
12
14
from semantic_kernel import Kernel
13
15
from semantic_kernel .connectors .ai .open_ai import (
14
16
AzureTextEmbedding ,
15
17
OpenAIEmbeddingPromptExecutionSettings ,
16
18
OpenAITextEmbedding ,
17
19
)
18
20
from semantic_kernel .connectors .memory .azure_ai_search import AzureAISearchCollection
19
- from semantic_kernel .connectors .memory .azure_cosmos_db import AzureCosmosDBNoSQLCollection
21
+ from semantic_kernel .connectors .memory .azure_cosmos_db import (
22
+ AzureCosmosDBforMongoDBCollection ,
23
+ AzureCosmosDBNoSQLCollection ,
24
+ )
20
25
from semantic_kernel .connectors .memory .in_memory import InMemoryVectorCollection
21
26
from semantic_kernel .connectors .memory .postgres import PostgresCollection
22
27
from semantic_kernel .connectors .memory .qdrant import QdrantCollection
23
28
from semantic_kernel .connectors .memory .redis import RedisHashsetCollection , RedisJsonCollection
24
29
from semantic_kernel .connectors .memory .weaviate import WeaviateCollection
25
30
from semantic_kernel .data import (
31
+ DISTANCE_FUNCTION_DIRECTION_HELPER ,
26
32
DistanceFunction ,
27
33
IndexKind ,
28
34
VectorizableTextSearchMixin ,
29
35
VectorizedSearchMixin ,
30
36
VectorSearchFilter ,
31
37
VectorSearchOptions ,
32
- VectorSearchResult ,
33
38
VectorStoreRecordCollection ,
34
39
VectorStoreRecordDataField ,
35
40
VectorStoreRecordKeyField ,
39
44
vectorstoremodel ,
40
45
)
41
46
47
+ # This is a rather complex sample, showing how to use the vector store
48
+ # with a number of different collections.
49
+ # It also shows how to use the vector store with a number of different data models.
50
+ # It also uses all the types of search available in the vector store.
51
+ # For a simpler example, see "simple_memory.py"
42
52
43
- def get_data_model_array (index_kind : IndexKind , distance_function : DistanceFunction ) -> type :
44
- @vectorstoremodel
45
- @dataclass
46
- class DataModelArray :
47
- vector : Annotated [
48
- np .ndarray | None ,
49
- VectorStoreRecordVectorField (
50
- embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
51
- index_kind = index_kind ,
52
- dimensions = 1536 ,
53
- distance_function = distance_function ,
54
- property_type = "float" ,
55
- serialize_function = np .ndarray .tolist ,
56
- deserialize_function = np .array ,
57
- ),
58
- ] = None
59
- id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
60
- content : Annotated [
61
- str ,
62
- VectorStoreRecordDataField (
63
- has_embedding = True ,
64
- embedding_property_name = "vector" ,
65
- property_type = "str" ,
66
- is_full_text_searchable = True ,
67
- ),
68
- ] = "content1"
69
- title : Annotated [str , VectorStoreRecordDataField (property_type = "str" , is_full_text_searchable = True )] = "title"
70
- tag : Annotated [str , VectorStoreRecordDataField (property_type = "str" , is_filterable = True )] = "tag"
71
53
72
- return DataModelArray
54
+ def get_data_model (type : Literal ["array" , "list" ], index_kind : IndexKind , distance_function : DistanceFunction ) -> type :
55
+ if type == "array" :
73
56
57
+ @vectorstoremodel
58
+ @dataclass
59
+ class DataModelArray :
60
+ vector : Annotated [
61
+ np .ndarray | None ,
62
+ VectorStoreRecordVectorField (
63
+ embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
64
+ index_kind = index_kind ,
65
+ dimensions = 1536 ,
66
+ distance_function = distance_function ,
67
+ property_type = "float" ,
68
+ serialize_function = np .ndarray .tolist ,
69
+ deserialize_function = np .array ,
70
+ ),
71
+ ] = None
72
+ id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
73
+ content : Annotated [
74
+ str ,
75
+ VectorStoreRecordDataField (
76
+ has_embedding = True ,
77
+ embedding_property_name = "vector" ,
78
+ property_type = "str" ,
79
+ is_full_text_searchable = True ,
80
+ ),
81
+ ] = "content1"
82
+ title : Annotated [str , VectorStoreRecordDataField (property_type = "str" , is_full_text_searchable = True )] = (
83
+ "title"
84
+ )
85
+ tag : Annotated [str , VectorStoreRecordDataField (property_type = "str" , is_filterable = True )] = "tag"
86
+
87
+ return DataModelArray
74
88
75
- def get_data_model_list (index_kind : IndexKind , distance_function : DistanceFunction ) -> type :
76
89
@vectorstoremodel
77
90
@dataclass
78
91
class DataModelList :
@@ -103,9 +116,10 @@ class DataModelList:
103
116
104
117
105
118
collection_name = "test"
119
+ distance_function = DistanceFunction .COSINE_SIMILARITY
106
120
# Depending on the vector database, the index kind and distance function may need to be adjusted,
107
121
# since not all combinations are supported by all databases.
108
- DataModel = get_data_model_array ( IndexKind .HNSW , DistanceFunction . COSINE_SIMILARITY )
122
+ DataModel = get_data_model ( "array" , IndexKind .IVF_FLAT , distance_function )
109
123
110
124
# A list of VectorStoreRecordCollection that can be used.
111
125
# Available collections are:
@@ -124,6 +138,8 @@ class DataModelList:
124
138
# https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-develop-emulator?tabs=windows%2Cpython&pivots=api-nosql
125
139
# Please see the link above to learn how to set up the Azure Cosmos NoSQL emulator on your machine.
126
140
# For this sample to work with Azure Cosmos NoSQL, please adjust the index_kind of the data model to QUANTIZED_FLAT.
141
+ # - azure_cosmos_mongodb: Azure Cosmos MongoDB
142
+ # https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/introduction
127
143
# This is represented as a mapping from the collection name to a
128
144
# function which returns the collection.
129
145
# Using a function allows for lazy initialization of the collection,
@@ -162,29 +178,22 @@ class DataModelList:
162
178
collection_name = collection_name ,
163
179
create_database = True ,
164
180
),
181
+ "azure_cosmos_mongodb" : lambda : AzureCosmosDBforMongoDBCollection (
182
+ data_model_type = DataModel ,
183
+ collection_name = collection_name ,
184
+ ),
165
185
}
166
186
167
187
168
- def print_record (result : VectorSearchResult | None = None , record : DataModel | None = None ):
169
- if result :
170
- record = result .record
171
- print (f" Found id: { record .id } " )
172
- print (f" Content: { record .content } " )
173
- if record .vector is not None :
174
- print (f" Vector (first five): { record .vector [:5 ]} " )
175
-
176
-
177
- async def main (collection : str , use_azure_openai : bool , embedding_model : str ):
188
+ async def main (collection : str , use_azure_openai : bool ):
178
189
print ("-" * 30 )
179
190
kernel = Kernel ()
180
- service_id = "embedding"
181
- if use_azure_openai :
182
- embedder = AzureTextEmbedding (service_id = service_id , deployment_name = embedding_model )
183
- else :
184
- embedder = OpenAITextEmbedding (service_id = service_id , ai_model_id = embedding_model )
191
+ embedder = (
192
+ AzureTextEmbedding (service_id = "embedding" ) if use_azure_openai else OpenAITextEmbedding (service_id = "embedding" )
193
+ )
185
194
kernel .add_service (embedder )
186
195
async with collections [collection ]() as record_collection :
187
- print (f"Creating { collection } collection!" )
196
+ print_with_color (f"Creating { collection } collection!" , Colors . CGREY )
188
197
await record_collection .delete_collection ()
189
198
await record_collection .create_collection_if_not_exists ()
190
199
@@ -200,16 +209,22 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
200
209
title = "Semantic Kernel Languages" ,
201
210
tag = "general" ,
202
211
)
212
+ record3 = DataModel (
213
+ content = "```python\n from semantic_kernel import Kernel\n kernel = Kernel()\n ```" ,
214
+ id = "d5c9913a-e015-4944-b960-5d4a84bca002" ,
215
+ title = "Code sample" ,
216
+ tag = "code" ,
217
+ )
203
218
204
- print ("Adding records!" )
219
+ print_with_color ("Adding records!" , Colors . CBLUE )
205
220
records = await VectorStoreRecordUtils (kernel ).add_vector_to_records (
206
- [record1 , record2 ], data_model_type = DataModel
221
+ [record1 , record2 , record3 ], data_model_type = DataModel
207
222
)
208
223
209
224
keys = await record_collection .upsert_batch (records )
210
225
print (f" Upserted { keys = } " )
211
- print ("Getting records!" )
212
- results = await record_collection .get_batch ([record1 .id , record2 .id ])
226
+ print_with_color ("Getting records!" , Colors . CBLUE )
227
+ results = await record_collection .get_batch ([record1 .id , record2 .id , record3 . id ])
213
228
if results :
214
229
[print_record (record = result ) for result in results ]
215
230
else :
@@ -219,9 +234,11 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
219
234
include_vectors = True ,
220
235
filter = VectorSearchFilter .equal_to ("tag" , "general" ),
221
236
)
237
+ print ("-" * 30 )
238
+ print_with_color ("Searching for 'python', with filter 'tag == general'" , Colors .CBLUE )
222
239
if isinstance (record_collection , VectorTextSearchMixin ):
223
240
print ("-" * 30 )
224
- print ("Using text search" )
241
+ print_with_color ("Using text search" , Colors . CBLUE )
225
242
try :
226
243
search_results = await record_collection .text_search ("python" , options )
227
244
if search_results .total_count == 0 :
@@ -232,14 +249,16 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
232
249
print ("Text search could not execute." )
233
250
if isinstance (record_collection , VectorizedSearchMixin ):
234
251
print ("-" * 30 )
235
- print (
236
- "Using vectorized search, depending on the distance function, "
237
- "the better score might be higher or lower."
252
+ print_with_color (
253
+ f"Using vectorized search, for { distance_function .value } , "
254
+ f"the { 'higher' if DISTANCE_FUNCTION_DIRECTION_HELPER [distance_function ](1 , 0 ) else 'lower' } the score the better" # noqa: E501
255
+ f"" ,
256
+ Colors .CBLUE ,
238
257
)
239
258
try :
240
259
search_results = await record_collection .vectorized_search (
241
260
vector = (await embedder .generate_raw_embeddings (["python" ]))[0 ],
242
- options = VectorSearchOptions ( vector_field_name = "vector" , include_vectors = True ) ,
261
+ options = options ,
243
262
)
244
263
if search_results .total_count == 0 :
245
264
print ("\n Nothing found...\n " )
@@ -249,7 +268,11 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
249
268
print ("Vectorized search could not execute." )
250
269
if isinstance (record_collection , VectorizableTextSearchMixin ):
251
270
print ("-" * 30 )
252
- print ("Using vectorizable text search" )
271
+ print_with_color (
272
+ f"Using vectorized search, for { distance_function .value } , "
273
+ f"the { 'higher' if DISTANCE_FUNCTION_DIRECTION_HELPER [distance_function ](1 , 0 ) else 'lower' } the score the better" , # noqa: E501
274
+ Colors .CBLUE ,
275
+ )
253
276
try :
254
277
search_results = await record_collection .vectorizable_text_search ("python" , options )
255
278
if search_results .total_count == 0 :
@@ -259,9 +282,9 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
259
282
except Exception :
260
283
print ("Vectorizable text search could not execute." )
261
284
print ("-" * 30 )
262
- print ("Deleting collection!" )
285
+ print_with_color ("Deleting collection!" , Colors . CBLUE )
263
286
await record_collection .delete_collection ()
264
- print ("Done!" )
287
+ print_with_color ("Done!" , Colors . CGREY )
265
288
266
289
267
290
if __name__ == "__main__" :
@@ -271,10 +294,5 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
271
294
parser .add_argument ("--collection" , default = "in_memory" , choices = collections .keys (), help = "What collection to use." )
272
295
# Option of whether to use OpenAI or Azure OpenAI.
273
296
parser .add_argument ("--use-azure-openai" , action = "store_true" , help = "Use Azure OpenAI instead of OpenAI." )
274
- # Model
275
- parser .add_argument (
276
- "--model" , default = "text-embedding-3-small" , help = "The model or deployment to use for embeddings."
277
- )
278
297
args = parser .parse_args ()
279
-
280
- asyncio .run (main (collection = args .collection , use_azure_openai = args .use_azure_openai , embedding_model = args .model ))
298
+ asyncio .run (main (collection = args .collection , use_azure_openai = args .use_azure_openai ))
0 commit comments