-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
VectorStore_DataIngestion_Simple.cs
114 lines (98 loc) · 4.75 KB
/
VectorStore_DataIngestion_Simple.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright (c) Microsoft. All rights reserved.
using System.Text.Json;
using Azure.Identity;
using Memory.VectorStoreFixtures;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using Microsoft.SemanticKernel.Embeddings;
using Qdrant.Client;
namespace Memory;
/// <summary>
/// A simple example showing how to ingest data into a vector store using <see cref="QdrantVectorStore"/>.
///
/// The example shows the following steps:
/// 1. Create an embedding generator.
/// 2. Create a Qdrant Vector Store.
/// 3. Ingest some data into the vector store.
/// 4. Read the data back from the vector store.
///
/// You need a local instance of Docker running, since the associated fixture will try and start a Qdrant container in the local docker instance to run against.
/// </summary>
[Collection("Sequential")]
public class VectorStore_DataIngestion_Simple(ITestOutputHelper output, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture<VectorStoreQdrantContainerFixture>
{
[Fact]
public async Task ExampleAsync()
{
// Create an embedding generation service.
var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService(
TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
new AzureCliCredential());
// Initiate the docker container and construct the vector store.
await qdrantFixture.ManualInitializeAsync();
var vectorStore = new QdrantVectorStore(new QdrantClient("localhost"));
// Get and create collection if it doesn't exist.
var collection = vectorStore.GetCollection<ulong, Glossary>("skglossary");
await collection.CreateCollectionIfNotExistsAsync();
// Create glossary entries and generate embeddings for them.
var glossaryEntries = CreateGlossaryEntries().ToList();
var tasks = glossaryEntries.Select(entry => Task.Run(async () =>
{
entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition);
}));
await Task.WhenAll(tasks);
// Upsert the glossary entries into the collection and return their keys.
var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x));
var upsertedKeys = await Task.WhenAll(upsertedKeysTasks);
// Retrieve one of the upserted records from the collection.
var upsertedRecord = await collection.GetAsync(upsertedKeys.First(), new() { IncludeVectors = true });
// Write upserted keys and one of the upserted records to the console.
Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}");
Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord)}");
}
/// <summary>
/// Sample model class that represents a glossary entry.
/// </summary>
/// <remarks>
/// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store.
/// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration.
/// </remarks>
private sealed class Glossary
{
[VectorStoreRecordKey]
public ulong Key { get; set; }
[VectorStoreRecordData]
public string Term { get; set; }
[VectorStoreRecordData]
public string Definition { get; set; }
[VectorStoreRecordVector(1536)]
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
}
/// <summary>
/// Create some sample glossary entries.
/// </summary>
/// <returns>A list of sample glossary entries.</returns>
private static IEnumerable<Glossary> CreateGlossaryEntries()
{
yield return new Glossary
{
Key = 1,
Term = "API",
Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data."
};
yield return new Glossary
{
Key = 2,
Term = "Connectors",
Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc."
};
yield return new Glossary
{
Key = 3,
Term = "RAG",
Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)."
};
}
}