|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 15, |
| 5 | + "execution_count": 74, |
6 | 6 | "id": "f35f0324-b068-41f6-86d7-0aae90c3e9a6",
|
7 | 7 | "metadata": {
|
8 | 8 | "tags": []
|
|
16 | 16 | },
|
17 | 17 | {
|
18 | 18 | "cell_type": "code",
|
19 |
| - "execution_count": null, |
| 19 | + "execution_count": 75, |
20 | 20 | "id": "65a9f4c2-66e8-4681-a55a-eb3594cb295f",
|
21 | 21 | "metadata": {},
|
22 | 22 | "outputs": [],
|
|
30 | 30 | },
|
31 | 31 | {
|
32 | 32 | "cell_type": "code",
|
33 |
| - "execution_count": null, |
| 33 | + "execution_count": 76, |
34 | 34 | "id": "dd65419c-d9eb-4e3b-8290-c388cba5252b",
|
35 | 35 | "metadata": {
|
36 | 36 | "tags": []
|
37 | 37 | },
|
38 |
| - "outputs": [], |
| 38 | + "outputs": [ |
| 39 | + { |
| 40 | + "data": { |
| 41 | + "text/plain": [ |
| 42 | + "minio.api.Minio" |
| 43 | + ] |
| 44 | + }, |
| 45 | + "execution_count": 76, |
| 46 | + "metadata": {}, |
| 47 | + "output_type": "execute_result" |
| 48 | + } |
| 49 | + ], |
39 | 50 | "source": [
|
40 | 51 | "type(client)"
|
41 | 52 | ]
|
42 | 53 | },
|
43 | 54 | {
|
44 | 55 | "cell_type": "code",
|
45 |
| - "execution_count": null, |
| 56 | + "execution_count": 77, |
46 | 57 | "id": "490acd9d-ad92-4d55-800d-c0ffa8697da3",
|
47 | 58 | "metadata": {
|
48 | 59 | "tags": []
|
|
54 | 65 | },
|
55 | 66 | {
|
56 | 67 | "cell_type": "code",
|
57 |
| - "execution_count": null, |
| 68 | + "execution_count": 78, |
58 | 69 | "id": "7c9a9322-825d-4881-8f87-4868fbdcbb5c",
|
59 | 70 | "metadata": {
|
60 | 71 | "tags": []
|
|
63 | 74 | "source": [
|
64 | 75 | "def upload_files(bucket_name, file_location, client):\n",
|
65 | 76 | " found = False # Initialize 'found' before the try block\n",
|
| 77 | + " print(\"Current working directory:\", os.getcwd())\n", |
| 78 | + " print(\"Listing directories in the current working directory:\", os.listdir(\".\"))\n", |
| 79 | + " print(f\"Checking existence of {file_location}: \", os.path.exists(file_location))\n", |
| 80 | + "\n", |
66 | 81 | " try:\n",
|
67 | 82 | " found = client.bucket_exists(bucket_name)\n",
|
68 | 83 | " except Exception as e:\n",
|
|
100 | 115 | },
|
101 | 116 | {
|
102 | 117 | "cell_type": "code",
|
103 |
| - "execution_count": null, |
| 118 | + "execution_count": 84, |
104 | 119 | "id": "096fceeb-e788-463f-88c2-274457020b3b",
|
105 | 120 | "metadata": {
|
106 | 121 | "tags": []
|
107 | 122 | },
|
108 |
| - "outputs": [], |
| 123 | + "outputs": [ |
| 124 | + { |
| 125 | + "name": "stdout", |
| 126 | + "output_type": "stream", |
| 127 | + "text": [ |
| 128 | + "Current working directory: /home/jovyan\n", |
| 129 | + "Listing directories in the current working directory: ['tmp_docs', '.gitconfig', '.mc', 'lost+found', '.local', '.conda', '.cache', '.bashrc', 'tiledb_demo', 'minio-binaries', '.ipynb_checkpoints', 'ezua-tutorials', 'tiledb_index', '.ipython', '.kube', '.jupyter', 'db_index.zip', '.bash_history']\n", |
| 130 | + "Checking existence of tiledb_demo/notebooks/documentation: True\n", |
| 131 | + "Bucket newtiledb exists, we won't attempt to create one\n", |
| 132 | + "Successfully uploaded tiledb_vector.txt to bucket newtiledb.\n", |
| 133 | + "Successfully uploaded vector_database.txt to bucket newtiledb.\n", |
| 134 | + "Successfully uploaded tiledb.txt to bucket newtiledb.\n", |
| 135 | + "Successfully uploaded why_tile_blog.txt to bucket newtiledb.\n", |
| 136 | + "Successfully uploaded RAG.txt to bucket newtiledb.\n", |
| 137 | + "Successfully uploaded array_db.txt to bucket newtiledb.\n", |
| 138 | + "Successfully uploaded human_in_loop.txt to bucket newtiledb.\n", |
| 139 | + "Successfully uploaded embedding.txt to bucket newtiledb.\n", |
| 140 | + "Successfully uploaded feature_store.txt to bucket newtiledb.\n", |
| 141 | + "Successfully uploaded LLM.txt to bucket newtiledb.\n" |
| 142 | + ] |
| 143 | + } |
| 144 | + ], |
109 | 145 | "source": [
|
110 |
| - "upload_files(bucket_name,\"documentation\",client)" |
| 146 | + "upload_files(bucket_name,\"tiledb_demo/notebooks/documentation\",client)" |
111 | 147 | ]
|
112 | 148 | },
|
113 | 149 | {
|
|
120 | 156 | },
|
121 | 157 | {
|
122 | 158 | "cell_type": "code",
|
123 |
| - "execution_count": 25, |
| 159 | + "execution_count": null, |
| 160 | + "id": "f33c4897-feeb-4499-bcca-977cb44f0949", |
| 161 | + "metadata": {}, |
| 162 | + "outputs": [], |
| 163 | + "source": [] |
| 164 | + }, |
| 165 | + { |
| 166 | + "cell_type": "code", |
| 167 | + "execution_count": 107, |
124 | 168 | "id": "8e5db8c5-b83c-493b-af9a-f85eacd4a034",
|
125 | 169 | "metadata": {
|
126 | 170 | "tags": []
|
|
131 | 175 | "output_type": "stream",
|
132 | 176 | "text": [
|
133 | 177 | "<Response [200]>\n",
|
134 |
| - "{'predictions': ['Vector databases are specialized storage systems designed to efficiently store and query high-dimensional vector data, often used in machine learning, artificial intelligence, and similarity search applications. This section introduces the concept of vector databases, their importance in enabling fast and scalable vector similarity searches, and typical use cases such as image and text retrieval, and recommendation systems.\\n2. Getting Started with Vector Databases', 'Vector Databases Documentation\\n1. Introduction to Vector Databases', 'This guide provides a step-by-step introduction to working with vector databases, including setting up your first vector database, inserting and indexing vector data, and performing similarity searches. Through practical examples, learn the basics of vector storage, querying techniques, and how to integrate vector databases into your data pipeline.\\n3. Vector Database Architecture and Indexing Techniques', 'Understand the underlying architecture of vector databases and the indexing techniques they use to enable efficient similarity searches. This section dives into the algorithms and data structures, such as k-NN graph and HNSW (Hierarchical Navigable Small World), that power vector databases, explaining how they work and their trade-offs.\\n4. Advanced Querying and Analytics with Vector Databases']}\n", |
135 |
| - "{\"predictions\":[\"Vector databases are specialized storage systems designed to efficiently store and query high-dimensional vector data, often used in machine learning, artificial intelligence, and similarity search applications. This section introduces the concept of vector databases, their importance in enabling fast and scalable vector similarity searches, and typical use cases such as image and text retrieval, and recommendation systems.\\n2. Getting Started with Vector Databases\",\"Vector Databases Documentation\\n1. Introduction to Vector Databases\",\"This guide provides a step-by-step introduction to working with vector databases, including setting up your first vector database, inserting and indexing vector data, and performing similarity searches. Through practical examples, learn the basics of vector storage, querying techniques, and how to integrate vector databases into your data pipeline.\\n3. Vector Database Architecture and Indexing Techniques\",\"Understand the underlying architecture of vector databases and the indexing techniques they use to enable efficient similarity searches. This section dives into the algorithms and data structures, such as k-NN graph and HNSW (Hierarchical Navigable Small World), that power vector databases, explaining how they work and their trade-offs.\\n4. Advanced Querying and Analytics with Vector Databases\"]}\n" |
| 178 | + "{\"predictions\":[\"In summary, TileDB's architecture and features make it an excellent choice for managing vector data, especially in scenarios requiring scalability, performance, and flexibility in data management. Its ability to handle various data types, combined with efficient storage, querying capabilities, and cloud-native support, positions it as a versatile and powerful vector store solution.\",\"TileDB is an array database, and its main strength is that it can morph into practically any data modality and application, delivering unprecedented performance and alleviating the data infrastructure in an organization. A vector is simply a 1D array, therefore, TileDB is the most natural database choice for delivering amazing vector search functionality.\",\"TileDB is designed as a universal data management solution, which means it can handle a wide variety of data types, including tabular, sparse, and dense multi-dimensional array data. When considering TileDB as a vector store, several features and characteristics make it a strong candidate:\",\"I am assuming that you find all this awesome, but I bet you’d like to see how TileDB compares to the increasingly crowded vector database market, as well as where this leads, with TileDB being a universal database and all. Read on! :)\\nDifferentiation of TileDB\"]}\n" |
136 | 179 | ]
|
137 | 180 | }
|
138 | 181 | ],
|
139 | 182 | "source": [
|
140 | 183 | "data = {\n",
|
141 | 184 | " \"instances\": [{\n",
|
142 |
| - " \"input\": \"what are vectors\",\n", |
| 185 | + " \"input\": \"What makes tiledb a good vector store?\",\n", |
143 | 186 | " \"num_docs\": 4 # number of documents to retrieve\n",
|
144 | 187 | " }]\n",
|
145 | 188 | "}\n",
|
|
148 | 191 | "\n",
|
149 | 192 | "response = requests.post(URL, json=data, verify=False) # 'verify=False' for self-signed certs\n",
|
150 | 193 | "print(response)\n",
|
151 |
| - "print(response.json())\n", |
| 194 | + "#print(response.json())\n", |
152 | 195 | "print(response.text)"
|
153 | 196 | ]
|
154 | 197 | },
|
155 | 198 | {
|
156 | 199 | "cell_type": "code",
|
157 |
| - "execution_count": 44, |
| 200 | + "execution_count": 113, |
158 | 201 | "id": "0ceb5821-adec-4804-b029-62f6000ef01c",
|
159 | 202 | "metadata": {
|
160 | 203 | "tags": []
|
|
166 | 209 | },
|
167 | 210 | {
|
168 | 211 | "cell_type": "code",
|
169 |
| - "execution_count": null, |
| 212 | + "execution_count": 114, |
170 | 213 | "id": "e2560e8f-1d9f-4016-8b97-d1d226a1bce9",
|
171 | 214 | "metadata": {
|
172 | 215 | "tags": []
|
|
177 | 220 | " \"instances\": [{\n",
|
178 | 221 | " \"system\": \"You are an AI assistant. You will be given a task. You must generate a detailed answer.\",\n",
|
179 | 222 | " \"instruction\": \"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\",\n",
|
180 |
| - " \"input\": \"Is TileDB a good vector store?\",\n", |
181 |
| - " \"max_tokens\": 50,\n", |
| 223 | + " \"input\": \"what part of tiledbs architecture makes it a good choice for a vectorstore?\",\n", |
| 224 | + " \"max_tokens\": 5000,\n", |
182 | 225 | " \"top_k\": 100,\n",
|
183 | 226 | " \"top_p\": 0.4,\n",
|
184 |
| - " \"num_docs\": 1,\n", |
| 227 | + " \"num_docs\": 3,\n", |
185 | 228 | " \"temperature\": 0.2\n",
|
186 | 229 | " }]\n",
|
187 | 230 | "}\n",
|
|
190 | 233 | },
|
191 | 234 | {
|
192 | 235 | "cell_type": "code",
|
193 |
| - "execution_count": null, |
| 236 | + "execution_count": 115, |
194 | 237 | "id": "eb3fc990-6a89-4fe6-b4bb-772c0015723a",
|
195 | 238 | "metadata": {
|
196 | 239 | "tags": []
|
197 | 240 | },
|
198 |
| - "outputs": [], |
| 241 | + "outputs": [ |
| 242 | + { |
| 243 | + "name": "stdout", |
| 244 | + "output_type": "stream", |
| 245 | + "text": [ |
| 246 | + "<Response [200]>\n", |
| 247 | + "{\"predictions\":[\"TileDB's architecture and features make it an excellent choice for managing vector data due to its ability to handle various data types, efficient storage, querying capabilities, and cloud-native support. TileDB is designed as a universal data management solution that can handle tabular, sparse, and dense multi-dimensional array data, making it the most natural database choice for delivering amazing vector search functionality. Additionally, TileDB's ability to morph into practically any data modality and application makes it a versatile and powerful vector store solution. Therefore, TileDB is an excellent choice for managing vector data in scenarios requiring scalability, performance, and flexibility in data management.\"]}\n" |
| 248 | + ] |
| 249 | + } |
| 250 | + ], |
199 | 251 | "source": [
|
200 | 252 | "print(response)\n",
|
201 | 253 | "#print(response.json())\n",
|
|
0 commit comments