|
11 | 11 | "2. LangChain4j Integration (OpenAI, Cohere, VoyageAI, Azure, etc.)\n",
|
12 | 12 | "3. Custom vectorizers\n",
|
13 | 13 | "\n",
|
14 |
| - "Before running this notebook, be sure to:\n", |
15 |
| - "1. Have Java 17+ installed\n", |
16 |
| - "2. Have a running Redis Stack instance with RediSearch > 2.4 active\n", |
17 |
| - "\n", |
18 |
| - "For example, you can run Redis Stack locally with Docker:\n", |
| 14 | + "Before running this notebook, be sure to have a running Redis Stack instance. You can start it with Docker:\n", |
19 | 15 | "\n",
|
20 | 16 | "```bash\n",
|
21 | 17 | "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n",
|
|
24 | 20 | "This will run Redis on port 6379 and RedisInsight at http://localhost:8001."
|
25 | 21 | ]
|
26 | 22 | },
|
27 |
| - { |
28 |
| - "cell_type": "markdown", |
29 |
| - "metadata": {}, |
30 |
| - "source": [ |
31 |
| - "## Setup\n", |
32 |
| - "\n", |
33 |
| - "First, add the RedisVL4j JAR and its dependencies to the classpath.\n", |
34 |
| - "For local development, you can build the project with `./gradlew :core:build` and find the JAR in `core/build/libs/`." |
35 |
| - ] |
36 |
| - }, |
37 | 23 | {
|
38 | 24 | "cell_type": "code",
|
39 | 25 | "execution_count": null,
|
40 | 26 | "metadata": {},
|
41 | 27 | "outputs": [],
|
42 | 28 | "source": [
|
43 |
| - "// Add JARs to classpath - adjust paths as needed\n", |
44 |
| - "%jars /path/to/redisvl4j/core/build/libs/*.jar\n", |
45 |
| - "\n", |
46 |
| - "// Import necessary classes\n", |
| 29 | + "// Load Maven dependencies\n", |
| 30 | + "%maven redis.clients:jedis:5.2.0\n", |
| 31 | + "%maven org.slf4j:slf4j-nop:2.0.16\n", |
| 32 | + "%maven com.fasterxml.jackson.core:jackson-databind:2.18.0\n", |
| 33 | + "%maven com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.0\n", |
| 34 | + "%maven com.github.f4b6a3:ulid-creator:5.2.3\n", |
| 35 | + "%maven dev.langchain4j:langchain4j:0.36.2\n", |
| 36 | + "%maven dev.langchain4j:langchain4j-open-ai:0.36.2\n", |
| 37 | + "%maven dev.langchain4j:langchain4j-cohere:0.36.2\n", |
| 38 | + "%maven dev.langchain4j:langchain4j-voyage-ai:0.36.2\n", |
| 39 | + "%maven com.microsoft.onnxruntime:onnxruntime:1.16.3\n", |
| 40 | + "%maven com.squareup.okhttp3:okhttp:4.12.0\n", |
| 41 | + "%maven com.google.code.gson:gson:2.10.1\n", |
| 42 | + "%maven ai.djl.huggingface:tokenizers:0.30.0\n", |
| 43 | + "\n", |
| 44 | + "// Note: RedisVL JAR is in classpath (loaded automatically by Docker container)\n", |
| 45 | + "\n", |
| 46 | + "// Import RedisVL classes\n", |
47 | 47 | "import com.redis.vl.utils.vectorize.*;\n",
|
48 | 48 | "import com.redis.vl.index.SearchIndex;\n",
|
49 | 49 | "import com.redis.vl.schema.IndexSchema;\n",
|
50 | 50 | "import com.redis.vl.schema.VectorField;\n",
|
51 | 51 | "import com.redis.vl.query.VectorQuery;\n",
|
| 52 | + "\n", |
| 53 | + "// Import Redis client\n", |
52 | 54 | "import redis.clients.jedis.UnifiedJedis;\n",
|
53 |
| - "import redis.clients.jedis.search.schemafields.VectorField.VectorAlgorithm;\n", |
54 |
| - "import java.util.List;\n", |
55 |
| - "import java.util.Map;\n", |
56 |
| - "import java.util.HashMap;\n", |
57 |
| - "import java.util.ArrayList;\n", |
58 |
| - "import java.util.Arrays;" |
| 55 | + "import redis.clients.jedis.HostAndPort;\n", |
| 56 | + "\n", |
| 57 | + "// Import LangChain4J\n", |
| 58 | + "import dev.langchain4j.model.openai.OpenAiEmbeddingModel;\n", |
| 59 | + "import dev.langchain4j.model.cohere.CohereEmbeddingModel;\n", |
| 60 | + "import dev.langchain4j.model.voyageai.VoyageAiEmbeddingModel;\n", |
| 61 | + "\n", |
| 62 | + "// Import Java standard libraries\n", |
| 63 | + "import java.util.*;" |
59 | 64 | ]
|
60 | 65 | },
|
61 | 66 | {
|
|
104 | 109 | "source": [
|
105 | 110 | "// Create a vectorizer using HuggingFace Sentence Transformers\n",
|
106 | 111 | "// This model runs locally - no API key needed!\n",
|
107 |
| - "var hf = new SentenceTransformersVectorizer(\"sentence-transformers/all-mpnet-base-v2\");\n", |
| 112 | + "BaseVectorizer hf = new SentenceTransformersVectorizer(\"sentence-transformers/all-mpnet-base-v2\");\n", |
108 | 113 | "\n",
|
109 | 114 | "// Embed a single sentence\n",
|
110 | 115 | "float[] test = hf.embed(\"This is a test sentence.\");\n",
|
|
141 | 146 | "metadata": {},
|
142 | 147 | "outputs": [],
|
143 | 148 | "source": [
|
144 |
| - "import dev.langchain4j.model.openai.OpenAiEmbeddingModel;\n", |
145 |
| - "\n", |
146 | 149 | "// Get API key from environment\n",
|
147 | 150 | "String apiKey = System.getenv(\"OPENAI_API_KEY\");\n",
|
148 |
| - "if (apiKey == null) {\n", |
| 151 | + "if (apiKey == null || apiKey.isEmpty()) {\n", |
149 | 152 | " System.out.println(\"Skipping OpenAI example - OPENAI_API_KEY not set\");\n",
|
150 | 153 | "} else {\n",
|
151 | 154 | " // Create OpenAI embedding model\n",
|
|
155 | 158 | " .build();\n",
|
156 | 159 | " \n",
|
157 | 160 | " // Wrap in LangChain4JVectorizer\n",
|
158 |
| - " var oai = new LangChain4JVectorizer(\"text-embedding-ada-002\", openaiModel);\n", |
| 161 | + " BaseVectorizer oai = new LangChain4JVectorizer(\"text-embedding-ada-002\", openaiModel);\n", |
159 | 162 | " \n",
|
160 | 163 | " // Embed a sentence\n",
|
161 | 164 | " float[] openaiTest = oai.embed(\"This is a test sentence.\");\n",
|
|
185 | 188 | "metadata": {},
|
186 | 189 | "outputs": [],
|
187 | 190 | "source": [
|
188 |
| - "import dev.langchain4j.model.cohere.CohereEmbeddingModel;\n", |
189 |
| - "\n", |
190 | 191 | "String cohereApiKey = System.getenv(\"COHERE_API_KEY\");\n",
|
191 |
| - "if (cohereApiKey == null) {\n", |
| 192 | + "if (cohereApiKey == null || cohereApiKey.isEmpty()) {\n", |
192 | 193 | " System.out.println(\"Skipping Cohere example - COHERE_API_KEY not set\");\n",
|
193 | 194 | "} else {\n",
|
194 | 195 | " var cohereModel = CohereEmbeddingModel.builder()\n",
|
195 | 196 | " .apiKey(cohereApiKey)\n",
|
196 | 197 | " .modelName(\"embed-english-v3.0\")\n",
|
197 | 198 | " .build();\n",
|
198 | 199 | " \n",
|
199 |
| - " var co = new LangChain4JVectorizer(\"embed-english-v3.0\", cohereModel);\n", |
| 200 | + " BaseVectorizer co = new LangChain4JVectorizer(\"embed-english-v3.0\", cohereModel);\n", |
200 | 201 | " \n",
|
201 | 202 | " float[] cohereTest = co.embed(\"This is a test sentence.\");\n",
|
202 | 203 | " System.out.println(\"Cohere Vector dimensions: \" + cohereTest.length);\n",
|
|
221 | 222 | "metadata": {},
|
222 | 223 | "outputs": [],
|
223 | 224 | "source": [
|
224 |
| - "import dev.langchain4j.model.voyageai.VoyageAiEmbeddingModel;\n", |
225 |
| - "\n", |
226 | 225 | "String voyageApiKey = System.getenv(\"VOYAGE_API_KEY\");\n",
|
227 |
| - "if (voyageApiKey == null) {\n", |
| 226 | + "if (voyageApiKey == null || voyageApiKey.isEmpty()) {\n", |
228 | 227 | " System.out.println(\"Skipping VoyageAI example - VOYAGE_API_KEY not set\");\n",
|
229 | 228 | "} else {\n",
|
230 | 229 | " var voyageModel = VoyageAiEmbeddingModel.builder()\n",
|
231 | 230 | " .apiKey(voyageApiKey)\n",
|
232 | 231 | " .modelName(\"voyage-law-2\")\n",
|
233 | 232 | " .build();\n",
|
234 | 233 | " \n",
|
235 |
| - " var vo = new LangChain4JVectorizer(\"voyage-law-2\", voyageModel);\n", |
| 234 | + " BaseVectorizer vo = new LangChain4JVectorizer(\"voyage-law-2\", voyageModel);\n", |
236 | 235 | " \n",
|
237 | 236 | " float[] voyageTest = vo.embed(\"This is a test sentence.\");\n",
|
238 | 237 | " System.out.println(\"VoyageAI Vector dimensions: \" + voyageTest.length);\n",
|
|
271 | 270 | " \n",
|
272 | 271 | " @Override\n",
|
273 | 272 | " protected List<float[]> generateEmbeddingsBatch(List<String> texts, int batchSize) {\n",
|
274 |
| - " return texts.stream()\n", |
275 |
| - " .map(this::generateEmbedding)\n", |
276 |
| - " .collect(java.util.stream.Collectors.toList());\n", |
| 273 | + " List<float[]> results = new ArrayList<>();\n", |
| 274 | + " for (String text : texts) {\n", |
| 275 | + " results.add(generateEmbedding(text));\n", |
| 276 | + " }\n", |
| 277 | + " return results;\n", |
277 | 278 | " }\n",
|
278 | 279 | "}\n",
|
279 | 280 | "\n",
|
280 |
| - "var customVectorizer = new CustomVectorizer();\n", |
| 281 | + "BaseVectorizer customVectorizer = new CustomVectorizer();\n", |
281 | 282 | "float[] customEmbed = customVectorizer.embed(\"This is a test sentence.\");\n",
|
282 | 283 | "System.out.println(\"Custom vectorizer dimensions: \" + customEmbed.length);\n",
|
283 | 284 | "System.out.println(\"First 10 values: \" + Arrays.toString(Arrays.copyOfRange(customEmbed, 0, 10)));"
|
|
302 | 303 | "outputs": [],
|
303 | 304 | "source": [
|
304 | 305 | "// Connect to Redis\n",
|
305 |
| - "var redis = new UnifiedJedis(\"redis://localhost:6379\");\n", |
306 |
| - "\n", |
307 |
| - "// Create the schema - matching the Python notebook YAML\n", |
308 |
| - "var schema = IndexSchema.builder()\n", |
309 |
| - " .name(\"vectorizers\")\n", |
310 |
| - " .prefix(\"doc\")\n", |
311 |
| - " .storageType(IndexSchema.StorageType.HASH)\n", |
312 |
| - " .addTextField(\"sentence\", textField -> {})\n", |
313 |
| - " .addVectorField(\"embedding\", 768, vectorField ->\n", |
314 |
| - " vectorField\n", |
315 |
| - " .algorithm(VectorAlgorithm.FLAT)\n", |
316 |
| - " .distanceMetric(VectorField.DistanceMetric.COSINE)\n", |
317 |
| - " .dataType(VectorField.VectorDataType.FLOAT32))\n", |
318 |
| - " .build();\n", |
| 306 | + "UnifiedJedis jedis = new UnifiedJedis(new HostAndPort(\"redis-stack\", 6379));\n", |
| 307 | + "\n", |
| 308 | + "// Create the schema from a Map (matching the Python notebook YAML)\n", |
| 309 | + "Map<String, Object> schema = Map.of(\n", |
| 310 | + " \"index\", Map.of(\n", |
| 311 | + " \"name\", \"vectorizers\",\n", |
| 312 | + " \"prefix\", \"doc\"\n", |
| 313 | + " ),\n", |
| 314 | + " \"fields\", List.of(\n", |
| 315 | + " Map.of(\"name\", \"sentence\", \"type\", \"text\"),\n", |
| 316 | + " Map.of(\n", |
| 317 | + " \"name\", \"embedding\",\n", |
| 318 | + " \"type\", \"vector\",\n", |
| 319 | + " \"attrs\", Map.of(\n", |
| 320 | + " \"dims\", 768,\n", |
| 321 | + " \"distance_metric\", \"cosine\",\n", |
| 322 | + " \"algorithm\", \"flat\",\n", |
| 323 | + " \"datatype\", \"float32\"\n", |
| 324 | + " )\n", |
| 325 | + " )\n", |
| 326 | + " )\n", |
| 327 | + ");\n", |
319 | 328 | "\n",
|
320 | 329 | "// Create the index\n",
|
321 |
| - "var index = new SearchIndex(schema, redis);\n", |
| 330 | + "SearchIndex index = SearchIndex.fromDict(schema, jedis);\n", |
322 | 331 | "index.create(true); // overwrite if exists\n",
|
323 | 332 | "System.out.println(\"Index created: \" + index.getName());"
|
324 | 333 | ]
|
|
342 | 351 | "}\n",
|
343 | 352 | "\n",
|
344 | 353 | "// Load data into the index\n",
|
345 |
| - "index.load(data);\n", |
346 |
| - "System.out.println(\"Loaded \" + data.size() + \" documents\");" |
| 354 | + "List<String> keys = index.load(data);\n", |
| 355 | + "System.out.println(\"Loaded \" + data.size() + \" documents\");\n", |
| 356 | + "System.out.println(\"Keys: \" + keys);" |
347 | 357 | ]
|
348 | 358 | },
|
349 | 359 | {
|
|
356 | 366 | "float[] queryEmbedding = hf.embed(\"That is a happy cat\");\n",
|
357 | 367 | "\n",
|
358 | 368 | "// Create and execute a vector query\n",
|
359 |
| - "var query = VectorQuery.builder()\n", |
| 369 | + "VectorQuery query = VectorQuery.builder()\n", |
360 | 370 | " .vector(queryEmbedding)\n",
|
361 | 371 | " .field(\"embedding\")\n",
|
362 |
| - " .returnFields(List.of(\"sentence\"))\n", |
| 372 | + " .returnFields(\"sentence\", \"vector_distance\")\n", |
363 | 373 | " .numResults(3)\n",
|
364 | 374 | " .build();\n",
|
365 | 375 | "\n",
|
366 | 376 | "List<Map<String, Object>> results = index.query(query);\n",
|
367 | 377 | "\n",
|
368 | 378 | "System.out.println(\"\\nSearch results for: 'That is a happy cat'\");\n",
|
369 |
| - "for (var doc : results) {\n", |
| 379 | + "for (Map<String, Object> doc : results) {\n", |
370 | 380 | " System.out.println(doc.get(\"sentence\") + \" - Distance: \" + doc.get(\"vector_distance\"));\n",
|
371 | 381 | "}"
|
372 | 382 | ]
|
|
386 | 396 | "source": [
|
387 | 397 | "// Cleanup\n",
|
388 | 398 | "index.delete(true);\n",
|
389 |
| - "System.out.println(\"Index deleted\");" |
| 399 | + "jedis.close();\n", |
| 400 | + "System.out.println(\"Index deleted and connection closed\");" |
390 | 401 | ]
|
391 | 402 | },
|
392 | 403 | {
|
|
412 | 423 | "name": "java"
|
413 | 424 | },
|
414 | 425 | "language_info": {
|
415 |
| - "name": "java", |
416 |
| - "version": "17" |
| 426 | + "codemirror_mode": "java", |
| 427 | + "file_extension": ".jshell", |
| 428 | + "mimetype": "text/x-java-source", |
| 429 | + "name": "Java", |
| 430 | + "pygments_lexer": "java", |
| 431 | + "version": "21+35" |
417 | 432 | }
|
418 | 433 | },
|
419 | 434 | "nbformat": 4,
|
|
0 commit comments