redis
diff --git a/‎core/src/main/java/com/redis/vl/extensions/router/SemanticRouter.java‎
Lines changed: 1 addition & 2 deletions b/‎core/src/main/java/com/redis/vl/extensions/router/SemanticRouter.java‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java‎
Lines changed: 0 additions & 30 deletions b/‎core/src/test/java/com/redis/vl/extensions/cache/NotebookSemanticCacheTest.java‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎core/src/test/java/com/redis/vl/extensions/router/SemanticRouterIntegrationTest.java‎
Lines changed: 6 additions & 3 deletions b/‎core/src/test/java/com/redis/vl/extensions/router/SemanticRouterIntegrationTest.java‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎core/src/test/java/com/redis/vl/utils/rerank/BAAIModelRealIntegrationTest.java‎
Lines changed: 1 addition & 18 deletions b/‎core/src/test/java/com/redis/vl/utils/rerank/BAAIModelRealIntegrationTest.java‎
Lines changed: 1 addition & 18 deletions
diff --git a/‎core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java‎
Lines changed: 0 additions & 30 deletions b/‎core/src/test/java/com/redis/vl/utils/rerank/CohereRerankerIntegrationTest.java‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎core/src/test/java/com/redis/vl/utils/rerank/CrossEncoderTokenizationTest.java‎
Lines changed: 77 additions & 0 deletions b/‎core/src/test/java/com/redis/vl/utils/rerank/CrossEncoderTokenizationTest.java‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎core/src/test/java/com/redis/vl/utils/rerank/HFCrossEncoderRerankerNotebookTest.java‎
Lines changed: 0 additions & 12 deletions b/‎core/src/test/java/com/redis/vl/utils/rerank/HFCrossEncoderRerankerNotebookTest.java‎
Lines changed: 0 additions & 12 deletions
@@ -194,8 +194,7 @@ public java.util.Map<String, Object> toDict() {
 
     java.util.Map<String, Object> configDict = new java.util.HashMap<>();
     configDict.put("max_k", routingConfig.getMaxK());
-    configDict.put(
-        "aggregation_method", routingConfig.getAggregationMethod().name().toLowerCase());
+    configDict.put("aggregation_method", routingConfig.getAggregationMethod().name().toLowerCase());
     dict.put("routing_config", configDict);
 
     return dict;
 
@@ -27,11 +27,8 @@ public class NotebookSemanticCacheTest extends BaseIntegrationTest {
   public void setUp() {
 
     // Cell 5: Create vectorizer using SentenceTransformersVectorizer
-    // This should download the redis/langcache-embed-v3 model from HuggingFace on first use
-    System.out.println("Initializing SentenceTransformersVectorizer with redis/langcache-embed-v3");
     try {
       vectorizer = new SentenceTransformersVectorizer("redis/langcache-embed-v3");
-      System.out.println("Model dimensions: " + vectorizer.getDimensions());
     } catch (Exception e) {
       System.err.println("Failed to initialize SentenceTransformersVectorizer: " + e.getMessage());
       e.printStackTrace();
@@ -46,32 +43,27 @@ public void setUp() {
             .distanceThreshold(0.1f)
             .vectorizer(vectorizer)
             .build();
-
-    System.out.println("SemanticCache initialized with index: " + llmcache.getName());
   }
 
   @Test
   public void testNotebookFlow() {
     // Cell 6: Verify cache is ready
     assertNotNull(llmcache);
     assertEquals("llmcache_test", llmcache.getName());
-    System.out.println("Cache index '" + llmcache.getName() + "' is ready for use");
 
     // Cell 8: Define question
     String question = "What is the capital of France?";
 
     // Cell 9: Check empty cache
     Optional<CacheHit> response = llmcache.check(question);
     assertFalse(response.isPresent(), "Cache should be empty initially");
-    System.out.println("Initial cache check: " + (response.isPresent() ? "Found" : "Empty"));
 
     // Cell 11: Store in cache
     Map<String, Object> metadata = new HashMap<>();
     metadata.put("city", "Paris");
     metadata.put("country", "france");
 
     llmcache.store(question, "Paris", metadata);
-    System.out.println("Stored in cache");
 
     // Cell 13: Check cache again
     Optional<CacheHit> cacheResponse = llmcache.check(question);
@@ -84,11 +76,6 @@ public void testNotebookFlow() {
       assertNotNull(hit.getMetadata());
       assertEquals("Paris", hit.getMetadata().get("city"));
       assertEquals("france", hit.getMetadata().get("country"));
-      System.out.println("Found in cache:");
-      System.out.println("  Prompt: " + hit.getPrompt());
-      System.out.println("  Response: " + hit.getResponse());
-      System.out.println("  Distance: " + hit.getDistance());
-      System.out.println("  Metadata: " + hit.getMetadata());
     }
 
     // Cell 14: Check semantically similar question
@@ -97,29 +84,22 @@ public void testNotebookFlow() {
     assertTrue(similarResponse.isPresent(), "Should find semantically similar entry");
     if (similarResponse.isPresent()) {
       assertEquals("Paris", similarResponse.get().getResponse());
-      System.out.println("Similar question result: " + similarResponse.get().getResponse());
     }
 
     // Cell 16: Adjust distance threshold
     llmcache.setDistanceThreshold(0.5f);
     assertEquals(0.5f, llmcache.getDistanceThreshold(), 0.001f);
-    System.out.println("Distance threshold set to 0.5");
 
     // Cell 17: Try with tricky question
     String trickQuestion =
         "What is the capital city of the country in Europe that also has a city named Nice?";
     Optional<CacheHit> trickResponse = llmcache.check(trickQuestion);
     // With wider threshold, this might match
-    System.out.println(
-        "Trick question result: "
-            + (trickResponse.isPresent() ? trickResponse.get().getResponse() : "Not found"));
 
     // Cell 18: Clear cache
     llmcache.clear();
     Optional<CacheHit> clearedResponse = llmcache.check(trickQuestion);
     assertFalse(clearedResponse.isPresent(), "Cache should be empty after clear");
-    System.out.println(
-        "Cache after clear: " + (clearedResponse.isPresent() ? "Not empty" : "Empty"));
   }
 
   @Test
@@ -134,11 +114,8 @@ public void testTTLCache() throws InterruptedException {
             .ttl(5) // 5 seconds
             .build();
 
-    System.out.println("Created cache with 5 second TTL");
-
     // Cell 21: Store with TTL
     ttlCache.store("This is a TTL test", "This is a TTL test response");
-    System.out.println("Stored entry with TTL");
 
     // Verify it's there immediately
     Optional<CacheHit> immediateCheck = ttlCache.check("This is a TTL test");
@@ -150,8 +127,6 @@ public void testTTLCache() throws InterruptedException {
     // Cell 22: Check after TTL expiry
     Optional<CacheHit> ttlResult = ttlCache.check("This is a TTL test");
     assertFalse(ttlResult.isPresent(), "Entry should have expired");
-    System.out.println(
-        "Result after TTL expiry: " + (ttlResult.isPresent() ? "Found" : "Empty (expired)"));
 
     // Cell 23: Clean up
     ttlCache.clear();
@@ -204,23 +179,18 @@ public void testUserMetadataFiltering() {
         "The number on file is 123-555-1111",
         userDef);
 
-    System.out.println("Stored user-specific cache entries");
-
     // Cell 32: Check cache entries
     Optional<CacheHit> phoneResponse =
         llmcache.check("What is the phone number linked to my account?");
 
     assertTrue(phoneResponse.isPresent());
     if (phoneResponse.isPresent()) {
-      System.out.println("Found entry: " + phoneResponse.get().getResponse());
       // Should return one of the phone numbers based on similarity
       String response = phoneResponse.get().getResponse();
       assertTrue(response.contains("123-555-"));
     }
 
     // Cell 33: Final cleanup
     llmcache.clear();
-    System.out.println("\nAll caches cleaned up.");
-    System.out.println("SemanticCache demonstration complete!");
   }
 }
@@ -215,11 +215,14 @@ void testNotebookQuery() {
     // Test no-match query from Python notebook cell 9
     // Python output: RouteMatch(name=None, distance=None)
     RouteMatch noMatch = router.route("are aliens real?");
-    System.out.println("DEBUG: aliens query - name=" + noMatch.getName() + ", distance=" + noMatch.getDistance());
-    System.out.println("DEBUG: technology threshold=" + notebookRoutes.get(0).getDistanceThreshold());
+    System.out.println(
+        "DEBUG: aliens query - name=" + noMatch.getName() + ", distance=" + noMatch.getDistance());
+    System.out.println(
+        "DEBUG: technology threshold=" + notebookRoutes.get(0).getDistanceThreshold());
 
     assertThat(noMatch).isNotNull();
-    // NOTE: Java ONNX embeddings differ from Python, this query may match with distance near threshold
+    // NOTE: Java ONNX embeddings differ from Python, this query may match with distance near
+    // threshold
     // Python: None, Java: may match technology with distance ~0.33
     // Accept either outcome as embedding implementations differ
     if (noMatch.getName() != null) {
 
@@ -9,7 +9,7 @@
 import org.junit.jupiter.api.Test;
 
 /**
- * Real integration test with BAAI/bge-reranker-base model.
+ * Integration test with BAAI/bge-reranker-base model.
  *
  * <p>Compares outputs to Python notebook to verify correctness.
  *
@@ -23,12 +23,10 @@ class BAAIModelRealIntegrationTest {
 
   @BeforeAll
   static void setUp() {
-    System.out.println("=== LOADING BAAI/bge-reranker-base MODEL ===");
     reranker = HFCrossEncoderReranker.builder().model("BAAI/bge-reranker-base").build();
 
     assertNotNull(reranker, "Reranker must initialize");
     assertEquals("BAAI/bge-reranker-base", reranker.getModel());
-    System.out.println("=== MODEL LOADED ===");
   }
 
   @AfterAll
@@ -57,18 +55,6 @@ void testBAAIModelProducesCorrectScores() {
     List<?> results = result.getDocuments();
     List<Double> scores = result.getScores();
 
-    System.out.println("\n=== JAVA OUTPUT ===");
-    for (int i = 0; i < results.size(); i++) {
-      String docPreview =
-          results.get(i).toString().substring(0, Math.min(50, results.get(i).toString().length()));
-      System.out.println(scores.get(i) + " -- " + docPreview + "...");
-    }
-
-    System.out.println("\n=== EXPECTED PYTHON OUTPUT (with sigmoid) ===");
-    System.out.println("0.9999381  --  Washington, D.C. ...");
-    System.out.println("0.3802366  --  Charlotte Amalie ...");
-    System.out.println("0.0746112  --  Carson City ...");
-
     // Verify we got 3 results
     assertEquals(3, results.size(), "Should return 3 results");
     assertEquals(3, scores.size(), "Should return 3 scores");
@@ -81,9 +67,6 @@ void testBAAIModelProducesCorrectScores() {
 
     // Score for Washington D.C. should be ~0.9999 (after sigmoid)
     double topScore = scores.get(0);
-    System.out.println("\n=== SCORE COMPARISON ===");
-    System.out.println("Expected top score: ~0.9999");
-    System.out.println("Actual top score: " + topScore);
 
     assertTrue(
         topScore > 0.0 && topScore < 1.0,
 
@@ -57,18 +57,6 @@ void testRerankStringDocuments() {
     assertEquals(3, docs.size(), "Should return 3 results");
     assertEquals(3, scores.size(), "Should return 3 scores");
 
-    System.out.println("\n=== JAVA STRING DOCS OUTPUT ===");
-    for (int i = 0; i < docs.size(); i++) {
-      String docPreview =
-          docs.get(i).toString().substring(0, Math.min(50, docs.get(i).toString().length()));
-      System.out.println(scores.get(i) + " -- " + docPreview + "...");
-    }
-
-    System.out.println("\n=== EXPECTED PYTHON OUTPUT ===");
-    System.out.println("0.9990564 -- Washington, D.C. ...");
-    System.out.println("0.7516481 -- Capital punishment ...");
-    System.out.println("0.08882029 -- Northern Mariana Islands ...");
-
     // Top result must be Washington D.C.
     String topDoc = (String) docs.get(0);
     assertTrue(
@@ -77,9 +65,6 @@ void testRerankStringDocuments() {
 
     // Top score should be ~0.999
     double topScore = scores.get(0);
-    System.out.println("\n=== SCORE COMPARISON ===");
-    System.out.println("Expected top score: ~0.999");
-    System.out.println("Actual top score: " + topScore);
 
     assertTrue(topScore > 0.9, "Top score should be > 0.9, but was: " + topScore);
     assertTrue(
@@ -140,17 +125,6 @@ void testRerankDictionaryDocumentsWithRankBy() {
     assertEquals(3, docs.size(), "Should return 3 results");
     assertEquals(3, scores.size(), "Should return 3 scores");
 
-    System.out.println("\n=== JAVA DICT DOCS OUTPUT ===");
-    for (int i = 0; i < docs.size(); i++) {
-      System.out.println(scores.get(i) + " -- " + docs.get(i));
-    }
-
-    System.out.println("\n=== EXPECTED PYTHON OUTPUT ===");
-    System.out.println("0.9988121 -- {'source': 'textbook', 'passage': 'Washington, D.C. ...'}");
-    System.out.println("0.5974905 -- {'source': 'wiki', 'passage': 'Capital punishment ...'}");
-    System.out.println(
-        "0.059101548 -- {'source': 'encyclopedia', 'passage': 'Northern Mariana ...'}");
-
     // Top result must be Washington D.C. with source=textbook
     @SuppressWarnings("unchecked")
     Map<String, Object> topDoc = (Map<String, Object>) docs.get(0);
@@ -161,10 +135,6 @@ void testRerankDictionaryDocumentsWithRankBy() {
 
     // Top score should be ~0.998
     double topScore = scores.get(0);
-    System.out.println("\n=== SCORE COMPARISON ===");
-    System.out.println("Expected top score: ~0.998");
-    System.out.println("Actual top score: " + topScore);
-
     assertTrue(topScore > 0.9, "Top score should be > 0.9, but was: " + topScore);
     assertTrue(
         Math.abs(topScore - 0.998) < 0.05,
 
@@ -0,0 +1,77 @@
+package com.redis.vl.utils.rerank;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.Arrays;
+import java.util.Map;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Test to validate cross-encoder tokenization matches Python transformers library.
+ *
+ * <p>Compares Java WordPiece tokenization against Python reference values to ensure embeddings and
+ * reranking scores match Python implementation.
+ */
+@Tag("integration")
+class CrossEncoderTokenizationTest {
+
+  @Test
+  void testTokenizationMatchesPython() throws Exception {
+    HFCrossEncoderReranker reranker =
+        HFCrossEncoderReranker.builder().model("BAAI/bge-reranker-base").build();
+
+    String query = "What is the capital of the United States?";
+    String doc =
+        "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States.";
+
+    // Access the internal tokenizer through reflection
+    java.lang.reflect.Field loaderField =
+        HFCrossEncoderReranker.class.getDeclaredField("modelLoader");
+    loaderField.setAccessible(true);
+    CrossEncoderLoader loader = (CrossEncoderLoader) loaderField.get(reranker);
+
+    Map<String, long[][]> tokens = loader.tokenizePair(query, doc);
+
+    long[] inputIds = tokens.get("input_ids")[0];
+    long[] tokenTypeIds = tokens.get("token_type_ids")[0];
+    long[] attentionMask = tokens.get("attention_mask")[0];
+
+    // Expected token IDs from Python transformers tokenizer
+    // Generated with: tokenizer("What is the capital...", "Washington, D.C...")
+    long[] expectedTokenIds = {
+      0, 4865, 83, 70, 10323, 111, 70, 14098, 46684, 32, 2, 2, 17955, 4, 391, 5, 441, 5, 15, 289
+    };
+
+    // Validate token IDs match Python (first 20 tokens)
+    long[] actualFirst20 = Arrays.copyOf(inputIds, Math.min(20, inputIds.length));
+    assertThat(actualFirst20)
+        .as("Token IDs should match Python transformers tokenizer")
+        .containsExactly(expectedTokenIds);
+
+    // Validate total token count matches Python
+    assertThat(inputIds.length).as("Total tokens should match Python tokenization").isEqualTo(49);
+
+    // Validate attention mask is correct (all 1s for non-padding tokens)
+    for (int i = 0; i < attentionMask.length; i++) {
+      assertThat(attentionMask[i])
+          .as("Attention mask[%d] should be 1 (no padding)", i)
+          .isEqualTo(1);
+    }
+
+    // XLM-Roberta doesn't use token type IDs, so they should all be 0
+    for (int i = 0; i < tokenTypeIds.length; i++) {
+      assertThat(tokenTypeIds[i])
+          .as("Token type ID[%d] should be 0 for XLM-Roberta", i)
+          .isEqualTo(0);
+    }
+
+    System.out.println("\n✓ Tokenization matches Python transformers");
+    System.out.println("  Query: " + query.substring(0, Math.min(50, query.length())));
+    System.out.println("  Document: " + doc.substring(0, Math.min(50, doc.length())));
+    System.out.println("  Token IDs (first 20): " + Arrays.toString(actualFirst20));
+    System.out.println("  Total tokens: " + inputIds.length);
+
+    reranker.close();
+  }
+}
@@ -97,12 +97,6 @@ void testNotebookSimpleReranking() {
     assertTrue(
         topDoc.contains("Washington, D.C.") || topDoc.contains("capital of the United States"),
         "Top result should be about Washington D.C., but was: " + topDoc);
-
-    // Print results like notebook does
-    System.out.println("\nNotebook test results (BAAI model):");
-    for (int i = 0; i < results.size(); i++) {
-      System.out.println(scores.get(i) + " -- " + results.get(i));
-    }
   }
 
   @Test
@@ -167,12 +161,6 @@ void testNotebookStructuredDocuments() {
       assertTrue(doc.containsKey("source"), "Should preserve 'source' field");
       assertTrue(doc.containsKey("content"), "Should preserve 'content' field");
     }
-
-    // Print like notebook
-    System.out.println("\nNotebook structured doc results (BAAI model):");
-    for (int i = 0; i < rerankedResults.size(); i++) {
-      System.out.println(structuredScores.get(i) + " -- " + rerankedResults.get(i));
-    }
   }
 
   @Test