elastic · demjened · Jul 29, 2024 · Jul 23, 2024 · Jul 23, 2024 · Jul 24, 2024
diff --git a/docs/changelog/111212.yaml b/docs/changelog/111212.yaml
@@ -0,0 +1,6 @@
+pr: 111212
+summary: Fix score count validation in reranker response
+area: Ranking
+type: bug
+issues:
+ - 111202
diff --git a/...k/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/...k/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java
@@ -7,15 +7,22 @@
 
 package org.elasticsearch.xpack.inference.rank.textsimilarity;
 
+import org.elasticsearch.action.ActionFuture;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.inference.InputType;
+import org.elasticsearch.inference.ModelConfigurations;
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.search.rank.context.RankFeaturePhaseRankCoordinatorContext;
 import org.elasticsearch.search.rank.feature.RankFeatureDoc;
+import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
+import org.elasticsearch.xpack.inference.services.cohere.CohereService;
+import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings;
+import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiService;
+import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankTaskSettings;
 
 import java.util.Arrays;
 import java.util.Comparator;
@@ -51,15 +58,42 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext(
 
     @Override
     protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener<float[]> scoreListener) {
+        // The rerank inference endpoint may have an override to return top N documents only, in that case let's fail fast to avoid
+        // assigning scores to the wrong inputs
+        Integer inferenceEndpointTopN = getTopNFromInferenceEndpointTaskSettings();
+        if (inferenceEndpointTopN != null && inferenceEndpointTopN < featureDocs.length) {
+            scoreListener.onFailure(
+                new IllegalArgumentException(
+                    "Inference endpoint ["
+                        + inferenceId
+                        + "] is configured to return the top ["
+                        + inferenceEndpointTopN
+                        + "] results. Reduce rank_window_size to be less than or equal to this value."
+                )
+            );
+            return;
+        }
+
         // Wrap the provided rankListener to an ActionListener that would handle the response from the inference service
         // and then pass the results
         final ActionListener<InferenceAction.Response> actionListener = scoreListener.delegateFailureAndWrap((l, r) -> {
-            float[] scores = extractScoresFromResponse(r);
-            if (scores.length != featureDocs.length) {
+            InferenceServiceResults results = r.getResults();
+            assert results instanceof RankedDocsResults;
+
+            // Ensure we get exactly as many scores as the number of docs we passed, otherwise we may return incorrect results
+            List<RankedDocsResults.RankedDoc> rankedDocs = ((RankedDocsResults) results).getRankedDocs();
+            if (rankedDocs.size() != featureDocs.length) {
                 l.onFailure(
-                    new IllegalStateException("Document and score count mismatch: [" + featureDocs.length + "] vs [" + scores.length + "]")
+                    new IllegalStateException(
+                        "Reranker input document count and returned score count mismatch: ["
+                            + featureDocs.length
+                            + "] vs ["
+                            + rankedDocs.size()
+                            + "]"
+                    )
                 );
             } else {
+                float[] scores = extractScoresFromRankedDocs(rankedDocs);
                 l.onResponse(scores);
             }
         });
@@ -73,6 +107,18 @@ protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener<float[
         }
     }
 
+    /**
+     * Sorts documents by score descending and discards those with a score less than minScore.
+     * @param originalDocs documents to process
+     */
+    @Override
+    protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) {
+        return Arrays.stream(originalDocs)
+            .filter(doc -> minScore == null || doc.score >= minScore)
+            .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed())
+            .toArray(RankFeatureDoc[]::new);
+    }
+
     protected InferenceAction.Request generateRequest(List<String> docFeatures) {
         return new InferenceAction.Request(
             TaskType.RERANK,
@@ -85,11 +131,7 @@ protected InferenceAction.Request generateRequest(List<String> docFeatures) {
         );
     }
 
-    private float[] extractScoresFromResponse(InferenceAction.Response response) {
-        InferenceServiceResults results = response.getResults();
-        assert results instanceof RankedDocsResults;
-
-        List<RankedDocsResults.RankedDoc> rankedDocs = ((RankedDocsResults) results).getRankedDocs();
+    private float[] extractScoresFromRankedDocs(List<RankedDocsResults.RankedDoc> rankedDocs) {
         float[] scores = new float[rankedDocs.size()];
         for (RankedDocsResults.RankedDoc rankedDoc : rankedDocs) {
             scores[rankedDoc.index()] = rankedDoc.relevanceScore();
@@ -98,15 +140,22 @@ private float[] extractScoresFromResponse(InferenceAction.Response response) {
         return scores;
     }
 
-    /**
-     * Sorts documents by score descending and discards those with a score less than minScore.
-     * @param originalDocs documents to process
-     */
-    @Override
-    protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) {
-        return Arrays.stream(originalDocs)
-            .filter(doc -> minScore == null || doc.score >= minScore)
-            .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed())
-            .toArray(RankFeatureDoc[]::new);
+    private Integer getTopNFromInferenceEndpointTaskSettings() {
+        GetInferenceModelAction.Request request = new GetInferenceModelAction.Request(inferenceId, TaskType.RERANK);
+        ActionFuture<GetInferenceModelAction.Response> response = client.execute(GetInferenceModelAction.INSTANCE, request);
+        ModelConfigurations modelConfigurations = response.actionGet().getEndpoints().get(0);
+
+        if (modelConfigurations.getService().equals(CohereService.NAME)
+            && modelConfigurations.getTaskType().equals(TaskType.RERANK)
+            && modelConfigurations.getTaskSettings() instanceof CohereRerankTaskSettings) {
+            return ((CohereRerankTaskSettings) modelConfigurations.getTaskSettings()).getTopNDocumentsOnly();
+        } else if (modelConfigurations.getService().equals(GoogleVertexAiService.NAME)
+            && modelConfigurations.getTaskType().equals(TaskType.RERANK)
+            && modelConfigurations.getTaskSettings() instanceof GoogleVertexAiRerankTaskSettings) {
+                return ((GoogleVertexAiRerankTaskSettings) modelConfigurations.getTaskSettings()).topN();
+            }
+
+        return null;
     }
+
 }
diff --git a/...t/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java b/...t/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.inference.rank.textsimilarity;
 
+import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.inference.InputType;
@@ -33,18 +34,22 @@
 public class TextSimilarityRankTests extends ESSingleNodeTestCase {
 
     /**
-     * {@code TextSimilarityRankBuilder} that simulates an inference call that returns a different number of results as the input.
+     * {@code TextSimilarityRankBuilder} that simulates an inference call returning N results.
      */
-    public static class InvalidInferenceResultCountProvidingTextSimilarityRankBuilder extends TextSimilarityRankBuilder {
+    public static class InferenceResultCountAcceptingTextSimilarityRankBuilder extends TextSimilarityRankBuilder {
 
-        public InvalidInferenceResultCountProvidingTextSimilarityRankBuilder(
+        private final int inferenceResultCount;
+
+        public InferenceResultCountAcceptingTextSimilarityRankBuilder(
             String field,
             String inferenceId,
             String inferenceText,
             int rankWindowSize,
-            Float minScore
+            Float minScore,
+            int inferenceResultCount
         ) {
             super(field, inferenceId, inferenceText, rankWindowSize, minScore);
+            this.inferenceResultCount = inferenceResultCount;
         }
 
         @Override
@@ -62,10 +67,10 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo
                 protected InferenceAction.Request generateRequest(List<String> docFeatures) {
                     return new InferenceAction.Request(
                         TaskType.RERANK,
-                        inferenceId,
+                        this.inferenceId,
                         inferenceText,
                         docFeatures,
-                        Map.of("invalidInferenceResultCount", true),
+                        Map.of("inferenceResultCount", inferenceResultCount),
                         InputType.SEARCH,
                         InferenceAction.Request.DEFAULT_TIMEOUT
                     );
@@ -151,17 +156,33 @@ public void testRerankInferenceFailure() {
         );
     }
 
-    public void testRerankInferenceResultMismatch() {
-        ElasticsearchAssertions.assertFailures(
+    public void testRerankTopNConfigurationAndRankWindowSizeMismatch() {
+        SearchPhaseExecutionException ex = expectThrows(
+            SearchPhaseExecutionException.class,
             // Execute search with text similarity reranking
             client.prepareSearch()
                 .setRankBuilder(
-                    new InvalidInferenceResultCountProvidingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f)
+                    // Simulate reranker configuration with top_n=3 in task_settings, which is different from rank_window_size=10
+                    // (Note: top_n comes from inferenceId, there's no other easy way of passing this to the mocked get model request)
+                    new TextSimilarityRankBuilder("text", "my-rerank-model-task-settings-top-3", "my query", 100, 1.5f)
                 )
-                .setQuery(QueryBuilders.matchAllQuery()),
-            RestStatus.INTERNAL_SERVER_ERROR,
-            containsString("Failed to execute phase [rank-feature], Computing updated ranks for results failed")
+                .setQuery(QueryBuilders.matchAllQuery())
+        );
+        assertThat(ex.getDetailedMessage(), containsString("Reduce rank_window_size to be less than or equal to this value"));
+    }
+
+    public void testRerankInputSizeAndInferenceResultsMismatch() {
+        SearchPhaseExecutionException ex = expectThrows(
+            SearchPhaseExecutionException.class,
+            // Execute search with text similarity reranking
+            client.prepareSearch()
+                .setRankBuilder(
+                    // Simulate reranker returning different number of results from input
+                    new InferenceResultCountAcceptingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f, 4)
+                )
+                .setQuery(QueryBuilders.matchAllQuery())
         );
+        assertThat(ex.getDetailedMessage(), containsString("Reranker input document count and returned score count mismatch"));
     }
 
     private static void assertHitHasRankScoreAndText(SearchHit hit, int expectedRank, float expectedScore, String expectedText) {

diff --git a/.../java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java b/.../java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java
@@ -21,7 +21,9 @@
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.inference.EmptyTaskSettings;
 import org.elasticsearch.inference.InputType;
+import org.elasticsearch.inference.ModelConfigurations;
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.plugins.ActionPlugin;
 import org.elasticsearch.plugins.Plugin;
@@ -39,15 +41,21 @@
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
+import org.elasticsearch.xpack.inference.services.cohere.CohereService;
+import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankServiceSettings;
+import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import static java.util.Collections.singletonList;
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
@@ -100,31 +108,66 @@ public int order() {
         }
 
         @Override
-        @SuppressWarnings("unchecked")
         public <Request extends ActionRequest, Response extends ActionResponse> void apply(
             Task task,
             String action,
             Request request,
             ActionListener<Response> listener,
             ActionFilterChain<Request, Response> chain
         ) {
-            // For any other action than inference, execute normally
-            if (action.equals(InferenceAction.INSTANCE.name()) == false) {
+            if (action.equals(GetInferenceModelAction.INSTANCE.name())) {
+                assert request instanceof GetInferenceModelAction.Request;
+                handleGetInferenceModelActionRequest((GetInferenceModelAction.Request) request, listener);
+            } else if (action.equals(InferenceAction.INSTANCE.name())) {
+                assert request instanceof InferenceAction.Request;
+                handleInferenceActionRequest((InferenceAction.Request) request, listener);
+            } else {
+                // For any other action than get model and inference, execute normally
                 chain.proceed(task, action, request, listener);
-                return;
             }
+        }
 
-            assert request instanceof InferenceAction.Request;
-            boolean shouldThrow = (boolean) ((InferenceAction.Request) request).getTaskSettings().getOrDefault("throwing", false);
-            boolean hasInvalidInferenceResultCount = (boolean) ((InferenceAction.Request) request).getTaskSettings()
-                .getOrDefault("invalidInferenceResultCount", false);
+        @SuppressWarnings("unchecked")
+        private <Response extends ActionResponse> void handleGetInferenceModelActionRequest(
+            GetInferenceModelAction.Request request,
+            ActionListener<Response> listener
+        ) {
+            String inferenceEntityId = request.getInferenceEntityId();
+            Integer topN = null;
+            Matcher extractTopN = Pattern.compile(".*(task-settings-top-\\d+).*").matcher(inferenceEntityId);
+            if (extractTopN.find()) {
+                topN = Integer.parseInt(extractTopN.group(1).replaceAll("\\D", ""));
+            }
+
+            ActionResponse response = new GetInferenceModelAction.Response(
+                List.of(
+                    new ModelConfigurations(
+                        request.getInferenceEntityId(),
+                        request.getTaskType(),
+                        CohereService.NAME,
+                        new CohereRerankServiceSettings("uri", "model", null),
+                        topN == null ? new EmptyTaskSettings() : new CohereRerankTaskSettings(topN, null, null)
+                    )
+                )
+            );
+            listener.onResponse((Response) response);
+        }
+
+        @SuppressWarnings("unchecked")
+        private <Response extends ActionResponse> void handleInferenceActionRequest(
+            InferenceAction.Request request,
+            ActionListener<Response> listener
+        ) {
+            Map<String, Object> taskSettings = request.getTaskSettings();
+            boolean shouldThrow = (boolean) taskSettings.getOrDefault("throwing", false);
+            Integer inferenceResultCount = (Integer) taskSettings.get("inferenceResultCount");
 
             if (shouldThrow) {
                 listener.onFailure(new UnsupportedOperationException("simulated failure"));
             } else {
                 List<RankedDocsResults.RankedDoc> rankedDocsResults = new ArrayList<>();
-                List<String> inputs = ((InferenceAction.Request) request).getInput();
-                int resultCount = hasInvalidInferenceResultCount ? inputs.size() - 1 : inputs.size();
+                List<String> inputs = request.getInput();
+                int resultCount = inferenceResultCount == null ? inputs.size() : inferenceResultCount;
                 for (int i = 0; i < resultCount; i++) {
                     rankedDocsResults.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(inputs.get(i)), inputs.get(i)));
                 }