diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessor.java index b2e14122b..969e1710c 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessor.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import lombok.AllArgsConstructor; @@ -57,7 +58,7 @@ public void process( return; } List querySearchResults = getQueryPhaseSearchResults(searchPhaseResult); - FetchSearchResult fetchSearchResult = searchPhaseResult.getAtomicArray().asList().get(0).fetchResult(); + FetchSearchResult fetchSearchResult = getFetchSearchResults(searchPhaseResult); normalizationWorkflow.execute(querySearchResults, fetchSearchResult, normalizationTechnique, combinationTechnique); } @@ -123,4 +124,9 @@ private List getQueryPhase .map(result -> result == null ? null : result.queryResult()) .collect(Collectors.toList()); } + + private FetchSearchResult getFetchSearchResults(final SearchPhaseResults results) { + Optional optionalFirstResult = results.getAtomicArray().asList().stream().findFirst(); + return optionalFirstResult.map(SearchPhaseResult::fetchResult).orElse(null); + } } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java index 5c3ba76bb..2fccb702a 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java @@ -82,7 +82,15 @@ private List getQueryTopDocs(final List quer .map(CompoundTopDocs::new) .collect(Collectors.toList()); if (queryTopDocs.size() != querySearchResults.size()) { - log.warn("Some of querySearchResults are not produced by hybrid query"); + log.error( + String.format( + Locale.ROOT, + "sizes of querySearchResults [%d] and queryTopDocs [%d] must match. Most likely some of query results were not formatted correctly by the hybrid query", + querySearchResults.size(), + queryTopDocs.size() + ) + ); + throw new IllegalStateException("found inconsistent system state while processing score normalization and combination"); } return queryTopDocs; } @@ -131,7 +139,10 @@ private void updateOriginalFetchResults( FetchSearchResult fetchSearchResult = fetchSearchResultOptional.get(); SearchHits searchHits = fetchSearchResult.hits(); - // create map of docId to index of search hits, handles (2) + // create map of docId to index of search hits. This solves (2), duplicates are from + // delimiter and start/stop elements, they all have same valid doc_id. For this map + // we use doc_id as a key, and all those special elements are collapsed into a single + // key-value pair. Map docIdToSearchHit = Arrays.stream(searchHits.getHits()) .collect(Collectors.toMap(SearchHit::docId, Function.identity(), (a1, a2) -> a1));