Skip to content

Commit

Permalink
fix: better WeightRerankRunner run logic use O(1) and delete unused c…
Browse files Browse the repository at this point in the history
…ode (#10849)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
  • Loading branch information
yihong0618 authored Nov 19, 2024
1 parent bc1013d commit 58a9d9e
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions api/core/rag/rerank/weight_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,21 @@ def run(
:return:
"""
docs = []
doc_id = []
unique_documents = []
doc_id = set()
for document in documents:
if document.metadata["doc_id"] not in doc_id:
doc_id.append(document.metadata["doc_id"])
docs.append(document.page_content)
doc_id = document.metadata.get("doc_id")
if doc_id not in doc_id:
doc_id.add(doc_id)
unique_documents.append(document)

documents = unique_documents

rerank_documents = []
query_scores = self._calculate_keyword_score(query, documents)

query_vector_scores = self._calculate_cosine(self.tenant_id, query, documents, self.weights.vector_setting)

rerank_documents = []
for document, query_score, query_vector_score in zip(documents, query_scores, query_vector_scores):
# format document
score = (
self.weights.vector_setting.vector_weight * query_vector_score
+ self.weights.keyword_setting.keyword_weight * query_score
Expand All @@ -61,7 +59,8 @@ def run(
continue
document.metadata["score"] = score
rerank_documents.append(document)
rerank_documents = sorted(rerank_documents, key=lambda x: x.metadata["score"], reverse=True)

rerank_documents.sort(key=lambda x: x.metadata["score"], reverse=True)
return rerank_documents[:top_n] if top_n else rerank_documents

def _calculate_keyword_score(self, query: str, documents: list[Document]) -> list[float]:
Expand Down

0 comments on commit 58a9d9e

Please sign in to comment.