From 7e85d56874ff4edaeea7134e15c671b0b1263c5b Mon Sep 17 00:00:00 2001 From: Ram <9160496+0-hero@users.noreply.github.com> Date: Fri, 26 Jan 2024 20:44:02 +0530 Subject: [PATCH] RAGatouille metadata support (#893) * Updated RAGatouilleRetrieverPack to add support for document metadata * Method updates * Linting --------- Co-authored-by: Ram --- .../llama_packs/ragatouille_retriever/base.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/llama_hub/llama_packs/ragatouille_retriever/base.py b/llama_hub/llama_packs/ragatouille_retriever/base.py index ad86063bfb..92e1532218 100644 --- a/llama_hub/llama_packs/ragatouille_retriever/base.py +++ b/llama_hub/llama_packs/ragatouille_retriever/base.py @@ -69,11 +69,18 @@ def __init__( ) doc_txts = [doc.get_content() for doc in documents] + doc_ids = [doc.doc_id for doc in documents] + doc_metadatas = [doc.metadata for doc in documents] # index the documents if index_path is None: RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") - index_path = RAG.index(index_name=index_name, collection=doc_txts) + index_path = RAG.index( + index_name=index_name, + collection=doc_txts, + document_ids=doc_ids, + document_metadatas=doc_metadatas, + ) else: RAG = RAGPretrainedModel.from_index(index_path) @@ -89,6 +96,26 @@ def __init__( self.custom_retriever, service_context=ServiceContext.from_defaults(llm=llm) ) + def add_documents(self, documents: List[Document]) -> None: + """Add documents.""" + + doc_txts = [doc.get_content() for doc in documents] + doc_ids = [doc.doc_id for doc in documents] + doc_metadatas = [doc.metadata for doc in documents] + + self.RAG.add_to_index( + new_collection=doc_txts, + new_document_ids=doc_ids, + new_document_metadatas=doc_metadatas, + ) + + def delete_documents(self, documents: List[Document]) -> None: + """Delete documents.""" + + doc_ids = [doc.doc_id for doc in documents] + + self.RAG.delete_from_index(document_ids=doc_ids) + def get_modules(self) -> Dict[str, Any]: """Get modules.""" return {