From 8bd08b6bf3a4b2d4002067e7d745fee3af2e68bb Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Thu, 13 Apr 2023 00:23:31 -0700 Subject: [PATCH] add pinecone namespace (#1169) --- .../indices/vector_store/vector_indices.py | 2 ++ gpt_index/vector_stores/pinecone.py | 18 ++++++++++++------ tests/indices/vector_store/utils.py | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/gpt_index/indices/vector_store/vector_indices.py b/gpt_index/indices/vector_store/vector_indices.py index 82a0d5155449a..03ca31dcd6c13 100644 --- a/gpt_index/indices/vector_store/vector_indices.py +++ b/gpt_index/indices/vector_store/vector_indices.py @@ -210,6 +210,7 @@ def __init__( pinecone_index: Optional[Any] = None, index_name: Optional[str] = None, environment: Optional[str] = None, + namespace: Optional[str] = None, metadata_filters: Optional[Dict[str, Any]] = None, pinecone_kwargs: Optional[Dict] = None, insert_kwargs: Optional[Dict] = None, @@ -230,6 +231,7 @@ def __init__( pinecone_index=pinecone_index, index_name=index_name, environment=environment, + namespace=namespace, metadata_filters=metadata_filters, pinecone_kwargs=pinecone_kwargs, insert_kwargs=insert_kwargs, diff --git a/gpt_index/vector_stores/pinecone.py b/gpt_index/vector_stores/pinecone.py index baf0f20c49184..a0415b6fd12c8 100644 --- a/gpt_index/vector_stores/pinecone.py +++ b/gpt_index/vector_stores/pinecone.py @@ -4,20 +4,20 @@ """ +import logging import os -from typing import Any, Dict, List, Optional, cast, Callable +from collections import Counter from functools import partial +from typing import Any, Callable, Dict, List, Optional, cast -from gpt_index.data_structs.node_v2 import Node, DocumentRelationship +from gpt_index.data_structs.node_v2 import DocumentRelationship, Node from gpt_index.vector_stores.types import ( NodeEmbeddingResult, VectorStore, - VectorStoreQueryResult, VectorStoreQuery, VectorStoreQueryMode, + VectorStoreQueryResult, ) -from collections import Counter -import logging _logger = logging.getLogger(__name__) @@ -128,6 +128,7 @@ def __init__( pinecone_index: Optional[Any] = None, index_name: Optional[str] = None, environment: Optional[str] = None, + namespace: Optional[str] = None, metadata_filters: Optional[Dict[str, Any]] = None, pinecone_kwargs: Optional[Dict] = None, insert_kwargs: Optional[Dict] = None, @@ -148,6 +149,7 @@ def __init__( self._index_name = index_name self._environment = environment + self._namespace = namespace if pinecone_index is not None: self._pinecone_index = cast(pinecone.Index, pinecone_index) _logger.warn( @@ -202,6 +204,7 @@ def config_dict(self) -> dict: return { "index_name": self._index_name, "environment": self._environment, + "namespace": self._namespace, "metadata_filters": self._metadata_filters, "pinecone_kwargs": self._pinecone_kwargs, "insert_kwargs": self._insert_kwargs, @@ -264,7 +267,9 @@ def add( [node.get_text()], self._tokenizer )[0] entry.update({"sparse_values": sparse_vector}) - self._pinecone_index.upsert([entry], **self._pinecone_kwargs) + self._pinecone_index.upsert( + [entry], namespace=self._namespace, **self._pinecone_kwargs + ) ids.append(new_id) return ids @@ -312,6 +317,7 @@ def query(self, query: VectorStoreQuery) -> VectorStoreQueryResult: top_k=query.similarity_top_k, include_values=True, include_metadata=True, + namespace=self._namespace, filter=self._metadata_filters, **self._pinecone_kwargs, ) diff --git a/tests/indices/vector_store/utils.py b/tests/indices/vector_store/utils.py index 571535cd13fa4..896be7a185cb2 100644 --- a/tests/indices/vector_store/utils.py +++ b/tests/indices/vector_store/utils.py @@ -29,6 +29,7 @@ def query( include_values: bool = True, include_metadata: bool = True, filter: Optional[Dict[str, Any]] = None, + namespace: Optional[str] = None, ) -> Any: """Mock query.""" # index_mat is n x k