Skip to content

Support for microsoft/documentdb #1728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,10 @@
},
"custom": true, // Enable the `custom` dictionary
"internal-terms": true // Disable the `internal-terms` dictionary
}
},
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
3 changes: 3 additions & 0 deletions graphrag/cache/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from graphrag.config.enums import CacheType
from graphrag.storage.blob_pipeline_storage import create_blob_storage
from graphrag.storage.cosmosdb_pipeline_storage import create_cosmosdb_storage
from graphrag.storage.documentdb_pipeline_storage import create_documentdb_storage
from graphrag.storage.file_pipeline_storage import FilePipelineStorage

if TYPE_CHECKING:
Expand Down Expand Up @@ -56,6 +57,8 @@ def create_cache(
return JsonPipelineCache(create_blob_storage(**kwargs))
case CacheType.cosmosdb:
return JsonPipelineCache(create_cosmosdb_storage(**kwargs))
case CacheType.documentdb:
return JsonPipelineCache(create_documentdb_storage(**kwargs))
case _:
if cache_type in cls.cache_types:
return cls.cache_types[cache_type](**kwargs)
Expand Down
4 changes: 4 additions & 0 deletions graphrag/config/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class CacheType(str, Enum):
"""The blob cache configuration type."""
cosmosdb = "cosmosdb"
"""The cosmosdb cache configuration type"""
documentdb = "documentdb"
"""The documentdb cache configuration type"""

def __repr__(self):
"""Get a string representation."""
Expand Down Expand Up @@ -64,6 +66,8 @@ class OutputType(str, Enum):
"""The blob output type."""
cosmosdb = "cosmosdb"
"""The cosmosdb output type"""
documentdb = "documentdb"
"""The documentdb output type"""

def __repr__(self):
"""Get a string representation."""
Expand Down
6 changes: 3 additions & 3 deletions graphrag/config/init_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@
## connection_string and container_name must be provided

cache:
type: {defs.CACHE_TYPE.value} # [file, blob, cosmosdb]
type: {defs.CACHE_TYPE.value} # [file, blob, cosmosdb, documentdb]
base_dir: "{defs.CACHE_BASE_DIR}"

reporting:
type: {defs.REPORTING_TYPE.value} # [file, blob, cosmosdb]
type: {defs.REPORTING_TYPE.value} # [file, blob, cosmosdb, documentdb]
base_dir: "{defs.REPORTING_BASE_DIR}"

output:
type: {defs.OUTPUT_TYPE.value} # [file, blob, cosmosdb]
type: {defs.OUTPUT_TYPE.value} # [file, blob, cosmosdb, documentdb]
base_dir: "{defs.OUTPUT_BASE_DIR}"

### Workflow settings ###
Expand Down
3 changes: 3 additions & 0 deletions graphrag/config/models/cache_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ class CacheConfig(BaseModel):
cosmosdb_account_url: str | None = Field(
description="The cosmosdb account url to use.", default=None
)
documentdb_account_url: str | None = Field(
description="The documentdb account url to use.", default=None
)
3 changes: 3 additions & 0 deletions graphrag/config/models/output_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ class OutputConfig(BaseModel):
cosmosdb_account_url: str | None = Field(
description="The cosmosdb account url to use.", default=None
)
documentdb_account_url: str | None = Field(
description="The documentdb account url to use.", default=None
)
10 changes: 8 additions & 2 deletions graphrag/config/models/vector_store_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,17 @@ def _validate_url(self) -> None:
):
msg = "vector_store.url is required when vector_store.type == cosmos_db. Please rerun `graphrag init` and select the correct vector store type."
raise ValueError(msg)

if self.type == VectorStoreType.DocumentDB and (
self.url is None or self.url.strip() == ""
):
msg = "vector_store.url is required when vector_store.type == document_db. Please rerun `graphrag init` and select the correct vector store type."
raise ValueError(msg)

if self.type == VectorStoreType.LanceDB and (
self.url is not None and self.url.strip() != ""
):
msg = "vector_store.url is only used when vector_store.type == azure_ai_search or vector_store.type == cosmos_db. Please rerun `graphrag init` and select the correct vector store type."
msg = "vector_store.url is only used when vector_store.type == azure_ai_search or vector_store.type == cosmos_db or vector_store.type == document_db. Please rerun `graphrag init` and select the correct vector store type."
raise ValueError(msg)

api_key: str | None = Field(
Expand All @@ -73,7 +79,7 @@ def _validate_url(self) -> None:
)

database_name: str | None = Field(
description="The database name to use when type == cosmos_db.", default=None
description="The database name to use when type == cosmos_db or document_db.", default=None
)

overwrite: bool = Field(
Expand Down
Loading