Skip to content

Refactor storage factory #1895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20250424210340222198.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "add semversioner file"
}
1 change: 0 additions & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
"ms-python.vscode-pylance",
"bierner.markdown-mermaid",
"streetsidesoftware.code-spell-checker",
"ronnidc.nunjucks",
"lucien-martijn.parquet-visualizer",
]
}
53 changes: 0 additions & 53 deletions graphrag/storage/blob_pipeline_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,59 +330,6 @@ def create_blob_storage(**kwargs: Any) -> PipelineStorage:
)


def validate_blob_container_name(container_name: str):
"""
Check if the provided blob container name is valid based on Azure rules.

- A blob container name must be between 3 and 63 characters in length.
- Start with a letter or number
- All letters used in blob container names must be lowercase.
- Contain only letters, numbers, or the hyphen.
- Consecutive hyphens are not permitted.
- Cannot end with a hyphen.

Args:
-----
container_name (str)
The blob container name to be validated.

Returns
-------
bool: True if valid, False otherwise.
"""
# Check the length of the name
if len(container_name) < 3 or len(container_name) > 63:
return ValueError(
f"Container name must be between 3 and 63 characters in length. Name provided was {len(container_name)} characters long."
)

# Check if the name starts with a letter or number
if not container_name[0].isalnum():
return ValueError(
f"Container name must start with a letter or number. Starting character was {container_name[0]}."
)

# Check for valid characters (letters, numbers, hyphen) and lowercase letters
if not re.match(r"^[a-z0-9-]+$", container_name):
return ValueError(
f"Container name must only contain:\n- lowercase letters\n- numbers\n- or hyphens\nName provided was {container_name}."
)

# Check for consecutive hyphens
if "--" in container_name:
return ValueError(
f"Container name cannot contain consecutive hyphens. Name provided was {container_name}."
)

# Check for hyphens at the end of the name
if container_name[-1] == "-":
return ValueError(
f"Container name cannot end with a hyphen. Name provided was {container_name}."
)

return True


def _create_progress_status(
num_loaded: int, num_filtered: int, num_total: int
) -> Progress:
Expand Down
28 changes: 13 additions & 15 deletions graphrag/storage/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,16 @@ def register(cls, storage_type: str, storage: type):
def create_storage(
cls, storage_type: OutputType | str, kwargs: dict
) -> PipelineStorage:
"""Create or get a storage object from the provided type."""
match storage_type:
case OutputType.blob:
return create_blob_storage(**kwargs)
case OutputType.cosmosdb:
return create_cosmosdb_storage(**kwargs)
case OutputType.file:
return create_file_storage(**kwargs)
case OutputType.memory:
return MemoryPipelineStorage()
case _:
if storage_type in cls.storage_types:
return cls.storage_types[storage_type](**kwargs)
msg = f"Unknown storage type: {storage_type}"
raise ValueError(msg)
"""Get a storage object from the provided type."""
if storage_type not in cls.storage_types:
msg = f"Storage implementation '{storage_type}' is not registered."
raise ValueError(msg)
return cls.storage_types[storage_type](**kwargs)


StorageFactory.register(OutputType.blob, OutputType.blob, create_blob_storage)
StorageFactory.register(
OutputType.cosmosdb, OutputType.cosmosdb, create_cosmosdb_storage
)
StorageFactory.register(OutputType.file, OutputType.file, create_file_storage)
StorageFactory.register(OutputType.memory, OutputType.memory, MemoryPipelineStorage)
6 changes: 3 additions & 3 deletions graphrag/utils/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ def load_search_prompt(root_dir: str, prompt_config: str | None) -> str | None:
def create_storage_from_config(output: OutputConfig) -> PipelineStorage:
"""Create a storage object from the config."""
storage_config = output.model_dump()
return StorageFactory().create_storage(
storage_type=storage_config["type"],
kwargs=storage_config,
return StorageFactory.create_storage(
storage_config["type"],
storage_config,
)


Expand Down
Loading