Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
fc86a84
Added copyright header to new files
alexthomas93 Jul 22, 2024
dc367ec
Added copyright header to kg_writer.py
alexthomas93 Jul 22, 2024
a4e9c3d
Added __future__ import to kg_writer.py for backwards compatibility o…
alexthomas93 Jul 22, 2024
f232af8
Added E2E test for Neo4jWriter
alexthomas93 Jul 23, 2024
7e9f779
Added a copyright header to test_kg_builder_e2e.py
alexthomas93 Jul 23, 2024
ff944e5
Added upsert_vector test for relationship embeddings
alexthomas93 Jul 23, 2024
ab9947b
Moved KG writer and its tests
alexthomas93 Jul 24, 2024
9c30284
Moved Neo4jGraph and associated objects to a new file
alexthomas93 Jul 24, 2024
5d92dba
Renamed KG builder fixture
alexthomas93 Jul 24, 2024
d926b26
Added unit tests for KG writer
alexthomas93 Jul 24, 2024
e324015
Split upsert_vector into 2 functions
alexthomas93 Jul 25, 2024
705ab44
Fixed broken cypher query strings
alexthomas93 Jul 26, 2024
f678ba3
Removed embedding creation from Neo4jWriter
alexthomas93 Jul 26, 2024
5aa4722
Fixed setup_neo4j_for_kg_construction fixture
alexthomas93 Jul 26, 2024
a0086e9
Added KGWriterModel class
alexthomas93 Jul 30, 2024
d82ba09
Fixed minor mistake in test_weaviate_e2e.py
alexthomas93 Jul 31, 2024
2b88fef
Renamed kg_construction folder to components
alexthomas93 Jul 31, 2024
442a4e8
Updated unit tests with new folder structure
alexthomas93 Jul 31, 2024
78e6d93
Fixed broken import
alexthomas93 Jul 31, 2024
c78d9db
Fixed copyright headers
alexthomas93 Aug 1, 2024
64501e8
Added missing docstrings
alexthomas93 Aug 1, 2024
609feaf
Fixed typo
alexthomas93 Aug 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ Database Interaction

.. autofunction:: neo4j_genai.indexes.upsert_vector

.. autofunction:: neo4j_genai.indexes.upsert_vector_on_relationship


******
Errors
Expand Down
1 change: 1 addition & 0 deletions src/neo4j_genai/components/embedder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
144 changes: 144 additions & 0 deletions src/neo4j_genai/components/kg_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from abc import abstractmethod
from typing import Literal, Optional

import neo4j
from neo4j_genai.components.types import Neo4jGraph, Neo4jNode, Neo4jRelationship
from neo4j_genai.indexes import upsert_vector, upsert_vector_on_relationship
from neo4j_genai.neo4j_queries import UPSERT_NODE_QUERY, UPSERT_RELATIONSHIP_QUERY
from neo4j_genai.pipeline.component import Component, DataModel
from pydantic import validate_call


class KGWriterModel(DataModel):
"""Data model for the output of the Knowledge Graph writer.

Attributes:
status (Literal["SUCCESS", "FAILURE"]): Whether or not the write operation was successful.
"""

status: Literal["SUCCESS", "FAILURE"]


class KGWriter(Component):
"""Abstract class used to write a knowledge graph to a data store."""

@abstractmethod
@validate_call
async def run(self, graph: Neo4jGraph) -> KGWriterModel:
"""
Writes the graph to a data store.

Args:
graph (Neo4jGraph): The knowledge graph to write to the data store.
"""
pass


class Neo4jWriter(KGWriter):
"""Writes a knowledge graph to a Neo4j database.

Args:
driver (neo4j.driver): The Neo4j driver to connect to the database.
neo4j_database (Optional[str]): The name of the Neo4j database to write to. Defaults to 'neo4j' if not provided.
"""

def __init__(
self,
driver: neo4j.driver,
neo4j_database: Optional[str] = None,
):
self.driver = driver
self.neo4j_database = neo4j_database

def _upsert_node(self, node: Neo4jNode) -> None:
"""Upserts a single node into the Neo4j database."

Args:
node (Neo4jNode): The node to upsert into the database.
"""
# Create the initial node
properties = "{" + f"id: {node.id}"
if node.properties:
properties += (
", " + ", ".join(f"{p.key}: {p.value}" for p in node.properties) + "}"
)
else:
properties += "}"
query = UPSERT_NODE_QUERY.format(label=node.label, properties=properties)
result = self.driver.execute_query(query)
node_id = result.records[0]["elementID(n)"]
# Add the embedding properties to the node
if node.embedding_properties:
for prop in node.embedding_properties:
upsert_vector(
driver=self.driver,
node_id=node_id,
embedding_property=prop.key,
vector=prop.value,
neo4j_database=self.neo4j_database,
)

def _upsert_relationship(self, rel: Neo4jRelationship) -> None:
"""Upserts a single relationship into the Neo4j database.

Args:
rel (Neo4jRelationship): The relationship to upsert into the database.
"""
# Create the initial relationship
properties = (
"{" + ", ".join(f"{p.key}: {p.value}" for p in rel.properties) + "}"
if rel.properties
else "{}"
)
query = UPSERT_RELATIONSHIP_QUERY.format(
start_node_id=rel.start_node_id,
end_node_id=rel.end_node_id,
type=rel.type,
properties=properties,
)
result = self.driver.execute_query(query)
rel_id = result.records[0]["elementID(r)"]
# Add the embedding properties to the relationship
if rel.embedding_properties:
for prop in rel.embedding_properties:
upsert_vector_on_relationship(
driver=self.driver,
rel_id=rel_id,
embedding_property=prop.key,
vector=prop.value,
neo4j_database=self.neo4j_database,
)

@validate_call
async def run(self, graph: Neo4jGraph) -> KGWriterModel:
"""Upserts a knowledge graph into a Neo4j database.

Args:
graph (Neo4jGraph): The knowledge graph to upsert into the database.
"""
try:
for node in graph.nodes:
self._upsert_node(node)

for rel in graph.relationships:
self._upsert_relationship(rel)

return KGWriterModel(status="SUCCESS")
except neo4j.exceptions.ClientError:
return KGWriterModel(status="FAILURE")
1 change: 1 addition & 0 deletions src/neo4j_genai/components/text_splitters/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 1 addition & 0 deletions src/neo4j_genai/components/text_splitters/langchain.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 1 addition & 0 deletions src/neo4j_genai/components/text_splitters/llamaindex.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
71 changes: 71 additions & 0 deletions src/neo4j_genai/components/types.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -39,3 +40,73 @@ class TextChunks(DataModel):
"""

chunks: list[TextChunk]


class Neo4jProperty(BaseModel):
"""Represents a Neo4j property.

Attributes:
key (str): The property name.
value (Any): The property value.
"""

key: str
value: Any


class Neo4jEmbeddingProperty(BaseModel):
"""Represents a Neo4j embedding property.

Attributes:
key (str): The property name.
value (list[float]): The embedding vector.
"""

key: str
value: list[float]


class Neo4jNode(BaseModel):
"""Represents a Neo4j node.

Attributes:
id (str): The ID of the node.
label (str): The label of the node.
properties (Optional[list[Neo4jProperty]]): A list of properties associated with the node.
embedding_properties (Optional[list[Neo4jEmbeddingProperty]]): A list of embedding properties associated with the node.
"""

id: str
label: str
properties: Optional[list[Neo4jProperty]] = None
embedding_properties: Optional[list[Neo4jEmbeddingProperty]] = None


class Neo4jRelationship(BaseModel):
"""Represents a Neo4j relationship.

Attributes:
start_node_id (str): The ID of the start node.
end_node_id (str): The ID of the end node.
type (str): The relationship type.
properties (Optional[list[Neo4jProperty]]): A list of properties associated with the relationship.
embedding_properties (Optional[list[Neo4jEmbeddingProperty]]): A list of embedding properties associated with the relationship.
"""

start_node_id: str
end_node_id: str
type: str
properties: Optional[list[Neo4jProperty]] = None
embedding_properties: Optional[list[Neo4jEmbeddingProperty]] = None


class Neo4jGraph(BaseModel):
"""Represents a Neo4j graph.

Attributes:
nodes (list[Neo4jNode]): A list of nodes in the graph.
relationships (list[Neo4jRelationship]): A list of relationships in the graph.
"""

nodes: list[Neo4jNode]
relationships: list[Neo4jRelationship]
79 changes: 70 additions & 9 deletions src/neo4j_genai/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def upsert_vector(
.. code-block:: python

from neo4j import GraphDatabase
from neo4j_genai.indexes import upsert_query
from neo4j_genai.indexes import upsert_vector

URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "password")
Expand All @@ -260,7 +260,7 @@ def upsert_vector(
driver = GraphDatabase.driver(URI, auth=AUTH)

# Upsert the vector data
upsert_query(
upsert_vector(
driver,
node_id="nodeId",
embedding_property="vectorProperty",
Expand All @@ -278,13 +278,13 @@ def upsert_vector(
Neo4jInsertionError: If upserting of the vector fails.
"""
try:
query = """
MATCH (n)
WHERE elementId(n) = $id
WITH n
CALL db.create.setNodeVectorProperty(n, $embedding_property, $vector)
RETURN n
"""
query = (
"MATCH (n) "
"WHERE elementId(n) = $id "
"WITH n "
"CALL db.create.setNodeVectorProperty(n, $embedding_property, $vector) "
"RETURN n"
)
parameters = {
"id": node_id,
"embedding_property": embedding_property,
Expand All @@ -295,3 +295,64 @@ def upsert_vector(
raise Neo4jInsertionError(
f"Upserting vector to Neo4j failed: {e.message}"
) from e


def upsert_vector_on_relationship(
driver: neo4j.Driver,
rel_id: int,
embedding_property: str,
vector: list[float],
neo4j_database: Optional[str] = None,
) -> None:
"""
This method constructs a Cypher query and executes it to upsert (insert or update) a vector property on a specific relationship.

Example:

.. code-block:: python

from neo4j import GraphDatabase
from neo4j_genai.indexes import upsert_vector_on_relationship

URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "password")

# Connect to Neo4j database
driver = GraphDatabase.driver(URI, auth=AUTH)

# Upsert the vector data
upsert_vector_on_relationship(
driver,
node_id="nodeId",
embedding_property="vectorProperty",
vector=...,
)

Args:
driver (neo4j.Driver): Neo4j Python driver instance.
rel_id (int): The id of the relationship.
embedding_property (str): The name of the property to store the vector in.
vector (list[float]): The vector to store.
neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).

Raises:
Neo4jInsertionError: If upserting of the vector fails.
"""
try:
query = (
"MATCH ()-[r]->() "
"WHERE elementId(r) = $id "
"WITH r "
"CALL db.create.setRelationshipVectorProperty(r, $embedding_property, $vector) "
"RETURN r"
)
parameters = {
"id": rel_id,
"embedding_property": embedding_property,
"vector": vector,
}
driver.execute_query(query, parameters, database_=neo4j_database)
except neo4j.exceptions.ClientError as e:
raise Neo4jInsertionError(
f"Upserting vector to Neo4j failed: {e.message}"
) from e
8 changes: 8 additions & 0 deletions src/neo4j_genai/neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@
"YIELD node, score"
)

UPSERT_NODE_QUERY = "MERGE (n:`{label}` {properties}) RETURN elementID(n)"

UPSERT_RELATIONSHIP_QUERY = (
"MATCH (start {{ id: {start_node_id} }}), (end {{ id: {end_node_id} }}) "
"MERGE (start)-[r:{type} {properties}]->(end) "
"RETURN elementID(r)"
)


def _get_hybrid_query() -> str:
return (
Expand Down
20 changes: 20 additions & 0 deletions tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,23 @@ def setup_neo4j_for_schema_query_with_excluded_labels(driver: Driver) -> None:
driver.execute_query(
"CREATE (:_Bloom_Scene_{property_a: 'a'})-[:_Bloom_HAS_SCENE_{property_b: 'b'}]->(:_Bloom_Perspective_)"
)


@pytest.fixture(scope="module")
def setup_neo4j_for_kg_construction(driver: Driver) -> None:
# Delete all nodes and indexes in the graph
driver.execute_query("MATCH (n) DETACH DELETE n")
vector_index_name = "vector-index-name"
fulltext_index_name = "fulltext-index-name"
drop_index_if_exists(driver, vector_index_name)
drop_index_if_exists(driver, fulltext_index_name)

# Create a vector index with the dimensions used by the Hugging Face all-MiniLM-L6-v2 model
create_vector_index(
driver,
vector_index_name,
label="Document",
embedding_property="vectorProperty",
dimensions=3,
similarity_fn="euclidean",
)
Loading