Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/pipeline/kg_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import neo4j
from langchain_text_splitters import CharacterTextSplitter
from neo4j_genai.components.embedder import TextChunkEmbedder
from neo4j_genai.components.entity_relation_extractor import (
LLMEntityRelationExtractor,
OnError,
Expand All @@ -32,6 +33,7 @@
SchemaRelation,
)
from neo4j_genai.components.text_splitters.langchain import LangChainTextSplitterAdapter
from neo4j_genai.embeddings.openai import OpenAIEmbeddings
from neo4j_genai.llm import OpenAILLM
from neo4j_genai.pipeline import Pipeline

Expand Down Expand Up @@ -78,6 +80,7 @@ async def main(neo4j_driver: neo4j.Driver) -> dict[str, Any]:
CharacterTextSplitter(chunk_size=50, chunk_overlap=10, separator=".")
),
)
pipe.add_component("chunk_embedder", TextChunkEmbedder(embedder=OpenAIEmbeddings()))
pipe.add_component("schema", SchemaBuilder())
pipe.add_component(
"extractor",
Expand All @@ -95,8 +98,11 @@ async def main(neo4j_driver: neo4j.Driver) -> dict[str, Any]:
pipe.add_component("writer", Neo4jWriter(neo4j_driver))
# define the execution order of component
# and how the output of previous components must be used
pipe.connect("splitter", "extractor", input_config={"chunks": "splitter"})
pipe.connect("splitter", "chunk_embedder", input_config={"text_chunks": "splitter"})
pipe.connect("schema", "extractor", input_config={"schema": "schema"})
pipe.connect(
"chunk_embedder", "extractor", input_config={"chunks": "chunk_embedder"}
)
pipe.connect(
"extractor",
"writer",
Expand Down
3 changes: 3 additions & 0 deletions src/neo4j_genai/components/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pydantic import validate_call

from neo4j_genai.components.types import TextChunk, TextChunks
from neo4j_genai.embedder import Embedder
from neo4j_genai.pipeline.component import Component
Expand Down Expand Up @@ -42,6 +44,7 @@ def _embed_chunk(self, text_chunk: TextChunk) -> TextChunk:
metadata["embedding"] = embedding
return TextChunk(text=text_chunk.text, metadata=metadata)

@validate_call
async def run(self, text_chunks: TextChunks) -> TextChunks:
"""Embed a list of text chunks.

Expand Down
14 changes: 10 additions & 4 deletions src/neo4j_genai/components/entity_relation_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,22 @@ def create_next_chunk_relationship(
)

def create_chunk_node(self, chunk: TextChunk, chunk_id: str) -> Neo4jNode:
"""Create chunk node with properties 'text' and 'metadata' if metadata is defined."""
"""Create chunk node with properties 'text' and any 'metadata' added during
the process. Special case for the potential chunk embedding property that
gets added as an embedding_property"""
chunk_properties: Dict[str, Any] = {
"text": chunk.text,
}
embedding_properties = {}
if chunk.metadata:
chunk_properties["metadata"] = chunk.metadata
if "embedding" in chunk.metadata:
embedding_properties["embedding"] = chunk.metadata.pop("embedding")
chunk_properties.update(chunk.metadata)
return Neo4jNode(
id=chunk_id,
label=CHUNK_NODE_LABEL,
properties=chunk_properties,
embedding_properties=embedding_properties,
)

def create_node_to_chunk_rel(
Expand Down Expand Up @@ -162,10 +168,10 @@ async def extract_for_chunk(
llm_result = self.llm.invoke(prompt)
try:
result = json.loads(llm_result.content)
except json.JSONDecodeError:
except json.JSONDecodeError as e:
if self.on_error == OnError.RAISE:
raise LLMGenerationError(
f"LLM response is not valid JSON {llm_result.content}"
f"LLM response is not valid JSON {llm_result.content}: {e}"
)
else:
logger.error(
Expand Down
15 changes: 15 additions & 0 deletions tests/e2e/data/harry_potter.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
At Malfoy Manor, Snape tells Voldemort the date that Harry’s friends are planning to
move him from the house on Privet Drive to a new safe location, so that Voldemort
can capture Harry en route.

As Harry packs to leave Privet Drive, he reads two obituaries for Dumbledore, both
of which make him think that he didn’t know Dumbledore as well as he should have.
Downstairs, he bids good-bye to the Dursleys for the final time, as the threat of
Voldemort forces them to go into hiding themselves.

The Order of the Phoenix, led by Alastor “Mad-Eye” Moody, arrives to take Harry to
his new home at the Weasleys’ house, the Burrow. Six of Harry’s friends take
Polyjuice Potion to disguise themselves as Harry and act as decoys, and they all fly
off in different directions. The Death Eaters, alerted to their departure by Snape,
attack Harry and his friends. Voldemort chases Harry down, but Harry’s wand fends
Voldemort off, seemingly without Harry’s help.
3 changes: 0 additions & 3 deletions tests/e2e/pinecone_e2e/populate_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.
from __future__ import annotations

import os.path
from typing import Any

import neo4j
Expand All @@ -23,8 +22,6 @@

from ..utils import build_data_objects, populate_neo4j

BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def populate_dbs(
neo4j_driver: neo4j.Driver, pc_client: Pinecone, index_name: str = "jeopardy"
Expand Down
Loading