Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"name": "Constructing Knowledge Graphs with Neo4j GraphRAG Python",
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
"postCreateCommand": "bash .devcontainer/post_create.sh",
"customizations": {
"codespaces": {
"openFiles": [
"SETUP.md",
".env.example",
"genai-graphrag-python/test_environment.py"
]
},
"vscode": {
"settings": {
"python.createEnvironment.trigger": "off",
"workbench.editorAssociations": {
"*.md": "vscode.markdown.preview.editor"
}
},
"extensions": [
"ms-python.python",
"ms-python.debugpy",
"neo4j-extensions.neo4j-for-vscode"
]
}
}
}
2 changes: 2 additions & 0 deletions .devcontainer/post_create.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
pip3 install -r requirements.txt
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Create a copy of this file and name it .env
OPENAI_API_KEY="sk-"
NEO4J_URI="neo4j://"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD=""
NEO4J_DATABASE="neo4j"
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto
32 changes: 32 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
.vscode
.env
.venv

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
.pytest_cache/

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
13 changes: 13 additions & 0 deletions SETUP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Setup

This repository accompanies the [Neo4j and GenerativeAI Fundamentals course](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/) on [GraphAcademy](https://graphacademy.neo4j.com).

When the devcontainer is created, such as in a GitHub codespace, all the required software and packages will be installed.

Follow the [Setup Instructions in GraphAcademy](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/1-introduction/2-setup/) to get started.

You will need to:

1. Create a new [`.env`](.env) file and copy the contents of the [`.env.example`](.env.example) file into it
2. Update the environment values in the [`.env`](.env) file with the values in the [Setup Instructions](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/1-introduction/2-setup/)
3. Run the [`genai-graphrag-python/test_environment.py`](./genai-graphrag-python/test_environment.py) program to check the environment is set up correctly.
16 changes: 16 additions & 0 deletions genai-graphrag-python/data/docs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
filename,course,module,lesson,url
genai-fundamentals_1-generative-ai_1-what-is-genai.pdf,genai-fundamentals,1-generative-ai,1-what-is-genai,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/1-what-is-genai
genai-fundamentals_1-generative-ai_2-considerations.pdf,genai-fundamentals,1-generative-ai,2-considerations,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/2-considerations
genai-fundamentals_1-generative-ai_3-context.pdf,genai-fundamentals,1-generative-ai,3-context,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/3-context
genai-fundamentals_2-rag_1-what-is-rag.pdf,genai-fundamentals,2-rag,1-what-is-rag,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/1-what-is-rag
genai-fundamentals_2-rag_2-vector-search.pdf,genai-fundamentals,2-rag,2-vector-search,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/2-vector-search
genai-fundamentals_2-rag_3-vector-index.pdf,genai-fundamentals,2-rag,3-vector-index,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/3-vector-index
genai-fundamentals_2-rag_4-graphrag.pdf,genai-fundamentals,2-rag,4-graphrag,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/4-graphrag
genai-fundamentals_3-knowledge-graphs_1-what-is-a-knowledge-graph.pdf,genai-fundamentals,3-knowledge-graphs,1-what-is-a-knowledge-graph,https://graphacademy.neo4j.com/courses/genai-fundamentals/3-knowledge-graphs/1-what-is-a-knowledge-graph
genai-fundamentals_3-knowledge-graphs_2-constructing-knowledge-graphs.pdf,genai-fundamentals,3-knowledge-graphs,2-constructing-knowledge-graphs,https://graphacademy.neo4j.com/courses/genai-fundamentals/3-knowledge-graphs/2-constructing-knowledge-graphs
genai-fundamentals_4-integrating-neo4j_1-neo4j-graphrag.pdf,genai-fundamentals,4-integrating-neo4j,1-neo4j-graphrag,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/1-neo4j-graphrag
genai-fundamentals_4-integrating-neo4j_2-vector-retriever.pdf,genai-fundamentals,4-integrating-neo4j,2-vector-retriever,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/2-vector-retriever
genai-fundamentals_4-integrating-neo4j_3-rag.pdf,genai-fundamentals,4-integrating-neo4j,3-rag,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/3-rag
genai-fundamentals_4-integrating-neo4j_4-vector-graph.pdf,genai-fundamentals,4-integrating-neo4j,4-vector-graph,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/4-vector-graph
genai-fundamentals_4-integrating-neo4j_5-text-to-cypher.pdf,genai-fundamentals,4-integrating-neo4j,5-text-to-cypher,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/5-text-to-cypher
genai-fundamentals_4-integrating-neo4j_6-frameworks.pdf,genai-fundamentals,4-integrating-neo4j,6-frameworks,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/6-frameworks
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
= What is Generative AI
:order: 1
:type: lesson
:slides: true

[.slide]
== GenAI

Generative AI (or GenAI) refers to artificial intelligence systems designed to create new content that resembles human-made data. The data could be text, images, audio, or code.

[.transcript-only]
====
These models, like GPT (for text) or DALL-E (for images), are trained on large datasets and use patterns learned from this data to generate new output.
====

image::images/genai-model-process.svg[A diagram showing the process of Generative AI, where a model is trained on a large dataset, learns patterns, and generates new content based on those patterns.]

[.transcript-only]
====
Generative AI is widely used in applications such as chatbots, content creation, image synthesis, and code generation.
====

[.slide.discrete]
== GenAI

Generative AI models are not "intelligent" in the way humans are:

. They do not understand or comprehend the content they generate
. They rely on statistical patterns and correlations learned from their training data.

While Generative AI models can produce coherent and contextually relevant outputs, they lack understanding.

[.slide]
== Large Language Models (LLMs)

This course will focus on text-generating models, specifically Large Language Models (LLMs)

LLMs are a type of generative AI model designed to understand and generate human-like text.

These models are trained on vast amounts of text data and can perform various tasks, including answering questions, summarizing data, and analyzing text.

[.slide.discrete]
== LLM Responses

The response generated by an LLM is a probabilistic continuation of the instructions it receives.

The LLM provides the most likely response based on the patterns it has learned from its training data.

If presented with the instruction:

"Continue this sequence - A B C"

An LLM could respond:

"D E F"

[.slide.col-2]
== Prompts

[.col]
====
To get an LLM to perform a task, you provide a **prompt**.

A prompt should specify your requirements and provide clear instructions on how to respond.
====

[.col]
image::images/llm-prompt-interaction.svg["A user asks an LLM the question 'What is an LLM? Give the response using simple language avoiding jargon.', the LLM responds with a simple definition of an LLM."]

[.slide.discrete]
== Precision

Precision in the task description, potentially combined with examples or context, ensures that the model understands the intent and produces relevant and accurate outputs.

An example prompt may be a simple question.

What is the capital of Japan?

Or, it could be more descriptive:

You are a friendly travel agent helping a customer to choose a
holiday destination. Your readers may have English as a second
language, so use simple terms and avoid colloquialisms.
Avoid Jargon at all costs.

Tell me about the capital of Japan.

The LLM will interpret these instructions and return a response based on the patterns it has learned from its training data.

[.quiz]
== Check Your Understanding

include::questions/1-generative-ai.adoc[leveloffset=+1]

[.summary]
== Lesson Summary

In this lesson, you learned about Generative AI and Large Language Models (LLMs).

In the next lesson, you will learn about the limitations of LLMs, including hallucination, and access to data.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
77 changes: 77 additions & 0 deletions genai-graphrag-python/examples/data_loader_custom_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
from dotenv import load_dotenv
load_dotenv()

import asyncio

from neo4j import GraphDatabase
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

# tag::import_loader[]
from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader, PdfDocument

import re
from fsspec import AbstractFileSystem
from typing import Dict, Optional, Union
from pathlib import Path
# end::import_loader[]

neo4j_driver = GraphDatabase.driver(
os.getenv("NEO4J_URI"),
auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
)
neo4j_driver.verify_connectivity()

llm = OpenAILLM(
model_name="gpt-4o",
model_params={
"temperature": 0,
"response_format": {"type": "json_object"},
}
)

embedder = OpenAIEmbeddings(
model="text-embedding-ada-002"
)

# tag::loader[]
class CustomPDFLoader(PdfLoader):
async def run(
self,
filepath: Union[str, Path],
metadata: Optional[Dict[str, str]] = None,
fs: Optional[Union[AbstractFileSystem, str]] = None,
) -> PdfDocument:
pdf_document = await super().run(filepath, metadata, fs)

# Process the PDF document
# remove asciidoc attribute lines like :id:
pdf_document.text = re.sub(r':*:.*\n?', '', pdf_document.text, flags=re.MULTILINE)

return pdf_document

data_loader = CustomPDFLoader()
# end::loader[]

# tag::kg_builder[]
kg_builder = SimpleKGPipeline(
llm=llm,
driver=neo4j_driver,
neo4j_database=os.getenv("NEO4J_DATABASE"),
embedder=embedder,
from_pdf=True,
pdf_loader=data_loader
)
# end::kg_builder[]

# tag::run_loader[]
pdf_file = "./genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.pdf"
doc = asyncio.run(data_loader.run(pdf_file))
print(doc.text)
# end::run_loader[]

print(f"Processing {pdf_file}")
result = asyncio.run(kg_builder.run_async(file_path=pdf_file))
print(result.result)
74 changes: 74 additions & 0 deletions genai-graphrag-python/examples/data_loader_text_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
from dotenv import load_dotenv
load_dotenv()

import asyncio

from neo4j import GraphDatabase
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

# tag::import_loader[]
from neo4j_graphrag.experimental.components.pdf_loader import DataLoader, PdfDocument, DocumentInfo
from pathlib import Path
# end::import_loader[]

neo4j_driver = GraphDatabase.driver(
os.getenv("NEO4J_URI"),
auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
)
neo4j_driver.verify_connectivity()

llm = OpenAILLM(
model_name="gpt-4o",
model_params={
"temperature": 0,
"response_format": {"type": "json_object"},
}
)

embedder = OpenAIEmbeddings(
model="text-embedding-ada-002"
)

# tag::loader[]
class TextLoader(DataLoader):
async def run(self, filepath: Path) -> PdfDocument:

# Process the file
with open(filepath, 'r', encoding='utf-8') as f:
text = f.read()

# Return a PdfDocument
return PdfDocument(
text=text,
document_info=DocumentInfo(
path=str(filepath),
metadata={}
)
)

data_loader = TextLoader()
# end::loader[]

# tag::kg_builder[]
kg_builder = SimpleKGPipeline(
llm=llm,
driver=neo4j_driver,
neo4j_database=os.getenv("NEO4J_DATABASE"),
embedder=embedder,
from_pdf=True,
pdf_loader=data_loader
)
# end::kg_builder[]

# tag::run_loader[]
pdf_file = "./genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.txt"
doc = asyncio.run(data_loader.run(pdf_file))
print(doc.text)
# end::run_loader[]

print(f"Processing {pdf_file}")
result = asyncio.run(kg_builder.run_async(file_path=pdf_file))
print(result.result)
Loading