neo4j-graphacademy · martinohanlon · Oct 21, 2025 · Oct 21, 2025 · Oct 24, 2025 · Oct 27, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,27 @@
+{
+	"name": "Constructing Knowledge Graphs with Neo4j GraphRAG Python",
+	"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye",
+	"postCreateCommand": "bash .devcontainer/post_create.sh",
+	"customizations": {
+		"codespaces": {
+			"openFiles": [
+				"SETUP.md",
+				".env.example",
+				"genai-graphrag-python/test_environment.py"
+				]
+		},
+		"vscode": {
+			"settings": {
+				"python.createEnvironment.trigger": "off",
+				"workbench.editorAssociations": {
+					"*.md": "vscode.markdown.preview.editor"
+				}
+			},
+			"extensions": [
+				"ms-python.python",
+				"ms-python.debugpy",
+				"neo4j-extensions.neo4j-for-vscode"
+				]
+		}
+	}
+}
diff --git a/.devcontainer/post_create.sh b/.devcontainer/post_create.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+pip3 install -r requirements.txt
diff --git a/.env.example b/.env.example
@@ -0,0 +1,6 @@
+# Create a copy of this file and name it .env
+OPENAI_API_KEY="sk-"
+NEO4J_URI="neo4j://"
+NEO4J_USERNAME="neo4j"
+NEO4J_PASSWORD=""
+NEO4J_DATABASE="neo4j"
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+* text=auto
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,32 @@
+.vscode
+.env
+.venv
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
diff --git a/SETUP.md b/SETUP.md
@@ -0,0 +1,13 @@
+# Setup
+
+This repository accompanies the [Neo4j and GenerativeAI Fundamentals course](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/) on [GraphAcademy](https://graphacademy.neo4j.com).
+
+When the devcontainer is created, such as in a GitHub codespace, all the required software and packages will be installed.
+
+Follow the [Setup Instructions in GraphAcademy](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/1-introduction/2-setup/) to get started.
+
+You will need to:
+
+1. Create a new [`.env`](.env) file and copy the contents of the [`.env.example`](.env.example) file into it
+2. Update the environment values in the [`.env`](.env) file with the values in the [Setup Instructions](ttps://graphacademy.neo4j.com/courses/genai-graphrag-python/1-introduction/2-setup/)
+3. Run the [`genai-graphrag-python/test_environment.py`](./genai-graphrag-python/test_environment.py) program to check the environment is set up correctly.
diff --git a/genai-graphrag-python/data/docs.csv b/genai-graphrag-python/data/docs.csv
@@ -0,0 +1,16 @@
+filename,course,module,lesson,url
+genai-fundamentals_1-generative-ai_1-what-is-genai.pdf,genai-fundamentals,1-generative-ai,1-what-is-genai,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/1-what-is-genai
+genai-fundamentals_1-generative-ai_2-considerations.pdf,genai-fundamentals,1-generative-ai,2-considerations,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/2-considerations
+genai-fundamentals_1-generative-ai_3-context.pdf,genai-fundamentals,1-generative-ai,3-context,https://graphacademy.neo4j.com/courses/genai-fundamentals/1-generative-ai/3-context
+genai-fundamentals_2-rag_1-what-is-rag.pdf,genai-fundamentals,2-rag,1-what-is-rag,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/1-what-is-rag
+genai-fundamentals_2-rag_2-vector-search.pdf,genai-fundamentals,2-rag,2-vector-search,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/2-vector-search
+genai-fundamentals_2-rag_3-vector-index.pdf,genai-fundamentals,2-rag,3-vector-index,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/3-vector-index
+genai-fundamentals_2-rag_4-graphrag.pdf,genai-fundamentals,2-rag,4-graphrag,https://graphacademy.neo4j.com/courses/genai-fundamentals/2-rag/4-graphrag
+genai-fundamentals_3-knowledge-graphs_1-what-is-a-knowledge-graph.pdf,genai-fundamentals,3-knowledge-graphs,1-what-is-a-knowledge-graph,https://graphacademy.neo4j.com/courses/genai-fundamentals/3-knowledge-graphs/1-what-is-a-knowledge-graph
+genai-fundamentals_3-knowledge-graphs_2-constructing-knowledge-graphs.pdf,genai-fundamentals,3-knowledge-graphs,2-constructing-knowledge-graphs,https://graphacademy.neo4j.com/courses/genai-fundamentals/3-knowledge-graphs/2-constructing-knowledge-graphs
+genai-fundamentals_4-integrating-neo4j_1-neo4j-graphrag.pdf,genai-fundamentals,4-integrating-neo4j,1-neo4j-graphrag,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/1-neo4j-graphrag
+genai-fundamentals_4-integrating-neo4j_2-vector-retriever.pdf,genai-fundamentals,4-integrating-neo4j,2-vector-retriever,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/2-vector-retriever
+genai-fundamentals_4-integrating-neo4j_3-rag.pdf,genai-fundamentals,4-integrating-neo4j,3-rag,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/3-rag
+genai-fundamentals_4-integrating-neo4j_4-vector-graph.pdf,genai-fundamentals,4-integrating-neo4j,4-vector-graph,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/4-vector-graph
+genai-fundamentals_4-integrating-neo4j_5-text-to-cypher.pdf,genai-fundamentals,4-integrating-neo4j,5-text-to-cypher,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/5-text-to-cypher
+genai-fundamentals_4-integrating-neo4j_6-frameworks.pdf,genai-fundamentals,4-integrating-neo4j,6-frameworks,https://graphacademy.neo4j.com/courses/genai-fundamentals/4-integrating-neo4j/6-frameworks
diff --git a/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.pdf b/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.txt b/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.txt
@@ -0,0 +1,100 @@
+= What is Generative AI
+:order: 1
+:type: lesson
+:slides: true
+
+[.slide]
+== GenAI
+
+Generative AI (or GenAI) refers to artificial intelligence systems designed to create new content that resembles human-made data. The data could be text, images, audio, or code.
+
+[.transcript-only]
+====
+These models, like GPT (for text) or DALL-E (for images), are trained on large datasets and use patterns learned from this data to generate new output.
+====
+
+image::images/genai-model-process.svg[A diagram showing the process of Generative AI, where a model is trained on a large dataset, learns patterns, and generates new content based on those patterns.]
+
+[.transcript-only]
+====
+Generative AI is widely used in applications such as chatbots, content creation, image synthesis, and code generation.
+====
+
+[.slide.discrete]
+== GenAI
+
+Generative AI models are not "intelligent" in the way humans are:
+
+. They do not understand or comprehend the content they generate
+. They rely on statistical patterns and correlations learned from their training data.
+
+While Generative AI models can produce coherent and contextually relevant outputs, they lack understanding.
+
+[.slide]
+== Large Language Models (LLMs)
+
+This course will focus on text-generating models, specifically Large Language Models (LLMs)
+
+LLMs are a type of generative AI model designed to understand and generate human-like text.
+
+These models are trained on vast amounts of text data and can perform various tasks, including answering questions, summarizing data, and analyzing text.
+
+[.slide.discrete]
+== LLM Responses
+
+The response generated by an LLM is a probabilistic continuation of the instructions it receives.
+
+The LLM provides the most likely response based on the patterns it has learned from its training data.
+
+If presented with the instruction:
+
+    "Continue this sequence - A B C"
+
+An LLM could respond:
+
+    "D E F"
+
+[.slide.col-2]
+== Prompts
+
+[.col]
+====
+To get an LLM to perform a task, you provide a **prompt**.
+
+A prompt should specify your requirements and provide clear instructions on how to respond.
+====
+
+[.col]
+image::images/llm-prompt-interaction.svg["A user asks an LLM the question 'What is an LLM? Give the response using simple language avoiding jargon.', the LLM responds with a simple definition of an LLM."]
+
+[.slide.discrete]
+== Precision
+
+Precision in the task description, potentially combined with examples or context, ensures that the model understands the intent and produces relevant and accurate outputs.
+
+An example prompt may be a simple question.
+
+    What is the capital of Japan?
+
+Or, it could be more descriptive:
+
+    You are a friendly travel agent helping a customer to choose a
+    holiday destination. Your readers may have English as a second
+    language, so use simple terms and avoid colloquialisms.
+    Avoid Jargon at all costs.
+
+    Tell me about the capital of Japan.
+
+The LLM will interpret these instructions and return a response based on the patterns it has learned from its training data.
+
+[.quiz]
+== Check Your Understanding
+
+include::questions/1-generative-ai.adoc[leveloffset=+1]
+
+[.summary]
+== Lesson Summary
+
+In this lesson, you learned about Generative AI and Large Language Models (LLMs).
+
+In the next lesson, you will learn about the limitations of LLMs, including hallucination, and access to data.
diff --git a/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_2-considerations.pdf b/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_2-considerations.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_3-context.pdf b/genai-graphrag-python/data/genai-fundamentals_1-generative-ai_3-context.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_2-rag_1-what-is-rag.pdf b/genai-graphrag-python/data/genai-fundamentals_2-rag_1-what-is-rag.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_2-rag_2-vector-search.pdf b/genai-graphrag-python/data/genai-fundamentals_2-rag_2-vector-search.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_2-rag_3-vector-index.pdf b/genai-graphrag-python/data/genai-fundamentals_2-rag_3-vector-index.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_2-rag_4-graphrag.pdf b/genai-graphrag-python/data/genai-fundamentals_2-rag_4-graphrag.pdf
diff --git a/...raphrag-python/data/genai-fundamentals_3-knowledge-graphs_1-what-is-a-knowledge-graph.pdf b/...raphrag-python/data/genai-fundamentals_3-knowledge-graphs_1-what-is-a-knowledge-graph.pdf
diff --git a/...rag-python/data/genai-fundamentals_3-knowledge-graphs_2-constructing-knowledge-graphs.pdf b/...rag-python/data/genai-fundamentals_3-knowledge-graphs_2-constructing-knowledge-graphs.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_1-neo4j-graphrag.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_1-neo4j-graphrag.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_2-vector-retriever.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_2-vector-retriever.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_3-rag.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_3-rag.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_4-vector-graph.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_4-vector-graph.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_5-text-to-cypher.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_5-text-to-cypher.pdf
diff --git a/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_6-frameworks.pdf b/genai-graphrag-python/data/genai-fundamentals_4-integrating-neo4j_6-frameworks.pdf
diff --git a/genai-graphrag-python/examples/data_loader_custom_pdf.py b/genai-graphrag-python/examples/data_loader_custom_pdf.py
@@ -0,0 +1,77 @@
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+import asyncio
+
+from neo4j import GraphDatabase
+from neo4j_graphrag.llm import OpenAILLM
+from neo4j_graphrag.embeddings import OpenAIEmbeddings
+from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
+
+# tag::import_loader[]
+from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader, PdfDocument
+
+import re
+from fsspec import AbstractFileSystem
+from typing import Dict, Optional, Union
+from pathlib import Path
+# end::import_loader[]
+
+neo4j_driver = GraphDatabase.driver(
+    os.getenv("NEO4J_URI"),
+    auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
+)
+neo4j_driver.verify_connectivity()
+
+llm = OpenAILLM(
+    model_name="gpt-4o",
+    model_params={
+        "temperature": 0,
+        "response_format": {"type": "json_object"},
+    }
+)
+
+embedder = OpenAIEmbeddings(
+    model="text-embedding-ada-002"
+)
+
+# tag::loader[]
+class CustomPDFLoader(PdfLoader):
+    async def run(
+        self,
+        filepath: Union[str, Path],
+        metadata: Optional[Dict[str, str]] = None,
+        fs: Optional[Union[AbstractFileSystem, str]] = None,
+    ) -> PdfDocument:
+        pdf_document = await super().run(filepath, metadata, fs)
+
+        # Process the PDF document
+        # remove asciidoc attribute lines like :id:
+        pdf_document.text = re.sub(r':*:.*\n?', '', pdf_document.text, flags=re.MULTILINE)
+
+        return pdf_document
+
+data_loader = CustomPDFLoader()
+# end::loader[]
+
+# tag::kg_builder[]
+kg_builder = SimpleKGPipeline(
+    llm=llm,
+    driver=neo4j_driver, 
+    neo4j_database=os.getenv("NEO4J_DATABASE"), 
+    embedder=embedder, 
+    from_pdf=True,
+    pdf_loader=data_loader
+)
+# end::kg_builder[]
+
+# tag::run_loader[]
+pdf_file = "./genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.pdf"
+doc = asyncio.run(data_loader.run(pdf_file))
+print(doc.text)
+# end::run_loader[]
+
+print(f"Processing {pdf_file}")
+result = asyncio.run(kg_builder.run_async(file_path=pdf_file))
+print(result.result)
diff --git a/genai-graphrag-python/examples/data_loader_text_file.py b/genai-graphrag-python/examples/data_loader_text_file.py
@@ -0,0 +1,74 @@
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+import asyncio
+
+from neo4j import GraphDatabase
+from neo4j_graphrag.llm import OpenAILLM
+from neo4j_graphrag.embeddings import OpenAIEmbeddings
+from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
+
+# tag::import_loader[]
+from neo4j_graphrag.experimental.components.pdf_loader import DataLoader, PdfDocument, DocumentInfo
+from pathlib import Path
+# end::import_loader[]
+
+neo4j_driver = GraphDatabase.driver(
+    os.getenv("NEO4J_URI"),
+    auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
+)
+neo4j_driver.verify_connectivity()
+
+llm = OpenAILLM(
+    model_name="gpt-4o",
+    model_params={
+        "temperature": 0,
+        "response_format": {"type": "json_object"},
+    }
+)
+
+embedder = OpenAIEmbeddings(
+    model="text-embedding-ada-002"
+)
+
+# tag::loader[]
+class TextLoader(DataLoader):
+    async def run(self, filepath: Path) -> PdfDocument:
+
+        # Process the file
+        with open(filepath, 'r', encoding='utf-8') as f:
+            text = f.read()
+
+        # Return a PdfDocument
+        return PdfDocument(
+            text=text,
+            document_info=DocumentInfo(
+                path=str(filepath),
+                metadata={}
+            )
+        )
+
+data_loader = TextLoader()
+# end::loader[]
+
+# tag::kg_builder[]
+kg_builder = SimpleKGPipeline(
+    llm=llm,
+    driver=neo4j_driver, 
+    neo4j_database=os.getenv("NEO4J_DATABASE"), 
+    embedder=embedder, 
+    from_pdf=True,
+    pdf_loader=data_loader
+)
+# end::kg_builder[]
+
+# tag::run_loader[]
+pdf_file = "./genai-graphrag-python/data/genai-fundamentals_1-generative-ai_1-what-is-genai.txt"
+doc = asyncio.run(data_loader.run(pdf_file))
+print(doc.text)
+# end::run_loader[]
+
+print(f"Processing {pdf_file}")
+result = asyncio.run(kg_builder.run_async(file_path=pdf_file))
+print(result.result)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/usr/bin/env bash
		pip3 install -r requirements.txt