Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add document structure into GraphRAG #2033

Merged
merged 43 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0bb7596
Graph RAG add document and chunk type
KingSkyLi Sep 14, 2024
961f7ab
fixed insert_graph
KingSkyLi Sep 20, 2024
bc5ffda
get source file name
KingSkyLi Sep 25, 2024
d171735
add delete document;
KingSkyLi Sep 27, 2024
ecbc3fd
change create label function, graph_vis api, local query
KingSkyLi Sep 27, 2024
e3f9c09
add next edge;
KingSkyLi Sep 27, 2024
80c69a2
refactor: Refactor graph_vis API and local query functions
Appointat Oct 11, 2024
c368524
Refactor graph store initialization and label creation
Appointat Oct 11, 2024
7dab7ed
refactor: refactor graph store initialization and label creation
Appointat Oct 11, 2024
442a5f9
refactor: Refactor graph store initialization and label creation
Appointat Oct 11, 2024
fad9e0b
refactor: Refactor graph store initialization and label creation
Appointat Oct 11, 2024
f0d970c
refactor: Refactor graph store initialization and label creation
Appointat Oct 11, 2024
e8cb3cb
Refactor document vertex type description in TuGraphStoreConfig
Appointat Oct 11, 2024
9b560f2
refactor: the great refactor
Appointat Oct 11, 2024
53277ff
refacor: Refactor graph store factory and TuGraphStore
Appointat Oct 12, 2024
ee4cfc0
Refactor property filtering in TuGraphStore
Appointat Oct 12, 2024
733f1d5
refactor: Refactor graph store initialization and label creation in T…
Appointat Oct 12, 2024
8d3306b
refactor: Refactor graph store tests and adapters
Appointat Oct 12, 2024
68c20ce
Refactor import statements in graph_store/base.py
Appointat Oct 12, 2024
072ca7f
Refactor graph store adapter in CommunityStore
Appointat Oct 12, 2024
1f2fb6f
chore: Refactor import statements in graph_store/base.py
Appointat Oct 14, 2024
6e64f1f
chore: Refactor import statements in graph_store/base.py and graph_st…
Appointat Oct 14, 2024
62bf973
fix: Refactor del_edges method in Graph and MemoryGraph classes
Appointat Oct 14, 2024
4d11d66
fix: Refactor test_get_table_names in test_conn_tugraph.py
Appointat Oct 14, 2024
ea05c7c
chore: reformat the code by "make fmt"
Appointat Oct 14, 2024
952b749
fix: fix the bugs
Appointat Oct 15, 2024
3f7cc8f
fix: fix some great bugs
Appointat Oct 16, 2024
7feea09
fix: Refactor TuGraphStoreAdapter explore method to handle depth and …
Appointat Oct 16, 2024
1dd6683
feat: enable graph search for documents and entities, and refactor th…
Appointat Oct 17, 2024
4b92714
feat: Refactor explore_text_link method in GraphStoreAdapter
Appointat Oct 17, 2024
91962d0
Refactor explore_text_link method in GraphStoreAdapter
Appointat Oct 17, 2024
5ba1215
fix: fix mypy
Appointat Oct 17, 2024
3ccfc3d
fix
Appointat Oct 17, 2024
1c9edcd
fix by fmt
Appointat Oct 17, 2024
869556f
fix: fix bugs
Appointat Oct 18, 2024
9693984
fix by black format
Appointat Oct 18, 2024
cc588de
Revert "fix by black format"
Appointat Oct 18, 2024
59aab63
fix by make fmt
Appointat Oct 18, 2024
8f7089e
fix
Appointat Oct 18, 2024
d153132
chore: fix and update the doc
Appointat Oct 18, 2024
5ab6345
doc
Appointat Oct 18, 2024
847c4c6
doc: add the image
Appointat Oct 18, 2024
943db44
fix
Appointat Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dbgpt/app/knowledge/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,12 +650,12 @@ def query_graph(self, space_name, limit):
{
"id": node.vid,
"communityId": node.get_prop("_community_id"),
"name": node.vid,
"type": "",
"name": node.name,
"type": node.get_prop("type") or ""
}
)
for edge in graph.edges():
res["edges"].append(
{"source": edge.sid, "target": edge.tid, "name": edge.name, "type": ""}
{"source": edge.sid, "target": edge.tid, "name": edge.name, "type": edge.get_prop("type") or ""}
)
return res
4 changes: 2 additions & 2 deletions dbgpt/rag/transformer/graph_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _parse_response(self, text: str, limit: Optional[int] = None) -> List[Graph]
match = re.match(r"\((.*?)#(.*?)\)", line)
if match:
name, summary = [part.strip() for part in match.groups()]
graph.upsert_vertex(Vertex(name, description=summary))
graph.upsert_vertex(Vertex(name, description=summary, vertex_type='entity'))
Aries-ckt marked this conversation as resolved.
Show resolved Hide resolved
elif current_section == "Relationships":
match = re.match(r"\((.*?)#(.*?)#(.*?)#(.*?)\)", line)
if match:
Expand All @@ -74,7 +74,7 @@ def _parse_response(self, text: str, limit: Optional[int] = None) -> List[Graph]
]
edge_count += 1
graph.append_edge(
Edge(source, target, name, description=summary)
Edge(source, target, name, description=summary, edge_type='relation')
)

if limit and edge_count >= limit:
Expand Down
18 changes: 17 additions & 1 deletion dbgpt/storage/graph_store/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dbgpt._private.pydantic import BaseModel, ConfigDict, Field
from dbgpt.core import Embeddings
from dbgpt.storage.graph_store.graph import Direction, Graph
from dbgpt.storage.graph_store.graph import Direction, Graph, Vertex

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,6 +44,10 @@ def get_vertex_type(self) -> str:
def get_edge_type(self) -> str:
"""Get the edge type."""

@abstractmethod
def get_document_vertex(self, doc_name:str) -> Vertex:
"""Add graph."""

@abstractmethod
def insert_triplet(self, sub: str, rel: str, obj: str):
"""Add triplet."""
Expand All @@ -60,6 +64,10 @@ def get_triplets(self, sub: str) -> List[Tuple[str, str]]:
def delete_triplet(self, sub: str, rel: str, obj: str):
"""Delete triplet."""

@abstractmethod
def delete_document(self, doc_name: str):
"""Delete document."""

@abstractmethod
def truncate(self):
"""Truncate Graph."""
Expand Down Expand Up @@ -87,6 +95,14 @@ def explore(
) -> Graph:
"""Explore on graph."""

def explore_text_link(
self,
subs: List[str],
depth: Optional[int] = None,
limit: Optional[int] = None,
) -> Graph:
"""Explore text link on graph."""

@abstractmethod
def query(self, query: str, **args) -> Graph:
"""Execute a query."""
Expand Down
20 changes: 14 additions & 6 deletions dbgpt/storage/graph_store/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,11 @@ def get_neighbor_edges(
"""Get neighbor edges."""

@abstractmethod
def vertices(self) -> Iterator[Vertex]:
def vertices(self, vertex_prop:Optional[str] = None) -> Iterator[Vertex]:
Aries-ckt marked this conversation as resolved.
Show resolved Hide resolved
"""Get vertex iterator."""

@abstractmethod
def edges(self) -> Iterator[Edge]:
def edges(self, edge_prop:Optional[str] = None) -> Iterator[Edge]:
"""Get edge iterator."""

@abstractmethod
Expand Down Expand Up @@ -335,13 +335,21 @@ def unique_elements(elements):

return itertools.islice(es, limit) if limit else es

def vertices(self) -> Iterator[Vertex]:
def vertices(self, vertex_type: Optional[str] = None) -> Iterator[Vertex]:
"""Return vertices."""
return iter(self._vs.values())
return (
item for item in self._vs.values()
if vertex_type is None or item.get_prop('vertex_type') == vertex_type
)

def edges(self) -> Iterator[Edge]:
def edges(self, edge_type: Optional[str] = None) -> Iterator[Edge]:
"""Return edges."""
return iter(e for nbs in self._oes.values() for es in nbs.values() for e in es)
return (
e for nbs in self._oes.values()
for es in nbs.values()
for e in es
if edge_type is None or e.get_prop('edge_type') == edge_type
)

def del_vertices(self, *vids: str):
"""Delete specified vertices."""
Expand Down
Loading
Loading