Skip to content

Commit 6188f82

Browse files
authored
feat: add delete_doc and list_docs for kb (#131)
* feat: add delete_doc and list_docs * fix: remove redundant characters * feat: add mysql and redis part
1 parent ce8bc07 commit 6188f82

File tree

7 files changed

+286
-7
lines changed

7 files changed

+286
-7
lines changed

veadk/database/database_adapter.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,33 @@ def query(self, query: str, index: str, top_k: int = 0) -> list:
5454
logger.error(f"Failed to search from Redis: index={index} error={e}")
5555
raise e
5656

57+
def delete_doc(self, index: str, id: str) -> bool:
58+
logger.debug(f"Deleting document from Redis database: index={index} id={id}")
59+
try:
60+
# For Redis, we need to handle deletion differently since RedisDatabase.delete_doc
61+
# takes a key and a single id
62+
result = self.client.delete_doc(key=index, id=id)
63+
return result
64+
except Exception as e:
65+
logger.error(
66+
f"Failed to delete document from Redis database: index={index} id={id} error={e}"
67+
)
68+
return False
69+
70+
def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
71+
logger.debug(f"Listing documents from Redis database: index={index}")
72+
try:
73+
# Get all documents from Redis
74+
docs = self.client.list_docs(key=index)
75+
76+
# Apply offset and limit for pagination
77+
return docs[offset : offset + limit]
78+
except Exception as e:
79+
logger.error(
80+
f"Failed to list documents from Redis database: index={index} error={e}"
81+
)
82+
return []
83+
5784

5885
class RelationalDatabaseAdapter:
5986
def __init__(self, client):
@@ -108,6 +135,28 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
108135

109136
return [item["data"] for item in results]
110137

138+
def delete_doc(self, index: str, id: str) -> bool:
139+
logger.debug(f"Deleting document from SQL database: table_name={index} id={id}")
140+
try:
141+
# Convert single id to list for the client method
142+
result = self.client.delete_doc(table=index, ids=[int(id)])
143+
return result
144+
except Exception as e:
145+
logger.error(
146+
f"Failed to delete document from SQL database: table_name={index} id={id} error={e}"
147+
)
148+
return False
149+
150+
def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
151+
logger.debug(f"Listing documents from SQL database: table_name={index}")
152+
try:
153+
return self.client.list_docs(table=index, offset=offset, limit=limit)
154+
except Exception as e:
155+
logger.error(
156+
f"Failed to list documents from SQL database: table_name={index} error={e}"
157+
)
158+
return []
159+
111160

112161
class VectorDatabaseAdapter:
113162
def __init__(self, client):
@@ -152,6 +201,23 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
152201
top_k=top_k,
153202
)
154203

204+
def delete_doc(self, index: str, id: str) -> bool:
205+
self._validate_index(index)
206+
logger.debug(f"Deleting documents from vector database: index={index} id={id}")
207+
try:
208+
self.client.delete_by_id(collection_name=index, id=id)
209+
return True
210+
except Exception as e:
211+
logger.error(
212+
f"Failed to delete document from vector database: index={index} id={id} error={e}"
213+
)
214+
return False
215+
216+
def list_docs(self, index: str, offset: int = 0, limit: int = 1000) -> list[dict]:
217+
self._validate_index(index)
218+
logger.debug(f"Listing documents from vector database: index={index}")
219+
return self.client.list_docs(collection_name=index, offset=offset, limit=limit)
220+
155221

156222
class VikingDatabaseAdapter:
157223
def __init__(self, client):
@@ -212,6 +278,16 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
212278

213279
return self.client.query(query, collection_name=index, top_k=top_k)
214280

281+
def delete_doc(self, index: str, id: str) -> bool:
282+
self._validate_index(index)
283+
logger.debug(f"Deleting documents from vector database: index={index} id={id}")
284+
return self.client.delete_by_id(collection_name=index, id=id)
285+
286+
def list_docs(self, index: str, offset: int, limit: int) -> list[dict]:
287+
self._validate_index(index)
288+
logger.debug(f"Listing documents from vector database: index={index}")
289+
return self.client.list_docs(collection_name=index, offset=offset, limit=limit)
290+
215291

216292
class VikingMemoryDatabaseAdapter:
217293
def __init__(self, client):
@@ -248,6 +324,12 @@ def query(self, query: str, index: str, top_k: int, **kwargs):
248324
result = self.client.query(query, collection_name=index, top_k=top_k, **kwargs)
249325
return result
250326

327+
def delete_docs(self, index: str, ids: list[int]):
328+
raise NotImplementedError("VikingMemoryDatabase does not support delete_docs")
329+
330+
def list_docs(self, index: str):
331+
raise NotImplementedError("VikingMemoryDatabase does not support list_docs")
332+
251333

252334
class LocalDatabaseAdapter:
253335
def __init__(self, client):
@@ -261,6 +343,12 @@ def add(self, data: list[str], **kwargs):
261343
def query(self, query: str, **kwargs):
262344
return self.client.query(query, **kwargs)
263345

346+
def delete_doc(self, index: str, id: str) -> bool:
347+
return self.client.delete_doc(id)
348+
349+
def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
350+
return self.client.list_docs(offset=offset, limit=limit)
351+
264352

265353
MAPPING = {
266354
"RedisDatabase": KVDatabaseAdapter,

veadk/database/kv/redis_database.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,47 @@ def delete(self, **kwargs):
110110
except Exception as e:
111111
logger.error(f"Failed to delete key `{key}`: {e}")
112112
raise e
113+
114+
def delete_doc(self, key: str, id: str) -> bool:
115+
"""Delete a specific document by ID from a Redis list.
116+
117+
Args:
118+
key: The Redis key (list) to delete from
119+
id: The ID of the document to delete
120+
121+
Returns:
122+
bool: True if deletion was successful, False otherwise
123+
"""
124+
try:
125+
# Get all items in the list
126+
items = self._client.lrange(key, 0, -1)
127+
128+
# Find the index of the item to delete
129+
for i, item in enumerate(items):
130+
# Assuming the item is stored as a JSON string with an 'id' field
131+
# If it's just the content, we'll use the list index as ID
132+
if str(i) == id:
133+
self._client.lrem(key, 1, item)
134+
return True
135+
136+
logger.warning(f"Document with id {id} not found in key {key}")
137+
return False
138+
except Exception as e:
139+
logger.error(f"Failed to delete document with id {id} from key {key}: {e}")
140+
return False
141+
142+
def list_docs(self, key: str) -> list[dict]:
143+
"""List all documents in a Redis list.
144+
145+
Args:
146+
key: The Redis key (list) to list documents from
147+
148+
Returns:
149+
list[dict]: List of documents with id and content
150+
"""
151+
try:
152+
items = self._client.lrange(key, 0, -1)
153+
return [{"id": str(i), "content": item} for i, item in enumerate(items)]
154+
except Exception as e:
155+
logger.error(f"Failed to list documents from key {key}: {e}")
156+
return []

veadk/database/local_database.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,35 @@ class LocalDataBase(BaseDatabase):
2424

2525
def __init__(self, **kwargs):
2626
super().__init__()
27-
self.data = []
27+
self.data = {}
2828
self._type = "local"
29+
self._next_id = 0 # Used to generate unique IDs
2930

3031
def add_texts(self, texts: list[str], **kwargs):
31-
self.data.extend(texts)
32+
for text in texts:
33+
self.data[str(self._next_id)] = text
34+
self._next_id += 1
3235

3336
def is_empty(self):
3437
return len(self.data) == 0
3538

3639
def query(self, query: str, **kwargs: Any) -> list[str]:
37-
return self.data
40+
return list(self.data.values())
3841

3942
def delete(self, **kwargs: Any):
40-
self.data = []
43+
self.data = {}
4144

4245
def add(self, texts: list[str], **kwargs: Any):
4346
return self.add_texts(texts)
47+
48+
def list_docs(self, **kwargs: Any) -> list[dict]:
49+
return [{"id": id, "content": content} for id, content in self.data.items()]
50+
51+
def delete_doc(self, id: str, **kwargs: Any):
52+
if id not in self.data:
53+
raise ValueError(f"id {id} not found")
54+
try:
55+
del self.data[id]
56+
return True
57+
except Exception:
58+
return False

veadk/database/relational/mysql_database.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,5 +111,62 @@ def delete(self, **kwargs):
111111
logger.error(f"Failed to drop table {table}: {e}")
112112
raise e
113113

114+
def delete_doc(self, table: str, ids: list[int]) -> bool:
115+
"""Delete documents by IDs from a MySQL table.
116+
117+
Args:
118+
table: The table name to delete from
119+
ids: List of document IDs to delete
120+
121+
Returns:
122+
bool: True if deletion was successful, False otherwise
123+
"""
124+
if not self.table_exists(table):
125+
logger.warning(f"Table {table} does not exist. Skipping delete operation.")
126+
return False
127+
128+
if not ids:
129+
return True # Nothing to delete
130+
131+
try:
132+
with self._connection.cursor() as cursor:
133+
# Create placeholders for the IDs
134+
placeholders = ",".join(["%s"] * len(ids))
135+
sql = f"DELETE FROM `{table}` WHERE id IN ({placeholders})"
136+
cursor.execute(sql, ids)
137+
self._connection.commit()
138+
logger.info(f"Deleted {cursor.rowcount} documents from table {table}")
139+
return True
140+
except Exception as e:
141+
logger.error(f"Failed to delete documents from table {table}: {e}")
142+
return False
143+
144+
def list_docs(self, table: str, offset: int = 0, limit: int = 100) -> list[dict]:
145+
"""List documents from a MySQL table.
146+
147+
Args:
148+
table: The table name to list documents from
149+
offset: Offset for pagination
150+
limit: Limit for pagination
151+
152+
Returns:
153+
list[dict]: List of documents with id and content
154+
"""
155+
if not self.table_exists(table):
156+
logger.warning(f"Table {table} does not exist. Returning empty list.")
157+
return []
158+
159+
try:
160+
with self._connection.cursor() as cursor:
161+
sql = f"SELECT id, data FROM `{table}` ORDER BY created_at DESC LIMIT %s OFFSET %s"
162+
cursor.execute(sql, (limit, offset))
163+
results = cursor.fetchall()
164+
return [
165+
{"id": str(row["id"]), "content": row["data"]} for row in results
166+
]
167+
except Exception as e:
168+
logger.error(f"Failed to list documents from table {table}: {e}")
169+
return []
170+
114171
def is_empty(self):
115172
pass

veadk/database/vector/opensearch_vector_database.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,20 +219,22 @@ def list_all_collection(self) -> list:
219219
response = self._opensearch_client.indices.get_alias()
220220
return list(response.keys())
221221

222-
def get_all_docs(self, collection_name: str, size: int = 10000) -> list[dict]:
222+
def list_docs(
223+
self, collection_name: str, offset: int = 0, limit: int = 10000
224+
) -> list[dict]:
223225
"""Match all docs in one index of OpenSearch"""
224226
if not self.collection_exists(collection_name):
225227
logger.warning(
226228
f"Get all docs, but collection {collection_name} does not exist. return a empty list."
227229
)
228230
return []
229231

230-
query = {"size": size, "query": {"match_all": {}}}
232+
query = {"size": limit, "from": offset, "query": {"match_all": {}}}
231233
response = self._opensearch_client.search(index=collection_name, body=query)
232234
return [
233235
{
234236
"id": hit["_id"],
235-
"page_content": hit["_source"]["page_content"],
237+
"content": hit["_source"]["page_content"],
236238
}
237239
for hit in response["hits"]["hits"]
238240
]

veadk/database/viking/viking_database.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
doc_add_path = "/api/knowledge/doc/add"
4242
doc_info_path = "/api/knowledge/doc/info"
4343
doc_del_path = "/api/collection/drop"
44+
list_docs_path = "/api/knowledge/point/list"
45+
delete_docs_path = "/api/knowledge/point/delete"
4446

4547

4648
class VolcengineTOSConfig(BaseModel):
@@ -400,3 +402,66 @@ def collection_exists(self, collection_name: str) -> bool:
400402
return True
401403
else:
402404
return False
405+
406+
def list_docs(
407+
self, collection_name: str, offset: int = 0, limit: int = -1
408+
) -> list[dict]:
409+
request_params = {
410+
"collection_name": collection_name,
411+
"project": self.config.project,
412+
"offset": offset,
413+
"limit": limit,
414+
}
415+
416+
create_collection_req = prepare_request(
417+
method="POST",
418+
path=list_docs_path,
419+
config=self.config,
420+
data=request_params,
421+
)
422+
resp = requests.request(
423+
method=create_collection_req.method,
424+
url="https://{}{}".format(
425+
g_knowledge_base_domain, create_collection_req.path
426+
),
427+
headers=create_collection_req.headers,
428+
data=create_collection_req.body,
429+
)
430+
431+
result = resp.json()
432+
if result["code"] != 0:
433+
logger.error(f"Error in list_docs: {result['message']}")
434+
raise ValueError(f"Error in list_docs: {result['message']}")
435+
436+
data = [
437+
{"id": res["point_id"], "content": res["content"]}
438+
for res in result["data"]["point_list"]
439+
]
440+
return data
441+
442+
def delete_by_id(self, collection_name: str, id: str) -> bool:
443+
request_params = {
444+
"collection_name": collection_name,
445+
"project": self.config.project,
446+
"point_id": id,
447+
}
448+
449+
create_collection_req = prepare_request(
450+
method="POST",
451+
path=delete_docs_path,
452+
config=self.config,
453+
data=request_params,
454+
)
455+
resp = requests.request(
456+
method=create_collection_req.method,
457+
url="https://{}{}".format(
458+
g_knowledge_base_domain, create_collection_req.path
459+
),
460+
headers=create_collection_req.headers,
461+
data=create_collection_req.body,
462+
)
463+
464+
result = resp.json()
465+
if result["code"] != 0:
466+
return False
467+
return True

veadk/knowledgebase/knowledgebase.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,11 @@ def search(self, query: str, app_name: str, top_k: int | None = None) -> list[st
8080
if len(result) == 0:
8181
logger.warning(f"No documents found in knowledgebase. Query: {query}")
8282
return result
83+
84+
def delete_doc(self, app_name: str, id: str) -> bool:
85+
index = build_knowledgebase_index(app_name)
86+
return self.adapter.delete_doc(index=index, id=id)
87+
88+
def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
89+
index = build_knowledgebase_index(app_name)
90+
return self.adapter.list_docs(index=index, offset=offset, limit=limit)

0 commit comments

Comments
 (0)