Skip to content

Commit f13a3f7

Browse files
rahul-tripRahul Tripathi
andauthored
Enhanced DaxaSafeLoader as per review comments: (langchain-ai#2)
* Enhanced DaxaSafeLoader as per review comments: - Remane eof macro to loading_end, make default value to "false". - in load, send loading_end s "true". - in lazy load send empty doc with loading_end as "true" when stopIteration is found. - remove call is for en - change /discover to /api/discover - add load_id - add owner in loader init --------- Signed-off-by: Rahul Tripathi <rahul@clouddefense.io> Co-authored-by: Rahul Tripathi <rahul@clouddefense.io>
1 parent 9b704a7 commit f13a3f7

File tree

2 files changed

+23
-16
lines changed
  • libs/community/langchain_community

2 files changed

+23
-16
lines changed

libs/community/langchain_community/document_loaders/daxa.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,34 @@
22

33
import os
44
import requests
5-
from http import HTTPStatus
65
import logging
6+
import uuid
7+
from http import HTTPStatus
78

89
from langchain_community.utilities.daxa import CLASSIFIER_URL, PLUGIN_VERSION
910
from langchain_community.utilities.daxa import get_loader_full_path, get_loader_type, get_full_path, get_runtime
10-
from langchain_community.utilities.daxa import App, Framework, Runtime
11-
11+
from langchain_community.utilities.daxa import App
1212
from langchain_community.document_loaders.base import BaseLoader
1313

1414
logger = logging.getLogger(__name__)
1515

1616

1717
class DaxaSafeLoader(BaseLoader):
1818

19-
def __init__(self, langchain_loader: BaseLoader, app_id: str):
19+
def __init__(self, langchain_loader: BaseLoader, app_id: str, owner: str):
2020
if not app_id or not isinstance(app_id, str):
2121
raise NameError("""No app_id provided. Or invalid app_id.""")
2222
self.app_name = app_id
23+
self.loader_id = str(uuid.uuid4())
2324
self.loader = langchain_loader
25+
self.owner = owner
2426
self.source_path = get_loader_full_path(self.loader)
2527
self.docs = []
2628
loader_name = str(type(self.loader)).split(".")[-1].split("'")[0]
2729
source_type = get_loader_type(loader_name)
2830
self.loader_details = {
2931
"loader": loader_name,
32+
"loader_id": self.loader_id,
3033
"source_path": self.source_path,
3134
"source_type": source_type
3235
}
@@ -37,8 +40,8 @@ def __init__(self, langchain_loader: BaseLoader, app_id: str):
3740
def load(self):
3841
"""load Documents."""
3942
self.docs = self.loader.load()
40-
self._send_loader_doc()
41-
self._send_loader_doc(macro="EOF")
43+
self._send_loader_doc(loading_end=True)
44+
DaxaSafeLoader.set_loader_sent()
4245
return self.docs
4346

4447
def lazy_load(self):
@@ -53,7 +56,9 @@ def lazy_load(self):
5356
try:
5457
doc = next(doc_iterator)
5558
except StopIteration:
56-
self._send_loader_doc(macro="EOF")
59+
self.docs = [ ]
60+
self._send_loader_doc(loading_end=True)
61+
DaxaSafeLoader.set_loader_sent()
5762
break
5863
self.docs = [doc, ]
5964
self._send_loader_doc()
@@ -67,29 +72,27 @@ def set_discover_sent(cls):
6772
def set_loader_sent(cls):
6873
cls._loader_sent = True
6974

70-
def _send_loader_doc(self, macro=None):
75+
def _send_loader_doc(self, loading_end=False):
7176
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
7277
doc_content = [doc.dict() for doc in self.docs]
7378
payload = {
7479
"name": self.app_name,
80+
"owner": self.owner,
7581
"docs": [{"doc": doc.get('page_content'), "source_path": get_full_path(doc.get('metadata', {}).get('source')), "last_modified": doc.get('metadata', {}).get('last_modified')} for doc in doc_content],
7682
"plugin_version": PLUGIN_VERSION,
7783
"loader_details": self.loader_details,
78-
"eof": "false"
84+
"loading_end": "false"
7985
}
80-
if macro == "EOF":
81-
payload["eof"] = "true"
82-
payload["docs"] = []
86+
if loading_end is True:
87+
payload["loading_end"] = "true"
8388
resp = requests.post(f"{CLASSIFIER_URL}/loader/doc", headers=headers, json=payload, timeout=10)
8489
logger.debug(f"===> send_loader_doc: request, url {resp.request.url}, headers {resp.request.headers}, body {resp.request.body[:999]} with a len: {len(resp.request.body)}\n")
8590
logger.debug(f"===> send_loader_doc: response status {resp.status_code}, body {resp.json()}\n")
86-
if resp.status_code == HTTPStatus.OK or resp.status_code == HTTPStatus.BAD_GATEWAY:
87-
DaxaSafeLoader.set_discover_sent()
8891

8992
def _send_discover(self, app: App):
9093
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
9194
payload = app.model_dump(exclude_unset=True)
92-
resp = requests.post(f"{CLASSIFIER_URL}/discover", headers=headers, json=payload)
95+
resp = requests.post(f"{CLASSIFIER_URL}/app/discover", headers=headers, json=payload)
9396
logger.debug(f"===> send_discover: request, url {resp.request.url}, headers {resp.request.headers}, body {resp.request.body}\n")
9497
logger.debug(f"===> send_discover: response status {resp.status_code}, body {resp.json()}\n")
9598
if resp.status_code == HTTPStatus.OK or resp.status_code == HTTPStatus.BAD_GATEWAY:
@@ -99,8 +102,10 @@ def _get_app_details(self):
99102
framework, runtime = get_runtime()
100103
app = App(
101104
name=self.app_name,
105+
owner=self.owner,
106+
loader_id=self.loader_id,
102107
runtime=runtime,
103108
framework=framework,
104109
plugin_version=PLUGIN_VERSION,
105110
)
106-
return app
111+
return app

libs/community/langchain_community/utilities/daxa.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class Framework(BaseModel):
6262

6363
class App(BaseModel):
6464
name: str
65+
owner: str
66+
loader_id: str
6567
runtime: Runtime
6668
framework: Framework
6769
plugin_version: str

0 commit comments

Comments
 (0)