22
33import os
44import requests
5- from http import HTTPStatus
65import logging
6+ import uuid
7+ from http import HTTPStatus
78
89from langchain_community .utilities .daxa import CLASSIFIER_URL , PLUGIN_VERSION
910from langchain_community .utilities .daxa import get_loader_full_path , get_loader_type , get_full_path , get_runtime
10- from langchain_community .utilities .daxa import App , Framework , Runtime
11-
11+ from langchain_community .utilities .daxa import App
1212from langchain_community .document_loaders .base import BaseLoader
1313
1414logger = logging .getLogger (__name__ )
1515
1616
1717class DaxaSafeLoader (BaseLoader ):
1818
19- def __init__ (self , langchain_loader : BaseLoader , app_id : str ):
19+ def __init__ (self , langchain_loader : BaseLoader , app_id : str , owner : str ):
2020 if not app_id or not isinstance (app_id , str ):
2121 raise NameError ("""No app_id provided. Or invalid app_id.""" )
2222 self .app_name = app_id
23+ self .loader_id = str (uuid .uuid4 ())
2324 self .loader = langchain_loader
25+ self .owner = owner
2426 self .source_path = get_loader_full_path (self .loader )
2527 self .docs = []
2628 loader_name = str (type (self .loader )).split ("." )[- 1 ].split ("'" )[0 ]
2729 source_type = get_loader_type (loader_name )
2830 self .loader_details = {
2931 "loader" : loader_name ,
32+ "loader_id" : self .loader_id ,
3033 "source_path" : self .source_path ,
3134 "source_type" : source_type
3235 }
@@ -37,8 +40,8 @@ def __init__(self, langchain_loader: BaseLoader, app_id: str):
3740 def load (self ):
3841 """load Documents."""
3942 self .docs = self .loader .load ()
40- self ._send_loader_doc ()
41- self . _send_loader_doc ( macro = "EOF" )
43+ self ._send_loader_doc (loading_end = True )
44+ DaxaSafeLoader . set_loader_sent ( )
4245 return self .docs
4346
4447 def lazy_load (self ):
@@ -53,7 +56,9 @@ def lazy_load(self):
5356 try :
5457 doc = next (doc_iterator )
5558 except StopIteration :
56- self ._send_loader_doc (macro = "EOF" )
59+ self .docs = [ ]
60+ self ._send_loader_doc (loading_end = True )
61+ DaxaSafeLoader .set_loader_sent ()
5762 break
5863 self .docs = [doc , ]
5964 self ._send_loader_doc ()
@@ -67,29 +72,27 @@ def set_discover_sent(cls):
6772 def set_loader_sent (cls ):
6873 cls ._loader_sent = True
6974
70- def _send_loader_doc (self , macro = None ):
75+ def _send_loader_doc (self , loading_end = False ):
7176 headers = {'Accept' : 'application/json' , 'Content-Type' : 'application/json' }
7277 doc_content = [doc .dict () for doc in self .docs ]
7378 payload = {
7479 "name" : self .app_name ,
80+ "owner" : self .owner ,
7581 "docs" : [{"doc" : doc .get ('page_content' ), "source_path" : get_full_path (doc .get ('metadata' , {}).get ('source' )), "last_modified" : doc .get ('metadata' , {}).get ('last_modified' )} for doc in doc_content ],
7682 "plugin_version" : PLUGIN_VERSION ,
7783 "loader_details" : self .loader_details ,
78- "eof " : "false"
84+ "loading_end " : "false"
7985 }
80- if macro == "EOF" :
81- payload ["eof" ] = "true"
82- payload ["docs" ] = []
86+ if loading_end is True :
87+ payload ["loading_end" ] = "true"
8388 resp = requests .post (f"{ CLASSIFIER_URL } /loader/doc" , headers = headers , json = payload , timeout = 10 )
8489 logger .debug (f"===> send_loader_doc: request, url { resp .request .url } , headers { resp .request .headers } , body { resp .request .body [:999 ]} with a len: { len (resp .request .body )} \n " )
8590 logger .debug (f"===> send_loader_doc: response status { resp .status_code } , body { resp .json ()} \n " )
86- if resp .status_code == HTTPStatus .OK or resp .status_code == HTTPStatus .BAD_GATEWAY :
87- DaxaSafeLoader .set_discover_sent ()
8891
8992 def _send_discover (self , app : App ):
9093 headers = {'Accept' : 'application/json' , 'Content-Type' : 'application/json' }
9194 payload = app .model_dump (exclude_unset = True )
92- resp = requests .post (f"{ CLASSIFIER_URL } /discover" , headers = headers , json = payload )
95+ resp = requests .post (f"{ CLASSIFIER_URL } /app/ discover" , headers = headers , json = payload )
9396 logger .debug (f"===> send_discover: request, url { resp .request .url } , headers { resp .request .headers } , body { resp .request .body } \n " )
9497 logger .debug (f"===> send_discover: response status { resp .status_code } , body { resp .json ()} \n " )
9598 if resp .status_code == HTTPStatus .OK or resp .status_code == HTTPStatus .BAD_GATEWAY :
@@ -99,8 +102,10 @@ def _get_app_details(self):
99102 framework , runtime = get_runtime ()
100103 app = App (
101104 name = self .app_name ,
105+ owner = self .owner ,
106+ loader_id = self .loader_id ,
102107 runtime = runtime ,
103108 framework = framework ,
104109 plugin_version = PLUGIN_VERSION ,
105110 )
106- return app
111+ return app
0 commit comments