2
2
from typing import Annotated
3
3
import json
4
4
from fastapi import UploadFile , Form , FastAPI , File , Header , status
5
- import tempfile
6
5
from starlette .responses import JSONResponse
7
-
8
6
import modal
9
- from llama_index .readers .file import UnstructuredReader
10
- import requests
11
7
12
8
image = (
13
9
modal .Image .debian_slim (python_version = "3.12" )
@@ -114,26 +110,21 @@ async def ingest(
114
110
},
115
111
)
116
112
117
- from pathlib import Path
113
+ from io import BytesIO
118
114
from llama_index .readers .file import UnstructuredReader
119
115
import requests
120
116
121
-
122
117
file_stream = None
118
+ filename_to_use = file .filename if file else filename
123
119
124
- with tempfile . TemporaryDirectory () as temp_dir :
120
+ try :
125
121
if file :
126
- # file_path = os.path.join(temp_dir, file.filename)
127
- # with open(file_path, "wb") as f:
128
- # f.write(await file.read())
129
- file_stream = await file . ()
122
+ file_stream = BytesIO (await file .read ())
130
123
else :
131
124
try :
132
125
response = requests .get (url )
133
126
response .raise_for_status ()
134
- file_path = os .path .join (temp_dir , filename )
135
- with open (file_path , "wb" ) as f :
136
- f .write (response .content )
127
+ file_stream = BytesIO (response .content )
137
128
except Exception as e :
138
129
return JSONResponse (
139
130
status_code = status .HTTP_400_BAD_REQUEST ,
@@ -143,42 +134,41 @@ async def ingest(
143
134
},
144
135
)
145
136
146
- try :
147
- documents = UnstructuredReader ().load_data (
148
- unstructured_kwargs = {
149
- "file" : Path (file_path ),
150
- "metadata_filename" : filename ,
151
- ** unstructured_args ,
152
- },
153
- split_documents = True ,
154
- extra_info = metadata ,
155
- )
156
-
157
- if len (documents ) <= 0 :
158
- return JSONResponse (
159
- status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
160
- content = {
161
- "status" : status .HTTP_500_INTERNAL_SERVER_ERROR ,
162
- "message" : "couldn't parse document" ,
163
- },
164
- )
137
+ documents = UnstructuredReader ().load_data (
138
+ unstructured_kwargs = {
139
+ "file" : file_stream ,
140
+ "metadata_filename" : filename_to_use ,
141
+ ** unstructured_args ,
142
+ },
143
+ split_documents = True ,
144
+ extra_info = metadata ,
145
+ )
165
146
166
- return JSONResponse (
167
- status_code = status .HTTP_200_OK ,
168
- content = {
169
- "status" : status .HTTP_200_OK ,
170
- "documents" : [document .to_dict () for document in documents ],
171
- },
172
- )
173
- except Exception as e :
147
+ if len (documents ) <= 0 :
174
148
return JSONResponse (
175
149
status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
176
150
content = {
177
151
"status" : status .HTTP_500_INTERNAL_SERVER_ERROR ,
178
- "message" : str ( e ) ,
152
+ "message" : "couldn't parse document" ,
179
153
},
180
154
)
181
155
156
+ return JSONResponse (
157
+ status_code = status .HTTP_200_OK ,
158
+ content = {
159
+ "status" : status .HTTP_200_OK ,
160
+ "documents" : [document .to_dict () for document in documents ],
161
+ },
162
+ )
163
+ except Exception as e :
164
+ return JSONResponse (
165
+ status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
166
+ content = {
167
+ "status" : status .HTTP_500_INTERNAL_SERVER_ERROR ,
168
+ "message" : str (e ),
169
+ },
170
+ )
171
+
182
172
183
173
@app .function (timeout = 600 ) # 10 minutes
184
174
@modal .asgi_app ()
0 commit comments