Skip to content

Commit 1d35939

Browse files
committed
v1
1 parent 2179fbb commit 1d35939

File tree

1 file changed

+32
-42
lines changed

1 file changed

+32
-42
lines changed

main.py

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,8 @@
22
from typing import Annotated
33
import json
44
from fastapi import UploadFile, Form, FastAPI, File, Header, status
5-
import tempfile
65
from starlette.responses import JSONResponse
7-
86
import modal
9-
from llama_index.readers.file import UnstructuredReader
10-
import requests
117

128
image = (
139
modal.Image.debian_slim(python_version="3.12")
@@ -114,26 +110,21 @@ async def ingest(
114110
},
115111
)
116112

117-
from pathlib import Path
113+
from io import BytesIO
118114
from llama_index.readers.file import UnstructuredReader
119115
import requests
120116

121-
122117
file_stream = None
118+
filename_to_use = file.filename if file else filename
123119

124-
with tempfile.TemporaryDirectory() as temp_dir:
120+
try:
125121
if file:
126-
# file_path = os.path.join(temp_dir, file.filename)
127-
# with open(file_path, "wb") as f:
128-
# f.write(await file.read())
129-
file_stream = await file.()
122+
file_stream = BytesIO(await file.read())
130123
else:
131124
try:
132125
response = requests.get(url)
133126
response.raise_for_status()
134-
file_path = os.path.join(temp_dir, filename)
135-
with open(file_path, "wb") as f:
136-
f.write(response.content)
127+
file_stream = BytesIO(response.content)
137128
except Exception as e:
138129
return JSONResponse(
139130
status_code=status.HTTP_400_BAD_REQUEST,
@@ -143,42 +134,41 @@ async def ingest(
143134
},
144135
)
145136

146-
try:
147-
documents = UnstructuredReader().load_data(
148-
unstructured_kwargs={
149-
"file": Path(file_path),
150-
"metadata_filename": filename,
151-
**unstructured_args,
152-
},
153-
split_documents=True,
154-
extra_info=metadata,
155-
)
156-
157-
if len(documents) <= 0:
158-
return JSONResponse(
159-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
160-
content={
161-
"status": status.HTTP_500_INTERNAL_SERVER_ERROR,
162-
"message": "couldn't parse document",
163-
},
164-
)
137+
documents = UnstructuredReader().load_data(
138+
unstructured_kwargs={
139+
"file": file_stream,
140+
"metadata_filename": filename_to_use,
141+
**unstructured_args,
142+
},
143+
split_documents=True,
144+
extra_info=metadata,
145+
)
165146

166-
return JSONResponse(
167-
status_code=status.HTTP_200_OK,
168-
content={
169-
"status": status.HTTP_200_OK,
170-
"documents": [document.to_dict() for document in documents],
171-
},
172-
)
173-
except Exception as e:
147+
if len(documents) <= 0:
174148
return JSONResponse(
175149
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
176150
content={
177151
"status": status.HTTP_500_INTERNAL_SERVER_ERROR,
178-
"message": str(e),
152+
"message": "couldn't parse document",
179153
},
180154
)
181155

156+
return JSONResponse(
157+
status_code=status.HTTP_200_OK,
158+
content={
159+
"status": status.HTTP_200_OK,
160+
"documents": [document.to_dict() for document in documents],
161+
},
162+
)
163+
except Exception as e:
164+
return JSONResponse(
165+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
166+
content={
167+
"status": status.HTTP_500_INTERNAL_SERVER_ERROR,
168+
"message": str(e),
169+
},
170+
)
171+
182172

183173
@app.function(timeout=600) # 10 minutes
184174
@modal.asgi_app()

0 commit comments

Comments
 (0)