Skip to content

Commit

Permalink
fix image files not deleted on indexing_estimate #9541 (#10798)
Browse files Browse the repository at this point in the history
Co-authored-by: root <root@localhost.localdomain>
  • Loading branch information
wy96f and root authored Nov 21, 2024
1 parent 2ae6460 commit 94c9cad
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
14 changes: 14 additions & 0 deletions api/core/indexing_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from core.rag.splitter.text_splitter import TextSplitter
from core.tools.utils.text_processing_utils import remove_leading_symbols
from core.tools.utils.web_reader_tool import get_image_upload_file_ids
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from extensions.ext_storage import storage
Expand Down Expand Up @@ -279,6 +280,19 @@ def indexing_estimate(
if len(preview_texts) < 5:
preview_texts.append(document.page_content)

# delete image files and related db records
image_upload_file_ids = get_image_upload_file_ids(document.page_content)
for upload_file_id in image_upload_file_ids:
image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
try:
storage.delete(image_file.key)
except Exception:
logging.exception(
"Delete image_files failed while indexing_estimate, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)

if doc_form and doc_form == "qa_model":
if len(preview_texts) > 0:
# qa model document
Expand Down
1 change: 1 addition & 0 deletions api/tasks/clean_dataset_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def clean_dataset_task(
"Delete image_files failed when storage deleted, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)
db.session.delete(segment)

db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()
Expand Down
1 change: 1 addition & 0 deletions api/tasks/clean_document_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
"Delete image_files failed when storage deleted, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)
db.session.delete(segment)

db.session.commit()
Expand Down

0 comments on commit 94c9cad

Please sign in to comment.