diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 7db8f54f701556..cae539b6a7e340 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -30,6 +30,7 @@ ) from core.rag.splitter.text_splitter import TextSplitter from core.tools.utils.text_processing_utils import remove_leading_symbols +from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_redis import redis_client from extensions.ext_storage import storage @@ -279,6 +280,19 @@ def indexing_estimate( if len(preview_texts) < 5: preview_texts.append(document.page_content) + # delete image files and related db records + image_upload_file_ids = get_image_upload_file_ids(document.page_content) + for upload_file_id in image_upload_file_ids: + image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first() + try: + storage.delete(image_file.key) + except Exception: + logging.exception( + "Delete image_files failed while indexing_estimate, \ + image_upload_file_is: {}".format(upload_file_id) + ) + db.session.delete(image_file) + if doc_form and doc_form == "qa_model": if len(preview_texts) > 0: # qa model document diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index 4d45df4d2a87e8..a555fb28746697 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -78,6 +78,7 @@ def clean_dataset_task( "Delete image_files failed when storage deleted, \ image_upload_file_is: {}".format(upload_file_id) ) + db.session.delete(image_file) db.session.delete(segment) db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete() diff --git a/api/tasks/clean_document_task.py b/api/tasks/clean_document_task.py index 54c89450c91419..4d328643bfa165 100644 --- a/api/tasks/clean_document_task.py +++ b/api/tasks/clean_document_task.py @@ -51,6 +51,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i "Delete image_files failed when storage deleted, \ image_upload_file_is: {}".format(upload_file_id) ) + db.session.delete(image_file) db.session.delete(segment) db.session.commit()