diff --git a/ingestors/__init__.py b/ingestors/__init__.py index 05a67350d..a7727db45 100644 --- a/ingestors/__init__.py +++ b/ingestors/__init__.py @@ -1,4 +1,5 @@ """Provides a set of ingestors based on different file types.""" + import logging __version__ = "4.0.0-rc6" diff --git a/ingestors/manager.py b/ingestors/manager.py index ec3b527d4..6c9a6b722 100644 --- a/ingestors/manager.py +++ b/ingestors/manager.py @@ -230,7 +230,8 @@ def ingest(self, file_path, entity, **kwargs): log.exception(f"[{repr(entity)}] Failed to process: {pexc}") INGESTIONS_FAILED.labels(ingestor=ingestor_name).inc() entity.set("processingError", stringify(pexc)) - capture_exception(pexc) + if settings.SENTRY_CAPTURE_PROCESSING_EXCEPTIONS: + capture_exception(pexc) finally: self.finalize(entity) diff --git a/ingestors/settings.py b/ingestors/settings.py index 3f389951d..b5913cb54 100644 --- a/ingestors/settings.py +++ b/ingestors/settings.py @@ -52,3 +52,10 @@ sls.TAGS_DATABASE_URI = fts.DATABASE_URI RABBITMQ_URL = env.get("ALEPH_RABBITMQ_URL", "rabbitmq") + +# ProcessingException is thrown whenever something goes wrong wiht +# parsing a file. Enable this with care, it can easily eat up the +# Sentry quota of events. +SENTRY_CAPTURE_PROCESSING_EXCEPTIONS = env.to_bool( + "SENTRY_CAPTURE_PROCESSING_EXCEPTIONS", False +) diff --git a/requirements-dev.txt b/requirements-dev.txt index 6940d3799..643ceda81 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,3 @@ bump2version==1.0.1 -black==23.12.1 -ruff==0.1.9 +black==24.4.2 +ruff==0.4.2 diff --git a/requirements.txt b/requirements.txt index 0ea21d10c..244b22257 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ banal==1.0.6 normality==2.5.0 pantomime==0.6.1 -followthemoney==3.5.8 -followthemoney-store[postgresql]==3.0.6 -servicelayer[google,amazon]==1.23.0rc3 +followthemoney==3.5.9 +followthemoney-store[postgresql]==3.1.0 +servicelayer[google,amazon]==1.22.2 languagecodes==1.1.1 countrytagger==0.1.2 pyicu==2.12 -google-cloud-vision==3.5.0 +google-cloud-vision==3.7.2 tesserocr==2.6.2 spacy==3.6.1 fingerprints==1.1.1 @@ -15,15 +15,15 @@ fasttext==0.9.2 pika==1.3.2 # Development -pytest==7.4.4 -pytest-cov==4.1.0 -click==8.1.6 +pytest==8.2.0 +pytest-cov==5.0.0 +click==8.1.7 # File format support dbf==0.99.9 pymediainfo==6.1.0 python-magic==0.4.27 -rarfile==4.1 +rarfile==4.2 xlrd==2.0.1 openpyxl==3.1.2 odfpy==1.4.1 @@ -33,11 +33,11 @@ olefile==0.47 Pillow==10.1.0 vobject==0.9.6.1 msglite==0.30.0 -icalendar==5.0.11 +icalendar==5.0.12 cryptography==41.0.7 requests[security]==2.31.0 pymupdf==1.21.1 prometheus-client==0.17.1 -sentry_sdk==1.39.1 +sentry_sdk==2.0.1