EBIvariation · tcezard · Jul 7, 2025 · Jul 3, 2025 · Jul 4, 2025 · tcezard
diff --git a/eva_sub_cli/semantic_metadata.py b/eva_sub_cli/semantic_metadata.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 
 import yaml
+from requests import HTTPError
 
 from retry import retry
 from ebi_eva_common_pyutils.biosamples_communicators import NoAuthHALCommunicator
@@ -121,8 +122,14 @@ def check_all_analysis_run_accessions(self):
     def check_accession_in_ena(self, ena_accession, accession_type, json_path):
         try:
             res = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{ena_accession}')
-        except Exception:
-            self.add_error(json_path, f'{accession_type} {ena_accession} does not exist in ENA or is private')
+        except HTTPError as e:
+            # We cannot currently differentiate between the service returning an error and the accession not existing
+            if e.response.status_code == 500:
+                self.add_error(json_path, f'{accession_type} {ena_accession} does not exist in ENA or is private')
+            else:
+                self.add_error(json_path, f'{accession_type} {ena_accession} does not exist in ENA or is private')
+        except Exception as e:
+            self.add_error(json_path, f'Unexpected error occurred when checking {accession_type} {ena_accession}')
 
     def check_project_accession(self, project_acc, json_path):
         self.check_accession_in_ena(project_acc, 'Project', json_path)
@@ -216,7 +223,10 @@ def _validate_biosample_against_checklist(self, sample_data, json_path, accessio
 
     def _should_bypass_error(self, sample_data, error):
         try:
-            created_dated = datetime.strptime(sample_data['create'],'%Y-%m-%dT%H:%M:%S.%fZ')
+            try:
+                created_dated = datetime.strptime(sample_data['create'],'%Y-%m-%dT%H:%M:%S.%fZ')
+            except ValueError:
+                created_dated = datetime.strptime(sample_data['create'],'%Y-%m-%dT%H:%M:%SZ')
             if created_dated < threshold_2023 and (
                     'collection date' in error or
                     'geographic location (country and/or sea)' in error

diff --git a/tests/test_semantic_metadata.py b/tests/test_semantic_metadata.py
@@ -1,6 +1,8 @@
 from unittest import TestCase
 from unittest.mock import patch
+from requests import HTTPError, Response
 
+import pytest
 from ebi_eva_common_pyutils.biosamples_communicators import NoAuthHALCommunicator
 
 from eva_sub_cli.semantic_metadata import SemanticMetadataChecker
@@ -11,7 +13,8 @@
         {"bioSampleAccession": "SAME00002"},
         {"bioSampleAccession": "SAME00003"},
         {"bioSampleAccession": "SAME00004"},
-        {"bioSampleAccession": "SAME00005"}
+        {"bioSampleAccession": "SAME00005"},
+        {"bioSampleAccession": "SAME00006"}
     ]
 }
 valid_sample = {
@@ -50,6 +53,15 @@
     }
 }
 
+old_invalid_sample2 = {
+    'accession': 'SAME00005',
+    'name': 'sample4',
+    "create": "2011-10-10T08:45:15Z",
+    'characteristics': {
+        'organism': [{'text': 'Viridiplantae'}]
+    }
+}
+
 
 class TestSemanticMetadata(TestCase):
 
@@ -62,7 +74,7 @@ def test_check_all_project_accessions(self):
         }
         checker = SemanticMetadataChecker(metadata)
         with patch('eva_sub_cli.semantic_metadata.download_xml_from_ena') as m_ena_download:
-            m_ena_download.side_effect = [True, True, Exception('problem downloading')]
+            m_ena_download.side_effect = [True, True, HTTPError('problem downloading', response=Response())]
             checker.check_all_project_accessions()
             self.assertEqual(checker.errors, [
                 {'property': '/project/childProjects/1', 'description': 'Project PRJEBNA does not exist in ENA or is private'}
@@ -170,7 +182,7 @@ def test_check_all_scientific_names(self):
     def test_check_existing_biosamples_with_checklist(self):
         checker = SemanticMetadataChecker(metadata)
         with patch.object(SemanticMetadataChecker, '_get_biosample',
-                          side_effect=[valid_sample, ValueError, invalid_sample1, invalid_sample2, old_invalid_sample]) as m_get_sample:
+                          side_effect=[valid_sample, ValueError, invalid_sample1, invalid_sample2, old_invalid_sample, old_invalid_sample2]) as m_get_sample:
             checker.check_existing_biosamples()
             self.assertEqual(
                 checker.errors[0],
@@ -196,14 +208,25 @@ def test_check_existing_biosamples_with_checklist(self):
     def test_check_existing_biosamples(self):
         checker = SemanticMetadataChecker(metadata, sample_checklist=None)
         with patch.object(NoAuthHALCommunicator, 'follows_link',
-                          side_effect=[valid_sample, ValueError, invalid_sample1, invalid_sample2, old_invalid_sample]) as m_follows_link:
+                          side_effect=[valid_sample, ValueError, invalid_sample1, invalid_sample2, old_invalid_sample, old_invalid_sample2]) as m_follows_link:
             checker.check_existing_biosamples()
             self.assertEqual(checker.errors, [
                 {'description': 'SAME00002 does not exist or is private','property': '/sample/1/bioSampleAccession'},
                 {'description': 'Existing sample SAME00003 does not have a valid collection date', 'property': '/sample/2/bioSampleAccession'},
                 {'description': 'Existing sample SAME00004 does not have a valid collection date', 'property': '/sample/3/bioSampleAccession'},
                 {'description': 'Existing sample SAME00004 does not have a valid geographic location', 'property': '/sample/3/bioSampleAccession'}])
 
+    @pytest.mark.skip(reason='Contact BioSample API')
+    def test_check_existing_real_biosamples(self):
+        metadata = {
+            "sample": [
+                {"bioSampleAccession": "SAMN01894452"}
+            ]
+        }
+        checker = SemanticMetadataChecker(metadata, sample_checklist=None)
+        checker.check_existing_biosamples()
+        print(checker.errors)
+
     def test_check_analysis_alias_coherence(self):
         metadata = {
             "analysis": [