qiita.slurm_resource_allocations insertion (qiita-spots#3415)

Gossty · web-flow · commit cca6de70322f · 2024-06-19T10:18:59.000-06:00
* Update to DB qiita.slurm_resource_allocations * connected tests to database * Update util.py * debugging changes to test * Update test_util.py * Update test_util.py * Tests update * Update test_meta_util.py * Updates to @antgonza comments * Updates to @charles-cowart comments * Resource Allocation Insertion into DB - Added functionality of updating resource allocation data into db in qiita_db/util.py - Added tests for the added functionality in qiita_db/test/test_util.py - Moved MaxRSS_helper function from qiita_core/util.py to qiita_db/util.py. - Moved MaxRSS_helper test from qiita_core/tests/test_util.py to qiita_db/test/test_util.py * Debug Memory Model * Update test_util.py * Added columns to slurm_resource_allocation table * Update test_processing_job.py * Update test_util.py * Automatic date adjustment in utill.py * Update util.py
diff --git a/notebooks/resource-allocation/generate-allocation-summary-arrays.py b/notebooks/resource-allocation/generate-allocation-summary-arrays.py
@@ -1,4 +1,4 @@
-from qiita_core.util import MaxRSS_helper
+from qiita_db.util import MaxRSS_helper
 from qiita_db.software import Software
 import datetime
 from io import StringIO
diff --git a/notebooks/resource-allocation/generate-allocation-summary.py b/notebooks/resource-allocation/generate-allocation-summary.py
@@ -5,7 +5,7 @@
 from json import loads
 from os.path import join
 
-from qiita_core.util import MaxRSS_helper
+from qiita_db.util import MaxRSS_helper
 from qiita_db.exceptions import QiitaDBUnknownIDError
 from qiita_db.processing_job import ProcessingJob
 from qiita_db.software import Software
diff --git a/qiita_core/tests/test_util.py b/qiita_core/tests/test_util.py
@@ -10,7 +10,7 @@
 
 from qiita_core.util import (
     qiita_test_checker, execute_as_transaction, get_qiita_version,
-    is_test_environment, get_release_info, MaxRSS_helper)
+    is_test_environment, get_release_info)
 from qiita_db.meta_util import (
     generate_biom_and_metadata_release, generate_plugin_releases)
 import qiita_db as qdb
@@ -82,20 +82,6 @@ def test_get_release_info(self):
         self.assertEqual(biom_metadata_release, ('', '', ''))
         self.assertNotEqual(archive_release, ('', '', ''))
 
-    def test_MaxRSS_helper(self):
-        tests = [
-            ('6', 6.0),
-            ('6K', 6000),
-            ('6M', 6000000),
-            ('6G', 6000000000),
-            ('6.9', 6.9),
-            ('6.9K', 6900),
-            ('6.9M', 6900000),
-            ('6.9G', 6900000000),
-        ]
-        for x, y in tests:
-            self.assertEqual(MaxRSS_helper(x), y)
-
 
 if __name__ == '__main__':
     main()
diff --git a/qiita_core/util.py b/qiita_core/util.py
@@ -151,15 +151,3 @@ def get_release_info(study_status='public'):
     archive_release = ((md5sum, filepath, timestamp))
 
     return (biom_metadata_release, archive_release)
-
-
-def MaxRSS_helper(x):
-    if x[-1] == 'K':
-        y = float(x[:-1]) * 1000
-    elif x[-1] == 'M':
-        y = float(x[:-1]) * 1000000
-    elif x[-1] == 'G':
-        y = float(x[:-1]) * 1000000000
-    else:
-        y = float(x)
-    return y
diff --git a/qiita_db/support_files/patches/92.sql b/qiita_db/support_files/patches/92.sql
@@ -10,6 +10,9 @@ CREATE  TABLE qiita.slurm_resource_allocations (
 	extra_info           varchar DEFAULT NULL,
 	memory_used          bigint,
 	walltime_used        integer,
+	job_start            TIMESTAMP,
+	node_name            varchar DEFAULT NULL,
+	node_model           varchar DEFAULT NULL,
 	CONSTRAINT pk_slurm_resource_allocations_processing_job_id PRIMARY KEY (
     processing_job_id )
  );
diff --git a/qiita_db/support_files/patches/test_db_sql/92.sql b/qiita_db/support_files/patches/test_db_sql/92.sql
diff --git a/qiita_db/support_files/populate_test_db.sql b/qiita_db/support_files/populate_test_db.sql
@@ -329,16 +329,16 @@ INSERT INTO qiita.processing_job_status VALUES (6, 'waiting', 'The job is waitin
 -- Data for Name: processing_job; Type: TABLE DATA; Schema: qiita; Owner: antoniog
 --
 
-INSERT INTO qiita.processing_job VALUES ('6d368e16-2242-4cf8-87b4-a5dc40bb890b', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('4c7115e8-4c8e-424c-bf25-96c292ca1931', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('3c9991ab-6c14-4368-a48c-841e8837a79c', 'test@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('b72369f9-a886-4193-8d3d-f7b504168e75', 'shared@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, '2015-11-22 21:15:00', NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('46b76f74-e100-47aa-9bf2-c0208bcea52d', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "True", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('80bf25f3-5f1d-4e10-9369-315e4244f6d5', 'test@foo.bar', 3, '{"reference": "2", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('9ba5ae7a-41e1-4202-b396-0259aeaac366', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('e5609746-a985-41a1-babf-6b3ebe9eb5a9', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "False", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false, NULL);
-INSERT INTO qiita.processing_job VALUES ('8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0', 'test@foo.bar', 12, '{"biom_table": "8", "depth": "9000", "subsample_multinomial": "False"}', 3, NULL, NULL, NULL, NULL, false, NULL);
+INSERT INTO qiita.processing_job VALUES ('6d368e16-2242-4cf8-87b4-a5dc40bb890b', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false, 1284411757); 
+INSERT INTO qiita.processing_job VALUES ('4c7115e8-4c8e-424c-bf25-96c292ca1931', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false, 1287244546);
+INSERT INTO qiita.processing_job VALUES ('3c9991ab-6c14-4368-a48c-841e8837a79c', 'test@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}', 3, NULL, NULL, NULL, NULL, false, 1284411377);
+INSERT INTO qiita.processing_job VALUES ('b72369f9-a886-4193-8d3d-f7b504168e75', 'shared@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, '2015-11-22 21:15:00', NULL, NULL, false, 128552986);
+INSERT INTO qiita.processing_job VALUES ('46b76f74-e100-47aa-9bf2-c0208bcea52d', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "True", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false, 1279011391);
+INSERT INTO qiita.processing_job VALUES ('80bf25f3-5f1d-4e10-9369-315e4244f6d5', 'test@foo.bar', 3, '{"reference": "2", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, 1286151876);
+INSERT INTO qiita.processing_job VALUES ('9ba5ae7a-41e1-4202-b396-0259aeaac366', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, 1283300404);
+INSERT INTO qiita.processing_job VALUES ('e5609746-a985-41a1-babf-6b3ebe9eb5a9', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false, 1275827198);
+INSERT INTO qiita.processing_job VALUES ('6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "False", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false, 1266027);
+INSERT INTO qiita.processing_job VALUES ('8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0', 'test@foo.bar', 12, '{"biom_table": "8", "depth": "9000", "subsample_multinomial": "False"}', 3, NULL, NULL, NULL, NULL, false, 126652530);
 INSERT INTO qiita.processing_job VALUES ('063e553b-327c-4818-ab4a-adfe58e49860', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 1, NULL, NULL, NULL, NULL, true, NULL);
 INSERT INTO qiita.processing_job VALUES ('bcc7ebcd-39c1-43e4-af2d-822e3589f14d', 'test@foo.bar', 2, '{"min_seq_len":100,"max_seq_len":1000,"trim_seq_length":false,"min_qual_score":25,"max_ambig":6,"max_homopolymer":6,"max_primer_mismatch":0,"barcode_type":"golay_12","max_barcode_errors":1.5,"disable_bc_correction":false,"qual_score_window":0,"disable_primers":false,"reverse_primers":"disable","reverse_primer_mismatches":0,"truncate_ambi_bases":false,"input_data":1}', 2, NULL, '2015-11-22 21:00:00', 'demultiplexing', NULL, true, NULL);
 INSERT INTO qiita.processing_job VALUES ('d19f76ee-274e-4c1b-b3a2-a12d73507c55', 'shared@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}', 4, 1, '2015-11-22 21:30:00', 'generating demux file', NULL, true, NULL);
diff --git a/qiita_db/test/test_data/jobs_2024-02-21.tsv.gz b/qiita_db/test/test_data/jobs_2024-02-21.tsv.gz
diff --git a/qiita_db/test/test_data/slurm_data.txt.gz b/qiita_db/test/test_data/slurm_data.txt.gz
diff --git a/qiita_db/test/test_processing_job.py b/qiita_db/test/test_processing_job.py
@@ -970,7 +970,7 @@ def test_notification_mail_generation(self):
         pj = qdb.processing_job.ProcessingJob(jid)
         obs = pj._generate_notification_message('running', 'Yahoo!')
         exp = {'subject': ('Single Rarefaction: running, 8a7a8461-e8a1-'
-                           '4b4e-a428-1bc2f4d3ebd0 [Not Available]'),
+                           '4b4e-a428-1bc2f4d3ebd0 [126652530]'),
                'message': 'Analysis Job Single Rarefaction\n'
                           f'{qiita_config.base_url}/analysis/description/1/\n'
                           'New status: running'}
diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
@@ -1316,7 +1316,7 @@ def setUp(self):
         self.columns = [
                 "sName", "sVersion", "cID", "cName", "processing_job_id",
                 "parameters", "samples", "columns", "input_size", "extra_info",
-                "MaxRSSRaw", "ElapsedRaw"]
+                "MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"]
 
         # df is a dataframe that represents a table with columns specified in
         # self.columns
@@ -1351,8 +1351,15 @@ def test_minimize_const(self):
         failures_df = qdb.util._resource_allocation_failures(
             self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
         failures = failures_df.shape[0]
-        self.assertEqual(bm, qdb.util.mem_model3, msg="""Best memory model
-                                                doesn't match""")
+        self.assertEqual(bm, qdb.util.mem_model3,
+                         msg=f"""Best memory model
+                                 doesn't match
+                                 Coefficients:{k} {a} {b}
+                                 {qdb.util.mem_model1}, "qdb.util.mem_model1"
+                                 {qdb.util.mem_model2}, "qdb.util.mem_model2"
+                                 {qdb.util.mem_model3}, "qdb.util.mem_model3"
+                                 {qdb.util.mem_model4}, "qdb.util.mem_model4"
+                            """)
         self.assertEqual(failures, 0, "Number of failures must be 0")
 
         # check that the algorithm chooses correct model for ElapsedRaw and
@@ -1365,10 +1372,63 @@ def test_minimize_const(self):
             self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')
         failures = failures_df.shape[0]
 
-        self.assertEqual(bm, qdb.util.time_model1, msg="""Best time model
-                                                   doesn't match""")
+        self.assertEqual(bm, qdb.util.time_model1,
+                         msg=f"""Best time model
+                                doesn't match
+                                Coefficients:{k} {a} {b}
+                                 {qdb.util.time_model1}, "qdb.util.time_model1"
+                                 {qdb.util.time_model2}, "qdb.util.time_model2"
+                                 {qdb.util.time_model3}, "qdb.util.time_model3"
+                                 {qdb.util.time_model4}, "qdb.util.time_model4"
+                                """)
         self.assertEqual(failures, 1, "Number of failures must be 1")
 
+    def test_MaxRSS_helper(self):
+        tests = [
+            ('6', 6.0),
+            ('6K', 6000),
+            ('6M', 6000000),
+            ('6G', 6000000000),
+            ('6.9', 6.9),
+            ('6.9K', 6900),
+            ('6.9M', 6900000),
+            ('6.9G', 6900000000),
+        ]
+        for x, y in tests:
+            self.assertEqual(qdb.util.MaxRSS_helper(x), y)
+
+    def test_db_update(self):
+        path_to_data = './qiita_db/test/test_data/slurm_data.txt.gz'
+        test_data = pd.read_csv(path_to_data, sep="|")
+        types = {
+            'Split libraries FASTQ': [
+                '6d368e16-2242-4cf8-87b4-a5dc40bb890b',
+                '4c7115e8-4c8e-424c-bf25-96c292ca1931',
+                'b72369f9-a886-4193-8d3d-f7b504168e75',
+                '46b76f74-e100-47aa-9bf2-c0208bcea52d',
+                '6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f'],
+            'Pick closed-reference OTUs': [
+                '3c9991ab-6c14-4368-a48c-841e8837a79c',
+                '80bf25f3-5f1d-4e10-9369-315e4244f6d5',
+                '9ba5ae7a-41e1-4202-b396-0259aeaac366',
+                'e5609746-a985-41a1-babf-6b3ebe9eb5a9',
+            ],
+            'Single Rarefaction': [
+                '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0'
+            ]
+        }
+
+        qdb.util.update_resource_allocation_table(test=test_data)
+
+        for curr_cname, ids in types.items():
+            updated_df = qdb.util._retrieve_resource_data(
+                    curr_cname, self.SNAME, self.columns)
+            updated_ids_set = set(updated_df['processing_job_id'])
+            previous_ids_set = set(self.df['processing_job_id'])
+            for id in ids:
+                self.assertTrue(id in updated_ids_set)
+                self.assertFalse(id in previous_ids_set)
+
 
 STUDY_INFO = {
     'study_id': 1,
diff --git a/qiita_db/util.py b/qiita_db/util.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from qiita_core.util import MaxRSS_helper`
	`1`	`+from qiita_db.util import MaxRSS_helper`
`2`	`2`	`from qiita_db.software import Software`
`3`	`3`	`import datetime`
`4`	`4`	`from io import StringIO`