Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions qiita_ware/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,21 @@ def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
if isdir(ebi_submission.ebi_dir):
rmtree(ebi_submission.ebi_dir)
ebi_submission.preprocessed_data.update_insdc_status(
'failed: %s' % str(error_msg))
LogEntry.create('Runtime', str(error_msg),
'failed: %s' % error_msg)
LogEntry.create('Runtime', error_msg,
info={'ebi_submission': preprocessed_data_id})
raise

# step 3: generate and write xml files
ebi_submission.write_xml_file(ebi_submission.generate_study_xml(),
'study_xml_fp',
ebi_submission.study_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_sample_xml(),
'sample_xml_fp',
ebi_submission.sample_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_experiment_xml(),
'experiment_xml_fp',
ebi_submission.experiment_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_run_xml(),
'run_xml_fp',
ebi_submission.run_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_submission_xml(),
'submission_xml_fp',
ebi_submission.submission_xml_fp)

# other steps
Expand Down
46 changes: 19 additions & 27 deletions qiita_ware/ebi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from os import environ, close, makedirs, remove, listdir
from datetime import date, timedelta
from xml.etree import ElementTree as ET
from xml.dom import minidom
from xml.sax.saxutils import escape
from gzip import GzipFile
from functools import partial
Expand Down Expand Up @@ -272,8 +271,8 @@ def generate_study_xml(self):

Returns
-------
str
string with study XML values
Element
Object with study XML values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dumb question. What exactly is Element? Is it an xml element corresponding to xml.etree? It would be nice to include the object type in the docs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that but AFAIK is not standard (however, this is not a standard type). What about xml.etree.ElementTree.Element?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, we commonly use pd.DataFrame instead of pandas.DataFrame or np.DataFrame instead of numpy.DataFrame.

So just ET.Element should be good enough.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason we use np and pd instead of numpy and pandas is because np and pd are standard abbreviations. Is ET a standard abbreviation of xml.etree.ElementTree?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for checking! In such case, 👍 to ET.Element

"""
study_set = ET.Element('STUDY_SET', {
'xmlns:xsi': self.xmlns_xsi,
Expand Down Expand Up @@ -309,15 +308,15 @@ def generate_study_xml(self):
for pmid in self.pmids:
self._get_pmid_element(study_links, pmid)

return ET.tostring(study_set)
return study_set

def generate_sample_xml(self):
"""Generates the sample XML file

Returns
-------
str
string with sample XML values
Element
Object with sample XML values
"""
sample_set = ET.Element('SAMPLE_SET', {
'xmlns:xsi': self.xmlns_xsi,
Expand Down Expand Up @@ -353,7 +352,7 @@ def generate_sample_xml(self):
'SAMPLE_ATTRIBUTE',
sample_info)

return ET.tostring(sample_set)
return sample_set

def _generate_spot_descriptor(self, design, platform):
"""This XML element (and its subelements) must be written for every
Expand Down Expand Up @@ -384,8 +383,8 @@ def generate_experiment_xml(self):

Returns
-------
str
string with experiment XML values
Element
Object with experiment XML values
"""
study_alias = self._get_study_alias()
experiment_set = ET.Element('EXPERIMENT_SET', {
Expand Down Expand Up @@ -450,15 +449,15 @@ def generate_experiment_xml(self):
'EXPERIMENT_ATTRIBUTE',
sample_prep)

return ET.tostring(experiment_set)
return experiment_set

def generate_run_xml(self):
"""Generates the run XML file

Returns
-------
str
string with run XML values
Element
Object with run XML values
"""
run_set = ET.Element('RUN_SET', {
'xmlns:xsi': self.xmlns_xsi,
Expand Down Expand Up @@ -492,7 +491,7 @@ def generate_run_xml(self):
'checksum': md5}
)

return ET.tostring(run_set)
return run_set

def generate_submission_xml(self, submission_date=None):
"""Generates the submission XML file
Expand All @@ -505,8 +504,8 @@ def generate_submission_xml(self, submission_date=None):

Returns
-------
str
string with run XML values
Element
Object with submission XML values

Notes
-----
Expand Down Expand Up @@ -554,28 +553,21 @@ def generate_submission_xml(self, submission_date=None):
'HoldUntilDate': str(submission_date + timedelta(365))}
)

return ET.tostring(submission_set)
return submission_set

def write_xml_file(self, text, attribute_name, fp):
def write_xml_file(self, element, fp):
"""Writes an XML file after calling one of the XML generation
functions

Parameters
----------
text : str
The XML text that will be written
attribute_name : str
The name of the attribute in which to store the output filepath
element : Element
The Element to be written
fp : str
The filepath to which the XML will be written
"""
create_dir(self.xml_dir)
xml = minidom.parseString(text)

with open(fp, 'w') as outfile:
outfile.write(xml.toxml(encoding='UTF-8'))

setattr(self, attribute_name, fp)
ET.ElementTree(element).write(fp, encoding='UTF-8')

def generate_curl_command(
self,
Expand Down
78 changes: 59 additions & 19 deletions qiita_ware/test/test_ebi.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_add_dict_as_tags_and_values(self):

def test_generate_study_xml(self):
submission = EBISubmission(2, 'ADD')
obs = submission.generate_study_xml()
obs = ET.tostring(submission.generate_study_xml())
exp = ''.join([l.strip() for l in STUDYXML.splitlines()])
self.assertEqual(obs, exp)

Expand All @@ -148,7 +148,7 @@ def test_generate_sample_xml(self):
for k in keys_to_del:
del(submission.samples[k])
del(submission.samples_prep[k])
obs = submission.generate_sample_xml()
obs = ET.tostring(submission.generate_sample_xml())
exp = ''.join([l.strip() for l in SAMPLEXML.splitlines()])
self.assertEqual(obs, exp)

Expand All @@ -168,7 +168,7 @@ def test_generate_experiment_xml(self):
del(submission.samples[k])
del(submission.samples_prep[k])

obs = submission.generate_experiment_xml()
obs = ET.tostring(submission.generate_experiment_xml())
exp = EXPERIMENTXML % {
'organization_prefix': qiita_config.ebi_organization_prefix}
exp = ''.join([l.strip() for l in exp.splitlines()])
Expand All @@ -185,22 +185,23 @@ def test_generate_spot_descriptor(self):

def test_generate_submission_xml(self):
submission = EBISubmission(2, 'ADD')
obs = submission.generate_submission_xml(
submission_date=date(2015, 9, 3))
obs = ET.tostring(
submission.generate_submission_xml(
submission_date=date(2015, 9, 3)))
exp = SUBMISSIONXML % {
'submission_alias': submission._get_submission_alias(),
'center_name': qiita_config.ebi_center_name}
exp = ''.join([l.strip() for l in exp.splitlines()])
self.assertEqual(obs, exp)

def test_write_xml_file(self):
test_text = '<TESTING foo="bar"/>'
element = ET.Element('TESTING', {'foo': 'bar'})
e = EBISubmission(2, 'ADD')
e.write_xml_file(test_text, 'thing', 'testfile')
e.write_xml_file(element, 'testfile')
self.files_to_remove.append('testfile')

obs = open('testfile').read()
exp = '<?xml version="1.0" encoding="UTF-8"?>%s' % test_text
exp = "<?xml version='1.0' encoding='UTF-8'?>\n<TESTING foo=\"bar\" />"
self.assertEqual(obs, exp)

def test_generate_curl_command(self):
Expand Down Expand Up @@ -260,22 +261,54 @@ def write_demux_files(self, prep_template):

return ppd

def generate_new_prep_template_and_write_demux_files(self):
def generate_new_prep_template_and_write_demux_files(self,
valid_metadata=False):
"""Creates new prep-template/demux-file to avoid duplication of code"""

# ignoring warnings generated when adding templates
simplefilter("ignore")
# creating prep template without required EBI submission columns
metadata_dict = {
'SKD6.640190': {'center_name': 'ANL',
'center_project_name': 'Test Project'},
'SKM6.640187': {'center_name': 'ANL',
'center_project_name': 'Test Project'},
'SKD9.640182': {'center_name': 'ANL',
'center_project_name': 'Test Project'}
}
if not valid_metadata:
metadata_dict = {
'SKD6.640190': {'center_name': 'ANL',
'center_project_name': 'Test Project'},
'SKM6.640187': {'center_name': 'ANL',
'center_project_name': 'Test Project'},
'SKD9.640182': {'center_name': 'ANL',
'center_project_name': 'Test Project'}
}
investigation_type = None
else:
metadata_dict = {
'SKD6.640190': {'center_name': 'ANL',
'center_project_name': 'Test Project',
'platform': 'ILLUMINA',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'experiment_design_description':
'microbiome of soil and rhizosphere',
'library_construction_protocol':
'PMID: 22402401'},
'SKM6.640187': {'center_name': 'ANL',
'center_project_name': 'Test Project',
'platform': 'ILLUMINA',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'experiment_design_description':
'microbiome of soil and rhizosphere',
'library_construction_protocol':
'PMID: 22402401'},
'SKD9.640182': {'center_name': 'ANL',
'center_project_name': 'Test Project',
'platform': 'ILLUMINA',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'experiment_design_description':
'microbiome of soil and rhizosphere',
'library_construction_protocol':
'PMID: 22402401'}
}
investigation_type = "Metagenomics"
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
pt = PrepTemplate.create(metadata, Study(1), "18S")
pt = PrepTemplate.create(metadata, Study(1), "18S",
investigation_type=investigation_type)
ppd = self.write_demux_files(pt)

return ppd
Expand Down Expand Up @@ -304,6 +337,13 @@ def test_init_exceptions(self):
EBISubmission(ppd.id, 'ADD')
self.assertEqual(exp_text, str(e.exception))

def test_prep_with_less_samples_than_sample_template(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uhm... is this test incomplete? At least, I would test that some of the internal values set up during __init__ is correct. For example, that the self.samples actually includes the subset that we want in here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can test that.

# the next line generates a valid prep template with less samples than
# the sample template and basically we want to test that
# the EBISubmission can be generated
ppd = self.generate_new_prep_template_and_write_demux_files(True)
EBISubmission(ppd.id, 'ADD')

def test_generate_run_xml(self):
ppd = self.write_demux_files(PrepTemplate(1))
submission = EBISubmission(ppd.id, 'ADD')
Expand All @@ -317,7 +357,7 @@ def test_generate_run_xml(self):

submission.generate_demultiplexed_fastq(mtime=1)
self.files_to_remove.append(submission.ebi_dir)
obs = submission.generate_run_xml()
obs = ET.tostring(submission.generate_run_xml())

exp = RUNXML % {
'study_alias': submission._get_study_alias(),
Expand Down