Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions qiita_ware/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,7 @@ def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
raise

# step 3: generate and write xml files
ebi_submission.write_xml_file(ebi_submission.generate_study_xml(),
ebi_submission.study_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_sample_xml(),
ebi_submission.sample_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_experiment_xml(),
ebi_submission.experiment_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_run_xml(),
ebi_submission.run_xml_fp)
ebi_submission.write_xml_file(ebi_submission.generate_submission_xml(),
ebi_submission.submission_xml_fp)
ebi_submission.generate_xml_files()

if send:
# step 4: sending sequences
Expand Down Expand Up @@ -121,10 +112,14 @@ def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
raise ComputeError("EBI Submission failed! Log id: %d" % le.id)

ebi_submission.study.ebi_submission_status = 'submitted'
ebi_submission.study.ebi_study_accession = st_acc
ebi_submission.sample_template.ebi_sample_accessions = sa_acc
ebi_submission.sample_template.biosample_accessions = bio_acc
ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
if st_acc:
ebi_submission.study.ebi_study_accession = st_acc
if sa_acc:
ebi_submission.sample_template.ebi_sample_accessions = sa_acc
if bio_acc:
ebi_submission.sample_template.biosample_accessions = bio_acc
if ex_acc:
ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
ebi_submission.preprocessed_data.ebi_run_accessions = run_acc
else:
st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None
Expand Down
177 changes: 129 additions & 48 deletions qiita_ware/ebi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gzip import GzipFile
from functools import partial

from future.utils import viewitems
from future.utils import viewitems, viewkeys
from skbio.util import safe_md5, create_dir

from qiita_core.qiita_settings import qiita_config
Expand Down Expand Up @@ -160,13 +160,13 @@ def __init__(self, preprocessed_data_id, action):

self.ebi_dir = join(qiita_config.working_dir,
'ebi_submission_%d' % preprocessed_data_id)
get_output_fp = partial(join, self.ebi_dir, 'xml_dir')
self.xml_dir = get_output_fp()
self.study_xml_fp = get_output_fp('study.xml')
self.sample_xml_fp = get_output_fp('sample.xml')
self.experiment_xml_fp = get_output_fp('experiment.xml')
self.run_xml_fp = get_output_fp('run.xml')
self.submission_xml_fp = get_output_fp('submission.xml')
self.xml_dir = join(self.ebi_dir, 'xml_dir')
self.study_xml_fp = None
self.sample_xml_fp = None
self.experiment_xml_fp = None
self.run_xml_fp = None
self.submission_xml_fp = None

self.pmids = self.study.pmids

# getting the restrictions
Expand Down Expand Up @@ -234,6 +234,11 @@ def __init__(self, preprocessed_data_id, action):
self._experiment_aliases = {}
self._run_aliases = {}

self._ebi_sample_accessions = \
self.sample_template.ebi_sample_accessions
self._ebi_experiment_accessions = \
self.prep_template.ebi_experiment_accessions

def _get_study_alias(self):
"""Format alias using ``self.preprocessed_data_id``"""
study_alias_format = '%s_sid_%s'
Expand Down Expand Up @@ -350,9 +355,15 @@ def generate_study_xml(self):

return study_set

def generate_sample_xml(self):
def generate_sample_xml(self, samples=None):
"""Generates the sample XML file

Parameters
----------
samples : list of str, optional
The list of samples to be included in the sample xml. If not
provided or an empty list is provided, all the samples are used

Returns
-------
ET.Element
Expand All @@ -362,8 +373,11 @@ def generate_sample_xml(self):
'xmlns:xsi': self.xmlns_xsi,
"xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"})

for sample_name, sample_info in sorted(viewitems(self.samples)):
sample_info = dict(sample_info)
if not samples:
samples = viewkeys(self.samples)

for sample_name in sorted(samples):
sample_info = dict(self.samples[sample_name])
sample = ET.SubElement(sample_set, 'SAMPLE', {
'alias': self._get_sample_alias(sample_name),
'center_name': qiita_config.ebi_center_name}
Expand Down Expand Up @@ -418,22 +432,40 @@ def _generate_spot_descriptor(self, design, platform):
base_coord = ET.SubElement(read_spec, 'BASE_COORD')
base_coord.text = '1'

def generate_experiment_xml(self):
def generate_experiment_xml(self, samples=None):
"""Generates the experiment XML file

Parameters
----------
samples : list of str, optional
The list of samples to be included in the experiment xml

Returns
-------
ET.Element
Object with experiment XML values
"""
study_alias = self._get_study_alias()
study_accession = self.study.ebi_study_accession
if study_accession:
study_ref_dict = {'accession': study_accession}
else:
study_ref_dict = {'refname': self._get_study_alias()}

experiment_set = ET.Element('EXPERIMENT_SET', {
'xmlns:xsi': self.xmlns_xsi,
"xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "experiment"})
for sample_name, sample_prep in sorted(self.samples_prep.items()):
sample_alias = self._get_sample_alias(sample_name)

samples = samples if samples is not None else viewkeys(self.samples)

for sample_name in sorted(samples):
experiment_alias = self._get_experiment_alias(sample_name)
sample_prep = dict(self.samples_prep[sample_name])
if self._ebi_sample_accessions[sample_name]:
sample_descriptor_dict = {
'accession': self._ebi_sample_accessions[sample_name]}
else:
sample_descriptor_dict = {
'refname': self._get_sample_alias(sample_name)}

platform = sample_prep.pop('platform')
experiment = ET.SubElement(experiment_set, 'EXPERIMENT', {
Expand All @@ -442,18 +474,14 @@ def generate_experiment_xml(self):
)
title = ET.SubElement(experiment, 'TITLE')
title.text = experiment_alias
ET.SubElement(experiment, 'STUDY_REF', {
'refname': study_alias}
)
ET.SubElement(experiment, 'STUDY_REF', study_ref_dict)

design = ET.SubElement(experiment, 'DESIGN')
design_description = ET.SubElement(design,
'DESIGN_DESCRIPTION')
edd = sample_prep.pop('experiment_design_description')
design_description.text = escape(clean_whitespace(edd))
ET.SubElement(
design, 'SAMPLE_DESCRIPTOR', {'refname': sample_alias}
)
ET.SubElement(design, 'SAMPLE_DESCRIPTOR', sample_descriptor_dict)

# this is the library contruction section. The only required fields
# is library_construction_protocol, the other are optional
Expand Down Expand Up @@ -513,10 +541,15 @@ def generate_run_xml(self):
run_set = ET.Element('RUN_SET', {
'xmlns:xsi': self.xmlns_xsi,
"xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "run"})
for sample_name, sample_prep in viewitems(self.samples_prep):
for sample_name, sample_prep in sorted(viewitems(self.samples_prep)):
sample_prep = dict(sample_prep)

experiment_alias = self._get_experiment_alias(sample_name)
if self._ebi_experiment_accessions[sample_name]:
experiment_ref_dict = {
'accession': self._ebi_experiment_accessions[sample_name]}
else:
experiment_alias = self._get_experiment_alias(sample_name)
experiment_ref_dict = {'refname': experiment_alias}

# We only submit fastq
file_type = 'fastq'
Expand All @@ -529,9 +562,7 @@ def generate_run_xml(self):
'alias': self._get_run_alias(sample_name),
'center_name': qiita_config.ebi_center_name}
)
ET.SubElement(run, 'EXPERIMENT_REF', {
'refname': experiment_alias}
)
ET.SubElement(run, 'EXPERIMENT_REF', experiment_ref_dict)
data_block = ET.SubElement(run, 'DATA_BLOCK')
files = ET.SubElement(data_block, 'FILES')
ET.SubElement(files, 'FILE', {
Expand Down Expand Up @@ -573,23 +604,26 @@ def generate_submission_xml(self, submission_date=None):

actions = ET.SubElement(submission, 'ACTIONS')

study_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(study_action, self.action, {
'schema': 'study',
'source': basename(self.study_xml_fp)}
)
if self.study_xml_fp:
study_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(study_action, self.action, {
'schema': 'study',
'source': basename(self.study_xml_fp)}
)

sample_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(sample_action, self.action, {
'schema': 'sample',
'source': basename(self.sample_xml_fp)}
)
if self.sample_xml_fp:
sample_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(sample_action, self.action, {
'schema': 'sample',
'source': basename(self.sample_xml_fp)}
)

experiment_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(experiment_action, self.action, {
'schema': 'experiment',
'source': basename(self.experiment_xml_fp)}
)
if self.experiment_xml_fp:
experiment_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(experiment_action, self.action, {
'schema': 'experiment',
'source': basename(self.experiment_xml_fp)}
)

run_action = ET.SubElement(actions, 'ACTION')
ET.SubElement(run_action, self.action, {
Expand Down Expand Up @@ -620,6 +654,51 @@ def write_xml_file(self, element, fp):
create_dir(self.xml_dir)
ET.ElementTree(element).write(fp, encoding='UTF-8')

def generate_xml_files(self):
"""Generate all the XML files"""
get_output_fp = partial(join, self.xml_dir)

# The study.xml file needs to be generated if and only if the study
# does NOT have an ebi_study_accession
if not self.study.ebi_study_accession:
self.study_xml_fp = get_output_fp('study.xml')
self.write_xml_file(self.generate_study_xml(), self.study_xml_fp)

# The sample.xml file needs to be generated if and only if there are
# samples in the current submission that do NOT have an
# ebi_sample_accession
new_samples = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be the prep template and not the sample template?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the ebi_sample_accessions are stored in the sample template.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

K, thanks!

sample for sample, accession in viewitems(
self.sample_template.ebi_sample_accessions)
if accession is None}
new_samples = new_samples.intersection(self.samples)
if new_samples:
self.sample_xml_fp = get_output_fp('sample.xml')
self.write_xml_file(self.generate_sample_xml(new_samples),
self.sample_xml_fp)

# The experiment.xml needs to be generated if and only if there are
# samples in the current submission that do NO have an
# ebi_experiment_accession
new_samples = {
sample for sample, accession in viewitems(
self.prep_template.ebi_experiment_accessions)
if accession is None}
new_samples = new_samples.intersection(self.samples)
if new_samples:
self.experiment_xml_fp = get_output_fp('experiment.xml')
self.write_xml_file(self.generate_experiment_xml(new_samples),
self.experiment_xml_fp)

# Generate the run.xml as it should always be generated
self.run_xml_fp = get_output_fp('run.xml')
self.write_xml_file(self.generate_run_xml(), self.run_xml_fp)

# The submission.xml is always generated
self.submission_xml_fp = get_output_fp('submission.xml')
self.write_xml_file(self.generate_submission_xml(),
self.submission_xml_fp)

def generate_curl_command(
self,
ebi_seq_xfer_user=qiita_config.ebi_seq_xfer_user,
Expand Down Expand Up @@ -743,13 +822,15 @@ def parse_EBI_reply(self, curl_result):
% curl_result)

study_elem = root.findall("STUDY")
if len(study_elem) > 1:
raise EBISubmissionError(
"Multiple study tags found in EBI reply: %d\n%s"
% (len(study_elem),
"".join([ET.tostring(s) for s in study_elem])))
study_elem = study_elem[0]
study_accession = study_elem.get('accession')
if study_elem:
if len(study_elem) > 1:
raise EBISubmissionError(
"Multiple study tags found in EBI reply: %d"
% len(study_elem))
study_elem = study_elem[0]
study_accession = study_elem.get('accession')
else:
study_accession = None

sample_accessions = {}
biosample_accessions = {}
Expand Down
Loading