-
Couldn't load subscription status.
- Fork 79
Ebi: ability to extend a submission #1489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
eb89f3b
b5b5f57
0022009
2facea1
670231e
96128d8
b1732f6
6651167
51b038e
1d80ea3
ea328ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,7 @@ | |
| from gzip import GzipFile | ||
| from functools import partial | ||
|
|
||
| from future.utils import viewitems | ||
| from future.utils import viewitems, viewkeys | ||
| from skbio.util import safe_md5, create_dir | ||
|
|
||
| from qiita_core.qiita_settings import qiita_config | ||
|
|
@@ -160,13 +160,13 @@ def __init__(self, preprocessed_data_id, action): | |
|
|
||
| self.ebi_dir = join(qiita_config.working_dir, | ||
| 'ebi_submission_%d' % preprocessed_data_id) | ||
| get_output_fp = partial(join, self.ebi_dir, 'xml_dir') | ||
| self.xml_dir = get_output_fp() | ||
| self.study_xml_fp = get_output_fp('study.xml') | ||
| self.sample_xml_fp = get_output_fp('sample.xml') | ||
| self.experiment_xml_fp = get_output_fp('experiment.xml') | ||
| self.run_xml_fp = get_output_fp('run.xml') | ||
| self.submission_xml_fp = get_output_fp('submission.xml') | ||
| self.xml_dir = join(self.ebi_dir, 'xml_dir') | ||
| self.study_xml_fp = None | ||
| self.sample_xml_fp = None | ||
| self.experiment_xml_fp = None | ||
| self.run_xml_fp = None | ||
| self.submission_xml_fp = None | ||
|
|
||
| self.pmids = self.study.pmids | ||
|
|
||
| # getting the restrictions | ||
|
|
@@ -234,6 +234,11 @@ def __init__(self, preprocessed_data_id, action): | |
| self._experiment_aliases = {} | ||
| self._run_aliases = {} | ||
|
|
||
| self._ebi_sample_accessions = \ | ||
| self.sample_template.ebi_sample_accessions | ||
| self._ebi_experiment_accessions = \ | ||
| self.prep_template.ebi_experiment_accessions | ||
|
|
||
| def _get_study_alias(self): | ||
| """Format alias using ``self.preprocessed_data_id``""" | ||
| study_alias_format = '%s_sid_%s' | ||
|
|
@@ -350,9 +355,14 @@ def generate_study_xml(self): | |
|
|
||
| return study_set | ||
|
|
||
| def generate_sample_xml(self): | ||
| def generate_sample_xml(self, samples=None): | ||
| """Generates the sample XML file | ||
|
|
||
| Parameters | ||
| ---------- | ||
| samples : list of str, optional | ||
| The list of samples to be included in the sample xml | ||
|
|
||
| Returns | ||
| ------- | ||
| ET.Element | ||
|
|
@@ -362,8 +372,10 @@ def generate_sample_xml(self): | |
| 'xmlns:xsi': self.xmlns_xsi, | ||
| "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"}) | ||
|
|
||
| for sample_name, sample_info in sorted(viewitems(self.samples)): | ||
| sample_info = dict(sample_info) | ||
| samples = samples if samples is not None else viewkeys(self.samples) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a rather subjective comment, but this line doesn't need to be a one-liner, it should jus be an if statement: if samples is not None:
samples = viewkeys(self.samples)There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're correct, although the correct if is: if not samples:
samples = viewkeys(self.samples) |
||
|
|
||
| for sample_name in sorted(samples): | ||
| sample_info = dict(self.samples[sample_name]) | ||
| sample = ET.SubElement(sample_set, 'SAMPLE', { | ||
| 'alias': self._get_sample_alias(sample_name), | ||
| 'center_name': qiita_config.ebi_center_name} | ||
|
|
@@ -418,22 +430,40 @@ def _generate_spot_descriptor(self, design, platform): | |
| base_coord = ET.SubElement(read_spec, 'BASE_COORD') | ||
| base_coord.text = '1' | ||
|
|
||
| def generate_experiment_xml(self): | ||
| def generate_experiment_xml(self, samples=None): | ||
| """Generates the experiment XML file | ||
|
|
||
| Parameters | ||
| ---------- | ||
| samples : list of str, optional | ||
| The list of samples to be included in the experiment xml | ||
|
|
||
| Returns | ||
| ------- | ||
| ET.Element | ||
| Object with experiment XML values | ||
| """ | ||
| study_alias = self._get_study_alias() | ||
| study_accession = self.study.ebi_study_accession | ||
| if study_accession: | ||
| study_ref_dict = {'accession': study_accession} | ||
| else: | ||
| study_ref_dict = {'refname': self._get_study_alias()} | ||
|
|
||
| experiment_set = ET.Element('EXPERIMENT_SET', { | ||
| 'xmlns:xsi': self.xmlns_xsi, | ||
| "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "experiment"}) | ||
| for sample_name, sample_prep in sorted(self.samples_prep.items()): | ||
| sample_alias = self._get_sample_alias(sample_name) | ||
|
|
||
| samples = samples if samples is not None else viewkeys(self.samples) | ||
|
|
||
| for sample_name in sorted(samples): | ||
| experiment_alias = self._get_experiment_alias(sample_name) | ||
| sample_prep = dict(self.samples_prep[sample_name]) | ||
| if self._ebi_sample_accessions[sample_name]: | ||
| sample_descriptor_dict = { | ||
| 'accession': self._ebi_sample_accessions[sample_name]} | ||
| else: | ||
| sample_descriptor_dict = { | ||
| 'refname': self._get_sample_alias(sample_name)} | ||
|
|
||
| platform = sample_prep.pop('platform') | ||
| experiment = ET.SubElement(experiment_set, 'EXPERIMENT', { | ||
|
|
@@ -442,18 +472,14 @@ def generate_experiment_xml(self): | |
| ) | ||
| title = ET.SubElement(experiment, 'TITLE') | ||
| title.text = experiment_alias | ||
| ET.SubElement(experiment, 'STUDY_REF', { | ||
| 'refname': study_alias} | ||
| ) | ||
| ET.SubElement(experiment, 'STUDY_REF', study_ref_dict) | ||
|
|
||
| design = ET.SubElement(experiment, 'DESIGN') | ||
| design_description = ET.SubElement(design, | ||
| 'DESIGN_DESCRIPTION') | ||
| edd = sample_prep.pop('experiment_design_description') | ||
| design_description.text = escape(clean_whitespace(edd)) | ||
| ET.SubElement( | ||
| design, 'SAMPLE_DESCRIPTOR', {'refname': sample_alias} | ||
| ) | ||
| ET.SubElement(design, 'SAMPLE_DESCRIPTOR', sample_descriptor_dict) | ||
|
|
||
| # this is the library contruction section. The only required fields | ||
| # is library_construction_protocol, the other are optional | ||
|
|
@@ -513,10 +539,15 @@ def generate_run_xml(self): | |
| run_set = ET.Element('RUN_SET', { | ||
| 'xmlns:xsi': self.xmlns_xsi, | ||
| "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "run"}) | ||
| for sample_name, sample_prep in viewitems(self.samples_prep): | ||
| for sample_name, sample_prep in sorted(viewitems(self.samples_prep)): | ||
| sample_prep = dict(sample_prep) | ||
|
|
||
| experiment_alias = self._get_experiment_alias(sample_name) | ||
| if self._ebi_experiment_accessions[sample_name]: | ||
| experiment_ref_dict = { | ||
| 'accession': self._ebi_experiment_accessions[sample_name]} | ||
| else: | ||
| experiment_alias = self._get_experiment_alias(sample_name) | ||
| experiment_ref_dict = {'refname': experiment_alias} | ||
|
|
||
| # We only submit fastq | ||
| file_type = 'fastq' | ||
|
|
@@ -529,9 +560,7 @@ def generate_run_xml(self): | |
| 'alias': self._get_run_alias(sample_name), | ||
| 'center_name': qiita_config.ebi_center_name} | ||
| ) | ||
| ET.SubElement(run, 'EXPERIMENT_REF', { | ||
| 'refname': experiment_alias} | ||
| ) | ||
| ET.SubElement(run, 'EXPERIMENT_REF', experiment_ref_dict) | ||
| data_block = ET.SubElement(run, 'DATA_BLOCK') | ||
| files = ET.SubElement(data_block, 'FILES') | ||
| ET.SubElement(files, 'FILE', { | ||
|
|
@@ -573,23 +602,26 @@ def generate_submission_xml(self, submission_date=None): | |
|
|
||
| actions = ET.SubElement(submission, 'ACTIONS') | ||
|
|
||
| study_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(study_action, self.action, { | ||
| 'schema': 'study', | ||
| 'source': basename(self.study_xml_fp)} | ||
| ) | ||
| if self.study_xml_fp: | ||
| study_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(study_action, self.action, { | ||
| 'schema': 'study', | ||
| 'source': basename(self.study_xml_fp)} | ||
| ) | ||
|
|
||
| sample_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(sample_action, self.action, { | ||
| 'schema': 'sample', | ||
| 'source': basename(self.sample_xml_fp)} | ||
| ) | ||
| if self.sample_xml_fp: | ||
| sample_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(sample_action, self.action, { | ||
| 'schema': 'sample', | ||
| 'source': basename(self.sample_xml_fp)} | ||
| ) | ||
|
|
||
| experiment_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(experiment_action, self.action, { | ||
| 'schema': 'experiment', | ||
| 'source': basename(self.experiment_xml_fp)} | ||
| ) | ||
| if self.experiment_xml_fp: | ||
| experiment_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(experiment_action, self.action, { | ||
| 'schema': 'experiment', | ||
| 'source': basename(self.experiment_xml_fp)} | ||
| ) | ||
|
|
||
| run_action = ET.SubElement(actions, 'ACTION') | ||
| ET.SubElement(run_action, self.action, { | ||
|
|
@@ -620,6 +652,51 @@ def write_xml_file(self, element, fp): | |
| create_dir(self.xml_dir) | ||
| ET.ElementTree(element).write(fp, encoding='UTF-8') | ||
|
|
||
| def generate_xml_files(self): | ||
| """Generate the XML files""" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be desirable to add numpy docs here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is already a numpy doc. This function doesn't take parameters, doesn't return and doesn't raise anything specific. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotcha. Dumb comment |
||
| get_output_fp = partial(join, self.xml_dir) | ||
|
|
||
| # The study.xml file needs to be generated if and only if the study | ||
| # does NOT have an ebi_study_accession | ||
| if not self.study.ebi_study_accession: | ||
| self.study_xml_fp = get_output_fp('study.xml') | ||
| self.write_xml_file(self.generate_study_xml(), self.study_xml_fp) | ||
|
|
||
| # The sample.xml file needs to be generated if and only if there are | ||
| # samples in the current submission that do NOT have an | ||
| # ebi_sample_accession | ||
| new_samples = { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be the prep template and not the sample template? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the ebi_sample_accessions are stored in the sample template. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. K, thanks! |
||
| sample for sample, accession in viewitems( | ||
| self.sample_template.ebi_sample_accessions) | ||
| if accession is None} | ||
| new_samples = new_samples.intersection(self.samples) | ||
| if new_samples: | ||
| self.sample_xml_fp = get_output_fp('sample.xml') | ||
| self.write_xml_file(self.generate_sample_xml(new_samples), | ||
| self.sample_xml_fp) | ||
|
|
||
| # The experiment.xml needs to be generated if and only if there are | ||
| # samples in the current submission that do NO have an | ||
| # ebi_experiment_accession | ||
| new_samples = { | ||
| sample for sample, accession in viewitems( | ||
| self.prep_template.ebi_experiment_accessions) | ||
| if accession is None} | ||
| new_samples = new_samples.intersection(self.samples) | ||
| if new_samples: | ||
| self.experiment_xml_fp = get_output_fp('experiment.xml') | ||
| self.write_xml_file(self.generate_experiment_xml(new_samples), | ||
| self.experiment_xml_fp) | ||
|
|
||
| # Generate the run.xml as it should always be generated | ||
| self.run_xml_fp = get_output_fp('run.xml') | ||
| self.write_xml_file(self.generate_run_xml(), self.run_xml_fp) | ||
|
|
||
| # The submission.xml is always generated | ||
| self.submission_xml_fp = get_output_fp('submission.xml') | ||
| self.write_xml_file(self.generate_submission_xml(), | ||
| self.submission_xml_fp) | ||
|
|
||
| def generate_curl_command( | ||
| self, | ||
| ebi_seq_xfer_user=qiita_config.ebi_seq_xfer_user, | ||
|
|
@@ -743,13 +820,15 @@ def parse_EBI_reply(self, curl_result): | |
| % curl_result) | ||
|
|
||
| study_elem = root.findall("STUDY") | ||
| if len(study_elem) > 1: | ||
| raise EBISubmissionError( | ||
| "Multiple study tags found in EBI reply: %d\n%s" | ||
| % (len(study_elem), | ||
| "".join([ET.tostring(s) for s in study_elem]))) | ||
| study_elem = study_elem[0] | ||
| study_accession = study_elem.get('accession') | ||
| if study_elem: | ||
| if len(study_elem) > 1: | ||
| raise EBISubmissionError( | ||
| "Multiple study tags found in EBI reply: %d" | ||
| % len(study_elem)) | ||
| study_elem = study_elem[0] | ||
| study_accession = study_elem.get('accession') | ||
| else: | ||
| study_accession = None | ||
|
|
||
| sample_accessions = {} | ||
| biosample_accessions = {} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cand you add that if
Noneis passed, then all the samples are used?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW, is an empty list an allowed input? If not, should there be a check?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point about the empty list - in case of an empty list or None, all the samples should be used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome, thanks - would be great to have an accompanying test case.
On (Oct-07-15|15:08), Jose Navas wrote:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added
2015-10-07 15:45 GMT-07:00 Yoshiki Vázquez Baeza notifications@github.com:
Jose Navas