Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion qiita_pet/handlers/qiita_redbiom.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import redbiom._requests
import redbiom.util
import redbiom.fetch
import redbiom.admin
from tornado.gen import coroutine, Task
from tornado.web import HTTPError
from requests.exceptions import HTTPError as rHTTPError

from qiita_core.util import execute_as_transaction
from qiita_db.util import generate_study_list_without_artifacts
Expand All @@ -27,9 +29,18 @@ class RedbiomPublicSearch(BaseHandler):
def get(self, search):
# making sure that if someone from a portal forces entry to this URI
# we go to the main portal
try:
timestamps = redbiom.admin.get_timestamps()
except (rHTTPError):
timestamps = []

if timestamps:
latest_release = timestamps[0]
else:
latest_release = 'Not reported'
if self.request.uri != '/redbiom/':
self.redirect('/redbiom/')
self.render('redbiom.html')
self.render('redbiom.html', latest_release=latest_release)

def _redbiom_metadata_search(self, query, contexts):
study_artifacts = defaultdict(lambda: defaultdict(list))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,54 @@ Shogun reference databases
- Genera: 2,264
- Species: 11,852
- Strains: 4,263

Metatranscriptome sample processing
------------------------------------

Sample processing guidelines for metatranscriptomic (metaT) data
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Total community RNA extracted from samples contain both coding and non-coding RNA. Typically, ribosomal RNA make up >90% of the library if not depleted prior to library construction. Ribosomal depletion allows for mRNA enrichment. Even if you are dealing with ribosomal RNA subtracted cDNA libraries, there will be some
residual ribosomal RNA in the libraries that you want to remove/separate from the non ribosomal RNA sequences.

Ribosomal read filtering
^^^^^^^^^^^^^^^^^^^^^^^^

`SortMeRNA <https://bioinfo.lifl.fr/RNA/sortmerna/>`_
is used for removal of ribosomal reads from quality filtered metaT data

Latest SortMeRNA version: v2.1

Input: Quality filtered metaT reads (FASTA/FASTQ)
Ribosomal reads are identified by searching against pre-curated rRNA databases. Currently, rRNA databases covering bacteria, archaea and eukarya were downloaded and indexed from `SILVA <https://www.arb-silva.de>`_ and `Rfam <https://rfam.xfam.org>`_.
Currently indexed databases and their clustering ids:

- silva-bacterial-16s-id 90%
- silva-bacterial-23s-id 98%
- silva-archaeal-16s-id 95%
- silva-archaeal-23s-id 98%
- silva-eukarya-18s-id 95%
- silva-eukarya-28s-id 98%
- rfam-5s-database-id 98%
- rfam-5.8s-database-id 98%

The above databases and ID cut-offs were chosen to work with a range of samples including more diverse/complex environmental samples.

Building Custom databases
^^^^^^^^^^^^^^^^^^^^^^^^^
Custom databases can also be built in addition to the above mentioned databases.
Custom databases can be built by using the using the `ARB package <https://www.arb-silva.de/download/arb-files/>`_ to extract FASTA files for:

- 16S bacteria, 16S archaea and 18S eukarya using SSURef_NR99_119_SILVA_14_07_14_opt.arb
- 23S bacteria, 23S archaea and 28S eukarya using LSURef_119_SILVA_15_07_14_opt.arb

The built databases will then have to be indexed before running SortMeRNA.
Reference database(s) and their corresponding indexes separated by "," and multiple databases are separated by ":"


SortMeRNA Usage
^^^^^^^^^^^^^^^
SortMeRNA filters the ribosomal from the non-ribosomal reads from the input sample dataset (via BLAST search)and outputs two fasta/q files containing the ribosomal and non-ribosomal reads respectively.
Additionally, a summary file showing the proportion of reads matching to each of the screened ribosomal databases can also be made available.
Default options have been set to report only the best alignment per read reaching E-value.
For non ribo-depleted samples (i.e. total RNA), the ribosomal reads obtained from SortMeRNA can be further used in taxonomic/compositional analysis.
In the case of ribo-depleted samples, only the non-ribosomal reads are used in downstream analyses such as assembly, mapping, differential gene abundance analyses etc.
2 changes: 1 addition & 1 deletion qiita_pet/templates/redbiom.html
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@
{%block content%}
<small>
<!-- Date to be fixed once we fix: https://github.com/biocore/qiita/issues/2773 -->
Redbiom only searches on public data. Last update: December 18th, 2018. Note that you will only be able to expand and add artifacts to analyses if you are signed into Qiita.
Redbiom only searches on public data. Last update: <i>{{latest_release}}</i>. Note that you will only be able to expand and add artifacts to analyses if you are signed into Qiita.
<br/><br/>
<a href="{% raw qiita_config.portal_dir %}/static/doc/html/redbiom.html" class="btn btn-info btn-sm" target="_blank">Help and examples?</a>
<br/>
Expand Down
11 changes: 6 additions & 5 deletions qiita_ware/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def submit_EBI(artifact_id, action, send, test=False, test_size=False):
ebi_submission.submission_xml_fp]
total_size = sum([stat(tr).st_size for tr in to_review if tr is not None])
# note that the max for EBI is 10M but let's play it safe
max_size = 8.5e+6 if not test_size else 6000
max_size = 10e+6 if not test_size else 5000
if total_size > max_size:
LogEntry.create(
'Runtime', 'The submission: %d is larger than allowed (%d), will '
Expand All @@ -228,10 +228,11 @@ def submit_EBI(artifact_id, action, send, test=False, test_size=False):
cols_to_drop = cols_to_drop - {'taxon_id', 'scientific_name',
'description'}
all_samples = ebi_submission.sample_template.ebi_sample_accessions
samples = {k: all_samples[k] for k in ebi_submission.samples}
ebi_submission.write_xml_file(
ebi_submission.generate_sample_xml(samples, cols_to_drop),
ebi_submission.sample_xml_fp)
samples = [k for k in ebi_submission.samples if all_samples[k] is None]
if samples:
ebi_submission.write_xml_file(
ebi_submission.generate_sample_xml(samples, cols_to_drop),
ebi_submission.sample_xml_fp)

# now let's recalculate the size to make sure it's fine
new_total_size = sum([stat(tr).st_size
Expand Down
18 changes: 11 additions & 7 deletions qiita_ware/ebi.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def generate_sample_xml(self, samples=None, ignore_columns=None):
The list of samples to be included in the sample xml. If not
provided or an empty list is provided, all the samples are used
ignore_columns : list of str, optional
The list of columns to ignore during submission; helful for when
The list of columns to ignore during submission; helpful for when
the submissions are too large

Returns
Expand All @@ -411,14 +411,18 @@ def generate_sample_xml(self, samples=None, ignore_columns=None):
for sample_name in sorted(samples):
sample_info = dict(self.samples[sample_name])

if self._ebi_sample_accessions[sample_name] is None:
sample = ET.SubElement(sample_set, 'SAMPLE', {
'alias': self._get_sample_alias(sample_name),
'center_name': qiita_config.ebi_center_name}
)
sample_accession = self._ebi_sample_accessions[sample_name]
if self.action in ('ADD', 'VALIDATE'):
if sample_accession is not None:
continue
else:
sample = ET.SubElement(sample_set, 'SAMPLE', {
'alias': self._get_sample_alias(sample_name),
'center_name': qiita_config.ebi_center_name}
)
else:
sample = ET.SubElement(sample_set, 'SAMPLE', {
'accession': self._ebi_sample_accessions[sample_name],
'accession': sample_accession,
'center_name': qiita_config.ebi_center_name}
)

Expand Down
2 changes: 0 additions & 2 deletions qiita_ware/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,6 @@ def test_max_ebiena_curl_error(self):

rmtree(join(self.base_fp, '%d_ebi_submission' % aid), True)

submit_EBI


FASTA_EXAMPLE = """>1.SKB2.640194_1 X orig_bc=X new_bc=X bc_diffs=0
CCACCCAGTAAC
Expand Down
5 changes: 3 additions & 2 deletions qiita_ware/test/test_ebi.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def test_generate_sample_xml(self):

samples = ['1.SKB2.640194', '1.SKB3.640195']
obs = ET.tostring(submission.generate_sample_xml(samples=samples))
exp = ''.join([l.strip() for l in SAMPLEXML.splitlines()])
exp = ('<SAMPLE_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-'
'instance" xsi:noNamespaceSchemaLocation="ftp://ftp.sra.ebi.ac.'
'uk/meta/xsd/sra_1_3/SRA.sample.xsd" />')
self.assertEqual(obs.decode('ascii'), exp)

# removing samples so test text is easier to read
Expand All @@ -194,7 +196,6 @@ def test_generate_sample_xml(self):
del(submission.samples[k])
del(submission.samples_prep[k])
obs = ET.tostring(submission.generate_sample_xml())
exp = ''.join([l.strip() for l in SAMPLEXML.splitlines()])
self.assertEqual(obs.decode('ascii'), exp)

obs = ET.tostring(submission.generate_sample_xml(samples=[]))
Expand Down