qiita-spots · ElDeveloper · Apr 27, 2020 · Apr 13, 2020 · Apr 13, 2020 · Apr 13, 2020
diff --git a/qiita_pet/handlers/qiita_redbiom.py b/qiita_pet/handlers/qiita_redbiom.py
@@ -13,8 +13,10 @@
 import redbiom._requests
 import redbiom.util
 import redbiom.fetch
+import redbiom.admin
 from tornado.gen import coroutine, Task
 from tornado.web import HTTPError
+from requests.exceptions import HTTPError as rHTTPError
 
 from qiita_core.util import execute_as_transaction
 from qiita_db.util import generate_study_list_without_artifacts
@@ -27,9 +29,18 @@ class RedbiomPublicSearch(BaseHandler):
     def get(self, search):
         # making sure that if someone from a portal forces entry to this URI
         # we go to the main portal
+        try:
+            timestamps = redbiom.admin.get_timestamps()
+        except (rHTTPError):
+            timestamps = []
+
+        if timestamps:
+            latest_release = timestamps[0]
+        else:
+            latest_release = 'Not reported'
         if self.request.uri != '/redbiom/':
             self.redirect('/redbiom/')
-        self.render('redbiom.html')
+        self.render('redbiom.html', latest_release=latest_release)
 
     def _redbiom_metadata_search(self, query, contexts):
         study_artifacts = defaultdict(lambda: defaultdict(list))

diff --git a/qiita_pet/support_files/doc/source/processingdata/processing-recommendations.rst b/qiita_pet/support_files/doc/source/processingdata/processing-recommendations.rst
@@ -152,3 +152,54 @@ Shogun reference databases
      - Genera: 2,264
      - Species: 11,852
      - Strains: 4,263
+
+Metatranscriptome sample processing
+------------------------------------
+
+Sample processing guidelines for metatranscriptomic (metaT) data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Total community RNA extracted from samples contain both coding and non-coding RNA. Typically, ribosomal RNA make up >90% of the library if not depleted prior to library construction. Ribosomal depletion allows for mRNA enrichment. Even if you are dealing with ribosomal RNA subtracted cDNA libraries, there will be some
+residual ribosomal RNA in the libraries that you want to remove/separate from the non ribosomal RNA sequences.
+
+Ribosomal read filtering
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+`SortMeRNA <https://bioinfo.lifl.fr/RNA/sortmerna/>`_
+is used for removal of ribosomal reads from quality filtered metaT data
+
+Latest SortMeRNA version: v2.1
+
+Input: Quality filtered metaT reads (FASTA/FASTQ) 
+Ribosomal reads are identified by searching against pre-curated rRNA databases. Currently, rRNA databases covering bacteria, archaea and eukarya were downloaded and indexed from `SILVA <https://www.arb-silva.de>`_ and `Rfam <https://rfam.xfam.org>`_.
+Currently indexed databases and their clustering ids:
+
+- silva-bacterial-16s-id 90%
+- silva-bacterial-23s-id 98%
+- silva-archaeal-16s-id 95%
+- silva-archaeal-23s-id 98%
+- silva-eukarya-18s-id 95%
+- silva-eukarya-28s-id 98%
+- rfam-5s-database-id 98%
+- rfam-5.8s-database-id 98%
+
+The above databases and ID cut-offs were chosen to work with a range of samples including more diverse/complex environmental samples.
+
+Building Custom databases
+^^^^^^^^^^^^^^^^^^^^^^^^^
+Custom databases can also be built in addition to the above mentioned databases.
+Custom databases can be built by using the using the `ARB package <https://www.arb-silva.de/download/arb-files/>`_ to extract FASTA files for:
+
+- 16S bacteria, 16S archaea and 18S eukarya using SSURef_NR99_119_SILVA_14_07_14_opt.arb
+- 23S bacteria, 23S archaea and 28S eukarya using LSURef_119_SILVA_15_07_14_opt.arb
+
+The built databases will then have to be indexed before running SortMeRNA. 
+Reference database(s) and their corresponding indexes separated by "," and multiple databases are separated by ":"
+
+
+SortMeRNA Usage
+^^^^^^^^^^^^^^^
+SortMeRNA filters the ribosomal from the non-ribosomal reads from the input sample dataset (via BLAST search)and outputs two fasta/q files containing the ribosomal and non-ribosomal reads respectively. 
+Additionally, a summary file showing the proportion of reads matching to each of the screened ribosomal databases can also be made available. 
+Default options have been set to report only the best alignment per read reaching E-value. 
+For non ribo-depleted samples (i.e. total RNA), the ribosomal reads obtained from SortMeRNA can be further used in taxonomic/compositional analysis. 
+In the case of ribo-depleted samples, only the non-ribosomal reads are used in downstream analyses such as assembly, mapping, differential gene abundance analyses etc.
diff --git a/qiita_pet/templates/redbiom.html b/qiita_pet/templates/redbiom.html
@@ -180,7 +180,7 @@
 {%block content%}
   <small>
     <!-- Date to be fixed once we fix: https://github.com/biocore/qiita/issues/2773 -->
-    Redbiom only searches on public data. Last update: December 18th, 2018. Note that you will only be able to expand and add artifacts to analyses if you are signed into Qiita.
+    Redbiom only searches on public data. Last update: <i>{{latest_release}}</i>. Note that you will only be able to expand and add artifacts to analyses if you are signed into Qiita.
     <br/><br/>
     <a href="{% raw qiita_config.portal_dir %}/static/doc/html/redbiom.html" class="btn btn-info btn-sm" target="_blank">Help and examples?</a>
     <br/>

diff --git a/qiita_ware/commands.py b/qiita_ware/commands.py
@@ -207,7 +207,7 @@ def submit_EBI(artifact_id, action, send, test=False, test_size=False):
                  ebi_submission.submission_xml_fp]
     total_size = sum([stat(tr).st_size for tr in to_review if tr is not None])
     # note that the max for EBI is 10M but let's play it safe
-    max_size = 8.5e+6 if not test_size else 6000
+    max_size = 10e+6 if not test_size else 5000
     if total_size > max_size:
         LogEntry.create(
             'Runtime', 'The submission: %d is larger than allowed (%d), will '
@@ -228,10 +228,11 @@ def submit_EBI(artifact_id, action, send, test=False, test_size=False):
         cols_to_drop = cols_to_drop - {'taxon_id', 'scientific_name',
                                        'description'}
         all_samples = ebi_submission.sample_template.ebi_sample_accessions
-        samples = {k: all_samples[k] for k in ebi_submission.samples}
-        ebi_submission.write_xml_file(
-            ebi_submission.generate_sample_xml(samples, cols_to_drop),
-            ebi_submission.sample_xml_fp)
+        samples = [k for k in ebi_submission.samples if all_samples[k] is None]
+        if samples:
+            ebi_submission.write_xml_file(
+                ebi_submission.generate_sample_xml(samples, cols_to_drop),
+                ebi_submission.sample_xml_fp)
 
         # now let's recalculate the size to make sure it's fine
         new_total_size = sum([stat(tr).st_size

diff --git a/qiita_ware/ebi.py b/qiita_ware/ebi.py
@@ -393,7 +393,7 @@ def generate_sample_xml(self, samples=None, ignore_columns=None):
             The list of samples to be included in the sample xml. If not
             provided or an empty list is provided, all the samples are used
         ignore_columns : list of str, optional
-            The list of columns to ignore during submission; helful for when
+            The list of columns to ignore during submission; helpful for when
             the submissions are too large
 
         Returns
@@ -411,14 +411,18 @@ def generate_sample_xml(self, samples=None, ignore_columns=None):
         for sample_name in sorted(samples):
             sample_info = dict(self.samples[sample_name])
 
-            if self._ebi_sample_accessions[sample_name] is None:
-                sample = ET.SubElement(sample_set, 'SAMPLE', {
-                    'alias': self._get_sample_alias(sample_name),
-                    'center_name': qiita_config.ebi_center_name}
-                )
+            sample_accession = self._ebi_sample_accessions[sample_name]
+            if self.action in ('ADD', 'VALIDATE'):
+                if sample_accession is not None:
+                    continue
+                else:
+                    sample = ET.SubElement(sample_set, 'SAMPLE', {
+                        'alias': self._get_sample_alias(sample_name),
+                        'center_name': qiita_config.ebi_center_name}
+                    )
             else:
                 sample = ET.SubElement(sample_set, 'SAMPLE', {
-                    'accession': self._ebi_sample_accessions[sample_name],
+                    'accession': sample_accession,
                     'center_name': qiita_config.ebi_center_name}
                 )
 

diff --git a/qiita_ware/test/test_commands.py b/qiita_ware/test/test_commands.py
@@ -236,8 +236,6 @@ def test_max_ebiena_curl_error(self):
 
         rmtree(join(self.base_fp, '%d_ebi_submission' % aid), True)
 
-    submit_EBI
-
 
 FASTA_EXAMPLE = """>1.SKB2.640194_1 X orig_bc=X new_bc=X bc_diffs=0
 CCACCCAGTAAC

diff --git a/qiita_ware/test/test_ebi.py b/qiita_ware/test/test_ebi.py
@@ -177,7 +177,9 @@ def test_generate_sample_xml(self):
 
         samples = ['1.SKB2.640194', '1.SKB3.640195']
         obs = ET.tostring(submission.generate_sample_xml(samples=samples))
-        exp = ''.join([l.strip() for l in SAMPLEXML.splitlines()])
+        exp = ('<SAMPLE_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-'
+               'instance" xsi:noNamespaceSchemaLocation="ftp://ftp.sra.ebi.ac.'
+               'uk/meta/xsd/sra_1_3/SRA.sample.xsd" />')
         self.assertEqual(obs.decode('ascii'), exp)
 
         # removing samples so test text is easier to read
@@ -194,7 +196,6 @@ def test_generate_sample_xml(self):
             del(submission.samples[k])
             del(submission.samples_prep[k])
         obs = ET.tostring(submission.generate_sample_xml())
-        exp = ''.join([l.strip() for l in SAMPLEXML.splitlines()])
         self.assertEqual(obs.decode('ascii'), exp)
 
         obs = ET.tostring(submission.generate_sample_xml(samples=[]))