qiita-spots · ElDeveloper · Jun 4, 2018 · Apr 7, 2018 · Apr 15, 2018 · May 30, 2018
diff --git a/qiita_db/metadata_template/test/support_files/a_qiimp_wb.xlsx b/qiita_db/metadata_template/test/support_files/a_qiimp_wb.xlsx
diff --git a/qiita_db/metadata_template/test/support_files/empty_qiimp_wb.xlsx b/qiita_db/metadata_template/test/support_files/empty_qiimp_wb.xlsx
diff --git a/qiita_db/metadata_template/test/support_files/not_a_qiimp_wb.xlsx b/qiita_db/metadata_template/test/support_files/not_a_qiimp_wb.xlsx
diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py
@@ -7,6 +7,8 @@
 # -----------------------------------------------------------------------------
 
 from six import StringIO
+from inspect import currentframe, getfile
+from os.path import dirname, abspath, join
 from unittest import TestCase, main
 import warnings
 
@@ -69,6 +71,31 @@ def test_load_template_to_dataframe(self):
         exp.index.name = 'sample_name'
         assert_frame_equal(obs, exp)
 
+    def test_load_template_to_dataframe_xlsx(self):
+        mfp = join(dirname(abspath(getfile(currentframe()))), 'support_files')
+
+        # test loading a qiimp file
+        fp = join(mfp, 'a_qiimp_wb.xlsx')
+        obs = qdb.metadata_template.util.load_template_to_dataframe(fp)
+        exp = pd.DataFrame.from_dict(EXP_QIIMP, dtype=str)
+        exp.index.name = 'sample_name'
+        assert_frame_equal(obs, exp)
+
+        # test loading an empty qiimp file
+        fp = join(mfp, 'empty_qiimp_wb.xlsx')
+        with self.assertRaises(qdb.exceptions.QiitaDBColumnError) as error:
+            qdb.metadata_template.util.load_template_to_dataframe(fp)
+        self.assertEqual(
+            str(error.exception), "The 'sample_name' column is missing from "
+            "your template, this file cannot be parsed.")
+
+        # test loading non qiimp file
+        fp = join(mfp, 'not_a_qiimp_wb.xlsx')
+        obs = qdb.metadata_template.util.load_template_to_dataframe(fp)
+        exp = pd.DataFrame.from_dict(EXP_NOT_QIIMP, dtype=str)
+        exp.index.name = 'sample_name'
+        assert_frame_equal(obs, exp)
+
     def test_load_template_to_dataframe_qiime_map(self):
         obs = qdb.metadata_template.util.load_template_to_dataframe(
             StringIO(QIIME_TUTORIAL_MAP_SUBSET), index='#SampleID')
@@ -844,5 +871,19 @@ def test_get_pgsql_reserved_words(self):
     '1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t'
     'GTGCCAGCMGCCGCGGTAA\tILLUMINA\ts_G1_L001_sequences\tValue for sample 2\n')
 
+EXP_QIIMP = {
+    'asfaewf': {'sample': 'f', 'oijnmk': 'f'},
+    'pheno': {'sample': 'med', 'oijnmk': 'missing: not provided'},
+    'bawer': {'sample': 'a', 'oijnmk': 'b'},
+    'aelrjg': {'sample': 'asfe', 'oijnmk': 'asfs'}
+}
+
+EXP_NOT_QIIMP = {
+    'myownidea': {
+        'sample5': 'I skipped some',
+        'sample1': 'sampleoneinfo',
+        'sample2': 'sampletwoinfo'}
+}
+
 if __name__ == '__main__':
     main()
diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -51,6 +51,9 @@
 from os.path import join, basename, isdir, exists
 from os import walk, remove, listdir, makedirs, rename
 from shutil import move, rmtree, copy as shutil_copy
+from openpyxl import load_workbook
+from tempfile import mkstemp
+from csv import writer as csv_writer
 from json import dumps
 from datetime import datetime
 from itertools import chain
@@ -1730,6 +1733,34 @@ def _get_filehandle(filepath_or, *args, **kwargs):
     if _is_string_or_bytes(filepath_or):
         if h5py.is_hdf5(filepath_or):
             fh, own_fh = h5py.File(filepath_or, *args, **kwargs), True
+        elif filepath_or.endswith('.xlsx'):
+            # due to extension, let's assume Excel file
+            wb = load_workbook(filename=filepath_or, data_only=True)
+            sheetnames = wb.sheetnames
+            # let's check if Qiimp, they must be in same order
+            first_cell_index = 0
+            is_qiimp_wb = False
+            if sheetnames == ["Metadata", "Validation", "Data Dictionary",
+                              "metadata_schema", "metadata_form",
+                              "Instructions"]:
+                first_cell_index = 1
+                is_qiimp_wb = True
+            first_sheet = wb[sheetnames[0]]
+            cell_range = range(first_cell_index, first_sheet.max_column)
+            _, fp = mkstemp(suffix='.txt')
+            with open(fp, 'w') as fh:
+                cfh = csv_writer(fh, delimiter='\t')
+                for r in first_sheet.rows:
+                    if is_qiimp_wb:
+                        # check contents of first column; if they are a zero
+                        # (not a valid QIIMP sample_id) or a "No more than
+                        # max samples" message, there are no more valid rows,
+                        # so don't examine any more rows.
+                        fcv = str(r[cell_range[0]].value)
+                        if fcv == "0" or fcv.startswith("No more than"):
+                            break
+                    cfh.writerow([r[x].value for x in cell_range])
+            fh, own_fh = open(fp, *args, **kwargs), True
         else:
             fh, own_fh = open(filepath_or, *args, **kwargs), True
     else:

diff --git a/setup.py b/setup.py
@@ -106,7 +106,7 @@
                         'tornado==3.1.1', 'toredis', 'redis', 'six',
                         'pyparsing', 'h5py >= 2.3.1', 'biom-format',
                         'natsort', 'networkx < 2.0', 'humanize',
-                        'scikit-bio == 0.4.2', 'wtforms == 2.0.1',
+                        'scikit-bio == 0.4.2', 'wtforms == 2.0.1', 'openpyxl',
                         'sphinx-bootstrap-theme', 'Sphinx >= 1.2.2',
                         'gitpython', 'qiita-files', 'redbiom==0.1.0-dev',
                         'sphinx_rtd_theme'],