Strip malformed escape code check from DBBuddy

biologyguy · Dec 6, 2021 · 43e9de4 · 43e9de4
1 parent 06897fa
commit 43e9de4
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 28 deletions.
diff --git a/buddysuite/DatabaseBuddy.py b/buddysuite/DatabaseBuddy.py
@@ -614,16 +614,10 @@ def _stdout(message, quiet=False, format_in=None, format_out=None):
     output = ""
     if format_in:
         format_in = format_in if type(format_in) == list else [format_in]
-        for _format in format_in:
-            if not re.search(r"\\033\[[0-9]*m", _format):
-                raise AttributeError('Malformed format_in attribute escape code')
         output += "".join(format_in)
 
     if format_out:
         format_out = format_out if type(format_out) == list else [format_out]
-        for _format in format_out:
-            if not re.search(r"\\033\[[0-9]*m", _format):
-                raise AttributeError('Malformed format_out attribute escape code')
         output += "%s%s" % (message, "".join(format_out))
     else:
         output += "%s\033[m" % message
@@ -1013,10 +1007,12 @@ def search_ncbi(self, _type):
         self.parse_error_file()
 
         results = self.results_file.read().split("\n### END ###\n")
-        results = [x for x in results if x != ""]
+        results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x != ""]
         accns = []
         for result in results:
-            result = Entrez.read(StringIO(result))
+            read_file = br.TempFile(byte_mode=True)
+            read_file.write(str.encode(result, encoding="utf-8"))
+            result = Entrez.read(read_file.get_handle(mode='r'))
             accns += result["IdList"]
         if not accns:
             br._stderr("NCBI returned no %s results\n\n" % _type)
@@ -1063,12 +1059,14 @@ def fetch_summaries(self, database):
 
         results = re.sub("<ERROR>.*</ERROR>", "", results)
         results = results.split("\n### END ###\n")
-        results = [x for x in results if x != ""]
+        results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x != ""]
 
         # Sift through all the results and grab summary information
         summaries = []
         for result in results:
-            parser = Entrez.parse(StringIO(result))
+            read_file = br.TempFile(byte_mode=True)
+            read_file.write(str.encode(result, encoding="utf-8"))
+            parser = Entrez.parse(read_file.get_handle(mode='r'))
             counter = 0
             while True:
                 try:
@@ -1121,11 +1119,13 @@ def fetch_summaries(self, database):
         self.parse_error_file()
 
         results = self.results_file.read().split("\n### END ###\n")
-        results = [x for x in results if x and "<ERROR>Empty id list" not in x]
+        results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x and "<ERROR>Empty id list" not in x]
 
         taxa = {}
         for result in results:
-            for summary in Entrez.parse(StringIO(result)):
+            read_file = br.TempFile(byte_mode=True)
+            read_file.write(str.encode(result, encoding="utf-8"))
+            for summary in Entrez.parse(read_file.get_handle(mode='r')):
                 taxa[summary["TaxId"]] = "Unclassified" if "ScientificName" not in summary \
                     else summary["ScientificName"]
 
@@ -1189,7 +1189,7 @@ def __init__(self, _dbbuddy, server='http://rest.ensembl.org/'):
         self.parse_error_file()
         if self.species:
             self.species = self.species["species"]
-            self.species = {x["display_name"]: x for x in self.species if x["display_name"]}
+            self.species = {x["name"]: x for x in self.species if x["name"]}
         else:
             self.species = {}
         self.max_attempts = 5
@@ -1261,8 +1261,8 @@ def search_ensembl(self):
         self.results_file.clear()
         species = [name for name, info in self.species.items()]
         for search_term in self.dbbuddy.search_terms:
-            br._stderr("Searching Ensembl for %s...\n" % search_term)
-            # br.run_multicore_function(species, self._mc_search, [search_term], quiet=True)
+            # br._stderr("Searching Ensembl for %s...\n" % search_term)
+            br.run_multicore_function(species, self._mc_search, [search_term], quiet=True)
             # TODO: fix multicore --> Many REST requests are failing unless a single request is sent at a time
             for i in species:
                 self._mc_search(i, [search_term])
@@ -1467,16 +1467,15 @@ def postcmd(self, stop, line):
         return stop
 
     def dump_session(self):
-        # Need to remove Lock()s to pickle
-        for client in [client for db, client in self.dbbuddy.server_clients.items() if client]:
-            client.lock = False
+        # Need to remove server client objects to pickle
+        clients = self.dbbuddy.server_clients
+        self.dbbuddy.server_clients = {"ncbi": False, "ensembl": False, "uniprot": False}
         self.crash_file.save("%s_undo" % self.crash_file.path)
         self.crash_file.open()
-        dill.dump(self.dbbuddy, self.crash_file.handle, protocol=-1)
+        dill.dump(self.dbbuddy, self.crash_file.get_handle(), protocol=-1)  # protocol -1 = Highest available
         self.crash_file.close()
         self.undo = True
-        for client in [client for db, client in self.dbbuddy.server_clients.items() if client]:
-            client.lock = Lock()
+        self.dbbuddy.server_clients = clients
 
     def default(self, line):
         if line == "exit":

diff --git a/buddysuite/tests/test_databasebuddy/test_db_ui.py b/buddysuite/tests/test_databasebuddy/test_db_ui.py
@@ -113,7 +113,7 @@ def test_liveshell_init(monkeypatch, capsys, hf):
     liveshell = Db.LiveShell(dbbuddy, crash_file)
     assert type(liveshell.tmpdir) == br.TempDir
     assert liveshell.terminal_default == "\033[m\033[40m\033[97m"
-    assert liveshell.prompt == '[95m[1mDbBuddy[m[40m[97m[1m>[m[40m[97m '
+    assert liveshell.prompt == '\033[95m\033[1mDbBuddy\033[m\033[40m\033[97m\033[1m>\033[m\033[40m\033[97m '
     assert hf.string2hash(liveshell.doc_leader) == "e71aa4976437bdb0c22eeaacfaea6f9f"
     assert hash(liveshell.dbbuddy) == hash(dbbuddy)
     assert liveshell.crash_file == crash_file
@@ -1216,7 +1216,6 @@ def test_main(monkeypatch):
 
 
 # ######################  loose command line ui helpers ###################### #
-@pytest.mark.loose
 def test_exit(monkeypatch, capsys):
     class MockExitUsage(object):
         @staticmethod
@@ -1238,7 +1237,6 @@ def save():
     assert "('DatabaseBuddy', '%s', 'LiveShell', 0)" % Db.VERSION.short() in out
 
 
-@pytest.mark.loose
 def test_error(monkeypatch, capsys):
     monkeypatch.setattr(Db, "LiveShell", mock_systemexit)
 
@@ -1256,7 +1254,6 @@ def test_error(monkeypatch, capsys):
     assert "can be loaded by launching DatabaseBuddy and using the 'load' command." in err
 
 
-@pytest.mark.loose
 def test_retrieve_accessions(monkeypatch):
     # Don't actually run anything, retrieve_summary() is tested elsewhere
     monkeypatch.setattr(Db, "retrieve_summary", lambda *_: True)
@@ -1281,7 +1278,6 @@ def test_retrieve_accessions(monkeypatch):
         assert dbbuddy.out_format == out_format
 
 
-@pytest.mark.loose
 def test_retrieve_sequences(monkeypatch, capsys, sb_resources, hf):
     # Don't actually run anything, retrieve_summary() is tested elsewhere
     monkeypatch.setattr(Db, "retrieve_summary", lambda *_: True)
@@ -1325,7 +1321,6 @@ def test_retrieve_sequences(monkeypatch, capsys, sb_resources, hf):
     assert out == '\x1b[91mAborted...\n\n\x1b[m\x1b[40m'
 
 
-@pytest.mark.loose
 def test_guess_db(capsys, hf):
     test_in_args = deepcopy(in_args)
     test_in_args.guess_database = True

diff --git a/buddysuite/tests/unit_test_resources/mock_resources/test_databasebuddy_clients/dbREADME b/buddysuite/tests/unit_test_resources/mock_resources/test_databasebuddy_clients/dbREADME
@@ -2,5 +2,6 @@ Every python version will save binary formats slightly different, so dbbuddy sav
 must be made for each version supported. This needs to be done for ALL versions every
 time a new version is added.
 
-Simply launch databasebuddy under the Python version in question, and do a search for casp9.
+Simply launch databasebuddy under the Python version in question, do a search for casp9,
+then save the session in mock_resources/test_databasebuddy_clients.
 Unit tests will need to be updated because the number of sequences will probably change.