Skip to content

Commit

Permalink
Strip malformed escape code check from DBBuddy
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Bond authored and Stephen Bond committed Dec 6, 2021
1 parent 06897fa commit 43e9de4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 28 deletions.
41 changes: 20 additions & 21 deletions buddysuite/DatabaseBuddy.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,16 +614,10 @@ def _stdout(message, quiet=False, format_in=None, format_out=None):
output = ""
if format_in:
format_in = format_in if type(format_in) == list else [format_in]
for _format in format_in:
if not re.search(r"\\033\[[0-9]*m", _format):
raise AttributeError('Malformed format_in attribute escape code')
output += "".join(format_in)

if format_out:
format_out = format_out if type(format_out) == list else [format_out]
for _format in format_out:
if not re.search(r"\\033\[[0-9]*m", _format):
raise AttributeError('Malformed format_out attribute escape code')
output += "%s%s" % (message, "".join(format_out))
else:
output += "%s\033[m" % message
Expand Down Expand Up @@ -1013,10 +1007,12 @@ def search_ncbi(self, _type):
self.parse_error_file()

results = self.results_file.read().split("\n### END ###\n")
results = [x for x in results if x != ""]
results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x != ""]
accns = []
for result in results:
result = Entrez.read(StringIO(result))
read_file = br.TempFile(byte_mode=True)
read_file.write(str.encode(result, encoding="utf-8"))
result = Entrez.read(read_file.get_handle(mode='r'))
accns += result["IdList"]
if not accns:
br._stderr("NCBI returned no %s results\n\n" % _type)
Expand Down Expand Up @@ -1063,12 +1059,14 @@ def fetch_summaries(self, database):

results = re.sub("<ERROR>.*</ERROR>", "", results)
results = results.split("\n### END ###\n")
results = [x for x in results if x != ""]
results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x != ""]

# Sift through all the results and grab summary information
summaries = []
for result in results:
parser = Entrez.parse(StringIO(result))
read_file = br.TempFile(byte_mode=True)
read_file.write(str.encode(result, encoding="utf-8"))
parser = Entrez.parse(read_file.get_handle(mode='r'))
counter = 0
while True:
try:
Expand Down Expand Up @@ -1121,11 +1119,13 @@ def fetch_summaries(self, database):
self.parse_error_file()

results = self.results_file.read().split("\n### END ###\n")
results = [x for x in results if x and "<ERROR>Empty id list" not in x]
results = [re.sub(r'\\n', '\n', x)[2:-1] for x in results if x and "<ERROR>Empty id list" not in x]

taxa = {}
for result in results:
for summary in Entrez.parse(StringIO(result)):
read_file = br.TempFile(byte_mode=True)
read_file.write(str.encode(result, encoding="utf-8"))
for summary in Entrez.parse(read_file.get_handle(mode='r')):
taxa[summary["TaxId"]] = "Unclassified" if "ScientificName" not in summary \
else summary["ScientificName"]

Expand Down Expand Up @@ -1189,7 +1189,7 @@ def __init__(self, _dbbuddy, server='http://rest.ensembl.org/'):
self.parse_error_file()
if self.species:
self.species = self.species["species"]
self.species = {x["display_name"]: x for x in self.species if x["display_name"]}
self.species = {x["name"]: x for x in self.species if x["name"]}
else:
self.species = {}
self.max_attempts = 5
Expand Down Expand Up @@ -1261,8 +1261,8 @@ def search_ensembl(self):
self.results_file.clear()
species = [name for name, info in self.species.items()]
for search_term in self.dbbuddy.search_terms:
br._stderr("Searching Ensembl for %s...\n" % search_term)
# br.run_multicore_function(species, self._mc_search, [search_term], quiet=True)
# br._stderr("Searching Ensembl for %s...\n" % search_term)
br.run_multicore_function(species, self._mc_search, [search_term], quiet=True)
# TODO: fix multicore --> Many REST requests are failing unless a single request is sent at a time
for i in species:
self._mc_search(i, [search_term])
Expand Down Expand Up @@ -1467,16 +1467,15 @@ def postcmd(self, stop, line):
return stop

def dump_session(self):
# Need to remove Lock()s to pickle
for client in [client for db, client in self.dbbuddy.server_clients.items() if client]:
client.lock = False
# Need to remove server client objects to pickle
clients = self.dbbuddy.server_clients
self.dbbuddy.server_clients = {"ncbi": False, "ensembl": False, "uniprot": False}
self.crash_file.save("%s_undo" % self.crash_file.path)
self.crash_file.open()
dill.dump(self.dbbuddy, self.crash_file.handle, protocol=-1)
dill.dump(self.dbbuddy, self.crash_file.get_handle(), protocol=-1) # protocol -1 = Highest available
self.crash_file.close()
self.undo = True
for client in [client for db, client in self.dbbuddy.server_clients.items() if client]:
client.lock = Lock()
self.dbbuddy.server_clients = clients

def default(self, line):
if line == "exit":
Expand Down
7 changes: 1 addition & 6 deletions buddysuite/tests/test_databasebuddy/test_db_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_liveshell_init(monkeypatch, capsys, hf):
liveshell = Db.LiveShell(dbbuddy, crash_file)
assert type(liveshell.tmpdir) == br.TempDir
assert liveshell.terminal_default == "\033[m\033[40m\033[97m"
assert liveshell.prompt == '[95m[1mDbBuddy[m[40m[97m[1m>[m[40m[97m '
assert liveshell.prompt == '\033[95m\033[1mDbBuddy\033[m\033[40m\033[97m\033[1m>\033[m\033[40m\033[97m '
assert hf.string2hash(liveshell.doc_leader) == "e71aa4976437bdb0c22eeaacfaea6f9f"
assert hash(liveshell.dbbuddy) == hash(dbbuddy)
assert liveshell.crash_file == crash_file
Expand Down Expand Up @@ -1216,7 +1216,6 @@ def test_main(monkeypatch):


# ###################### loose command line ui helpers ###################### #
@pytest.mark.loose
def test_exit(monkeypatch, capsys):
class MockExitUsage(object):
@staticmethod
Expand All @@ -1238,7 +1237,6 @@ def save():
assert "('DatabaseBuddy', '%s', 'LiveShell', 0)" % Db.VERSION.short() in out


@pytest.mark.loose
def test_error(monkeypatch, capsys):
monkeypatch.setattr(Db, "LiveShell", mock_systemexit)

Expand All @@ -1256,7 +1254,6 @@ def test_error(monkeypatch, capsys):
assert "can be loaded by launching DatabaseBuddy and using the 'load' command." in err


@pytest.mark.loose
def test_retrieve_accessions(monkeypatch):
# Don't actually run anything, retrieve_summary() is tested elsewhere
monkeypatch.setattr(Db, "retrieve_summary", lambda *_: True)
Expand All @@ -1281,7 +1278,6 @@ def test_retrieve_accessions(monkeypatch):
assert dbbuddy.out_format == out_format


@pytest.mark.loose
def test_retrieve_sequences(monkeypatch, capsys, sb_resources, hf):
# Don't actually run anything, retrieve_summary() is tested elsewhere
monkeypatch.setattr(Db, "retrieve_summary", lambda *_: True)
Expand Down Expand Up @@ -1325,7 +1321,6 @@ def test_retrieve_sequences(monkeypatch, capsys, sb_resources, hf):
assert out == '\x1b[91mAborted...\n\n\x1b[m\x1b[40m'


@pytest.mark.loose
def test_guess_db(capsys, hf):
test_in_args = deepcopy(in_args)
test_in_args.guess_database = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ Every python version will save binary formats slightly different, so dbbuddy sav
must be made for each version supported. This needs to be done for ALL versions every
time a new version is added.

Simply launch databasebuddy under the Python version in question, and do a search for casp9.
Simply launch databasebuddy under the Python version in question, do a search for casp9,
then save the session in mock_resources/test_databasebuddy_clients.
Unit tests will need to be updated because the number of sequences will probably change.

0 comments on commit 43e9de4

Please sign in to comment.