From 2433e6a6c1ca7ced07048f41231e335af352027b Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Wed, 27 Jan 2021 12:57:48 +0100 Subject: [PATCH 1/7] Bumped version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9658253..e517686 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nanocompore" -version = "1.0.2.dev" +version = "1.0.4.dev" description = "Software package that identifies raw signal changes between two conditions from https://github.com/jts/nanopolish resquiggled dRNA-Seq data." authors = ["Tommaso Leonardi ", "Adrien Leger "] license = "GPL-3.0" From c65cba01adcd8ccbac75743f4b62d328efe2fab3 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 29 Apr 2021 11:01:25 +0200 Subject: [PATCH 2/7] Improved logging/error handling in Whitelist --- nanocompore/Whitelist.py | 69 +++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/nanocompore/Whitelist.py b/nanocompore/Whitelist.py index d666e30..70b9f8a 100755 --- a/nanocompore/Whitelist.py +++ b/nanocompore/Whitelist.py @@ -173,33 +173,45 @@ def __read_eventalign_index(self, col_names = fp.readline().rstrip().split() c = Counter() for line in fp: - try: - # Transform line to dict and cast str numbers to actual numbers - read = numeric_cast_dict(keys=col_names, values=line.rstrip().split("\t")) - - # Filter out ref_id if a select_ref_id list or exclude_ref_id list was provided - if select_ref_id and not read["ref_id"] in select_ref_id: - raise NanocomporeError("Ref_id not in select list") - elif exclude_ref_id and read["ref_id"] in exclude_ref_id: - raise NanocomporeError("Ref_id in exclude list") - - # Filter out reads with high number of invalid kmers if information available - if self.__filter_invalid_kmers: - if max_invalid_kmers_freq: - invalid_kmers_freq = (read["NNNNN_kmers"]+read["mismatch_kmers"]+read["missing_kmers"])/read["kmers"] - if invalid_kmers_freq > max_invalid_kmers_freq: - raise NanocomporeError("High fraction of invalid kmers ({}%) for read {}".format(round(invalid_kmers_freq*100,2), read["read_id"])) - else: - NNNNN_kmers_freq = read["NNNNN_kmers"]/read["kmers"] - max_mismatching_freq = read["mismatch_kmers"]/read["kmers"] - max_missing_freq = read["missing_kmers"]/read["kmers"] - if NNNNN_kmers_freq > max_NNNNN_freq: - raise NanocomporeError("High fraction of NNNNN kmers ({}%) for read {}".format(round(NNNNN_kmers_freq*100,2), read["read_id"])) - elif max_mismatching_freq > max_mismatching_freq: - raise NanocomporeError("High fraction of mismatching kmers ({}%) for read {}".format(round(max_mismatching_freq*100,2), read["read_id"])) - elif max_missing_freq > max_missing_freq: - raise NanocomporeError("High fraction of missing kmers ({}%) for read {}".format(round(max_missing_freq*100,2), read["read_id"])) - + # Transform line to dict and cast str numbers to actual numbers + read = numeric_cast_dict(keys=col_names, values=line.rstrip().split("\t")) + read_is_valid = True + # Filter out ref_id if a select_ref_id list or exclude_ref_id list was provided + if select_ref_id and not read["ref_id"] in select_ref_id: + read_is_valid = False + c["Ref_id not in select list"]+=1 + logger.trace("Ref_id not in select list") + elif exclude_ref_id and read["ref_id"] in exclude_ref_id: + read_is_valid = False + c["Ref_id in exclude list"]+=1 + logger.trace("Ref_id in exclude list") + + # Filter out reads with high number of invalid kmers if information available + if self.__filter_invalid_kmers: + if max_invalid_kmers_freq: + invalid_kmers_freq = (read["NNNNN_kmers"]+read["mismatch_kmers"]+read["missing_kmers"])/read["kmers"] + if invalid_kmers_freq > max_invalid_kmers_freq: + read_is_valid = False + c["High fraction of invalid kmers"]+=1 + logger.trace("High fraction of invalid kmers ({}%) for read {}".format(round(invalid_kmers_freq*100,2), read["read_id"])) + else: + NNNNN_kmers_freq = read["NNNNN_kmers"]/read["kmers"] + max_mismatching_freq = read["mismatch_kmers"]/read["kmers"] + max_missing_freq = read["missing_kmers"]/read["kmers"] + if NNNNN_kmers_freq > max_NNNNN_freq: + read_is_valid = False + c["High fraction of NNNNN kmers"]+=1 + logger.trace("High fraction of NNNNN kmers ({}%) for read {}".format(round(NNNNN_kmers_freq*100,2), read["read_id"])) + elif max_mismatching_freq > max_mismatching_freq: + read_is_valid = False + c["High fraction of mismatching kmers"] + logger.trace("High fraction of mismatching kmers ({}%) for read {}".format(round(max_mismatching_freq*100,2), read["read_id"])) + elif max_missing_freq > max_missing_freq: + read_is_valid = False + c["High fraction of missing kmers"]+=1 + logger.trace("High fraction of missing kmers ({}%) for read {}".format(round(max_missing_freq*100,2), read["read_id"])) + + if read_is_valid: # Create dict arborescence and save valid reads if not read["ref_id"] in ref_reads: ref_reads[read["ref_id"]] = OrderedDict() @@ -212,9 +224,6 @@ def __read_eventalign_index(self, ref_reads[read["ref_id"]][cond_lab][sample_lab].append(read) c ["valid reads"] += 1 - except NanocomporeError as E: - c [str(E)] += 1 - logger.debug("\tCondition:{} Sample:{} {}".format(cond_lab, sample_lab, counter_to_str(c))) # Fill in missing condition/sample slots in case # a ref_id is missing from one of the eventalign files From 46ff8a932c6203c1171d017587fc1244ffc91fa1 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 27 May 2021 15:10:05 +0200 Subject: [PATCH 3/7] Improved logging --- nanocompore/Eventalign_collapse.py | 5 +++++ nanocompore/SampComp.py | 8 +++++++- nanocompore/__main__.py | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/nanocompore/Eventalign_collapse.py b/nanocompore/Eventalign_collapse.py index 4ce56e7..651e5b6 100644 --- a/nanocompore/Eventalign_collapse.py +++ b/nanocompore/Eventalign_collapse.py @@ -106,12 +106,17 @@ def __call__(self): for tb in iter(error_q.get, None): logger.trace("Error caught from error_q") raise NanocomporeError(tb) + logger.debug("Caught poison pill in error queue") # Soft processes and queues stopping + logger.debug("Joining all processes") for ps in ps_list: ps.join() + logger.debug("All processes were joined") + logger.debug("Closing all queues") for q in (in_q, out_q, error_q): q.close() + logger.debug("All queues were closed") # Catch error, kill all processed and reraise error except Exception as E: diff --git a/nanocompore/SampComp.py b/nanocompore/SampComp.py index 108f779..a8da84c 100644 --- a/nanocompore/SampComp.py +++ b/nanocompore/SampComp.py @@ -217,14 +217,20 @@ def __call__(self): # Monitor error queue for tb in iter(error_q.get, None): - logger.trace("Error caught from error_q") + logger.debug("Error caught from error_q") raise NanocomporeError(tb) + logger.debug("Error queue was closed") # Soft processes and queues stopping + logger.debug("Waiting for all processes to be joined") for ps in ps_list: ps.join() + logger.debug("All processes joined successfully") + + logger.debug("Closing all queues") for q in (in_q, out_q, error_q): q.close() + logger.debug("All queues were closed") # Return database wrapper object return SampCompDB( diff --git a/nanocompore/__main__.py b/nanocompore/__main__.py index fa97872..9d6b8c0 100644 --- a/nanocompore/__main__.py +++ b/nanocompore/__main__.py @@ -153,7 +153,7 @@ def main(args=None): sp_output.add_argument("--overwrite", "-w", action='store_true', default=False, help="Use --outpath even if it exists already (default: %(default)s)") sp_verbosity = sp.add_argument_group("Verbosity options") - sp_verbosity.add_argument("--log_level", type=str, default="info", choices=["warning", "info", "debug"], + sp_verbosity.add_argument("--log_level", type=str, default="info", choices=["warning", "info", "debug", "trace"], help="Set the log level (default: %(default)s)") sp_verbosity.add_argument("--progress", default=False, action='store_true', help="Display a progress bar during execution (default: %(default)s)") @@ -213,6 +213,7 @@ def sampcomp_main(args): db = s() # Save all reports + logger.info("Saving results") if(db): db.save_all(pvalue_thr=args.pvalue_thr) From d9123a424bf52cb84ca1452bfee8e35c44237335 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 27 May 2021 15:23:44 +0200 Subject: [PATCH 4/7] Updated Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8dc7b0e..4e17121 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ======= ## Dev +### Fixed +- Fixed logging levels and verbosity of log messages + +## v1.0.3 ### Fixed - Fixed bug Eventalign_collapse CLI options From ae3cf6465f3163a9bdfa05df3d500bb123e88990 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 27 May 2021 15:45:52 +0200 Subject: [PATCH 5/7] Improved versioning --- nanocompore/__init__.py | 7 ++----- nanocompore/common.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/nanocompore/__init__.py b/nanocompore/__init__.py index 1b1df6c..292b162 100755 --- a/nanocompore/__init__.py +++ b/nanocompore/__init__.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- # https://github.com/python-poetry/poetry/pull/2366#issuecomment-652418094 -try: - import importlib.metadata as importlib_metadata -except ModuleNotFoundError: - import importlib_metadata +from nanocompore.common import get_version -__version__ = importlib_metadata.version(__name__) +__version__ = get_version() __description__ = 'Software package that identifies raw signal changes between two conditions from https://github.com/jts/nanopolish resquiggled dRNA-Seq data.' diff --git a/nanocompore/common.py b/nanocompore/common.py index d9637ff..a366735 100644 --- a/nanocompore/common.py +++ b/nanocompore/common.py @@ -2,6 +2,7 @@ #~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~# # Standard library imports +import pkg_resources import sys import os from collections import * @@ -219,3 +220,13 @@ def jhelp (f:"python function or method"): # Display in Jupyter display (Markdown(s)) + +def get_version(): + try: + distribution = pkg_resources.get_distribution("nanocompore") + except pkg_resources.DistributionNotFound: + return "dev" # or "", or None + # or try with importib.metadata + # or try reading pyproject.toml + else: + return distribution.version From ab09ce5d4117a41a389c8f1bfb92b36d118cdde6 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 27 May 2021 16:06:33 +0200 Subject: [PATCH 6/7] Updated changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e17121..10c8ffa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ======= -## Dev +## v1.0.4.dev + ### Fixed - Fixed logging levels and verbosity of log messages From 4d5982599307f8b36c42c1202598921294b38f56 Mon Sep 17 00:00:00 2001 From: Tommaso Leonardi Date: Thu, 27 May 2021 16:10:19 +0200 Subject: [PATCH 7/7] Version bumbed to 1.0.4 --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10c8ffa..36700a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ======= -## v1.0.4.dev +## v1.0.4 ### Fixed - Fixed logging levels and verbosity of log messages diff --git a/pyproject.toml b/pyproject.toml index e517686..1ae82d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nanocompore" -version = "1.0.4.dev" +version = "1.0.4" description = "Software package that identifies raw signal changes between two conditions from https://github.com/jts/nanopolish resquiggled dRNA-Seq data." authors = ["Tommaso Leonardi ", "Adrien Leger "] license = "GPL-3.0"