From 08d40f2873d900fbad996473974a8f7aa3d241eb Mon Sep 17 00:00:00 2001 From: "nloyfer@gmail.com" Date: Sun, 17 Mar 2024 17:31:57 +0200 Subject: [PATCH] pylint --- src/python/bed2beta.py | 5 +---- src/python/beta_to_450k.py | 9 ++++---- src/python/beta_to_blocks.py | 40 +++++++++++++++--------------------- src/python/beta_to_table.py | 19 ++++++++--------- 4 files changed, 29 insertions(+), 44 deletions(-) diff --git a/src/python/bed2beta.py b/src/python/bed2beta.py index e202e20..5494ffd 100755 --- a/src/python/bed2beta.py +++ b/src/python/bed2beta.py @@ -2,12 +2,10 @@ import argparse import os.path as op -import sys import pandas as pd import numpy as np from utils_wgbs import delete_or_skip, splitextgz, trim_to_uint8, validate_file_list, \ - load_dict, eprint, load_dict_section -from genomic_region import GenomicRegion + eprint, load_dict_section def load_bed(bed_path, nrows, add1=False): @@ -86,4 +84,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/src/python/beta_to_450k.py b/src/python/beta_to_450k.py index 9725ccf..46c4ca8 100755 --- a/src/python/beta_to_450k.py +++ b/src/python/beta_to_450k.py @@ -2,13 +2,13 @@ import argparse import sys -import numpy as np import os.path as op +from multiprocessing import Pool import pandas as pd +import numpy as np from utils_wgbs import validate_single_file, validate_file_list, load_beta_data, \ beta2vec, IllegalArgumentError, eprint, \ add_multi_thread_args, GenomeRefPaths, beta_sanity_check -from multiprocessing import Pool # https://support.illumina.com/array/array_kits/infinium-methylationepic-beadchip-kit/downloads.html @@ -46,7 +46,7 @@ def load_full_ref(args, genome): def read_reference(args): genome = GenomeRefPaths(args.genome) - if not (beta_sanity_check(args.input_files[0], genome)): + if not beta_sanity_check(args.input_files[0], genome): raise IllegalArgumentError('beta incompatible with genome') # load "full" reference - the one supplied with wgbstools @@ -87,7 +87,7 @@ def read_reference(args): def betas2csv(args): - # set reference sites, as the intersection of the user input (--ref) + # set reference sites, as the intersection of the user input (--ref) # and the "full" reference, supplied by wgbstools (ilmn2cpg_dict) df = read_reference(args) indices = np.array(df['cpg']) @@ -140,4 +140,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/src/python/beta_to_blocks.py b/src/python/beta_to_blocks.py index f205cce..f9f23c0 100755 --- a/src/python/beta_to_blocks.py +++ b/src/python/beta_to_blocks.py @@ -1,12 +1,11 @@ #!/usr/bin/python3 -u import argparse -import os -import numpy as np +import sys import os.path as op -import pandas as pd from multiprocessing import Pool -import sys +import pandas as pd +import numpy as np from utils_wgbs import load_beta_data, trim_to_uint8, \ IllegalArgumentError, add_multi_thread_args, \ splitextgz, validate_file_list, validate_single_file, \ @@ -23,36 +22,28 @@ def b2b_log(*args, **kwargs): def is_block_file_nice(df): + msg = '' # no empty blocks (noCpGs): # no missing values (NAs) if df[['startCpG', 'endCpG']].isna().values.sum() > 0: msg = 'Some blocks are empty (NA)' - return False, msg - # no (startCpG==endCpG) - if not (df['endCpG'] - df['startCpG'] > 0).all(): + elif not (df['endCpG'] - df['startCpG'] > 0).all(): msg = 'Some blocks are empty (startCpG==endCpG)' - return False, msg - # blocks are sorted # startCpG and endCpG are monotonically increasing - if not np.all(np.diff(df['startCpG'].values) >= 0): + elif not np.all(np.diff(df['startCpG'].values) >= 0): msg = 'startCpG is not monotonically increasing' - return False, msg - if not np.all(np.diff(df['endCpG'].values) >= 0): + elif not np.all(np.diff(df['endCpG'].values) >= 0): msg = 'endCpG is not monotonically increasing' - return False, msg - # no duplicated blocks - if (df.shape[0] != df.drop_duplicates().shape[0]): + elif df.shape[0] != df.drop_duplicates().shape[0]: msg = 'Some blocks are duplicated' - return False, msg - # no overlaps between blocks - if not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values >= 0).all(): + elif not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values >= 0).all(): msg = 'Some blocks overlap' + if msg: return False, msg - return True, '' @@ -75,7 +66,7 @@ def load_blocks_file(blocks_path, anno=False, nrows=None): elif len(peek_df.columns) < len(names): # no annotations columns names = COORDS_COLS5 - # load + # load # dtypes = {'chr':str, 'start', 'end', 'startCpG', 'endCpG'} dtypes = {'startCpG':'Int64', 'endCpG':'Int64'} df = pd.read_csv(blocks_path, sep='\t', usecols=range(len(names)), dtype=dtypes, @@ -131,8 +122,8 @@ def reduce_data(beta_path, df, is_nice): start = df['startCpG'].values[0] end = df['endCpG'].values[df.shape[0] - 1] return fast_method(load_beta_data(beta_path, (start, end)), df) - else: - return slow_method(load_beta_data(beta_path), df) + + return slow_method(load_beta_data(beta_path), df) def collapse_process(beta_path, df, is_nice, lbeta=False, out_dir=None, bedGraph=False): @@ -207,13 +198,14 @@ def main(): params = [(b, df, is_nice, args.lbeta, args.out_dir, args.bedGraph) for b in files] if args.debug: - arr = [collapse_process(*k) for k in params] + _ = [collapse_process(*k) for k in params] else: p = Pool(args.threads) - arr = p.starmap(collapse_process, params) + p.starmap(collapse_process, params) p.close() p.join() + def parse_args(): parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('input_files', nargs='+', help='one or more beta files') diff --git a/src/python/beta_to_table.py b/src/python/beta_to_table.py index a38f9dd..8d03dbf 100644 --- a/src/python/beta_to_table.py +++ b/src/python/beta_to_table.py @@ -1,12 +1,12 @@ #!/usr/bin/python3 -u import argparse -import numpy as np import sys import os.path as op -import pandas as pd import warnings from multiprocessing import Pool +import pandas as pd +import numpy as np from dmb import load_gfile_helper, match_prefix_to_bin, load_uxm from beta_to_blocks import collapse_process, load_blocks_file, is_block_file_nice from utils_wgbs import validate_single_file, validate_file_list, eprint, \ @@ -56,17 +56,15 @@ def groups_load_wrap(groups_file, betas): return gf -def cwrap(beta_path, blocks_df, is_nice, min_cov, verbose): - # if verbose: - # eprint('[wt table]', op.splitext(op.basename(beta_path))[0]) +def cwrap(beta_path, blocks_df, is_nice, min_cov): if beta_path.endswith('.beta'): r = collapse_process(beta_path, blocks_df, is_nice) if r is None: return name = op.splitext(op.basename(beta_path))[0] return {name: beta2vec(r, min_cov)} - else: - return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)} + + return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)} def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True): @@ -76,7 +74,6 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True): betas = drop_dup_keep_order(gf['full_path']) p = Pool(threads) params = [(b, blocks_df, is_nice, min_cov, verbose) for b in betas] - # arr = [cwrap(*p) for p in params] # todo: remove arr = p.starmap(cwrap, params) p.close() p.join() @@ -101,9 +98,9 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True): warnings.filterwarnings('ignore', category=RuntimeWarning) empty_df = pd.DataFrame(index=blocks_df.index, columns=ugroups) blocks_df = pd.concat([blocks_df, empty_df], axis=1) - for group in ugroups: - blocks_df[group] = np.nanmean( - np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == group]]), axis=0).T + for ugroup in ugroups: + blocks_df[ugroup] = np.nanmean( + np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == ugroup]]), axis=0).T return blocks_df