From 08d40f2873d900fbad996473974a8f7aa3d241eb Mon Sep 17 00:00:00 2001
From: "nloyfer@gmail.com" <nloyfer@gmail.com>
Date: Sun, 17 Mar 2024 17:31:57 +0200
Subject: [PATCH] pylint

---
 src/python/bed2beta.py       |  5 +----
 src/python/beta_to_450k.py   |  9 ++++----
 src/python/beta_to_blocks.py | 40 +++++++++++++++---------------------
 src/python/beta_to_table.py  | 19 ++++++++---------
 4 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/src/python/bed2beta.py b/src/python/bed2beta.py
index e202e20..5494ffd 100755
--- a/src/python/bed2beta.py
+++ b/src/python/bed2beta.py
@@ -2,12 +2,10 @@
 
 import argparse
 import os.path as op
-import sys
 import pandas as pd
 import numpy as np
 from utils_wgbs import delete_or_skip, splitextgz, trim_to_uint8, validate_file_list, \
-                       load_dict, eprint, load_dict_section
-from genomic_region import GenomicRegion
+                       eprint, load_dict_section
 
 
 def load_bed(bed_path, nrows, add1=False):
@@ -86,4 +84,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-
diff --git a/src/python/beta_to_450k.py b/src/python/beta_to_450k.py
index 9725ccf..46c4ca8 100755
--- a/src/python/beta_to_450k.py
+++ b/src/python/beta_to_450k.py
@@ -2,13 +2,13 @@
 
 import argparse
 import sys
-import numpy as np
 import os.path as op
+from multiprocessing import Pool
 import pandas as pd
+import numpy as np
 from utils_wgbs import validate_single_file, validate_file_list, load_beta_data, \
                        beta2vec, IllegalArgumentError, eprint, \
                        add_multi_thread_args, GenomeRefPaths, beta_sanity_check
-from multiprocessing import Pool
 
 # https://support.illumina.com/array/array_kits/infinium-methylationepic-beadchip-kit/downloads.html
 
@@ -46,7 +46,7 @@ def load_full_ref(args, genome):
 def read_reference(args):
 
     genome = GenomeRefPaths(args.genome)
-    if not (beta_sanity_check(args.input_files[0], genome)):
+    if not beta_sanity_check(args.input_files[0], genome):
         raise IllegalArgumentError('beta incompatible with genome')
 
     # load "full" reference - the one supplied with wgbstools
@@ -87,7 +87,7 @@ def read_reference(args):
 
 def betas2csv(args):
 
-    # set reference sites, as the intersection of the user input (--ref) 
+    # set reference sites, as the intersection of the user input (--ref)
     # and the "full" reference, supplied by wgbstools (ilmn2cpg_dict)
     df = read_reference(args)
     indices = np.array(df['cpg'])
@@ -140,4 +140,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-
diff --git a/src/python/beta_to_blocks.py b/src/python/beta_to_blocks.py
index f205cce..f9f23c0 100755
--- a/src/python/beta_to_blocks.py
+++ b/src/python/beta_to_blocks.py
@@ -1,12 +1,11 @@
 #!/usr/bin/python3 -u
 
 import argparse
-import os
-import numpy as np
+import sys
 import os.path as op
-import pandas as pd
 from multiprocessing import Pool
-import sys
+import pandas as pd
+import numpy as np
 from utils_wgbs import load_beta_data, trim_to_uint8, \
                         IllegalArgumentError, add_multi_thread_args, \
                         splitextgz, validate_file_list, validate_single_file, \
@@ -23,36 +22,28 @@ def b2b_log(*args, **kwargs):
 
 def is_block_file_nice(df):
 
+    msg = ''
     # no empty blocks (noCpGs):
     # no missing values (NAs)
     if df[['startCpG', 'endCpG']].isna().values.sum() > 0:
         msg = 'Some blocks are empty (NA)'
-        return False, msg
-
     # no (startCpG==endCpG)
-    if not (df['endCpG'] - df['startCpG'] > 0).all():
+    elif not (df['endCpG'] - df['startCpG'] > 0).all():
         msg = 'Some blocks are empty (startCpG==endCpG)'
-        return False, msg
-
     # blocks are sorted
     # startCpG and endCpG are monotonically increasing
-    if not np.all(np.diff(df['startCpG'].values) >= 0):
+    elif not np.all(np.diff(df['startCpG'].values) >= 0):
         msg = 'startCpG is not monotonically increasing'
-        return False, msg
-    if not np.all(np.diff(df['endCpG'].values) >= 0):
+    elif not np.all(np.diff(df['endCpG'].values) >= 0):
         msg = 'endCpG is not monotonically increasing'
-        return False, msg
-
     # no duplicated blocks
-    if (df.shape[0] != df.drop_duplicates().shape[0]):
+    elif df.shape[0] != df.drop_duplicates().shape[0]:
         msg = 'Some blocks are duplicated'
-        return False, msg
-
     # no overlaps between blocks
-    if not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values  >= 0).all():
+    elif not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values  >= 0).all():
         msg = 'Some blocks overlap'
+    if msg:
         return False, msg
-
     return True, ''
 
 
@@ -75,7 +66,7 @@ def load_blocks_file(blocks_path, anno=False, nrows=None):
         elif len(peek_df.columns) < len(names):  # no annotations columns
             names = COORDS_COLS5
 
-        # load 
+        # load
         # dtypes = {'chr':str, 'start', 'end', 'startCpG', 'endCpG'}
         dtypes = {'startCpG':'Int64', 'endCpG':'Int64'}
         df = pd.read_csv(blocks_path, sep='\t', usecols=range(len(names)), dtype=dtypes,
@@ -131,8 +122,8 @@ def reduce_data(beta_path, df, is_nice):
         start = df['startCpG'].values[0]
         end = df['endCpG'].values[df.shape[0] - 1]
         return fast_method(load_beta_data(beta_path, (start, end)), df)
-    else:
-        return slow_method(load_beta_data(beta_path), df)
+
+    return slow_method(load_beta_data(beta_path), df)
 
 
 def collapse_process(beta_path, df, is_nice, lbeta=False, out_dir=None, bedGraph=False):
@@ -207,13 +198,14 @@ def main():
     params = [(b, df, is_nice, args.lbeta, args.out_dir, args.bedGraph)
               for b in files]
     if args.debug:
-        arr = [collapse_process(*k) for k in params]
+        _ = [collapse_process(*k) for k in params]
     else:
         p = Pool(args.threads)
-        arr = p.starmap(collapse_process, params)
+        p.starmap(collapse_process, params)
     p.close()
     p.join()
 
+
 def parse_args():
     parser = argparse.ArgumentParser(description=main.__doc__)
     parser.add_argument('input_files', nargs='+', help='one or more beta files')
diff --git a/src/python/beta_to_table.py b/src/python/beta_to_table.py
index a38f9dd..8d03dbf 100644
--- a/src/python/beta_to_table.py
+++ b/src/python/beta_to_table.py
@@ -1,12 +1,12 @@
 #!/usr/bin/python3 -u
 
 import argparse
-import numpy as np
 import sys
 import os.path as op
-import pandas as pd
 import warnings
 from multiprocessing import Pool
+import pandas as pd
+import numpy as np
 from dmb import load_gfile_helper, match_prefix_to_bin, load_uxm
 from beta_to_blocks import collapse_process, load_blocks_file, is_block_file_nice
 from utils_wgbs import validate_single_file, validate_file_list, eprint, \
@@ -56,17 +56,15 @@ def groups_load_wrap(groups_file, betas):
     return gf
 
 
-def cwrap(beta_path, blocks_df, is_nice, min_cov, verbose):
-    # if verbose:
-    #   eprint('[wt table]', op.splitext(op.basename(beta_path))[0])
+def cwrap(beta_path, blocks_df, is_nice, min_cov):
     if beta_path.endswith('.beta'):
         r = collapse_process(beta_path, blocks_df, is_nice)
         if r is None:
             return
         name = op.splitext(op.basename(beta_path))[0]
         return {name: beta2vec(r, min_cov)}
-    else:
-        return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)}
+
+    return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)}
 
 
 def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
@@ -76,7 +74,6 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
     betas = drop_dup_keep_order(gf['full_path'])
     p = Pool(threads)
     params = [(b, blocks_df, is_nice, min_cov, verbose) for b in betas]
-    # arr = [cwrap(*p) for p in params] # todo: remove
     arr = p.starmap(cwrap, params)
     p.close()
     p.join()
@@ -101,9 +98,9 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
         warnings.filterwarnings('ignore', category=RuntimeWarning)
         empty_df = pd.DataFrame(index=blocks_df.index, columns=ugroups)
         blocks_df = pd.concat([blocks_df, empty_df], axis=1)
-        for group in ugroups:
-            blocks_df[group] = np.nanmean(
-                np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == group]]), axis=0).T
+        for ugroup in ugroups:
+            blocks_df[ugroup] = np.nanmean(
+                np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == ugroup]]), axis=0).T
     return blocks_df