Skip to content

Commit

Permalink
pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
nloyfer committed Mar 17, 2024
1 parent dc0f1a3 commit 08d40f2
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 44 deletions.
5 changes: 1 addition & 4 deletions src/python/bed2beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@

import argparse
import os.path as op
import sys
import pandas as pd
import numpy as np
from utils_wgbs import delete_or_skip, splitextgz, trim_to_uint8, validate_file_list, \
load_dict, eprint, load_dict_section
from genomic_region import GenomicRegion
eprint, load_dict_section


def load_bed(bed_path, nrows, add1=False):
Expand Down Expand Up @@ -86,4 +84,3 @@ def main():

if __name__ == '__main__':
main()

9 changes: 4 additions & 5 deletions src/python/beta_to_450k.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import argparse
import sys
import numpy as np
import os.path as op
from multiprocessing import Pool
import pandas as pd
import numpy as np
from utils_wgbs import validate_single_file, validate_file_list, load_beta_data, \
beta2vec, IllegalArgumentError, eprint, \
add_multi_thread_args, GenomeRefPaths, beta_sanity_check
from multiprocessing import Pool

# https://support.illumina.com/array/array_kits/infinium-methylationepic-beadchip-kit/downloads.html

Expand Down Expand Up @@ -46,7 +46,7 @@ def load_full_ref(args, genome):
def read_reference(args):

genome = GenomeRefPaths(args.genome)
if not (beta_sanity_check(args.input_files[0], genome)):
if not beta_sanity_check(args.input_files[0], genome):
raise IllegalArgumentError('beta incompatible with genome')

# load "full" reference - the one supplied with wgbstools
Expand Down Expand Up @@ -87,7 +87,7 @@ def read_reference(args):

def betas2csv(args):

# set reference sites, as the intersection of the user input (--ref)
# set reference sites, as the intersection of the user input (--ref)
# and the "full" reference, supplied by wgbstools (ilmn2cpg_dict)
df = read_reference(args)
indices = np.array(df['cpg'])
Expand Down Expand Up @@ -140,4 +140,3 @@ def main():

if __name__ == '__main__':
main()

40 changes: 16 additions & 24 deletions src/python/beta_to_blocks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#!/usr/bin/python3 -u

import argparse
import os
import numpy as np
import sys
import os.path as op
import pandas as pd
from multiprocessing import Pool
import sys
import pandas as pd
import numpy as np
from utils_wgbs import load_beta_data, trim_to_uint8, \
IllegalArgumentError, add_multi_thread_args, \
splitextgz, validate_file_list, validate_single_file, \
Expand All @@ -23,36 +22,28 @@ def b2b_log(*args, **kwargs):

def is_block_file_nice(df):

msg = ''
# no empty blocks (noCpGs):
# no missing values (NAs)
if df[['startCpG', 'endCpG']].isna().values.sum() > 0:
msg = 'Some blocks are empty (NA)'
return False, msg

# no (startCpG==endCpG)
if not (df['endCpG'] - df['startCpG'] > 0).all():
elif not (df['endCpG'] - df['startCpG'] > 0).all():
msg = 'Some blocks are empty (startCpG==endCpG)'
return False, msg

# blocks are sorted
# startCpG and endCpG are monotonically increasing
if not np.all(np.diff(df['startCpG'].values) >= 0):
elif not np.all(np.diff(df['startCpG'].values) >= 0):
msg = 'startCpG is not monotonically increasing'
return False, msg
if not np.all(np.diff(df['endCpG'].values) >= 0):
elif not np.all(np.diff(df['endCpG'].values) >= 0):
msg = 'endCpG is not monotonically increasing'
return False, msg

# no duplicated blocks
if (df.shape[0] != df.drop_duplicates().shape[0]):
elif df.shape[0] != df.drop_duplicates().shape[0]:
msg = 'Some blocks are duplicated'
return False, msg

# no overlaps between blocks
if not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values >= 0).all():
elif not (df['startCpG'][1:].values - df['endCpG'][:df.shape[0] - 1].values >= 0).all():
msg = 'Some blocks overlap'
if msg:
return False, msg

return True, ''


Expand All @@ -75,7 +66,7 @@ def load_blocks_file(blocks_path, anno=False, nrows=None):
elif len(peek_df.columns) < len(names): # no annotations columns
names = COORDS_COLS5

# load
# load
# dtypes = {'chr':str, 'start', 'end', 'startCpG', 'endCpG'}
dtypes = {'startCpG':'Int64', 'endCpG':'Int64'}
df = pd.read_csv(blocks_path, sep='\t', usecols=range(len(names)), dtype=dtypes,
Expand Down Expand Up @@ -131,8 +122,8 @@ def reduce_data(beta_path, df, is_nice):
start = df['startCpG'].values[0]
end = df['endCpG'].values[df.shape[0] - 1]
return fast_method(load_beta_data(beta_path, (start, end)), df)
else:
return slow_method(load_beta_data(beta_path), df)

return slow_method(load_beta_data(beta_path), df)


def collapse_process(beta_path, df, is_nice, lbeta=False, out_dir=None, bedGraph=False):
Expand Down Expand Up @@ -207,13 +198,14 @@ def main():
params = [(b, df, is_nice, args.lbeta, args.out_dir, args.bedGraph)
for b in files]
if args.debug:
arr = [collapse_process(*k) for k in params]
_ = [collapse_process(*k) for k in params]
else:
p = Pool(args.threads)
arr = p.starmap(collapse_process, params)
p.starmap(collapse_process, params)
p.close()
p.join()


def parse_args():
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument('input_files', nargs='+', help='one or more beta files')
Expand Down
19 changes: 8 additions & 11 deletions src/python/beta_to_table.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/python3 -u

import argparse
import numpy as np
import sys
import os.path as op
import pandas as pd
import warnings
from multiprocessing import Pool
import pandas as pd
import numpy as np
from dmb import load_gfile_helper, match_prefix_to_bin, load_uxm
from beta_to_blocks import collapse_process, load_blocks_file, is_block_file_nice
from utils_wgbs import validate_single_file, validate_file_list, eprint, \
Expand Down Expand Up @@ -56,17 +56,15 @@ def groups_load_wrap(groups_file, betas):
return gf


def cwrap(beta_path, blocks_df, is_nice, min_cov, verbose):
# if verbose:
# eprint('[wt table]', op.splitext(op.basename(beta_path))[0])
def cwrap(beta_path, blocks_df, is_nice, min_cov):
if beta_path.endswith('.beta'):
r = collapse_process(beta_path, blocks_df, is_nice)
if r is None:
return
name = op.splitext(op.basename(beta_path))[0]
return {name: beta2vec(r, min_cov)}
else:
return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)}

return {op.basename(beta_path)[:-4]: load_uxm(beta_path, blocks_df, 'U', min_cov)}


def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
Expand All @@ -76,7 +74,6 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
betas = drop_dup_keep_order(gf['full_path'])
p = Pool(threads)
params = [(b, blocks_df, is_nice, min_cov, verbose) for b in betas]
# arr = [cwrap(*p) for p in params] # todo: remove
arr = p.starmap(cwrap, params)
p.close()
p.join()
Expand All @@ -101,9 +98,9 @@ def get_table(blocks_df, gf, min_cov, threads=8, verbose=False, group=True):
warnings.filterwarnings('ignore', category=RuntimeWarning)
empty_df = pd.DataFrame(index=blocks_df.index, columns=ugroups)
blocks_df = pd.concat([blocks_df, empty_df], axis=1)
for group in ugroups:
blocks_df[group] = np.nanmean(
np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == group]]), axis=0).T
for ugroup in ugroups:
blocks_df[ugroup] = np.nanmean(
np.concatenate([dres[k][None, :] for k in gf['fname'][gf['group'] == ugroup]]), axis=0).T
return blocks_df


Expand Down

0 comments on commit 08d40f2

Please sign in to comment.