Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions apply_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_

#read in bedfile, grab reads from valid chromosomes
chrom=''
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, me_col], names=['chrom', 'start', 'end', 'rid', 'me'], sep='\t', comment='#', chunksize=chunk_size)
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, 5, me_col], names=['chrom', 'start', 'end', 'rid', 'strand', 'me'], sep='\t', comment='#', chunksize=chunk_size)

with tqdm(total=len(chromlist), leave=True) as pbar:
i=-1
Expand All @@ -161,13 +161,15 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_
no_me_b12['blockCount'] = 1
no_me_b12['blockStarts'] = 1
no_me_b12['blockSizes'] = no_me_b12['end'] - no_me_b12['start']
no_me_b12['score'] = '.'

if not circle:
no_me_b12['blockSizes'] = no_me_b12['end'] - no_me_b12['start']
else:
no_me_b12['blockSizes'] = no_me_b12['end']*3 - no_me_b12['start']

no_me_b12.columns = ['chrom', 'start', 'end', 'name', 'thickStart', 'thickEnd', 'blockCount', 'itemRgb', 'blockSizes', 'blockStarts']

no_me_b12 = no_me_b12.rename(columns={'rid': 'name'})
no_me_b12 = no_me_b12[['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']]
chrom = chunk['chrom'].iloc[0]

#generate bed12 for reads with methylation
Expand All @@ -176,7 +178,8 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_
b12['thickStart'] = b12['start']
b12['thickEnd'] = b12['end']
b12['itemRgb'] = '255,0,0'

b12['score'] = '.'

#grab methylations
chunk = chunk['me'].str.split(pat=',', expand=True)

Expand Down Expand Up @@ -223,7 +226,7 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_

#combine, sort bed12s
b12 = b12.rename(columns={'rid': 'name'})
b12.columns = ['chrom', 'start', 'end', 'name', 'thickStart', 'thickEnd', 'blockCount', 'itemRgb', 'blockStarts', 'blockSizes']
b12 = b12[['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']]
b12 = pd.concat([b12, no_me_b12])
b12 = b12.sort_values(by=['chrom', 'start'])

Expand Down Expand Up @@ -283,4 +286,4 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_
fout.write(fin.read())
os.remove(tmp_file)

os.rmdir(tmp_dir)
os.rmdir(tmp_dir)
16 changes: 11 additions & 5 deletions apply_model_multiprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,16 @@ def process_chunk(chunk, model, context, chromlist, train_rids, me_col, chunk_si
no_me_b12['itemRgb'] = '255,0,0'
no_me_b12['blockCount'] = 1
no_me_b12['blockStarts'] = 1
no_me_b12['score'] = '.'

if not circle:
no_me_b12['blockSizes'] = no_me_b12['end'] - no_me_b12['start']
else:
no_me_b12['blockSizes'] = no_me_b12['end']*3 - no_me_b12['start']

no_me_b12.columns = ['chrom', 'start', 'end', 'name', 'thickStart', 'thickEnd', 'blockCount', 'itemRgb', 'blockSizes', 'blockStarts']
no_me_b12 = no_me_b12.rename(columns={'rid': 'name'})
no_me_b12 = no_me_b12[['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']]
chrom = chunk['chrom'].iloc[0]

else:
no_me_b12=pd.DataFrame()
Expand All @@ -157,6 +161,7 @@ def process_chunk(chunk, model, context, chromlist, train_rids, me_col, chunk_si
b12['thickStart'] = b12['start']
b12['thickEnd'] = b12['end']
b12['itemRgb'] = '255,0,0'
b12['score'] = '.'

# Grab methylations
chunk = chunk['me'].str.split(pat=',', expand=True)
Expand Down Expand Up @@ -199,7 +204,7 @@ def process_chunk(chunk, model, context, chromlist, train_rids, me_col, chunk_si

# Combine, sort bed12s
b12 = b12.rename(columns={'rid': 'name'})
b12.columns = ['chrom', 'start', 'end', 'name', 'thickStart', 'thickEnd', 'blockCount', 'itemRgb', 'blockStarts', 'blockSizes']
b12 = b12[['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']]
b12 = pd.concat([b12, no_me_b12])
b12 = b12.sort_values(by=['chrom', 'start'])

Expand Down Expand Up @@ -243,9 +248,9 @@ def apply_model(model, f, outdir, context, chromlist, train_rids, me_col, chunk_
try:
# read in fibertools output bedfile in chunks
if min_me > 0:
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, 13, 14, me_col], names=['chrom', 'start', 'end', 'rid', 'at_ct','me_ct','me'], sep='\t', comment='#', chunksize=chunk_size)
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, 5, 13, 14, me_col], names=['chrom', 'start', 'end', 'rid', 'strand', 'at_ct','me_ct','me'], sep='\t', comment='#', chunksize=chunk_size)
else:
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, me_col], names=['chrom', 'start', 'end', 'rid', 'me'], sep='\t', comment='#', chunksize=chunk_size)
reader = pd.read_csv(f, usecols=[0, 1, 2, 3, 5, me_col], names=['chrom', 'start', 'end', 'rid', 'strand', 'me'], sep='\t', comment='#', chunksize=chunk_size)
#assign each chunk to a pool
with Pool(core_count) as pool:
for i, chunk in enumerate(reader):
Expand Down Expand Up @@ -322,4 +327,5 @@ def combine_temp_files(chromlist, tmp_dir, outdir, dataset):

#this consistently fails on my tests because of permissions, but it's not a huge issue
#os.rmdir(tmp_dir)
logging.info("Temporary directory removed and script completed.")
logging.info("Temporary directory removed and script completed.")