1
+ from __future__ import print_function
2
+
3
+ import sys
4
+
5
+ if sys .version_info [0 ] < 3 :
6
+ raise Exception ("Must be using Python 3" )
7
+
1
8
import os
2
9
import numpy as np
3
10
import pandas as pd
4
11
from sklearn .utils import check_random_state
5
- from .utils import _print_code_header
12
+ from .utils_poisson import _print_code_header
13
+ from distutils .util import strtobool
6
14
7
15
8
16
def _test_objective (struct , counts , lengths , ploidy , alpha , bias ,
@@ -37,7 +45,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
37
45
normalize = True , filter_threshold = 0.04 , alpha_init = - 3. ,
38
46
max_alpha_loop = 20 , beta = None , multiscale_factor = 1 ,
39
47
multiscale_rounds = 1 , use_multiscale_variance = True ,
40
- final_multiscale_round = False , init = 'msd ' , max_iter = 10000000000 ,
48
+ final_multiscale_round = False , init = 'mds ' , max_iter = 10000000000 ,
41
49
factr = 10000000. , pgtol = 1e-05 , alpha_factr = 1000000000000. ,
42
50
bcc_lambda = 0. , hsc_lambda = 0. , hsc_r = None , hsc_min_beads = 5 ,
43
51
fullres_torm = None , struct_draft_fullres = None , draft = False ,
@@ -138,7 +146,6 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
138
146
infer_var : dict
139
147
A few of the variables used in inference or generated by inference.
140
148
Keys: 'alpha', 'beta', 'hsc_r', 'obj', and 'seed'.
141
-
142
149
"""
143
150
144
151
from .counts import preprocess_counts , ambiguate_counts , _update_betas_in_counts_matrices
@@ -148,34 +155,40 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
148
155
from .poisson import PastisPM
149
156
from .estimate_alpha_beta import _estimate_beta
150
157
from .multiscale_optimization import get_multiscale_variances_from_struct , _choose_max_multiscale_factor , decrease_lengths_res
151
- from .utils import find_beads_to_remove
152
-
153
- try :
154
- os .makedirs (outdir )
155
- except OSError :
156
- pass
157
- if seed is None :
158
- seed_str = ''
159
- else :
160
- seed_str = '.%03d' % seed
161
- out_file = os .path .join (outdir , 'struct_inferred%s.coords' % seed_str )
162
- orient_file = os .path .join (outdir , 'orient_inferred%s.coords' % seed_str )
163
- history_file = os .path .join (outdir , 'history%s' % seed_str )
164
- infer_var_file = os .path .join (
165
- outdir , 'inference_variables%s' % seed_str )
166
- out_fail = os .path .join (outdir , 'struct_nonconverged%s.coords' % seed_str )
167
-
168
- if os .path .exists (out_file ):
169
- print ('CONVERGED' , flush = True )
170
- infer_var = dict (pd .read_csv (
171
- infer_var_file , sep = '\t ' , header = None , squeeze = True , index_col = 0 ))
172
- infer_var ['beta' ] = [float (b ) for b in infer_var ['beta' ].split ()]
173
- infer_var ['alpha' ] = float (infer_var ['alpha' ])
174
- struct_ = np .loadtxt (out_file )
175
- return struct_ , infer_var
176
- elif os .path .exists (out_fail ):
177
- print ('OPTIMIZATION DID NOT CONVERGE' , flush = True )
178
- exit (1 )
158
+ from .utils_poisson import find_beads_to_remove
159
+
160
+ if outdir is not None :
161
+ try :
162
+ os .makedirs (outdir )
163
+ except OSError :
164
+ pass
165
+ if seed is None :
166
+ seed_str = ''
167
+ else :
168
+ seed_str = '.%03d' % seed
169
+ out_file = os .path .join (outdir , 'struct_inferred%s.coords' % seed_str )
170
+ orient_file = os .path .join (
171
+ outdir , 'orient_inferred%s.coords' % seed_str )
172
+ history_file = os .path .join (outdir , 'history%s' % seed_str )
173
+ infer_var_file = os .path .join (
174
+ outdir , 'inference_variables%s' % seed_str )
175
+ out_fail = os .path .join (
176
+ outdir , 'struct_nonconverged%s.coords' % seed_str )
177
+
178
+ if os .path .exists (out_file ) or os .path .exists (out_fail ):
179
+ if os .path .exists (out_file ):
180
+ print ('CONVERGED' , flush = True )
181
+ elif os .path .exists (out_fail ):
182
+ print ('OPTIMIZATION DID NOT CONVERGE' , flush = True )
183
+ infer_var = dict (pd .read_csv (
184
+ infer_var_file , sep = '\t ' , header = None , squeeze = True ,
185
+ index_col = 0 ))
186
+ infer_var ['beta' ] = [float (b ) for b in infer_var ['beta' ].split ()]
187
+ infer_var ['hsc_r' ] = [float (r ) for r in infer_var ['hsc_r' ].split ()]
188
+ infer_var ['alpha' ] = float (infer_var ['alpha' ])
189
+ infer_var ['converged' ] = strtobool (infer_var ['converged' ])
190
+ struct_ = np .loadtxt (out_file )
191
+ return struct_ , infer_var
179
192
180
193
random_state = np .random .RandomState (seed )
181
194
random_state = check_random_state (random_state )
@@ -256,6 +269,8 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
256
269
alpha_true = alpha_true , struct_true = struct_true ,
257
270
input_weight = input_weight , exclude_zeros = exclude_zeros ,
258
271
null = null , mixture_coefs = mixture_coefs , verbose = verbose )
272
+ if not infer_var_fullres ['converged' ]:
273
+ return struct_draft_fullres , infer_var_fullres
259
274
alpha_ = infer_var_fullres ['alpha' ]
260
275
beta_ = infer_var_fullres ['beta' ]
261
276
counts = _update_betas_in_counts_matrices (
@@ -290,7 +305,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
290
305
simple_diploid_for_lowres = True
291
306
multiscale_factor_for_lowres = _choose_max_multiscale_factor (
292
307
lengths = lengths , min_beads = hsc_min_beads )
293
- struct_draft_lowres , _ = infer (
308
+ struct_draft_lowres , infer_var_lowres = infer (
294
309
counts_raw = counts_for_lowres ,
295
310
outdir = os .path .join (outdir , 'struct_draft_lowres' ),
296
311
lengths = lengths , ploidy = ploidy , alpha = alpha_ ,
@@ -309,6 +324,8 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
309
324
struct_true = struct_true , input_weight = input_weight ,
310
325
exclude_zeros = exclude_zeros , null = null ,
311
326
mixture_coefs = mixture_coefs , verbose = verbose )
327
+ if not infer_var_lowres ['converged' ]:
328
+ return struct_draft_lowres , infer_var_lowres
312
329
hsc_r = distance_between_homologs (
313
330
structures = struct_draft_lowres ,
314
331
lengths = decrease_lengths_res (
@@ -359,20 +376,30 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
359
376
struct_ [torm ] = np .nan
360
377
361
378
infer_var = {'alpha' : pm .alpha_ , 'beta' : pm .beta_ , 'hsc_r' : hsc_r ,
362
- 'obj' : pm .obj_ , 'seed' : seed }
379
+ 'obj' : pm .obj_ , 'seed' : seed , 'converged' : pm .converged_ }
380
+
381
+ if outdir is not None :
382
+ with open (infer_var_file , 'w' ) as f :
383
+ for k , v in infer_var .items ():
384
+ if isinstance (v , np .ndarray ) or isinstance (v , list ):
385
+ f .write (
386
+ '%s\t %s\n ' % (k , ' ' .join (['%g' % x for x in v ])))
387
+ else :
388
+ f .write ('%s\t %g\n ' % (k , v ))
389
+ if reorienter is not None and reorienter .reorient :
390
+ np .savetxt (orient_file , pm .orientation_ )
391
+ if pm .converged_ :
392
+ np .savetxt (out_file , struct_ )
393
+ if pm .history_ is not None :
394
+ pd .DataFrame (
395
+ pm .history_ ).to_csv (history_file , sep = '\t ' , index = False )
396
+ else :
397
+ np .savetxt (out_fail , struct_ )
363
398
364
- if reorienter is not None and reorienter .reorient :
365
- np .savetxt (orient_file , pm .orientation_ )
366
399
if pm .converged_ :
367
- np .savetxt (out_file , struct_ )
368
- pd .Series (infer_var ).to_csv (infer_var_file , sep = '\t ' , header = False )
369
- if pm .history_ is not None :
370
- pd .DataFrame (
371
- pm .history_ ).to_csv (history_file , sep = '\t ' , index = False )
372
400
return struct_ , infer_var
373
401
else :
374
- np .savetxt (out_fail , struct_ )
375
- exit (1 )
402
+ return None , infer_var
376
403
377
404
else :
378
405
# BEGIN MULTISCALE OPTIMIZATION
@@ -413,6 +440,8 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
413
440
alpha_true = alpha_true , struct_true = struct_true ,
414
441
input_weight = input_weight , exclude_zeros = exclude_zeros ,
415
442
null = null , mixture_coefs = mixture_coefs , verbose = verbose )
443
+ if not infer_var ['converged' ]:
444
+ return struct_ , infer_var
416
445
return struct_ , infer_var
417
446
418
447
@@ -427,7 +456,7 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None,
427
456
piecewise = False , piecewise_step = None , piecewise_chrom = None ,
428
457
piecewise_min_beads = 5 , piecewise_fix_homo = False ,
429
458
piecewise_opt_orient = True , alpha_true = None , struct_true = None ,
430
- init = 'msd ' , input_weight = None , exclude_zeros = False ,
459
+ init = 'mds ' , input_weight = None , exclude_zeros = False ,
431
460
null = False , mixture_coefs = None , verbose = True ):
432
461
"""Infer 3D structures with PASTIS via Poisson model.
433
462
@@ -492,10 +521,18 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None,
492
521
hsc_min_beads : int, optional
493
522
For diploid organisms: number of beads in the low-resolution
494
523
structure from which `hsc_r` is estimated.
524
+
525
+ Returns
526
+ -------
527
+ struct_ : array_like of float of shape (lengths.sum() * ploidy, 3)
528
+ 3D structure resulting from the optimization.
529
+ infer_var : dict
530
+ A few of the variables used in inference or generated by inference.
531
+ Keys: 'alpha', 'beta', 'hsc_r', 'obj', and 'seed'.
495
532
"""
496
533
497
534
from .load_data import load_data
498
- from .stepwise_whole_genome import stepwise_inference
535
+ from .piecewise_whole_genome import piecewise_inference
499
536
500
537
lengths_full = lengths
501
538
chrom_full = chromosomes
@@ -529,7 +566,7 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None,
529
566
input_weight = input_weight , exclude_zeros = exclude_zeros ,
530
567
null = null , mixture_coefs = mixture_coefs , verbose = verbose )
531
568
else :
532
- stepwise_inference (
569
+ struct_ , infer_var = piecewise_inference (
533
570
counts = counts , outdir = outdir , lengths = lengths_subset , ploidy = ploidy ,
534
571
chromosomes = chrom_subset , alpha = alpha , seed = seed , normalize = normalize ,
535
572
filter_threshold = filter_threshold , alpha_init = alpha_init ,
@@ -548,6 +585,8 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None,
548
585
input_weight = input_weight , exclude_zeros = exclude_zeros , null = null ,
549
586
mixture_coefs = mixture_coefs , verbose = verbose )
550
587
588
+ return struct_ , infer_var
589
+
551
590
552
591
def _output_subdir (outdir , chrom_full , chrom_subset = None , null = False ,
553
592
piecewise = False , piecewise_step = None ,
0 commit comments