Skip to content

Commit 16ee5d6

Browse files
authored
Merge pull request #65 from gesinecauer/modifying_pastis_poisson
Modifying pastis poisson
2 parents 3575f68 + dcf3b6f commit 16ee5d6

File tree

10 files changed

+67
-191
lines changed

10 files changed

+67
-191
lines changed

README.rst

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,21 @@ For Pastis:
1414
- pandas
1515
- iced
1616

17-
Additional dependencies for the diploid version:
17+
Additional dependencies for new features (diploid inference
18+
multiscale optimization, etc):
1819
- python (>= 3.6)
1920
- autograd (>= 1.3)
2021

21-
All of these dependencies can be installed easily using conda:
22+
Most of these dependencies can be installed at once using conda:
2223
`http://conda.pydata.org/miniconda.html <http://conda.pydata.org/miniconda.html>`_
2324

2425
Once conda is installed, just type the following::
2526

2627
conda install numpy scipy scikit-learn pandas
2728

29+
Or, to include the new features::
30+
31+
conda install numpy scipy scikit-learn pandas autograd
2832

2933
`iced` can be installed via::
3034

@@ -34,17 +38,26 @@ Install PASTIS
3438
--------------
3539

3640
This package uses distutils, which is the default way of installing
37-
python modules. To install in your home directory, use::
41+
python modules.
3842

39-
python setup.py install --user
43+
To install in your home directory, use::
4044

41-
To install for all users on Unix/Linux::
45+
python setup.py install --user
4246

43-
python setup.py build
44-
sudo python setup.py install
47+
or using pip::
4548

49+
pip install --user pastis
50+
51+
To install for all users on Unix/Linux::
52+
53+
python setup.py build
54+
sudo python setup.py install
4655

4756
or using pip::
4857

49-
pip install --user pastis
58+
pip install pastis
59+
60+
This will install a python package ``pastis``, and five programs:
61+
``pastis-mds``, ``pastis-nmds``, ``pastis-pm1``, ``pastis-pm2``, and
62+
``pastis-poisson``. Calling any of those five programs will display the help.
5063

doc/install.rst

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,8 @@
22
Installation
33
================================================================================
44

5-
pastis
6-
=======
7-
8-
This package uses distutils, which is the default way of installing
9-
python modules.
10-
11-
The dependencies are:
5+
Dependencies
6+
============
127

138
- python (>= 2.7)
149
- setuptools
@@ -17,17 +12,31 @@ The dependencies are:
1712
- scikit-learn (>= 0.13)
1813
- iced
1914

20-
For the diploid version, additional dependencies are required:
15+
Additional dependencies for new features (diploid inference
16+
multiscale optimization, etc):
2117
- python (>= 3.6)
2218
- autograd (>= 1.3)
2319

24-
Most of these dependencies can be installed at once using `Anaconda
25-
<http://docs.continuum.io/anaconda/install.html>`_
20+
Most of these dependencies can be installed at once using conda:
21+
`http://conda.pydata.org/miniconda.html <http://conda.pydata.org/miniconda.html>`_
22+
23+
Once conda is installed, just type the following::
24+
25+
conda install numpy scipy scikit-learn pandas
26+
27+
Or, to include the new features::
28+
29+
conda install numpy scipy scikit-learn pandas autograd
2630

2731
`iced` can be installed using pip::
2832

2933
pip install --user iced
3034

35+
Install PASTIS
36+
==============
37+
38+
This package uses distutils, which is the default way of installing
39+
python modules.
3140

3241
To install in your home directory, use::
3342

@@ -46,7 +55,7 @@ or using pip::
4655

4756
pip install pastis
4857

49-
This will install a python package ``pastis``, and four programs ``pastis-mds``,
50-
``pastis-nmds``, ``pastis-pm1`` and ``pastis-pm2``. Calling any of those four
51-
programs will display the help.
58+
This will install a python package ``pastis``, and five programs:
59+
``pastis-mds``, ``pastis-nmds``, ``pastis-pm1``, ``pastis-pm2``, and
60+
``pastis-poisson``. Calling any of those five programs will display the help.
5261

pastis/optimization/callbacks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from timeit import default_timer as timer
33
from datetime import timedelta
44
import os
5-
from .utils_diploid import find_beads_to_remove
5+
from .utils_poisson import find_beads_to_remove
66
from .multiscale_optimization import decrease_lengths_res, decrease_struct_res
77

88

pastis/optimization/constraints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import autograd.numpy as ag_np
88
from autograd.builtins import SequenceBox
99
from .multiscale_optimization import decrease_struct_res, decrease_lengths_res
10-
from .utils_diploid import find_beads_to_remove
10+
from .utils_poisson import find_beads_to_remove
1111

1212

1313
class Constraints(object):
@@ -389,7 +389,7 @@ def distance_between_homologs(structures, lengths, ploidy, mixture_coefs=None,
389389
390390
"""
391391

392-
from .utils_diploid import _format_structures
392+
from .utils_poisson import _format_structures
393393

394394
structures = _format_structures(
395395
structures=structures, lengths=lengths, ploidy=ploidy,

pastis/optimization/counts.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88

99
from iced.io import write_counts
1010

11-
from .utils_diploid import _constraint_dis_indices
12-
from .utils_diploid import find_beads_to_remove
13-
from .utils_diploid import find_beads_to_remove
11+
from .utils_poisson import _constraint_dis_indices
12+
from .utils_poisson import find_beads_to_remove
1413

1514
from .multiscale_optimization import decrease_lengths_res
1615
from .multiscale_optimization import decrease_counts_res
@@ -37,7 +36,6 @@ def ambiguate_counts(counts, lengths, ploidy, exclude_zeros=None):
3736
Aggregated and ambiguated contact counts matrix.
3837
"""
3938

40-
4139
lengths = np.array(lengths)
4240
n = lengths.sum()
4341

@@ -193,7 +191,6 @@ def _check_counts_matrix(counts, lengths, ploidy, exclude_zeros=True,
193191
"""Check counts dimensions, reformat, & excise selected chromosomes.
194192
"""
195193

196-
197194
if chrom_subset_index is not None and len(chrom_subset_index) / max(counts.shape) not in (1, 2):
198195
raise ValueError("chrom_subset_index size (%d) does not fit counts"
199196
" shape (%d, %d)." %

pastis/optimization/multiscale_optimization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def get_multiscale_variances_from_struct(structures, lengths, multiscale_factor,
437437
low-resolution bead.
438438
"""
439439

440-
from .utils_diploid import _format_structures
440+
from .utils_poisson import _format_structures
441441

442442
if multiscale_factor == 1:
443443
return None

pastis/optimization/pastis_algorithms.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
4545
callback_freq=None, callback_function=None, reorienter=None,
4646
alpha_true=None, struct_true=None, input_weight=None,
4747
exclude_zeros=False, null=False, mixture_coefs=None, verbose=True):
48-
"""Infer 3D structures with PASTIS-diploid.
48+
"""Infer 3D structures with PASTIS via Poisson model.
4949
5050
Optimize 3D structure from Hi-C contact counts data for diploid
5151
organisms. Optionally perform multiscale optimization during inference.
@@ -416,21 +416,20 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0,
416416
return struct_, infer_var
417417

418418

419-
def pastis_diploid(counts, lengths, ploidy, outdir='', chromosomes=None,
420-
chrom_subset=None,
421-
alpha=None, seed=0, normalize=True, filter_threshold=0.04,
422-
alpha_init=-3., max_alpha_loop=20, multiscale_rounds=1,
423-
use_multiscale_variance=True, max_iter=10000000000, factr=10000000.,
424-
pgtol=1e-05, alpha_factr=1000000000000., bcc_lambda=0.,
425-
hsc_lambda=0., hsc_r=None, hsc_min_beads=5, callback_function=None,
426-
print_freq=100, history_freq=100, save_freq=None, piecewise=False,
427-
piecewise_step=None, piecewise_chrom=None, piecewise_min_beads=5,
428-
piecewise_fix_homo=False, piecewise_opt_orient=True,
429-
alpha_true=None,
430-
struct_true=None, init='msd', input_weight=None,
431-
exclude_zeros=False,
432-
null=False, mixture_coefs=None, verbose=True):
433-
"""Infer 3D structures with PASTIS.
419+
def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None,
420+
chrom_subset=None, alpha=None, seed=0, normalize=True,
421+
filter_threshold=0.04, alpha_init=-3., max_alpha_loop=20,
422+
multiscale_rounds=1, use_multiscale_variance=True,
423+
max_iter=10000000000, factr=10000000., pgtol=1e-05,
424+
alpha_factr=1000000000000., bcc_lambda=0., hsc_lambda=0.,
425+
hsc_r=None, hsc_min_beads=5, callback_function=None,
426+
print_freq=100, history_freq=100, save_freq=None,
427+
piecewise=False, piecewise_step=None, piecewise_chrom=None,
428+
piecewise_min_beads=5, piecewise_fix_homo=False,
429+
piecewise_opt_orient=True, alpha_true=None, struct_true=None,
430+
init='msd', input_weight=None, exclude_zeros=False,
431+
null=False, mixture_coefs=None, verbose=True):
432+
"""Infer 3D structures with PASTIS via Poisson model.
434433
435434
Infer 3D structure from Hi-C contact counts data for haploid or diploid
436435
organisms.

pastis/optimization/utils_diploid.py renamed to pastis/optimization/utils_poisson.py

Lines changed: 0 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from __future__ import print_function
22

33
import numpy as np
4-
from sklearn.metrics import euclidean_distances
5-
from scipy import sparse
64
import sys
75

86

@@ -190,143 +188,3 @@ def _struct_replace_nan(struct, lengths, kind='linear', random_state=None):
190188
warn('The following chromosomes were all NaN: ' + ' '.join(nan_chroms))
191189

192190
return(interpolated_struct)
193-
194-
195-
class ConstantDispersion(object):
196-
def __init__(self, coef=7):
197-
self.coef = coef
198-
199-
def fit(self, X, y):
200-
return self
201-
202-
def predict(self, X):
203-
return self.coef * np.ones(X.shape)
204-
205-
def derivate(self, X):
206-
return np.zeros(X.shape)
207-
208-
209-
def compute_wish_distances(counts, alpha=-3., beta=1., bias=None):
210-
"""
211-
Computes wish distances from a counts matrix
212-
213-
Parameters
214-
----------
215-
counts : ndarray
216-
Interaction counts matrix
217-
218-
alpha : float, optional, default: -3
219-
Coefficient of the power law
220-
221-
beta : float, optional, default: 1
222-
Scaling factor
223-
224-
Returns
225-
-------
226-
wish_distances
227-
"""
228-
if beta == 0:
229-
raise ValueError("beta cannot be equal to 0.")
230-
counts = counts.copy()
231-
if sparse.issparse(counts):
232-
if not sparse.isspmatrix_coo(counts):
233-
counts = counts.tocoo()
234-
if bias is not None:
235-
bias = bias.flatten()
236-
counts.data /= bias[counts.row] * bias[counts.col]
237-
wish_distances = counts / beta
238-
wish_distances.data[wish_distances.data != 0] **= 1. / alpha
239-
return wish_distances
240-
else:
241-
wish_distances = counts.copy() / beta
242-
wish_distances[wish_distances != 0] **= 1. / alpha
243-
244-
return wish_distances
245-
246-
247-
def eval_no_f(x, user_data=None):
248-
"""
249-
Evaluate the object function (no objective function).
250-
"""
251-
return 0.
252-
253-
254-
def eval_grad_no_f(X, user_data=None):
255-
m, n, counts, alpha, beta, d = user_data
256-
return np.zeros(n * m)
257-
258-
259-
def eval_g_no(x, user_data=None):
260-
return np.array([0, 0.])
261-
262-
263-
def eval_jac_g_no(x, flag, user_data=None):
264-
if flag:
265-
return 0, 0
266-
return np.array([0., 0.])
267-
268-
269-
def eval_g(x, user_data=None):
270-
"""
271-
Computes the constraints
272-
"""
273-
274-
m, n, wish_dist, alpha, beta, d = user_data
275-
276-
x = x.reshape((m, n))
277-
dis = euclidean_distances(x)
278-
dis = dis ** 2
279-
mask = np.invert(np.tri(m, dtype=np.bool))
280-
g = np.concatenate([dis[mask].flatten(), ((x - d) ** 2).sum(axis=1)])
281-
return g
282-
283-
284-
def eval_jac_g(x, flag, user_data=None):
285-
"""
286-
Computes the jacobian for the constraints mentionned in duan-et-al
287-
"""
288-
289-
m, n, wish_dist, alpha, beta, d = user_data
290-
291-
if flag:
292-
ncon = m * (m - 1) / 2
293-
row = np.arange(ncon).repeat(2 * n)
294-
295-
tmp = np.arange(n).repeat(m - 1)
296-
tmp = tmp.reshape((n, m - 1))
297-
tmp = tmp.T
298-
tmp1 = np.arange(n, m * n)
299-
tmp1 = tmp1.reshape((m - 1, n))
300-
tmp = np.concatenate((tmp, tmp1), axis=1)
301-
tmp1 = tmp.copy()
302-
for it in range(m):
303-
tmp += n
304-
tmp = tmp[:-1]
305-
tmp1 = np.concatenate((tmp1, tmp))
306-
307-
# The second part of the jacobian is the restrictions on the distances
308-
# to the origin and/or the distances to the SPB/nucleolus center
309-
row_2 = np.arange(m).repeat(n)
310-
col = np.arange(n * m)
311-
312-
col = np.concatenate([tmp1.flatten(), col])
313-
row = np.concatenate([row, row_2])
314-
return row.flatten(), col.flatten()
315-
else:
316-
x = x.reshape((m, n))
317-
tmp = x.repeat(m, axis=0).reshape((m, m, n))
318-
dif = tmp - tmp.transpose(1, 0, 2)
319-
mask = np.invert(np.tri(m, dtype=np.bool))
320-
dif = dif[mask]
321-
jac = 2 * np.concatenate((dif, - dif), axis=1).flatten()
322-
323-
# The second part of the jacobian is the restrictions on the distances
324-
# to the origin and/or the distances to the SPB/nucleolus center
325-
jac2 = 2 * (x - d).flatten()
326-
return np.concatenate([jac, jac2]).flatten()
327-
328-
329-
def eval_h(x, lagrange, obj_factor, flag, user_data=None):
330-
"""
331-
"""
332-
return False

pastis/script/pastis-diploid renamed to pastis/script/pastis-poisson

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#! /usr/bin/env python
22

3-
from pastis.optimization.pastis_algorithms import pastis_diploid
3+
from pastis.optimization.pastis_algorithms import pastis_poisson
44
import argparse
55

66

@@ -105,4 +105,4 @@ parser.add_argument('--exclude_zeros', default=False,
105105
parser.add_argument('--null', dest='null', default=False,
106106
action='store_true', help=argparse.SUPPRESS)
107107

108-
pastis_diploid(**vars(parser.parse_args()))
108+
pastis_poisson(**vars(parser.parse_args()))

0 commit comments

Comments
 (0)