Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## [1.3.3] - 2024-08-07

Bugfix release for issues with Dirac and Beta coalescent models.

**Bug fixes**:

- Fix segfault for in Dirac and Beta coalescent models for ploidy > 2 ({issue}`2307`, {pr}`{2308}`)

- Correct the Dirac coalescent time scaling with polyploidy and population growth. ({pr}`{2310},
{user}`JereKoskela`)

- Allow psi = 1 in the Dirac coalescent ({pr}`{2310}, {user}`JereKoskela`).

## [1.3.2] - 2024-07-08

- Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`)
Expand Down
158 changes: 72 additions & 86 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -6763,33 +6763,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate(
msp_t *self, population_t *pop, double lambda)
{
double ret = DBL_MAX;
double alpha = pop->growth_rate;
double alpha = 2.0 * pop->growth_rate;
double t = self->time;
double u, dt, z;
double pop_size = pop->initial_size;

if (lambda > 0.0) {
u = gsl_ran_exponential(self->rng, 1.0 / lambda);
if (alpha == 0.0) {
if (self->ploidy == 1) {
ret = pop->initial_size * pop->initial_size * u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
ret = pop->initial_size * pop->initial_size * u / 4.0;
}
ret = pop_size * pop_size * u;
} else {
dt = t - pop->start_time;
if (self->ploidy == 1) {
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u / 4.0;
}
z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u;
/* if z is <= 0 no coancestry can occur */
if (z > 0) {
ret = log(z) / alpha;
Expand All @@ -6808,13 +6793,7 @@ msp_dirac_get_common_ancestor_waiting_time(
{
population_t *pop = &self->populations[pop_id];
unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]);
double c = self->model.params.dirac_coalescent.c;
double lambda = n * (n - 1.0) / 2.0;
if (self->ploidy == 1) {
lambda += c;
} else {
lambda += c / (2.0 * self->ploidy);
}
double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c;

return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda);
}
Expand All @@ -6824,71 +6803,71 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t
{
int ret = 0;
uint32_t j, n, num_participants, num_parental_copies;
avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */
avl_tree_t *ancestors;
avl_tree_t *Q = NULL;
avl_node_t *x_node, *y_node;
segment_t *x, *y;
double nC2, p;
double psi = self->model.params.dirac_coalescent.psi;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}

ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
nC2 = gsl_sf_choose(n, 2);
if (self->ploidy == 1) {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c));
} else {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy)));
}
p = nC2 / (nC2 + self->model.params.dirac_coalescent.c);

if (gsl_rng_uniform(self->rng) < p) {
/* When 2 * ploidy parental chromosomes are available, Mendelian segregation
* results in a merger only 1 / (2 * ploidy) of the time. */
if (self->ploidy == 1
|| gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) {
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x = (segment_t *) x_node->item;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y = (segment_t *) y_node->item;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_avl_node(self, y_node);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
}
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x = (segment_t *) x_node->item;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y = (segment_t *) y_node->item;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_avl_node(self, y_node);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
} else {
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
num_participants = gsl_ran_binomial(self->rng, psi, n);
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (num_participants > 1) {
/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (ret < 0) {
goto out;
}
}
}
}
out:
tsk_safe_free(Q);
return ret;
}

Expand Down Expand Up @@ -7035,22 +7014,12 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
{
int ret = 0;
uint32_t j, n, num_participants, num_parental_copies;
avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */
avl_tree_t *ancestors;
avl_tree_t *Q = NULL;
double alpha = self->model.params.beta_coalescent.alpha;
double truncation_point = beta_compute_truncation(self);
double beta_x, u, increment;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point);
Expand Down Expand Up @@ -7088,6 +7057,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2);
} while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2));

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
Expand All @@ -7106,6 +7091,7 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
}

out:
tsk_safe_free(Q);
return ret;
}

Expand Down
42 changes: 42 additions & 0 deletions lib/tests/test_ancestry.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,47 @@ test_multiple_mergers_simulation(void)
gsl_rng_free(rng);
}

static void
test_multiple_mergers_ploidy(void)
{
int ret;
size_t j, ploidy;
uint32_t n = 10;
long seed = 10;
msp_t msp;
gsl_rng *rng = safe_rng_alloc();
tsk_table_collection_t tables;

for (j = 0; j < 2; j++) {
gsl_rng_set(rng, seed);
for (ploidy = 1; ploidy < 12; ploidy++) {
ret = build_sim(&msp, &tables, rng, 10, 1, NULL, n);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(msp_set_recombination_rate(&msp, 0.1), 0);
if (j == 0) {
ret = msp_set_simulation_model_dirac(&msp, 0.9, 10);
} else {
ret = msp_set_simulation_model_beta(&msp, 1.8, 1);
}
CU_ASSERT_EQUAL(ret, 0);

msp_set_ploidy(&msp, ploidy);
ret = msp_initialise(&msp);

ret = msp_run(&msp, DBL_MAX, ULONG_MAX);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(msp_is_completed(&msp));
CU_ASSERT_TRUE(msp.time > 0);
msp_verify(&msp, 0);

ret = msp_free(&msp);
CU_ASSERT_EQUAL(ret, 0);
tsk_table_collection_free(&tables);
}
}
gsl_rng_free(rng);
}

static void
test_multiple_mergers_growth_rate(void)
{
Expand Down Expand Up @@ -3875,6 +3916,7 @@ main(int argc, char **argv)
{ "test_gc_rates", test_gc_rates },

{ "test_multiple_mergers_simulation", test_multiple_mergers_simulation },
{ "test_multiple_mergers_ploidy", test_multiple_mergers_ploidy },
{ "test_multiple_mergers_growth_rate", test_multiple_mergers_growth_rate },
{ "test_dirac_coalescent_bad_parameters", test_dirac_coalescent_bad_parameters },
{ "test_beta_coalescent_bad_parameters", test_beta_coalescent_bad_parameters },
Expand Down
4 changes: 2 additions & 2 deletions msprime/_msprimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1177,8 +1177,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model)
goto out;
}
c = PyFloat_AsDouble(value);
if (psi <= 0 || psi >= 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1");
if (psi <= 0 || psi > 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1");
goto out;
}
if (c < 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ancestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_multimerger_dirac(self):
recombination_rate=0.1,
record_full_arg=True,
sequence_length=10,
model=msprime.DiracCoalescent(psi=0.5, c=5),
model=msprime.DiracCoalescent(psi=0.5, c=100),
)
self.verify(sim, multiple_mergers=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ def test_dirac_simulation_model(self):
make_sim(model=model)
with pytest.raises(ValueError):
make_sim(model=get_simulation_model("dirac"))
for bad_psi in [-1, 0, -1e-6, 1, 1e6]:
for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]:
with pytest.raises(ValueError):
make_sim(
model=get_simulation_model("dirac", c=1, psi=bad_psi),
Expand All @@ -1337,7 +1337,7 @@ def test_dirac_simulation_model(self):
make_sim(
model=get_simulation_model("dirac", psi=0.5, c=bad_c),
)
for psi in [0.99, 0.2, 1e-4]:
for psi in [1, 0.2, 1e-4]:
for c in [5.0, 1e2, 1e-4]:
model = get_simulation_model("dirac", psi=psi, c=c)
sim = make_sim(model=model)
Expand Down
26 changes: 15 additions & 11 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000):
logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}")
b = growth_rate * (alpha - 1)
model = (msprime.BetaCoalescent(alpha=alpha),)
ploidy = 2
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
for ploidy in range(2, 7):
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = (
f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
)
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
ploidy = 1
a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy)
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
Expand Down Expand Up @@ -3474,12 +3476,14 @@ def test_100000_11_001(self):
class DiracGrowth(XiGrowth):
def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000):
logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}")
b = growth_rate
b = 2 * growth_rate
model = (msprime.DiracCoalescent(psi=psi, c=c),)
p = 2
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name)
for p in range(2, 7):
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, p, name
)
p = 1
a = (1 + c * psi * psi) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
Expand Down