Skip to content

do not simulate null ancestry; closes #357 #359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
UPCOMING
***************************

**Bugfixes**:

- Recapitation on tree sequences with null genomes would attempt to simulate
the history of those null genomes; this would in all but exceptional cases
fail with an error ("not all roots are at the time expected"). Now,
`recapitate` removes the "sample" flag from all such null genomes in
sampled individuals. (:user: `petrelharp`, :pr:`358`)



***************************
[1.0.4] - 2023-08-01
Expand Down
26 changes: 25 additions & 1 deletion pyslim/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,37 @@ def recapitate(ts,
that if neither ``recombination_rate`` or a ``recombination_map`` are
provided, there will be *no* recombination.

.. warning:

If the tree sequence contains (SLiM-specified) "null" genomes, as for
instance in a haploid or haplodiploid simulation, then these genomes
will remain in the tree sequence but no longer be marked as samples,
so loading back into SLiM will not work. If this is a problem, please
open an issue on github.

:param tskit.TreeSequence ts: The tree sequence to transform.
:param float ancestral_Ne: If specified, then will simulate from a single
ancestral population of this size. It is an error to specify this
as well as ``demography``.
:param dict kwargs: Any other arguments to :func:`msprime.sim_ancestry`.
'''
is_current_version(ts, _warn=True)
has_null = False
for nid in ts.samples():
n = ts.node(nid)
if n.metadata['is_null']:
has_null = True
break
if has_null:
# we need to mark NULL genomes to be *not* samples
# so we don't simulate their ancestry
tables = ts.tables
tables.nodes.clear()
for n in ts.nodes():
if n.metadata['is_null']:
n = n.replace(flags=n.flags & ~tskit.NODE_IS_SAMPLE)
tables.nodes.append(n)
ts = tables.tree_sequence()
if ancestral_Ne is not None:
if "demography" in kwargs:
raise ValueError("You cannot specify both `demography` and `ancestral_Ne`.")
Expand All @@ -66,7 +90,7 @@ def recapitate(ts,
"Not all roots of the provided tree sequence are at the time expected "
"by recapitate(). This could happen if you've simplified in "
"python before recapitating (fix: don't simplify first). "
"If could also happen in other situations, e.g., "
"It could also happen in other situations, e.g., "
"you added new individuals without parents in SLiM "
"during the course of the simulation with sim.addSubPop(), "
"in which case you will probably need to recapitate with "
Expand Down
39 changes: 28 additions & 11 deletions tests/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
import random
import warnings
import contextlib

import msprime
import tskit
Expand All @@ -14,6 +15,13 @@
import tests
from .recipe_specs import restarted_recipe_eq

def mutcontext(ts):
if ts.num_mutations > 0:
handler = pytest.warns(Warning, match="already has.*metadata")
else:
handler = contextlib.nullcontext()
return handler

class TestAnnotate(tests.PyslimTestCase):
'''
Tests for tools to annotate existing msprime-derived tree sequences.
Expand Down Expand Up @@ -270,13 +278,18 @@ def test_basic_annotation(self, helper_functions, tmp_path):
tick = 4
cycle = 1
stage = "late"
slim_ts = pyslim.annotate(
ts, model_type="WF",
tick=tick,
cycle=cycle,
stage=stage,
annotate_mutations=do_mutations,
)
if do_mutations:
handler = mutcontext(ts)
else:
handler = contextlib.nullcontext()
with handler:
slim_ts = pyslim.annotate(
ts, model_type="WF",
tick=tick,
cycle=cycle,
stage=stage,
annotate_mutations=do_mutations,
)
assert slim_ts.metadata['SLiM']['model_type'] == 'WF'
assert slim_ts.metadata['SLiM']['tick'] == tick
assert slim_ts.metadata['SLiM']['cycle'] == cycle
Expand Down Expand Up @@ -305,7 +318,8 @@ def test_annotate_refseq(self):
def test_annotate_individuals(self, helper_functions, tmp_path):
# test the workflow of annotating defaults, then assigning sexes randomly
for ts in helper_functions.get_msprime_examples():
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1, stage="early")
with mutcontext(ts):
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1, stage="early")
tables = slim_ts.dump_tables()
top_md = tables.metadata
top_md['SLiM']['separate_sexes'] = True
Expand Down Expand Up @@ -345,7 +359,8 @@ def test_annotate_XY(self, helper_functions, tmp_path):
random.seed(8)
for ts in helper_functions.get_msprime_examples():
for genome_type in ["X", "Y"]:
slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
with mutcontext(ts):
slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
tables = slim_ts.dump_tables()
top_md = tables.metadata
top_md['SLiM']['separate_sexes'] = True
Expand Down Expand Up @@ -399,7 +414,8 @@ def test_annotate_XY(self, helper_functions, tmp_path):
def test_annotate_nodes(self, helper_functions):
# test workflow of annotating defaults and then editing node metadata
for ts in helper_functions.get_msprime_examples():
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1)
with mutcontext(ts):
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1)
tables = slim_ts.dump_tables()
metadata = [n.metadata for n in tables.nodes]
gtypes = [
Expand All @@ -421,7 +437,8 @@ def test_annotate_nodes(self, helper_functions):

def test_annotate_mutations(self, helper_functions):
for ts in helper_functions.get_msprime_examples():
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1)
with mutcontext(ts):
slim_ts = pyslim.annotate(ts, model_type="nonWF", tick=1)
tables = slim_ts.dump_tables()
metadata = [m.metadata for m in tables.mutations]
selcoefs = [random.uniform(0, 1) for _ in metadata]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tree_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ def test_mutation_at(self, recipe):
ts = recipe["ts"]
for _ in range(100):
node = random.randint(0, ts.num_nodes - 1)
pos = random.randint(0, ts.sequence_length - 1)
pos = random.randint(0, int(ts.sequence_length - 1))
tree = ts.at(pos)
parent = tree.parent(node)
a = pyslim.mutation_at(ts, node, pos)
Expand All @@ -664,7 +664,7 @@ def test_nucleotide_at(self, recipe):
assert len(ts.reference_sequence.data) == ts.sequence_length
for _ in range(100):
node = random.randint(0, ts.num_nodes - 1)
pos = random.randint(0, ts.sequence_length - 1)
pos = random.randint(0, int(ts.sequence_length - 1))
tree = ts.at(pos)
parent = tree.parent(node)
a = pyslim.nucleotide_at(ts, node, pos)
Expand Down
Loading