Skip to content

Switch the usage example to use bio2zarr.tskit #1032

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ dist:
clean:
rm -fR $(BUILDDIR)
rm -rf _static/example_data.vcz/ancestral_state
rm -rf notebook-simulation*

26 changes: 7 additions & 19 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,9 @@ import sys
import os
import subprocess

from Bio import bgzf
import numpy as np

import bio2zarr.tskit as ts2z
import msprime
import numpy as np
import tsinfer

if getattr(builtins, "__IPYTHON__", False): # if running IPython: e.g. in a notebook
Expand All @@ -343,27 +342,16 @@ ts = msprime.sim_ancestry(
random_seed=6,
)
ts = msprime.sim_mutations(ts, rate=1e-8, random_seed=7)
ts.dump(name + "-source.trees")
ts.dump(f"{name}-source.trees")
print(
f"Simulated {ts.num_samples} samples over {seq_len/1e6} Mb:",
f"{ts.num_trees} trees and {ts.num_sites} sites"
)

# Convert to a zarr file: this should be easier once a tskit2zarr utility is made, see
# https://github.com/sgkit-dev/bio2zarr/issues/232
np.save(f"{name}-AA.npy", [s.ancestral_state for s in ts.sites()])
vcf_name = f"{name}.vcf.gz"
with bgzf.open(vcf_name, "wt") as f:
ts.write_vcf(f)
subprocess.run(["tabix", vcf_name])
ret = subprocess.run(
[python, "-m", "bio2zarr", "vcf2zarr", "convert", "--force", vcf_name, f"{name}.vcz"],
stderr = subprocess.DEVNULL if name == "notebook-simulation" else None,
)
ts2z.convert(f"{name}-source.trees", f"{name}.vcz")
assert os.path.exists(f"{name}.vcz")

if ret.returncode == 0:
print(f"Converted to {name}.vcz")
print(f"Converted to {name}.vcz")
```

Here we first run a simulation then we create a vcf file and convert it to .vcz format.
Expand Down Expand Up @@ -431,8 +419,8 @@ Once we have our `.vcz` file created, running the inference is straightforward.

```{code-cell} ipython3
# Infer & save a ts from the notebook simulation.
ancestral_states = np.load(f"{name}-AA.npy")
vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_states)
vcf_zarr = zarr.load(f"{name}.vcz")
vdata = tsinfer.VariantData(f"{name}.vcz", ancestral_state=vcf_zarr["variant_allele"][:, 0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, this is ugly. sgkit-dev/bio2zarr#387

tsinfer.infer(vdata, progress_monitor=True, num_threads=4).dump(name + ".trees")
```

Expand Down
5 changes: 3 additions & 2 deletions requirements/CI-docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ humanize==4.12.1
lmdb==1.6.2
tqdm==4.67.1
daiquiri==3.3.0
tskit==0.6.4
msprime==1.3.3
sgkit[vcf]==0.9.0
ipywidgets==8.1.5
Bio==1.7.1
bio2zarr==0.1.4
bio2zarr==0.1.6
sphinx-book-theme #Unpinned to allow easy updating.
pyfaidx==0.8.1.3
pyfaidx==0.8.1.3
2 changes: 1 addition & 1 deletion requirements/CI-tests-complete/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ pytest==8.3.5
pytest-cov==6.0.0
seaborn==0.13.2
sgkit[vcf]==0.9.0
tskit==0.6.0
tskit==0.6.4
tqdm==4.67.1
twine==6.1.0
2 changes: 1 addition & 1 deletion requirements/CI-tests-conda/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ daiquiri==3.0.0 # Pinned as conda package not updating
matplotlib==3.9.4
seaborn==0.13.2
colorama==0.4.6
tskit==0.6.0
tskit==0.6.4
3 changes: 2 additions & 1 deletion requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ tqdm
humanize
daiquiri
msprime >= 1.0.0
tskit >= 0.5.3
tskit >= 0.6.4
bio2zarr >= 0.1.6
lmdb
pre-commit
pytest
Expand Down
Loading