Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 40 additions & 30 deletions IBDReduce/ibdreduce_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,40 @@ def parse_line(line: str) -> Tuple[int, int, np.ndarray, np.ndarray]:
return chrom, pos, obs_parts, vals


def compute_fdr_adjusted_pvalues(empp: np.ndarray, permutation_pvalues: np.ndarray) -> np.ndarray:
"""Calculate FDR-adjusted p-values from permutation p-values.

Args:
empp: Empirical p-values derived from the observed statistics.
permutation_pvalues: Ranked permutation-derived p-values for each breakpoint.

Returns:
Array of FDR-adjusted p-values corresponding to ``empp``.
"""

memoize = {}
adjp = np.zeros_like(empp, dtype=float)
num_breakpoints = permutation_pvalues.shape[0]

for i, p in enumerate(empp):
if p in memoize:
Rstar = memoize[p]
else:
Rstar = np.sum(permutation_pvalues <= p, axis=0)
memoize[p] = Rstar

rp = np.sum(empp <= p)
pm = p * num_breakpoints
rb = np.percentile(Rstar, 95)

if rp - rb >= pm:
adjp[i] = np.mean(Rstar / (Rstar + rp - pm))
else:
adjp[i] = np.sum(Rstar >= 1) / len(Rstar)

return adjp


def ibdlen(fpath: str, gmap: GeneticMap) -> Tuple[int, float]:
"""Calculate the IBD length for a single chromosome and permutation set.
:param fpath: The file path to the IBDmap file.
Expand Down Expand Up @@ -301,11 +335,11 @@ def subtract(a):

# Generate the evd
# Drop the first column, which contains the observed values.
data = data[:, 1:]
permutation_values = data[:, 1:]
# Rank the data in descending order, so that the smallest values are ranked highest.
data = stats.rankdata(-data, axis=1) / (args.nperm * args.nruns + 1)
permutation_pvalues = stats.rankdata(-permutation_values, axis=1) / (args.nperm * args.nruns + 1)
# Calculate the extreme value distribution (EVD) by taking the minimum across permutations.
evd = np.min(data, axis=0)
evd = np.min(permutation_pvalues, axis=0)

if args.print_evd:
print("EVD:\n{}".format("\t".join(str(x) for x in evd)))
Expand All @@ -322,35 +356,11 @@ def subtract(a):
lower = stats.chi2.ppf(0.025, 2 * succ) / 2.0
lower[np.isnan(lower)] = 0

upper /= data.shape[1]
lower /= data.shape[1]
upper /= permutation_pvalues.shape[1]
lower /= permutation_pvalues.shape[1]

if args.fdr:
# Permutation p-values are likely to be repeated, so we save on computation at the cost of memory
memoize = {}
for i, p in enumerate(empp):
# Rstar is the distribution of rejected null hypotheses over permutations
if p in memoize:
Rstar = memoize[p]
else:
# Count how many times each permutation has a value less than or equal to p
# This is the distribution of the count of rejected true null hypotheses in permutation
Rstar = np.sum(data <= p, axis=0)
memoize[p] = Rstar
# rp is the number of rejected null hypotheses in the observed set
rp = np.sum(data[:, 0] <= p)
# pm is the expected number of rejected null hypotheses at a given alpha = p
pm = p * data.shape[0]
# rb is the 95th percentile of the Rstar distribution
rb = np.percentile(Rstar, 95)
# The condition for applying the FDR adjustment.
# If the number of rejected null hypotheses in the observed set, minus the 95th percentile of rejected null
# hypotheses at alpha = p, is greather than or equal to the expected.
# The term rp - pm estimates the number of true positives at alpha = p.
if rp - rb >= pm:
adjp[i] = np.mean(Rstar / (Rstar + rp - pm))
else:
adjp[i] = sum(Rstar >= 1) / len(Rstar)
adjp = compute_fdr_adjusted_pvalues(empp, permutation_pvalues)

# Generate header information and write results to file.
with open(args.output, "w", encoding="utf-8") as output_file:
Expand Down
4 changes: 4 additions & 0 deletions tests/data/gmap/sample.gmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pos chrom rate
1000 1 0.0
2000 1 0.1
3000 1 0.2
3 changes: 3 additions & 0 deletions tests/data/sample_chr1_run0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1 1000 0.1 0.2 0.3 0.3 0.4 0.1
1 2000 0.2 0.1 0.3 0.5 0.2 0.6
1 3000 0.1 0.1 0.1 0.2 0.3 0.4
38 changes: 38 additions & 0 deletions tests/test_fdr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pathlib
import sys

import numpy as np

PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
sys.path.append(str(PROJECT_ROOT / "IBDReduce"))

from ibdreduce_v3 import compute_fdr_adjusted_pvalues


def test_fdr_adjustment_matches_manual_example():
empp = np.array([0.05, 0.2, 0.4], dtype=float)
permutation_pvalues = np.array([
[0.8, 0.05, 0.6, 0.1],
[0.3, 0.25, 0.1, 0.2],
[0.6, 0.35, 0.2, 0.45],
])

adjusted = compute_fdr_adjusted_pvalues(empp, permutation_pvalues)

expected = []
for p in empp:
rstar = np.sum(permutation_pvalues <= p, axis=0)
rp = np.sum(empp <= p)
pm = p * permutation_pvalues.shape[0]
rb = np.percentile(rstar, 95)
if rp - rb >= pm:
expected.append(np.mean(rstar / (rstar + rp - pm)))
else:
expected.append(np.sum(rstar >= 1) / len(rstar))

np.testing.assert_allclose(adjusted, np.array(expected, dtype=float))

observed_rp = [np.sum(empp <= p) for p in empp]
permutation_first_column_counts = [np.sum(permutation_pvalues[:, 0] <= p) for p in empp]
assert observed_rp != permutation_first_column_counts
assert observed_rp[1] == 2 # sanity-check the hand-calculated rejection counts
Loading