Skip to content

Commit

Permalink
CNest
Browse files Browse the repository at this point in the history
  • Loading branch information
tf2 committed Feb 11, 2024
1 parent ccdbf18 commit aa0d205
Show file tree
Hide file tree
Showing 20 changed files with 50,585 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/CNest/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Package: CNest
Type: Package
Title: CNV Estimation and Association Testing
Version: 1.0
Date: 2024-01-15
Author: Tomas W Fitzgerald
Maintainer: <tomas@ebi.ac.uk>
Depends: ViteRbi, mixtools
Description: Copy number analysis for large scale genomics
License: Artistic
NeedsCompilation: yes
Packaged: 2023-10-26 15:39:12 UTC; tomas
42 changes: 42 additions & 0 deletions src/CNest/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
export(
CNest_example,
RbinRead_depth,
RbinRead_ratio,
RbinConvert_depth,
RbinConvert_ratio,
RbinCompress_rbin,
processtobin,
processtobin_fast,
classify_gender,
assign_gender,
generate_coverage,
generate_correlation,
generate_correlation_batch,
generate_correlation_chunk,
generate_correlation_chunk_batch,
get_references,
get_ref_sample_names_by_type,
batch_get_references_to_rbin,
get_references_return,
processLogRtoBin,
run_hmm_rbin,
make_samplefile,
get_offset,
get_item_type,
get_return_type,
get_offset,
get_positions,
get_values,
compress_rbins_into_chr_cbins,
check_cntype,
write_blank_cbin,
write_sample_to_blank_cbin,
compress_rbins_into_chr_cbins_blank,
compress_rbins_into_chr_cbins_chunk,
compress_rbins_into_chr_cbins_auto_chunk,
Cbin_extract_item,
Cbin_extract_range,
cnv_gwas
)


605 changes: 605 additions & 0 deletions src/CNest/R/CNest.R

Large diffs are not rendered by default.

96 changes: 96 additions & 0 deletions src/CNest/R/CNwas.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
`check_cntype` <- function(cntype) {
cnty = -1
if(cntype=="mixed") { cnty = 1; }
if(cntype=="matched") { cnty = 2; }
if(cntype=="mismatched") { cnty = 3; }
if(cnty==-1) { stop("Invalid cntype!") }
invisible(cnty)
}

`Cbin_extract_item` <- function(samplefile, cbinfile, genome_index_pos, cntype="mixed") {
samples = read.table(samplefile)
r = .C("get_cbin_index_pos"
, "filename"=as.character(cbinfile)
, "index_pos"=as.integer(genome_index_pos)
, "number_samples"=as.integer(nrow(samples))
, "cntype"=as.integer(check_cntype(cntype))
, "values"=as.double(1:nrow(samples))
, "PACKAGE" = "CNest")
r = r$values; names(r)=samples[,1]
invisible(r)
}

`Cbin_extract_range` <- function(samplefile, cbinfile, start, stop, cntype="mixed") {
samples = read.table(samplefile)
number_positions = (stop-start)+1
long_values = as.double(1:(nrow(samples)*number_positions))
r = .C("get_cbin_index_range"
, "filename"=as.character(cbinfile)
, "index_pos"=as.integer(start)
, "number_samples"=as.integer(nrow(samples))
, "number_postions"=as.integer(number_positions)
, "cntype"=as.integer(check_cntype(cntype))
, "values"=long_values
, "PACKAGE" = "CNest")$values
invisible(r)
}

### cnv gwas
`cnv_gwas` <- function(sample_file, phenotype_file, covariates_file, cbin_file, outputname, test_type="linear", bin_type="mixed", invR=FALSE, chunk_n, chunk_len) {
my.invnorm = function(x) {
res = rank(x); res = qnorm(res/(length(res)+0.5))
return(res)
}
samples = read.table(sample_file)
n_samples = nrow(samples)
phenotypes_all = read.table(phenotype_file, header=T)
covars_all = read.table(covariates_file, header=T)
phenotypes_all = phenotypes_all[phenotypes_all$sample_id%in%samples[,1],]
phenotypes_all = phenotypes_all[phenotypes_all$sample_id%in%covars_all$sample_id,]
covars_all = covars_all[covars_all$sample_id%in%phenotypes_all$sample_id,]
phenotypes_all = phenotypes_all[order(phenotypes_all$sample_id),]; covars_all = covars_all[order(covars_all$sample_id),];
index_file = gsub(".cbin", ".cbii", cbin_file)
index = read.table(index_file)
nposes = ceiling(nrow(index)/chunk_len)
stop_pos = min((nposes*chunk_n), nrow(index))
start_pos = (stop_pos- nposes)+1
genome_pos = index[start_pos:stop_pos,]
cndata = Cbin_extract_range(sample_file, cbin_file, start_pos, stop_pos, bin_type)
for(x in 2:ncol(phenotypes_all)) {
print(paste("processing..", colnames(phenotypes_all)[x]))
chr = unlist(strsplit(gsub(".cbin", "", basename(cbin_file)), "_"))
outname = paste0(outputname, "_chunk", chunk_n, "_", chr[length(chr)], "_", colnames(phenotypes_all)[x], ".cwas", sep="")
phenotypes = data.frame(phenotypes_all[,1], phenotypes_all[,x])
colnames(phenotypes) = c(colnames(phenotypes_all)[c(1,x)])
phenotypes = phenotypes[complete.cases(phenotypes),]
covars = covars_all[covars_all$sample_id%in%phenotypes[,1],]; phenotypes = phenotypes[phenotypes[,1]%in%covars$sample_id,];
phenotypes= phenotypes[order(phenotypes[,1]),]; covars = covars[order(covars$sample_id),];
y1 = phenotypes[,2]
X = as.matrix(covars[,-(1)])
bs_ps_list = list()
if(invR==TRUE) { y1 = my.invnorm(y1); }
for(y in 1:nrow(genome_pos)) {
s_pos = ((n_samples*y)-n_samples)+1
S = cndata[s_pos:(s_pos+n_samples)]
names(S) = samples[,1]
S = S[names(S)%in%phenotypes$sample_id]
S = S[order(names(S))]
if(sum(names(S)==phenotypes$sample_id)!=length(S) | sum(names(S)==covars$sample_id)!=length(S)) { stop("sample id problem") }
beta_and_p = NULL
if(test_type=="logistic") {
beta_and_p = summary(glm(y1~S+X,family = binomial(link="logit"), na.action(na.omit)))$coefficients
}
if(test_type=="linear") {
beta_and_p = summary(glm(y1~S+X))$coefficients
}
bs_ps_list[[y]] = c(mad(S), mean(S), beta_and_p[2,c("Estimate", "Pr(>|t|)")])
}
genome_pos$n = length(y1)
cwas=cbind(genome_pos, do.call(rbind, bs_ps_list))
colnames(cwas) = c("chr", "start", "stop", "n", "mad", "mean", "beta", "p")
write.table(cwas, file=outname, sep="\t", row.names=F, quote=F)
print(paste(colnames(phenotypes_all)[x], ".. done"))
}
}


5 changes: 5 additions & 0 deletions src/CNest/R/firstlib.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.onLoad <- function(lib, pkg){
library.dynam("CNest", pkg, lib)
}
.onUnload <- function(libpath)
library.dynam.unload("CNest", libpath)
5 changes: 5 additions & 0 deletions src/CNest/R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.First.lib <- function(lib,pkg) {
library.dynam("CNest",pkg,lib)
cat("CNest 1.0 loaded\n")
}

1 change: 1 addition & 0 deletions src/CNest/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Rbin
Loading

0 comments on commit aa0d205

Please sign in to comment.