Skip to content

Commit

Permalink
STAARpipelineSummary v0.9.7
Browse files Browse the repository at this point in the history
  • Loading branch information
xihaoli committed Dec 11, 2023
1 parent 9e7ca08 commit 94abbc8
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 256 deletions.
37 changes: 19 additions & 18 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,29 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Install macOS system dependencies
if: runner.os == 'macOS'
run: |
brew install imagemagick@6
brew install libgit2
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
with:
r-version: 4.2.3
- name: Install BiocManager
run: |
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
shell: Rscript {0}
- name: Install GENESIS, GenomicFeatures
run: |
BiocManager::install(c("GENESIS", "GenomicFeatures"))
shell: Rscript {0}
- name: Install TxDb.Hsapiens.UCSC.hg38.knownGene
run: |
BiocManager::install("TxDb.Hsapiens.UCSC.hg38.knownGene")
shell: Rscript {0}
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
remotes::install_github(c("xihaoli/STAAR", "xihaoli/MultiSTAAR", "zilinli1988/SCANG", "xihaoli/STAARpipeline"))
shell: Rscript {0}
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
bioc::GENESIS
bioc::GenomicFeatures
bioc::TxDb.Hsapiens.UCSC.hg38.knownGene
github::xihaoli/STAAR
github::xihaoli/MultiSTAAR
github::zilinli1988/SCANG
github::xihaoli/STAARpipeline
any::rcmdcheck
needs: |
website
coverage
- name: Check
run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}
98 changes: 49 additions & 49 deletions R/Gene_Centric_Coding_Results_Summary.R

Large diffs are not rendered by default.

134 changes: 67 additions & 67 deletions R/Gene_Centric_Coding_Results_Summary_incl_ptv.R

Large diffs are not rendered by default.

148 changes: 74 additions & 74 deletions R/Gene_Centric_Noncoding_Results_Summary.R

Large diffs are not rendered by default.

16 changes: 9 additions & 7 deletions R/Individual_Analysis_Results_Summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out

par(mar=c(5,6,4,4))
plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

Expand All @@ -183,7 +183,9 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
gds.path <- agds_dir[chr]
genofile <- seqOpen(gds.path)

results_sig_cond_chr <- Individual_Analysis_cond(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel,known_loci=known_loci,variant_type="variant", QC_label=QC_label, geno_missing_imputation=geno_missing_imputation, method_cond=method_cond)
results_sig_cond_chr <- Individual_Analysis_cond(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel,
known_loci=known_loci,variant_type="variant",
QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,method_cond=method_cond)

results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)

Expand All @@ -209,8 +211,8 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))

results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel_cond,
variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)

results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)

Expand All @@ -223,8 +225,8 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
genofile <- seqOpen(gds.path)

results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel,
variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)

results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)

Expand Down
32 changes: 16 additions & 16 deletions R/Single_Variants_List_Analysis.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Calculate individual-variant p-values of a list of variants
#'
#' The \code{Single_Variants_List_Analysis} function takes in a list of variants to calculate the p-values and effect sizes of the input variants
#' (effect size estimations are not provided for imbalanced case-control setting),
#' only support for null model fitting using sparse GRM.
#' (effect size estimations are not provided for imbalanced case-control setting).
#' Note: this function only supports for null model fitting using sparse GRM.
#' @param agds_dir file directory of annotated GDS (aGDS) files for all chromosomes (1-22).
#' @param single_variants_list name a data frame containing the information of variants to be functionally annotated. The data frame must include 4 columns with
#' the following names: "CHR" (chromosome number), "POS" (position), "REF" (reference allele), and "ALT" (alternative allele).
Expand All @@ -12,8 +12,8 @@
#' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").
#' @param p_filter_cutoff threshold for the p-value recalculation using the SPA method (default = 0.05)
#' @param tol a positive number specifying tolerance, the difference threshold for parameter
#' estimates in saddlepoint apporximation algorithm below which iterations should be stopped (default = ".Machine$double.eps^0.25").
#' @param max_iter a positive integers pecifying the maximum number of iterations for applying the saddlepoint approximation algorithm (default = "1000").
#' estimates in saddlepoint approximation algorithm below which iterations should be stopped (default = ".Machine$double.eps^0.25").
#' @param max_iter a positive integer specifying the maximum number of iterations for applying the saddlepoint approximation algorithm (default = "1000").
#' @return a data frame containing the basic information (chromosome, position, reference allele and alternative allele)
#' the score test p-values, and the effect sizes for the input variants.
#' @references Li, Z., Li, X., et al. (2022). A framework for detecting
Expand All @@ -24,14 +24,14 @@

Single_Variants_List_Analysis <- function(agds_dir,single_variants_list,obj_nullmodel,
QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),
p_filter_cutoff=0.05,tol=.Machine$double.eps^0.25,max_iter=1000){
p_filter_cutoff=0.05,tol=.Machine$double.eps^0.25,max_iter=1000){

## evaluate choices
geno_missing_imputation <- match.arg(geno_missing_imputation)

phenotype.id <- as.character(obj_nullmodel$id_include)
samplesize <- length(phenotype.id)

n_pheno <- obj_nullmodel$n.pheno

if(!is.null(obj_nullmodel$use_SPA))
Expand All @@ -41,13 +41,13 @@ Single_Variants_List_Analysis <- function(agds_dir,single_variants_list,obj_null
{
use_SPA <- FALSE
}

Sigma_i <- obj_nullmodel$Sigma_i
Sigma_iX <- as.matrix(obj_nullmodel$Sigma_iX)
cov <- obj_nullmodel$cov

residuals.phenotype <- obj_nullmodel$scaled.residuals

## SPA
if(use_SPA)
{
Expand Down Expand Up @@ -127,7 +127,7 @@ Single_Variants_List_Analysis <- function(agds_dir,single_variants_list,obj_null
filter <- seqGetData(genofile, QC_label)

N <- rep(samplesize,length(CHR))

Geno <- Geno$Geno

### calculate p-values
Expand All @@ -139,7 +139,7 @@ Single_Variants_List_Analysis <- function(agds_dir,single_variants_list,obj_null
Geno <- Diagonal(n = n_pheno) %x% Geno
Score_test <- Individual_Score_Test_sp_multi(Geno, Sigma_i, Sigma_iX, cov, residuals.phenotype, n_pheno)
}

## SPA approximation for small p-values
if(use_SPA)
{
Expand All @@ -157,16 +157,16 @@ Single_Variants_List_Analysis <- function(agds_dir,single_variants_list,obj_null
if(use_SPA)
{
single_variants_list_annotation_chr <-data.frame(CHR=CHR,POS=position,REF=REF,ALT=ALT,ALT_AF=ALT_AF,QC_label=filter,MAF=MAF,N=N,
pvalue=pvalue)
pvalue=pvalue)
}else
{
single_variants_list_annotation_chr <-data.frame(CHR=CHR,POS=position,REF=REF,ALT=ALT,ALT_AF=ALT_AF,QC_label=filter,MAF=MAF,N=N,
pvalue=exp(-Score_test$pvalue_log),pvalue_log10=Score_test$pvalue_log/log(10),
Score=Score_test$Score,Score_se=Score_test$Score_se,
Est=Score_test$Est,Est_se=Score_test$Est_se)
pvalue=exp(-Score_test$pvalue_log),pvalue_log10=Score_test$pvalue_log/log(10),
Score=Score_test$Score,Score_se=Score_test$Score_se,
Est=Score_test$Est,Est_se=Score_test$Est_se)
}


single_variants_list_annotation <- rbind(single_variants_list_annotation,single_variants_list_annotation_chr)
seqClose(genofile)
}
Expand Down
45 changes: 24 additions & 21 deletions R/Sliding_Window_Results_Summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,11 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
genofile <- seqOpen(gds.path)

res_cond <- Sliding_Window_cond(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel,
start_loc=start_loc,end_loc=end_loc,known_loci=known_loci,
method_cond=method_cond,rare_maf_cutoff=rare_maf_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_name_catalog=Annotation_name_catalog,Annotation_dir=Annotation_dir,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name)
start_loc=start_loc,end_loc=end_loc,known_loci=known_loci,
method_cond=method_cond,rare_maf_cutoff=rare_maf_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_name_catalog=Annotation_name_catalog,Annotation_dir=Annotation_dir,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name)
results_sig_cond <- rbind(results_sig_cond,res_cond)

seqClose(genofile)
Expand Down Expand Up @@ -176,11 +176,11 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))

res_cond <- Sliding_Window_cond_spa(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel_cond,
start_loc=start_loc,end_loc=end_loc,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_name_catalog=Annotation_name_catalog,Annotation_dir=Annotation_dir,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
start_loc=start_loc,end_loc=end_loc,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_name_catalog=Annotation_name_catalog,Annotation_dir=Annotation_dir,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
results_sig_cond <- rbind(results_sig_cond,res_cond)

seqClose(genofile)
Expand All @@ -201,7 +201,6 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
{
print("Manhattan plot")


png(paste0(output_path,"sliding_window_manhattan.png"), width=12, height=8, units = 'in', res = 600)

if(!use_SPA)
Expand Down Expand Up @@ -234,8 +233,8 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
par(mar=c(5,6,4,4))

plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

Expand All @@ -253,8 +252,8 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
par(mar=c(5,6,4,4))

plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

Expand All @@ -271,9 +270,11 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
png(paste0(output_path,"sliding_window_qq_burden_1_1.png"), width=8, height=8, units = 'in', res = 600)

par(mar=c(5,6,4,4))

plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

dev.off()
Expand All @@ -291,8 +292,8 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
par(mar=c(5,6,4,4))

plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

Expand All @@ -308,9 +309,11 @@ Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_p
png(paste0(output_path,"sliding_window_qq_burden_1_1.png"), width=8, height=8, units = 'in', res = 600)

par(mar=c(5,6,4,4))

plot(lexp,lobs,pch=20, cex=1, xlim = c(0, max(lexp)), ylim = c(0, max(lobs)),
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)
xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
font.lab=2,cex.lab=2,cex.axis=2,font.axis=2)

abline(0, 1, col="red",lwd=2)

dev.off()
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ branches:
build_script:
- travis-tool.sh install_bioc_deps
- travis-tool.sh install_deps
- travis-tool.sh install_bioc GENESIS
- travis-tool.sh install_bioc GenomeInfoDbData
- travis-tool.sh install_bioc TxDb.Hsapiens.UCSC.hg38.knownGene
- travis-tool.sh install_github xihaoli/STAAR
Expand Down
8 changes: 4 additions & 4 deletions man/Single_Variants_List_Analysis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 94abbc8

Please sign in to comment.