Skip to content

Commit

Permalink
Add 'simple' prior method and release 0.9.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gaow committed Jan 15, 2020
1 parent 2c074e4 commit 34021ad
Show file tree
Hide file tree
Showing 74 changed files with 665 additions and 569 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Description: The package implements a simple new way to perform
effects are highly sparse (e.g. <20 non-zero effects in the vector
$b$), although it is also useful to more general applications.
Date: 2020-01-14
Version: 0.8.1.0571
Version: 0.9.0
Authors@R: c(person("Gao","Wang",role=c("aut","cre"),
email="gaow@uchicago.edu"),
person("Yuxin","Zou",role="aut"),
Expand Down
6 changes: 3 additions & 3 deletions R/compute_ss.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' @title Compute the needed summary statistics for `susie_ss` from the 'raw' data
#' @title Compute sufficient statistics from input data
#' @param X an n by p matrix of covariates
#' @param y an n vector
#' @param standardize logical flag (default=TRUE) for whether to standardize columns of X to unit variance prior to fitting.
#' @return a list with the input for `susie_ss`
#' @return a list of sufficient statistics
#' @importFrom methods as
#' @export
compute_ss = function(X, y, standardize = TRUE){
Expand All @@ -19,4 +19,4 @@ compute_ss = function(X, y, standardize = TRUE){
yty = sum(y^2)

return(list(XtX = XtX, Xty = Xty, yty = yty, n = n))
}
}
1 change: 1 addition & 0 deletions R/elbo_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ get_ER2_rss = function(R,z,s){
#' @param r residuals
#' @param Eb the posterior mean of b (p vector) (alpha * mu)
#' @param Eb2 the posterior second moment of b (p vector) (alpha * mu2)
#' @keywords internal
SER_posterior_e_loglik_rss = function(R, Sigma, r,Ez,Ez2){
eigenS = attr(Sigma,'eigenS')
Dinv = 1/(eigenS$values)
Expand Down
2 changes: 2 additions & 0 deletions R/estimate_residual_variance.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' @param X an n by p matrix of covariantes
#' @param Y an n vector of data
#' @param s a susie fit
#' @keywords internal
estimate_residual_variance = function(X,Y,s){
n = nrow(X)
return( (1/n)* get_ER2(X,Y,s) )
Expand All @@ -14,6 +15,7 @@ estimate_residual_variance = function(X,Y,s){
#' @param s a susie fit
#' @param yty a scaler, Y'Y, where Y is centered to have mean 0
#' @param n sample size
#' @keywords internal
estimate_residual_variance_ss = function(XtX,Xty,s,yty,n){
return( (1/n)* get_ER2_ss(XtX,Xty,s,yty) )
}
1 change: 0 additions & 1 deletion R/set_R_attributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#' @param r_tol tolerance level for eigen value check of positive semidefinite matrix of R
#' @return R with attribute e.g.
#' attr(R, 'eigenR') is the eigen decomposition of R.

set_R_attributes = function(R, r_tol){
if(is.null(attr(R, 'eigen')))
eigenR = eigen(R, symmetric = TRUE)
Expand Down
1 change: 0 additions & 1 deletion R/set_X_attributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#' attr(X, 'scaled:scale') is a p vector of column standard deviations of X if scale=TRUE, a p vector of 1s otherwise.
#' attr(X, 'd') is a p vector of column sums of X.standardized^2,
#' where X.standardized is the matrix X centered by attr(X, 'scaled:center') and scaled by attr(X, 'scaled:scale').

set_X_attributes = function(X,
center = TRUE,
scale = TRUE) {
Expand Down
24 changes: 13 additions & 11 deletions R/single_effect_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@
#' @importFrom stats uniroot
#' @importFrom stats optim
#' @importFrom Matrix colSums
#'
single_effect_regression = function(Y,X,V,residual_variance=1,prior_weights=NULL, optimize_V=c("none", "optim", "EM")){
single_effect_regression = function(Y,X,V,residual_variance=1,prior_weights=NULL, optimize_V=c("none", "optim", "uniroot", "EM", "simple")){
optimize_V = match.arg(optimize_V)
Xty = compute_Xty(X, Y)
betahat = (1/attr(X, "d")) * Xty
shat2 = residual_variance/attr(X, "d")
if (is.null(prior_weights))
prior_weights = rep(1/ncol(X), ncol(X))

if(optimize_V=="optim") V=optimize_prior_variance(optimize_V, betahat, shat2, prior_weights, alpha=NULL, post_mean2=NULL)
if(optimize_V!="EM" && optimize_V!="none") V=optimize_prior_variance(optimize_V, betahat, shat2, prior_weights, alpha=NULL, post_mean2=NULL)

lbf = dnorm(betahat,0,sqrt(V+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
#log(bf) on each SNP
Expand Down Expand Up @@ -64,14 +63,16 @@ est_V_uniroot = function(betahat, shat2, prior_weights){
}

optimize_prior_variance = function(optimize_V, betahat, shat2, prior_weights, alpha=NULL, post_mean2=NULL){
if(optimize_V=="optim"){
lV = optim(par=log(max(c(betahat^2-shat2, 1), na.rm = TRUE)), fn=neg.loglik.logscale, betahat=betahat, shat2=shat2, prior_weights = prior_weights, method='Brent', lower = -30, upper = 15)$par
V = exp(lV)
}else if(optimize_V=="uniroot"){
V = est_V_uniroot(betahat, shat2, prior_weights)
}else if(optimize_V=="EM"){
V = sum(alpha*post_mean2)
}else stop('Invalid option for `optimize_V` method')
if (optimize_V != "simple") {
if(optimize_V=="optim"){
lV = optim(par=log(max(c(betahat^2-shat2, 1), na.rm = TRUE)), fn=neg.loglik.logscale, betahat=betahat, shat2=shat2, prior_weights = prior_weights, method='Brent', lower = -30, upper = 15)$par
V = exp(lV)
}else if(optimize_V=="uniroot"){
V = est_V_uniroot(betahat, shat2, prior_weights)
}else if(optimize_V=="EM"){
V = sum(alpha*post_mean2)
}else stop('Invalid option for `optimize_V` method')
}
if(loglik(0,betahat,shat2,prior_weights) >= loglik(V,betahat,shat2,prior_weights)) V=0 # set V exactly 0 if that beats the numerical value
return(V)
}
Expand All @@ -81,6 +82,7 @@ optimize_prior_variance = function(optimize_V, betahat, shat2, prior_weights, al
#' The log likelihood function for SER model (based on summary data betahat, shat2), as a function of prior variance V
#' @importFrom Matrix colSums
#' @importFrom stats dnorm
#' @keywords internal
loglik = function(V,betahat,shat2,prior_weights) {

lbf = dnorm(betahat,0,sqrt(V+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
Expand Down
20 changes: 11 additions & 9 deletions R/single_effect_regression_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
#' \item{lbf_model}{(scalar) the loglikelihood for the total model minus the log-likelihood for the null model}
#'
#'
single_effect_regression_rss = function(z,Sigma,V=1,prior_weights=NULL,optimize_V=c("none", "optim", "EM")){
single_effect_regression_rss = function(z,Sigma,V=1,prior_weights=NULL,optimize_V=c("none", "optim", "uniroot", "EM", "simple")){
p = length(z)
shat2 = 1/attr(Sigma, 'RjSinvRj')
if (is.null(prior_weights))
prior_weights = rep(1/p, p)

if(optimize_V=="optim"){
if(optimize_V!="EM" && optimize_V!="none"){
V=optimize_prior_variance_rss(optimize_V, z, Sigma, prior_weights, alpha=NULL, post_mean2=NULL)
}

Expand Down Expand Up @@ -86,15 +86,17 @@ neg.loglik_z.logscale_rss = function(lV,z,Sigma,prior_weights){
}

optimize_prior_variance_rss = function(optimize_V, z, Sigma, prior_weights, alpha=NULL, post_mean2=NULL){
if(optimize_V=="optim"){
lV = optim(par=log(max(c((z^2) - (1/attr(Sigma, 'RjSinvRj')), 1e-6), na.rm = TRUE)),
if (optimize_V != "simple") {
if(optimize_V=="optim"){
lV = optim(par=log(max(c((z^2) - (1/attr(Sigma, 'RjSinvRj')), 1e-6), na.rm = TRUE)),
fn=neg.loglik_z.logscale_rss,
z=z, Sigma = Sigma, prior_weights = prior_weights,
method='Brent', lower = -30, upper = 15)$par
V = exp(lV)
}else if(optimize_V=="EM"){
V = sum(alpha*post_mean2)
}else stop('Invalid option for `optimize_V` method')
V = exp(lV)
}else if(optimize_V=="EM"){
V = sum(alpha*post_mean2)
}else stop('Invalid option for `optimize_V` method')
}
if(loglik_rss(0,z,Sigma,prior_weights) >= loglik_rss(V,z,Sigma,prior_weights)) V=0 # set V exactly 0 if that beats the numerical value
return(V)
}
}
4 changes: 2 additions & 2 deletions R/single_effect_regression_ss.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
#' @importFrom stats uniroot
#' @importFrom stats optim
#'
single_effect_regression_ss = function(Xty,dXtX,V=1,residual_variance=1,prior_weights=NULL,optimize_V=c("none", "optim", "EM")){
single_effect_regression_ss = function(Xty,dXtX,V=1,residual_variance=1,prior_weights=NULL,optimize_V=c("none", "optim", "uniroot", "EM", "simple")){
optimize_V = match.arg(optimize_V)
betahat = (1/dXtX) * Xty
shat2 = residual_variance/dXtX
if (is.null(prior_weights))
prior_weights = rep(1/length(dXtX), length(dXtX))

if(optimize_V=="optim") V=optimize_prior_variance(optimize_V, betahat, shat2, prior_weights, alpha=NULL, post_mean2=NULL)
if(optimize_V!="EM" && optimize_V!="none") V=optimize_prior_variance(optimize_V, betahat, shat2, prior_weights, alpha=NULL, post_mean2=NULL)

lbf = dnorm(betahat,0,sqrt(V+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
#log(bf) on each SNP
Expand Down
2 changes: 2 additions & 0 deletions R/sparse_multiplication.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ compute_Xb = function(X, b){
#' @return a p vector
#' @importFrom Matrix t
#' @importFrom Matrix crossprod
#' @keywords internal
compute_Xty = function(X, y){
cm = attr(X, 'scaled:center')
csd = attr(X, 'scaled:scale')
Expand All @@ -43,6 +44,7 @@ compute_Xty = function(X, y){
#' @param X an n by p unstandardized matrix with three attributes: attr(X, 'scaled:center'), attr(X, 'scaled:scale'), and attr(X, 'd')
#' @return a L by n matrix
#' @importFrom Matrix t
#' @keywords internal
compute_MXt = function(M, X){
cm = attr(X, 'scaled:center')
csd = attr(X, 'scaled:scale')
Expand Down
7 changes: 4 additions & 3 deletions R/susie.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@
#' point for the optimization. If \code{estimate_prior_variance = FALSE}
#' then the prior variance (for each of the \code{L} effects) is determined by the value
#' supplied to \code{scaled_prior_variance}.
#
#'
#' @param estimate_prior_method The method used for estimating prior
#' variance.
#' variance. "simple" method only compares the loglikelihood between
#' using specified prior variance and using zero, and chose the one that
#' gives larger loglikelihood.
#'
#' @param s_init A previous susie fit with which to initialize.
#'
Expand Down Expand Up @@ -178,7 +179,7 @@ susie <- function(X,Y,L = min(10,ncol(X)),scaled_prior_variance = 0.2,
standardize=TRUE,intercept=TRUE,
estimate_residual_variance=TRUE,
estimate_prior_variance = TRUE,
estimate_prior_method = c("optim","EM"),
estimate_prior_method = c("optim","EM","simple"),
s_init = NULL,coverage=0.95,min_abs_corr=0.5,
compute_univariate_zscore = FALSE,
na.rm = FALSE, max_iter=100,tol=1e-3,
Expand Down
8 changes: 5 additions & 3 deletions R/susie_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
#' @param restrict whether to restrict the resiudal variance between 0 and 1
#' @param estimate_residual_variance indicates whether to estimate residual variance
#' @param estimate_prior_variance indicates whether to estimate prior
#' @param estimate_prior_method The method used for estimating prior variance, 'optim' or 'EM'
#' @param estimate_prior_method The method used for estimating prior
#' variance. "simple" method only compares the loglikelihood between
#' using specified prior variance and using zero, and chose the one that
#' gives larger loglikelihood.
#' @param max_iter maximum number of iterations to perform
#' @param s_init a previous susie fit with which to initialize
#' @param intercept_value a value to assign to the intercept (since the intercept cannot be estimated from centered summary data). This
Expand Down Expand Up @@ -50,7 +53,7 @@ susie_rss = function(z, R, maf=NULL, maf_thresh=0,
restrict = TRUE,
estimate_residual_variance = TRUE,
estimate_prior_variance = TRUE,
estimate_prior_method = c("optim","EM"),
estimate_prior_method = c("optim","EM","simple"),
max_iter=100,s_init = NULL, intercept_value=0,
coverage=0.95, min_abs_corr=0.5,
tol=1e-3, verbose=FALSE, track_fit = FALSE, check_R = TRUE, check_z = TRUE) {
Expand Down Expand Up @@ -122,7 +125,6 @@ susie_rss = function(z, R, maf=NULL, maf_thresh=0,
R = set_R_attributes(R, r_tol)
# initialize susie fit
s = init_setup_rss(p,L,prior_variance,residual_variance,prior_weights,null_weight,1)

if (!missing(s_init)) {
s = modifyList(s, s_init)
s = init_finalize_rss(s, R=R)
Expand Down
4 changes: 2 additions & 2 deletions R/susie_ss.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#' @param residual_variance the residual variance (defaults to variance of y)
#' @param estimate_residual_variance indicates whether to estimate residual variance
#' @param estimate_prior_variance indicates whether to estimate prior
#' @param estimate_prior_method The method used for estimating prior variance, 'optim' or 'EM'
#' @param estimate_prior_method The method used for estimating prior variance.
#' @param r_tol tolerance level for eigen value check of positive semidefinite matrix of R.
#' @param prior_weights a p vector of prior probability that each element is non-zero
#' @param null_weight probability of no effect, for each single effect model
Expand Down Expand Up @@ -85,7 +85,7 @@ susie_suff_stat = function(bhat, shat, R, n, var_y = 1,
residual_variance=NULL,
estimate_residual_variance = TRUE,
estimate_prior_variance = TRUE,
estimate_prior_method = c("optim","EM"),
estimate_prior_method = c("optim","EM","simple"),
r_tol = 1e-08,
prior_weights = NULL, null_weight = NULL,
standardize = TRUE,
Expand Down
5 changes: 1 addition & 4 deletions R/susie_trendfilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,4 @@ susie_trendfilter = function(y, order=0, standardize=FALSE, use_mad=TRUE, ...){
#' @keywords internal
estimate_mad_residual_variance = function(y){
return(0.5*(median(abs(diff(y))/0.6745)^2))
}



}
1 change: 1 addition & 0 deletions R/susie_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ susie_get_objective <- function(res, all = FALSE, warning_tol = 1E-6) {

#' @title Slim the result of fitted SuSiE model
#' @param res a susie fit, the output of `susieR::susie()`
#' @keywords internal
susie_slim = function(res){
list(alpha = res$alpha, niter = res$niter, V = res$V, sigma2 = res$sigma2)
}
Expand Down
2 changes: 1 addition & 1 deletion R/update_each_effect.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' @param Y an n vector of response variable
#' @param s a SuSiE fit
#' @param estimate_prior_variance boolean indicating whether to estimate prior variance
#'
#' @keywords internal
update_each_effect <- function (X, Y, s, estimate_prior_variance=FALSE,
estimate_prior_method="optim") {
if(estimate_prior_variance==FALSE) estimate_prior_method="none"
Expand Down
1 change: 1 addition & 0 deletions R/update_each_effect_rss.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @param Sigma sigma2*R + lambda I
#' @param estimate_prior_variance boolean indicating whether to estimate prior variance
#' @importFrom Matrix diag
#' @keywords internal
update_each_effect_rss <- function (R, z, s_init, Sigma, estimate_prior_variance=FALSE,estimate_prior_method="optim") {

if(estimate_prior_variance==FALSE) estimate_prior_method="none"
Expand Down
1 change: 1 addition & 0 deletions R/update_each_effect_ss.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @param estimate_prior_variance boolean indicating whether to estimate prior variance
#' @param estimate_prior_method The method used for estimating prior variance, 'optim' or 'EM'
#' @importFrom Matrix diag
#' @keywords internal
update_each_effect_ss <- function (XtX, Xty, s_init, estimate_prior_variance=FALSE, estimate_prior_method="optim") {
if(estimate_prior_variance==FALSE) estimate_prior_method="none"
# Repeat for each effect to update
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ To automatically retrieve and install `susieR` from this repository,
with the vignettes included, run:

```R
devtools::install_github("stephenslab/susieR@0.8.0",build_vignettes = TRUE)
devtools::install_github("stephenslab/susieR@0.9.0",build_vignettes = TRUE)
```

## Quick Start
Expand All @@ -56,7 +56,7 @@ please visit https://stephenslab.github.io/susieR

## Developer notes

+ When any changes are made to `roxygen2` markup, simply run
+ When any changes are made to `roxygen2` markup, simply run
`devtools::document()` to update package `NAMESPACE`
and documentation files.

Expand All @@ -65,17 +65,17 @@ following commands in the shell:

```bash
R CMD build --resave-data --no-build-vignettes susieR
R CMD INSTALL susieR_0.8.1.0525.tar.gz
R CMD check --as-cran --ignore-vignettes susieR_0.8.1.0525.tar.gz
R CMD INSTALL susieR_0.9.0.tar.gz
R CMD check --as-cran --ignore-vignettes susieR_0.9.0.tar.gz
```

+ Run `pkgdown::build_site()` to build the website. Getting `pkgdown`
to work properly can be frustrating due to numerous & fragile dependencies.
to work properly can be frustrating due to numerous & fragile dependencies.
If `pkgdown` does not work for you out of the box you can use this `docker`
command to run all vignettes and build the site:

```bash
docker run --rm --security-opt label:disable -t -P -w $PWD -v $PWD:$PWD \
-u $UID:${GROUPS[0]} -e HOME=/home/$USER -e USER=$USER gaow/susie \
R --slave -e "pkgdown::build_site(lazy=TRUE, examples=FALSE)"
```
```
Loading

0 comments on commit 34021ad

Please sign in to comment.