Skip to content

Commit

Permalink
code monocle nb reg on xie data
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim committed Jul 20, 2021
1 parent 9378e51 commit 1fc80b5
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 70 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
new_diff_test_helper = function(x,
fullModelFormulaStr,
reducedModelFormulaStr,
expressionFamily,
relative_expr,
weights,
disp_func=NULL,
verbose=FALSE
){

reducedModelFormulaStr <- paste("f_expression", reducedModelFormulaStr, sep="")
fullModelFormulaStr <- paste("f_expression", fullModelFormulaStr, sep="")

x_orig <- x
disp_guess <- 0

if (expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (relative_expr == TRUE)
{
x <- x / Size_Factor
}
f_expression <- round(x)
if (is.null(disp_func) == FALSE){
disp_guess <- monocle:::calculate_NB_dispersion_hint(disp_func, round(x_orig))
if (is.null(disp_guess) == FALSE && disp_guess > 0 && is.na(disp_guess) == FALSE ) {
# FIXME: In theory, we could lose some user-provided parameters here
# e.g. if users supply zero=NULL or something.
if (expressionFamily@vfamily == "negbinomial")
expressionFamily <- negbinomial(isize=1/disp_guess)
else
expressionFamily <- negbinomial.size(size=1/disp_guess)
}
}
}else if (expressionFamily@vfamily %in% c("gaussianff", "uninormal")){
f_expression <- x
}else if (expressionFamily@vfamily %in% c("binomialff")){
f_expression <- x/Size_Factor
#f_expression[f_expression > 1] <- 1
}else{
f_expression <- log10(x)
}
test_res <- tryCatch({
if (expressionFamily@vfamily %in% c("binomialff")){
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}else{
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}

myCompareModels = function(full_models, reduced_models){
stopifnot(length(full_models) == length(reduced_models))
test_res <- mapply(function(x,y) {
if (is.null(x) == FALSE && is.null(y) == FALSE) {
lrt <- VGAM::lrtest(x,y)
pval=lrt@Body["Pr(>Chisq)"][2,]
family = x@family@vfamily
if (length(family) > 1)
family = family[1]
intercept = x@coefficients[1]
beta = x@coefficients[2]
data.frame(status = "OK", family=family, pval=pval,beta=beta, intercept=intercept)
} else { data.frame(status = "FAIL", family=NA, pval=1.0,beta=0) }
} , full_models, reduced_models, SIMPLIFY=FALSE, USE.NAMES=TRUE)

test_res <- do.call(rbind.data.frame, test_res)
test_res$qval <- p.adjust(test_res$pval, method="BH")
test_res
}
#print(full_model_fit)
#print(coef(reduced_model_fit))
myCompareModels(list(full_model_fit), list(reduced_model_fit))
},
#warning = function(w) { FM_fit },
error = function(e) {
if(verbose)
print (e);
data.frame(status = "FAIL", family=expressionFamily@vfamily, pval=1.0, qval=1.0, beta=0, intercept=0)
#data.frame(status = "FAIL", pval=1.0)
}
)
test_res
}

myDifferentialGeneTest = function(cds,
fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
reducedModelFormulaStr="~1",
relative_expr=TRUE,
cores=1,
verbose=FALSE
){
status <- NA
if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on
the CellDataSet.")
}
}
if (cores > 1){
diff_test_res<-monocle:::mcesApply(cds, 1, new_diff_test_helper,
c("BiocGenerics", "VGAM", "Matrix"),
cores=cores,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}else{
diff_test_res<-monocle:::smartEsApply(cds,1,new_diff_test_helper,
convert_to_dense=TRUE,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize

)
diff_test_res
}

diff_test_res <- do.call(rbind.data.frame, diff_test_res)
diff_test_res$qval <- 1
diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")

diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
diff_test_res[, 1] <- NULL

diff_test_res
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
args <- commandArgs(trailingOnly = TRUE)
code_dir <- if (is.na(args[1])) "/Users/timbarry/research_code/sceptre-manuscript" else args[1]
source(paste0(code_dir, "/sceptre_paper/analysis_drivers/analysis_drivers_xie/paths_to_dirs.R"))
source(paste0(code_dir, "/sceptre_paper/analysis_drivers/analysis_drivers_xie/gasp_custom_functs.R"))
cds <- readRDS(paste0(processed_dir, "/monocole_obj.rds"))
pairs <- fst::read_fst(paste0(processed_dir, "/gRNA_gene_pairs.fst"))
pairs <- pairs[1:40,]

# set formula
reduced_formula <- "~ log_n_umis + batch + log_n_gRNA_umis"
cds <- cds[as.character(pairs$gene_id),]

# set up parallel
future::plan(future::multisession())

# Loop over all gRNAs and genes; subset by gene, and regress on the corresponding gRNA using Molly's function
res <- future.apply::future_lapply(X = seq(1, nrow(pairs)), FUN = function(i) {
print(i)
gRNA <- as.character(pairs$gRNA_id[i])
gene <- as.character(pairs$gene_id[i])
# subset cds
cds_sub <- cds[gene,]
# set full formula
full_formula <- paste0(reduced_formula, " + `", gRNA, "`")
# regress
de <- myDifferentialGeneTest(cds = cds_sub,
fullModelFormulaStr = full_formula,
reducedModelFormulaStr = reduced_formula,
relative_expr = TRUE,
cores = 1,
verbose = TRUE)
out <- data.frame(gRNA_id = gRNA, gene_id = gene, p_val = de$pval, q_val = de$qval, beta = de$beta, intercept = de$intercept)
return(out)
})

saveRDS(res, paste0(results_dir_negative_binomial, "/monocle_nb_results.rds"))

This file was deleted.

10 changes: 6 additions & 4 deletions sceptre_paper/utilities/run_xie_analysis.bash
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"select_g
# Locate the parameter file
parameter_file=$code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"sceptre_function_args.R

##################
# Big computations
##################
echo Run the sceptre analysis at scale.
sceptre_at_scale_bash_dir=$code_dir"/sceptre_package/sceptre_at_scale"
bash $sceptre_at_scale_bash_dir/"run_sceptre_at_scale.bash" $sceptre_at_scale_bash_dir $offsite_dir $parameter_file $n_processors $precomputation_complete
Expand All @@ -50,10 +53,9 @@ nb_at_scale_bash_dir=$code_dir"/sceptre_paper/"nb_regression_at_scale
bash $nb_at_scale_bash_dir/"run_nb_regression_at_scale.bash" $nb_at_scale_bash_dir $offsite_dir $parameter_file $n_processors
wait

echo Run the monocle regression analysis at scale.
# bash $nb_at_scale_bash_dir/"run_monocle_regression_at_scale.bash" $nb_at_scale_bash_dir $offsite_dir $parameter_file $n_processors
wait

echo Run the Monocle negative binomial regression analysis at scale.
Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"run_monocle_nb_at_scale.R $code_dir $offsite_dir
##################

echo Preprocess the sceptre results for downstream analysis.
Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"append_simple_names_results_6.R $code_dir $offsite_dir
Expand Down

0 comments on commit 1fc80b5

Please sign in to comment.