Skip to content

Commit

Permalink
adding recent updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Helen Yihua Kang committed Sep 4, 2023
1 parent ec30c67 commit 3758903
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 21 deletions.
8 changes: 7 additions & 1 deletion workflow/rules/cNMF_pipeline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,7 @@ rule analysis:
figdir = os.path.join(config["figDir"], "{folder}"),
analysisdir = os.path.join(config["analysisDir"], "{folder}"), # K{k}/threshold_{threshold}
# barcode = os.path.join(config["barcodeDir"], "{sample}.barcodes.tsv"),
organism = config["organism"],
threshold = get_cNMF_filter_threshold_double,
barcode_names = config["barcodeDir"],
partition = "owners,normal"
Expand All @@ -1094,6 +1095,7 @@ rule analysis:
--K.val {wildcards.k} \
--density.thr {params.threshold} \
--recompute F \
--organism {params.organism} \
--motif.enhancer.background /oak/stanford/groups/engreitz/Users/kangh/2009_endothelial_perturbseq_analysis/cNMF/2104_all_genes/data/fimo_out_ABC_TeloHAEC_Ctrl_thresh1.0E-4/fimo.formatted.tsv \
--motif.promoter.background /oak/stanford/groups/engreitz/Users/kangh/2009_endothelial_perturbseq_analysis/topicModel/2104_remove_lincRNA/data/fimo_out_all_promoters_thresh1.0E-4/fimo.tsv \
' "
Expand Down Expand Up @@ -1786,7 +1788,7 @@ rule PoPS_plots:
--all.features {input.all_features_with_cNMF_RDS} \
--external.features.metadata {params.external_features_metadata} \
--combined.preds {input.combined_preds} \
--coefs.defining.top.topic.RDS {input.coefs_defining_top_topic_RDS} \
--coefs.defining.top.topic.RDS {input.coe.afs_defining_top_topic_RDS} \
--preds.importance.score.key.columns {input.preds_importance_score_key_columns} ' "


Expand All @@ -1803,6 +1805,8 @@ rule IGVF_formatting_model_programGenes:
time = "1:00:00",
mem_gb = "8",
analysisdir = os.path.join(config["analysisDir"], "{folder}"), # K{k}/threshold_{threshold}
level = config["sample_type"],
cell_type = config["celltype"],
partition = "owners,normal",
threshold = get_cNMF_filter_threshold_double
shell:
Expand All @@ -1813,6 +1817,8 @@ rule IGVF_formatting_model_programGenes:
--outdir {params.analysisdir}/ \
--K.val {wildcards.k} \
--density.thr {params.threshold} \
--level {params.level} \
--cell.type {params.cell_type} \
' "

rule IGVF_formatting_model_cellxgene:
Expand Down
19 changes: 16 additions & 3 deletions workflow/scripts/cNMF_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,11 @@ option.list <- list(
make_option("--test.type", type="character", default="per.guide.wilcoxon", help="Significance test to threshold perturbation results"),
make_option("--adj.p.value.thr", type="numeric", default=0.1, help="adjusted p-value threshold"),
make_option("--recompute", type="logical", default=F, help="T for recomputing statistical tests and F for not recompute"),
make_option("--perturb.seq", type="character", default="False", help="True for perturb-seq. The pipeline will perform statistical test if True.")

make_option("--perturb.seq", type="character", default="False", help="True for perturb-seq. The pipeline will perform statistical test if True."),

## Organism flag
make_option("--organism", type="character", default="human", help="Organism type, accept org.Hs.eg.db. Only support human and mouse.")

)
opt <- parse_args(OptionParser(option_list=option.list))

Expand Down Expand Up @@ -127,6 +130,16 @@ opt <- parse_args(OptionParser(option_list=option.list))
## opt$barcode.names <- "/oak/stanford/groups/engreitz/Users/kangh/collab_data/IGVF/mouse_ENCODE_adrenal/auxiliary_data/snrna/adrenal_Parse_10x_integrated_metadata.csv" ## sdev for mouse ENCODE


## ## debug IGVF b01_LeftCortex sdev
## opt$figdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/figures/all_genes/"
## opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes/"
## opt$topic.model.result.dir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes_acrossK/IGVF_b01_LeftCortex/"
## opt$K.val <- 15
## opt$sampleName <- "IGVF_b01_LeftCortex"
## opt$barcode.names <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_igvf_b01_LeftCortex_data/IGVF_b01_LeftCortex.barcodes.txt"
## opt$organism <- "mouse"



mytheme <- theme_classic() + theme(axis.text = element_text(size = 9), axis.title = element_text(size = 11), plot.title = element_text(hjust = 0.5, face = "bold"))

Expand Down Expand Up @@ -353,7 +366,7 @@ adjust.multiTargetGuide.rownames <- function(omega) {
## rep2.label <- paste0("-",tmp.labels[2])
## } else guideCounts <- loadGuides(n) %>% mutate(Gene=Gene.marked)

db <- ifelse(grepl("mouse", SAMPLE), "org.Mm.eg.db", "org.Hs.eg.db")
db <- ifelse(grepl("mouse|org.Mm.eg.db", opt$organism), "org.Mm.eg.db", "org.Hs.eg.db")
library(!!db) ## load the appropriate database

cNMF.result.file <- paste0(OUTDIRSAMPLE,"/cNMF_results.",SUBSCRIPT.SHORT, ".RData")
Expand Down
23 changes: 20 additions & 3 deletions workflow/scripts/cNMF_analysis_gsea_clusterProfiler.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ opt <- parse_args(OptionParser(option_list=option.list))
## opt$GSEA.type <- "ByWeightGSEA"
## opt$ranking.type <- "median_spectra"

## ## IGVF b01_LeftCortex
## opt$figdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/figures/all_genes/"
## opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes/"
## opt$K.val <- 20
## opt$sampleName <- "IGVF_b01_LeftCortex"
## opt$GSEA.type <- "GSEA"
## opt$ranking.type <- "median_spectra"
## opt$organism <- "mouse"

## ## RCA Pt4
## opt$topic.model.result.dir <- "/oak/stanford/groups/engreitz/Users/kangh/TeloHAEC_Perturb-seq_2kG/220124_snakemake_RCA/analysis/all_genes_acrossK/RCA"
## opt$sampleName <- "RCA"
## opt$figdir <- "/oak/stanford/groups/engreitz/Users/kangh/TeloHAEC_Perturb-seq_2kG/220124_snakemake_RCA/figures/all_genes/"
## opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/TeloHAEC_Perturb-seq_2kG/220124_snakemake_RCA/analysis/all_genes"
## opt$K.val <- 60
## opt$ranking.type <- "median_spectra_zscore"
## opt$GSEA.type <- "GOEnrichment"
## opt$organism <- "human"


SAMPLE=strsplit(opt$sampleName,",") %>% unlist()
DATADIR=opt$olddatadir # "/seq/lincRNA/Gavin/200829_200g_anal/scRNAseq/"
Expand Down Expand Up @@ -227,8 +246,6 @@ ranking.type.ary <- c("zscore", "raw", "median_spectra_zscore", "median_spectra"
score.colname.ary <- c("zscore", "raw", "median.spectra.zscore", "median.spectra")
ranking.rank.colname.ary <- c("zscore.specificity.rank", "raw.score.rank", "median.spectra.zscore.rank", "median.spectra.rank")
ranking.type.varname.ary <- c("theta.rank.df", "theta.raw.rank.df", "median.spectra.zscore.df", "median.spectra.rank.df")
db <- ifelse(grepl("mouse|org.Mm.eg.db", opt$organism), "org.Mm.eg.db", "org.Hs.eg.db")
library(!!db) ## load the appropriate database

getData <- function(t) {
i <- which(ranking.type.ary == opt$ranking.type)
Expand Down Expand Up @@ -278,7 +295,7 @@ getData <- function(t) {
return(list(top.genes = top.genes, pos.genes = pos.genes, geneUniverse = geneUniverse, gene.weights = gene.weights))
}

m_df <- msigdbr(species = ifelse(grepl("mouse", SAMPLE), "Mus musculus", "Homo sapiens"))
m_df <- msigdbr(species = ifelse(grepl("mouse", opt$organism), "Mus musculus", "Homo sapiens"))

## save this as a txt file and read in ## for future if needed
functionsToRun <- list(GOEnrichment = "out <- enrichGO(gene = top.genes, ont = 'ALL', OrgDb = db, universe = geneUniverse, readable=T, pvalueCutoff=1, pAdjustMethod = 'fdr') %>% as.data.frame %>% mutate(fdr.across.ont = p.adjust, ProgramID = paste0('K', k, '_', t))",
Expand Down
13 changes: 12 additions & 1 deletion workflow/scripts/motif_enrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ opt <- parse_args(OptionParser(option_list=option.list))
## opt$motif.enhancer.background <- "/oak/stanford/groups/engreitz/Users/kangh/2009_endothelial_perturbseq_analysis/cNMF/2104_all_genes/data/fimo_out_ABC_TeloHAEC_Ctrl_thresh1.0E-4/fimo.tsv"
## opt$motif.promoter.background <- "/oak/stanford/groups/engreitz/Users/kangh/2009_endothelial_perturbseq_analysis/cNMF/2104_all_genes/data/fimo_out_ABC_TeloHAEC_Ctrl_thresh1.0E-4/fimo.formatted.tsv"

## ## IGVF b01_LeftCortex sdev
## opt$sampleName <- "IGVF_b01_LeftCortex"
## opt$figdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/figures/all_genes/"
## opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes"
## opt$K.val <- 20
## opt$ep.type <- "enhancer"
## opt$organism <- "mouse"
## opt$motif.match.thr.str <- "pval1e-6"
## opt$motif.enhancer.background <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes/IGVF_b01_LeftCortex/fimo/fimo_out/fimo.txt"
## opt$motif.promoter.background <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes/IGVF_b01_LeftCortex/fimo/fimo_out/fimo.txt"


mytheme <- theme_classic() + theme(axis.text = element_text(size = 9), axis.title = element_text(size = 11), plot.title = element_text(hjust = 0.5, face = "bold"))

Expand Down Expand Up @@ -242,7 +253,7 @@ if (grepl("qval", motif.match.thr.str)) {


if(ep.type == "enhancer") {
if(ncol(motif.background) == 9) {
if(ncol(motif.background) == 9 | sum(as.numeric(grepl("|", motif.background$sequence_name))) == nrow(motif.background)) {
motif.background <- motif.background %>%
filter(!grepl("promoter", sequence_name) & !grepl("start", start)) %>%
separate(col="sequence_name", into=c("enhancer_region", "enhancer_type", "gene_region", "gene_name_sequence_region"), sep="[|]") %>%
Expand Down
16 changes: 13 additions & 3 deletions workflow/scripts/output_IGVF_format.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,21 @@ option.list <- list(
make_option("--outdir", type="character", default="/oak/stanford/groups/engreitz/Users/kangh/TeloHAEC_Perturb-seq_2kG/230104_snakemake_WeissmanLabData/analysis/top2000VariableGenes/", help="Output directory"),
make_option("--K.val", type="numeric", default=90, help="K value to analyze"),
make_option("--density.thr", type="character", default="0.2", help="concensus cluster threshold, 2 for no filtering"),
make_option("--perturbSeq", type="logical", default=TRUE, help="Whether this is a Perturb-seq experiment")
make_option("--perturbSeq", type="logical", default=TRUE, help="Whether this is a Perturb-seq experiment"),
make_option("--level", type="character", default="cell line", help="Sample type (e.g. tissue, cell line, primary cells"),
make_option("--cell.type", type="character", default="teloHAEC", help="Cell type description (e.g. brain, teloHAEC, K562)")
)
opt <- parse_args(OptionParser(option_list=option.list))


## ## sdev IGVF b01_LeftCortex
## opt$sampleName <- "IGVF_b01_LeftCortex"
## opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes/"
## opt$K.val <- 15
## opt$perturbSeq <- FALSE
## opt$level <- "tissue"
## opt$cell.type <- "brain"


SAMPLE=strsplit(opt$sampleName,",") %>% unlist()
OUTDIR=opt$outdir
Expand Down Expand Up @@ -103,8 +113,8 @@ out <- list("Assay" = NULL,
"Technology" = "10x",
"cNMF spectra threshold" = opt$density.thr,
"Topic IDs" = paste0(SAMPLE, "_K", k, "_", 1:k),
"level" = "cell line",
"cell type" = "K562")
"level" = opt$level,
"cell type" = opt$cell.type)
write_yaml(out, paste0(OUTDIRSAMPLEIGVF, SAMPLE, ".", SUBSCRIPT.SHORT, ".modelYAML.yaml"))

## 2. Topics YAML files: Capture all the information about Topics including Topic_ID, gene_weight, gene_id and gene_name and any other information that suits your data
Expand Down
33 changes: 23 additions & 10 deletions workflow/scripts/plot_gsea_clusterProfiler.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ opt <- parse_args(OptionParser(option_list=option.list))
## opt$ranking.type <- "zscore"
## opt$GSEA.type <- "GSEA"

## IGVF b01_LeftCortex sdev
opt$sampleName <- "IGVF_b01_LeftCortex"
opt$figdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/figures/all_genes/"
opt$outdir <- "/oak/stanford/groups/engreitz/Users/kangh/IGVF/Cellular_Programs_Networks/230706_snakemake_igvf_b01_LeftCortex/analysis/all_genes"
opt$K.val <- 10
opt$ranking.type <- "median_spectra"
opt$GSEA.type <- "GSEA"



OUTDIR <- opt$outdir
FIGDIR <- opt$figdir
Expand Down Expand Up @@ -96,16 +105,20 @@ mytheme <- theme_classic() + theme(axis.text = element_text(size = 7),

file.name <- paste0(OUTDIRSAMPLE, "/clusterProfiler_GeneRankingType", ranking.type.here, "_EnrichmentType", GSEA.type,".txt")
gsea.df <- read.delim(file.name, stringsAsFactors=F)
toplot <- gsea.df %>%
subset(p.adjust < fdr.thr) %>%
group_by(ProgramID) %>%
arrange(p.adjust) %>%
unique %>%
slice(1:10) %>%
mutate(TruncatedDescription = str_trunc(paste0(ID, "; ", Description), width=50, side="right"),
t = gsub("K60_", "", ProgramID) %>% as.numeric) %>%
arrange(t, p.adjust) %>%
as.data.frame
if(nrow(gsea.df) == 0) {
toplot <- data.frame()
} else {
toplot <- gsea.df %>%
subset(p.adjust < fdr.thr) %>%
group_by(ProgramID) %>%
arrange(p.adjust) %>%
unique %>%
slice(1:10) %>%
mutate(TruncatedDescription = str_trunc(paste0(ID, "; ", Description), width=50, side="right"),
t = gsub("K60_", "", ProgramID) %>% as.numeric) %>%
arrange(t, p.adjust) %>%
as.data.frame
}

plot.title <- paste0(ifelse(grepl("GO", GSEA.type), "GO Term Enrichment", "MSigDB Pathway Enrichment"),
"\non ",
Expand Down

0 comments on commit 3758903

Please sign in to comment.