Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
baigal628 committed Jun 21, 2021
1 parent 28731ce commit 500be11
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 31 deletions.
2 changes: 1 addition & 1 deletion MAESTRO/Snakemake/scATAC/rules/sc_atac_annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ rule scatac_analysis:
genescore = "{sample}_gene_score.h5",
outpre = "{sample}",
counts = "../../QC/{sample}/{sample}_filtered_peak_count.h5",
fraggz = "../../mapping/{sample}/fragments_corrected_dedup_count.tsv.gz",
fraggz = "../../Mapping/{sample}/fragments_corrected_dedup_count.tsv.gz",
giggleannotation = config["giggleannotation"],
species = config["species"],
signature = config["signature"],
Expand Down
2 changes: 0 additions & 2 deletions MAESTRO/scATAC_FragmentReshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,5 @@ def CommandLineParser():
items = line.strip().split("\t")
if str(items[0]) in chr_list :
frag_out.write("\t".join(items[0:]) + "\n")
else:
print("Region in" + items[0] + "is excluded." )
end_time = time.time()
print("End:", end_time-start_time)
7 changes: 5 additions & 2 deletions MAESTRO/scATAC_HTMLReport_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def CommandLineParser():
help = "The format of input files. DEFAULT: fastq.")
group_input.add_argument("--input-path", dest = "input_path", type = str, default = "",
help = "Directory where input files are stored")
group_input.add_argument("--mapping", dest = "mapping", type = str, default = "chromap",
choices = ["chromap", "minimap2"],
help = "Mapping Tools of scATAC-seq. DEFAULT: chromap.")
group_input.add_argument("--species", dest = "species", default = "GRCh38",
choices = ["GRCh38", "GRCm38"], type = str,
help = "Species (GRCh38 for human and GRCm38 for mouse). DEFAULT: GRCh38.")
Expand All @@ -49,7 +52,7 @@ def main():
species = myparser.species
platform = myparser.platform
input_format = myparser.input_format

mapping = myparser.mapping
try:
os.makedirs(directory)
except OSError:
Expand Down Expand Up @@ -99,7 +102,7 @@ def main():
td_list.append(items_str)
td_str = "\n".join(td_list)

if input_format != "fragments":
if input_format != "fragments" and mapping != "chromap":
if input_format == "fastq":
inputformat = "FASTQ Path"
else:
Expand Down
50 changes: 25 additions & 25 deletions R/ATACRunSeurat.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
#' Clustering analysis for scATAC-seq data using Seurat
#'
#' Clustering analysis for scATAC-seq dataset using Seurat (version >=4.0.0) and Signac (version >= 1.1.1). Including normalization, LSI/PCA dimension reduction, clustering and UMAP visualization. To run UMAP analysis, you must first install the umap-learn python package (e.g. via \code{pip install umap-learn}).
#' Clustering analysis for scATAC-seq dataset using Seurat (version >=4.0.0) and Signac (version >= 1.1.1). Including normalization, LSI/PCA dimension reduction, clustering and UMAP visualization. To run UMAP analysis, you must first install the umap-learn python package (e.g. via \code{pip install umap-learn}).
#'
#' @docType methods
#' @name ATACRunSeurat
#' @rdname ATACRunSeurat
#'
#' @param inputMat Input unnormalized count matrix, with peaks as rows and cells as columns. Rownames should be like "chromosome_peakstart_peakend",
#' for example "chr10_100020591_100020841".
#' for example "chr10_100020591_100020841".
#' @param type Type of the input matrix. Default is "matrix". Set to "object" if the input is Seurat object.
#' @param project Output project name. Default is "MAESTRO.scATAC.Seurat".
#' @param orign.ident Orginal identity for the input cells. If supplied, should keep the same order with the column name of the peak x cell matrix.
#' @param method Methods for dimension reduction, available options are LSI and PCA. Default is "LSI".
#' @param min.c Minimum number of cells required for a peak. Will exclude the peaks from input matrix if they only identified in
#' @param min.c Minimum number of cells required for a peak. Will exclude the peaks from input matrix if they only identified in
#' less than \code{min.c} cells. Default is 10. See \code{\link{CreateSeuratObject}} for details.
#' @param min.p Minimum number of peaks required for a cell. Will exclude the cells from input matrix if less than \code{min.p}
#' peaks are deteced in one cell. Default is 100. See \code{\link{CreateSeuratObject}} for details.
#' @param dims.use Number of dimensions used for UMAP analysis. Default is 1:30, use the first 30 PCs.
#' @param cluster.res Value of the clustering resolution parameter. Please use a value above (below) 1.0
#' @param cluster.res Value of the clustering resolution parameter. Please use a value above (below) 1.0
#' if users want to obtain a larger (smaller) number of communities. Default is 0.6.
#' @param only.pos If seting true, only positive peaks will be output. Default is False.
#' @param peaks.test.use Denotes which test to use to identify differnetial peaks. Default is "presto", a fast version of Wilcoxon Rank Sum test.
#' @param peaks.test.use Denotes which test to use to identify differnetial peaks. Default is "presto", a fast version of Wilcoxon Rank Sum test.
#' Available options are "wilcox" and "t". See \code{\link{FindAllMarkersMAESTRO}} for details.
#' @param peaks.cutoff Identify differential peaks with adjusted p.value less than \code{peaks.cutoff} as cluster specific peaks
#' @param peaks.pct Only test peaks that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing peaks that are very infrequently detected Default is 0.1
Expand Down Expand Up @@ -49,23 +49,23 @@
#' @importFrom Gmisc fastDoCall
#' @export

ATACRunSeurat <- function(inputMat, type = "matrix", project = "MAESTRO.scATAC.Seurat", orign.ident = NULL,
min.c = 10, min.p = 100, method = "LSI", dims.use = 1:30,
cluster.res = 0.6, only.pos = FALSE, peaks.test.use = "presto",
peaks.cutoff = 1E-5, peaks.pct = 0.1, peaks.logfc = 0.2,
runtfidf.args = list(), runsvd.args = list(), runpca.args = list(),
findneighbors.args = list(), findclusters.args = list(),...)
ATACRunSeurat <- function(inputMat, type = "matrix", project = "MAESTRO.scATAC.Seurat", orign.ident = NULL,
min.c = 10, min.p = 100, method = "LSI", dims.use = 1:30,
cluster.res = 0.6, only.pos = FALSE, peaks.test.use = "presto",
peaks.cutoff = 1E-5, peaks.pct = 0.1, peaks.logfc = 0.2,
runtfidf.args = list(), runsvd.args = list(), runpca.args = list(),
findneighbors.args = list(), findclusters.args = list(),outdir = ".", ...)
{
if(type == "matrix"){SeuratObj <- CreateSeuratObject(inputMat, project = project, min.cells = min.c, min.features = min.p, assay = "ATAC")}
if(type == "object"){SeuratObj <- inputMat}

if(method == "LSI"){
if(method == "LSI"){
#============ LSI ============
message("LSI analysis ...")
SeuratObj <- fastDoCall("RunTFIDF", c(object = SeuratObj, runtfidf.args))
SeuratObj <- fastDoCall("RunTFIDF", c(object = SeuratObj, runtfidf.args))
SeuratObj <- FindTopFeatures(SeuratObj, min.cutoff = 'q0')
SeuratObj <- fastDoCall("RunSVD", c(object = SeuratObj, runsvd.args))
SeuratObj <- fastDoCall("RunSVD", c(object = SeuratObj, runsvd.args))

#============ UMAP ============
message("UMAP analysis ...")
#SeuratObj <- fastDoCall("RunUMAP", c(object = SeuratObj, dims = dims.use, reduction = "lsi", runumap.args))
Expand All @@ -75,46 +75,46 @@ ATACRunSeurat <- function(inputMat, type = "matrix", project = "MAESTRO.scATAC.S
# SeuratObj <- FindNeighbors(object = SeuratObj, reduction = "lsi", dims = dims.use)
# SeuratObj <- FindClusters(object = SeuratObj, resolution = cluster.res)
p1 <- DimPlot(object = SeuratObj, pt.size = 0.5, label = TRUE)
ggsave(file.path(paste0(project, "_cluster.png")), p1, width=5, height=4)
ggsave(file.path(outdir, paste0(project, "_cluster.png")), p1, width=5, height=4)

#============ DE analysis ============
message("Identify cluster specific peaks ...")
SeuratObj <- NormalizeData(SeuratObj, normalization.method = "LogNormalize", scale.factor = 10000)
cluster.peaks <- NULL
cluster.peaks <- FindAllMarkersMAESTRO(object = SeuratObj, min.pct = peaks.pct, logfc.threshold = peaks.logfc, test.use = peaks.test.use, only.pos = only.pos)
cluster.peaks <- cluster.peaks[cluster.peaks$p_val_adj<peaks.cutoff, ]
colnames(cluster.peaks)[7] <- "peak"
write.table(cluster.peaks, paste0(project, "_DiffPeaks.tsv"), quote=F, sep="\t")}
write.table(cluster.peaks, file.path(outdir, paste0(project, "_DiffPeaks.tsv")), quote=F, sep="\t")}

if(method == "PCA"){
#============ PCA ============
message("PCA analysis ...")
SeuratObj <- NormalizeData(SeuratObj, normalization.method = "LogNormalize", scale.factor = 10000)
SeuratObj <- ScaleData(object = SeuratObj, var.to.regress="nCount_RNA")
SeuratObj <- fastDoCall("RunPCA", c(object = SeuratObj, features = rownames(SeuratObj), runpca.args))

# SeuratObj <- RunPCA(object = SeuratObj, features = rownames(SeuratObj))
p2 = ElbowPlot(object = SeuratObj)
ggsave(file.path(paste0(project,"_PCElbowPlot.png")), p2, width = 5, height = 4)
ggsave(file.path(outdir, paste0(project,"_PCElbowPlot.png")), p2, width = 5, height = 4)

#============ UMAP ============
message("UMAP analysis ...")
SeuratObj <- RunUMAP(object = SeuratObj, reduction = "pca", dims = dims.use, ...)
SeuratObj <- fastDoCall("FindNeighbors", c(object = SeuratObj, reduction = "pca", dims = dims.use, findneighbors.args))
SeuratObj <- fastDoCall("FindClusters", c(object = SeuratObj, resolution = cluster.res, findclusters.args))

# SeuratObj <- FindNeighbors(object = SeuratObj, reduction = "pca", dims = dims.use)
# SeuratObj <- FindClusters(object = SeuratObj, resolution = res)
p3 <- DimPlot(object = SeuratObj, pt.size = 0.5, label = TRUE)
ggsave(file.path(paste0(project, "_cluster.png")), p3, width=5, height=4)
ggsave(file.path(outdir, paste0(project, "_cluster.png")), p3, width=5, height=4)

#============ DE analysis ============
message("Identify cluster specific peaks ...")
cluster.peaks <- NULL
cluster.peaks <- FindAllMarkersMAESTRO(object = SeuratObj, min.pct = peaks.pct, logfc.threshold = peaks.logfc, test.use = peaks.test.use, only.pos = only.pos)
cluster.peaks <- cluster.peaks[cluster.peaks$p_val_adj<peaks.cutoff, ]
colnames(cluster.peaks)[7] <- "peak"
write.table(cluster.peaks, paste0(proj, "_DiffPeaks.tsv"), quote=F, sep="\t")
write.table(cluster.peaks, file.path(outdir,paste0(proj, "_DiffPeaks.tsv")), quote=F, sep="\t")
}
return(list(ATAC=SeuratObj, peaks=cluster.peaks))
}
3 changes: 2 additions & 1 deletion R/VisualizeUmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ VisualizeUmap <- function(genes, type, SeuratObj, ncol = NULL, width = 6, height
return(p)
})
combinedplot = CombinePlots(umapplots, ncol = ncol)
ggsave(paste0(name, "_umapplot.png"), combinedplot, width=width, height=height)
ggsave(paste0(name, "_umapplot.png"), combinedplot, width=width, height=height)
return(combinedplot)
}


Expand Down

0 comments on commit 500be11

Please sign in to comment.