SCTransform not regressing out variables - Seurat v5.0.1 #8148



I am performing routine scRNAseq analysis on a single cell dataset using the scTransform framework. and have run into a weird issue. Briefly, running the code below exactly reproduces the results of the SCTransform vignette, with clear differences in the UMAP plot reflecting cell cycle or mitochondrial percentage regression (Here I regressed out mitochondrial gene percentage). However, when I run this script with my own data by substituting out the path name in line 1 with that for my own data, regressing out or cell cycle score has no effect on the downstream UMAP or PCA plots. I'm not sure what to make of this, but my dataset is considerably larger, with around 9000 cells. I have also been able to reproduce these results with other datasets.

#Example 1: With Regression
pbmc_data <- Seurat::Read10X(data.dir = FilePath)
pbmc <- CreateSeuratObject(counts = pbmc_data)
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst")
s.genes <- cc.genes.updated.2019$s.genes #Cell cycle markers loaded from Seurat
s.genes <- sapply(s.genes, str_to_title)
g2m.genes <- cc.genes.updated.2019$g2m.genes #Separating into S and G2M markers
g2m.genes <- sapply(g2m.genes, str_to_title)
pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
pbmc <- PercentageFeatureSet(pbmc, pattern = "^mt-", = "")
pbmc <- SCTransform(pbmc, = c("", "S.Score", "G2M.Score"), vst.flavor = "v2", method = "glmGamPoi", verbose = T)
pbmc <- RunPCA(pbmc, verbose = T)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = T)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = T)
pbmc <- FindClusters(pbmc, verbose = T)
#Example 2: Without Regression
pbmc2 <- CreateSeuratObject(counts = pbmc_data)
pbmc2 <- NormalizeData(pbmc2)
pbmc2 <- FindVariableFeatures(pbmc2, selection.method = "vst")
pbmc2 <- CellCycleScoring(pbmc2, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
pbmc2 <- PercentageFeatureSet(pbmc2, pattern = "^mt-", = "")
pbmc2 <- SCTransform(pbmc2, vst.flavor = "v2", method = "glmGamPoi", verbose = T)
pbmc2 <- RunPCA(pbmc2, verbose = T)
pbmc2 <- RunUMAP(pbmc2, dims = 1:30, verbose = T)
pbmc2 <- FindNeighbors(pbmc2, dims = 1:30, verbose = T)
pbmc2 <- FindClusters(pbmc2, verbose = T)
Plot1 <- DimPlot(pbmc, label = TRUE) + ggtitle("With MT and CC Regression")
Plot2 <- DimPlot(pbmc2, label = TRUE) + ggtitle("Without MT and CC Regression")
Plot1 + Plot2

pbmc_data <- Seurat::Read10X(data.dir = FilePath)
pbmc <- CreateSeuratObject(counts = pbmc_data)
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
pbmc <- NormalizeData(pbmc)
