From e20e477fbddb9d063a9906422d70d28243c2df7a Mon Sep 17 00:00:00 2001
From: Tania <tanja_wyss@hotmail.com>
Date: Tue, 12 Jul 2022 11:01:54 +0200
Subject: [PATCH] add_discussion_day3

---
 scripts/discussion/discussion_day3.R | 578 +++++++++++++++++++++++++++
 1 file changed, 578 insertions(+)
 create mode 100644 scripts/discussion/discussion_day3.R

diff --git a/scripts/discussion/discussion_day3.R b/scripts/discussion/discussion_day3.R
new file mode 100644
index 0000000..158115c
--- /dev/null
+++ b/scripts/discussion/discussion_day3.R
@@ -0,0 +1,578 @@
+# --------  Day 3
+
+library(Seurat)
+# BiocManager::install("edgeR")
+library(edgeR)
+library(limma)
+library(dittoSeq)
+library(dplyr)
+
+# Differential gene expression
+setwd("/export/scratch/twyss/SIB_scRNAseq_course/July2022/data/")
+seu_int <- readRDS("seu_int_day2_part2.rds")
+
+# Use wilcoxon test from Seurat (make sure the default cell identity
+# is set to what you want it to be)
+Idents(seu_int) # res 0.3 with clusters 0-12
+# make sure the Default assay is set to RNA:
+DefaultAssay(seu_int)
+?FindAllMarkers
+
+# this takes a while:
+# de_genes <- Seurat::FindAllMarkers(seu_int,  min.pct = 0.25,
+#                                    only.pos = TRUE)
+head(de_genes) 
+
+# sometimes p>0.05 are included, remove non-significant genes
+range(de_genes$p_val_adj) # [1] 0 1
+de_genes<-subset(de_genes, de_genes$p_val_adj<0.05)
+
+# write to csv file if you want to store (and to avoid loosing time recalculating)
+write.csv(de_genes, "de_genes_FindAllMarkers.csv", row.names = F, quote = F)
+
+# create dotplot with top 3 markers per cluster:
+library(dplyr) 
+# use of %>% pipe of magrittr package
+# What the function does is to pass the left hand side of the operator 
+# to the first argument of the right hand side of the operator. 
+top_specific_markers <- de_genes %>% 
+  group_by(cluster) %>%
+  top_n(3, avg_log2FC) # select top 3 rows by value
+
+View(top_specific_markers)
+dittoSeq::dittoDotPlot(seu_int, vars = unique(top_specific_markers$gene), 
+                       group.by = "integrated_snn_res.0.3")
+dittoSeq::dittoDotPlot(seu_int, vars = unique(top_specific_markers$gene), 
+                       group.by = "SingleR_annot")
+
+# check if T cell genes are within DE genes
+tcell_genes <- c("IL7R", "LTB", "TRAC", "CD3D")
+de_genes[de_genes$gene %in% tcell_genes,]
+
+# pairwise DE between CD4+ and CD8+ T cells
+seu_int <- Seurat::SetIdent(seu_int, value = "SingleR_annot")
+
+deg_cd8_cd4 <- Seurat::FindMarkers(seu_int,
+                                   ident.1 = "CD8+ T cells",
+                                   ident.2 = "CD4+ T cells",
+                                   group.by = seu_int$SingleR_annot,
+                                   test.use = "wilcox")
+deg_cd8_cd4<-subset(deg_cd8_cd4, deg_cd8_cd4$p_val_adj<0.05)
+
+View(deg_cd8_cd4)
+# to have info on the output, see "Value" section in help:
+?FindMarkers
+deg_cd8_cd4[c("CD4", "CD8A", "CD8B"),]
+
+# Plotting the T cell genes only in T cells:
+Seurat::VlnPlot(seu_int, 
+                features = c("CD4", "CD8A", "CD8B"),
+                idents = c("CD8+ T cells", "CD4+ T cells"),
+                group.by = "SingleR_annot")
+# note on surface markers, eg CD4, which is often low at the mRNA level
+# CITE-seq for surface marker identification at the protein level
+
+
+
+
+
+
+
+# Pseudo-bulk DGE analysis with limma, summing counts per cell:
+##New script for generating pseudobulk
+##This script follows the vignette on this page 
+#http://bioconductor.org/books/3.14/OSCA.multisample/multi-sample-comparisons.html
+#here 591-596 lines are the same as done above
+#taking the proB data 
+proB <- readRDS("course_data/proB.rds")
+
+DimPlot(proB, group.by = "orig.ident")
+
+table(proB@meta.data$type)
+# ETV6-RUNX1      PBMMC 
+#      2000       1021
+
+head(proB@meta.data)
+
+Seurat::DefaultAssay(proB) <- "RNA"
+Seurat::Idents(proB) <- proB$orig.ident
+
+## add the patient id also for paired DGE
+proB$patient.id<-gsub("ETV6-RUNX1", "ETV6_RUNX1", proB$orig.ident)
+proB$patient.id<-sapply(strsplit(proB$patient.id, "-"), '[', 2)
+
+## here it is new, to allow to perform pseudo-bulk:
+##first a mandatory column of sample needs to be added to the meta data that is the grouping factor, should be the samples
+proB$sample <- factor(proB$orig.ident)
+
+##first an sce object is needed
+sce_proB <- as.SingleCellExperiment(proB)
+
+#The needed package has to be installed if not already done:
+if (!require("BiocManager", quietly = TRUE))
+  install.packages("BiocManager")
+BiocManager::install("scuttle")
+
+library(scuttle)
+
+##aggregateAcrossCells here it is only aggregated by sample, one could imagine
+##to aggregate by sample and by celltype for instance
+summed <- aggregateAcrossCells(sce_proB, 
+                               id=colData(sce_proB)[,c("sample")])
+
+##have a look at the counts
+counts(summed)[1:3,]
+
+#have a look at the colData of our new object summed, can you see type and 
+#patient.id are there
+head(colData(summed))
+
+#As in the standard limma analysis generate a DGE object
+
+y <- DGEList(counts(summed), samples=colData(summed)$sample)
+
+##filter lowly expressed (recommanded for limma)
+keep <- filterByExpr(y, group=summed$type)
+y <- y[keep,]
+
+##see how many genes were kept 
+summary(keep)
+
+## Create the design matrix and include the technology as a covariate:
+design <- model.matrix(~0 + summed$type + summed$patient.id)
+
+# Have a look
+design
+
+# change column/rownames names to more simple group names: 
+colnames(design) <- make.names(c("ETV6-RUNX1", "PBMMC","patient2","patient3"))
+rownames(design)<-colData(summed)$sample
+
+# Create contrasts, i.e. specify which groups we want to compare, here we want
+# to find genes differentially expressed between cluster 1 and cluster 2.
+contrast.mat <- limma::makeContrasts(ETV6.RUNX1 - PBMMC,
+                                     levels = design)
+
+
+dge <- edgeR::calcNormFactors(y)  
+
+#Do limma
+vm <- limma::voom(dge, design = design, plot = TRUE)
+fit <- limma::lmFit(vm, design = design)
+fit.contrasts <- limma::contrasts.fit(fit, contrast.mat)
+fit.contrasts <- limma::eBayes(fit.contrasts)
+
+# Show the top differentially expressed genes:
+limma::topTable(fit.contrasts, number = 10, sort.by = "P")
+limma_de <- limma::topTable(fit.contrasts, number = Inf, sort.by = "P")
+length(which(limma_de$adj.P.Val<0.05))
+
+# 
+tum_vs_norm <- Seurat::FindMarkers(proB, 
+                                   ident.1 = "ETV6-RUNX1", 
+                                   ident.2 = "PBMMC", 
+                                   group.by = "type")
+tum_vs_norm <- subset(tum_vs_norm, tum_vs_norm$p_val_adj<0.05)
+
+merge_limma_FindMarkers <- merge(tum_vs_norm, limma_de, by="row.names",
+                                 all.x=T)
+
+
+par(mar=c(4,4,4,4))
+plot(merge_limma_FindMarkers$avg_log2FC,
+     merge_limma_FindMarkers$logFC,
+     xlab="log2FC Wilcoxon", ylab="log2FC limma",
+     pch=15, cex=0.5)
+abline(a=0, b=1, col="red")
+
+
+
+
+# ---- Enrichment analysis
+library(clusterProfiler)
+library(enrichplot)
+# Need a gene annotation package for Human:
+# BiocManager::install("org.Hs.eg.db", update = FALSE)
+library(org.Hs.eg.db)
+# check gene label types allowed, we can use SYMBOL:
+AnnotationDbi::keytypes(org.Hs.eg.db)
+
+# select genes down-regulated in tumor:
+tum_down <- subset(tum_vs_norm,
+                   tum_vs_norm$avg_log2FC < -1 &
+                     tum_vs_norm$p_val_adj < 0.05)
+tum_down_genes <- rownames(tum_down) # 62 genes
+
+# over-representation analysis (Fisher test) for down-reg genes:
+?enrichGO
+tum_vs_norm_go <- clusterProfiler::enrichGO(gene = tum_down_genes,
+                                            OrgDb =  "org.Hs.eg.db",
+                                            keyType = "SYMBOL",
+                                            ont = "BP",
+                                            minGSSize = 50)
+View(tum_vs_norm_go@result)
+# remove redundant gene sets:
+enr_go <- clusterProfiler::simplify(tum_vs_norm_go)
+View(enr_go@result)
+
+# check if significant gene sets share overlapping genes:
+enrichplot::emapplot(enrichplot::pairwise_termsim(enr_go),
+                     showCategory = 30, cex_label_category = 0.5)
+
+# Test for hallmark gene set over-representation:
+?msigdbr
+gmt <- msigdbr::msigdbr(species = "human", category = "H")
+?clusterProfiler::read.gmt
+
+tum_vs_norm_enrich <- clusterProfiler::enricher(gene = tum_down_genes,
+                                                universe = rownames(proB),
+                                                pAdjustMethod = "BH",
+                                                TERM2GENE = gmt[,c("gs_name", "gene_symbol")])
+View(tum_vs_norm_enrich@result[which(tum_vs_norm_enrich@result$p.adjust<0.05),])
+
+
+# GSEA example using gseGO(), with t-statistic of limma output:
+# create ranked list of t-statistics:
+gene.list<-limma_de$t
+names(gene.list)<-rownames(limma_de)
+gene.list<-sort(gene.list, decreasing = T)
+head(gene.list)
+#      RPS4Y2       SDC2       CTGF AP005530.2      GNG11   HLA-DQA1 
+#    15.39674   15.33434   14.89301   14.27130   13.70183   13.39202 
+
+set.seed(1234)
+ego<-gseGO(geneList = gene.list,
+           ont="BP",
+           OrgDb = "org.Hs.eg.db",
+           keyType = "SYMBOL",
+           minGSSize = 60,
+           eps=1e-60,
+           seed=T)
+ego <- clusterProfiler::simplify(ego)
+head(ego@result[,c(2:7)])
+#                                 Description setSize enrichmentScore       NES       pvalue     p.adjust
+# GO:0000280                 nuclear division     292      -0.5139084 -2.583266 3.544230e-23 4.377997e-20
+# GO:0007059           chromosome segregation     267      -0.5233834 -2.608855 6.808704e-23 4.377997e-20
+# GO:0140014         mitotic nuclear division     225      -0.5404579 -2.661770 2.584499e-21 8.309163e-19
+# GO:0098813   nuclear chromosome segregation     208      -0.5369762 -2.606928 2.340434e-19 4.299712e-17
+# GO:0051301                    cell division     420      -0.4367175 -2.300783 3.118075e-19 5.012305e-17
+# GO:0010564 regulation of cell cycle process     443      -0.4125029 -2.183002 2.049784e-17 2.928913e-15
+
+# barcode plot of the top GO term:
+gseaplot(ego, geneSetID = "GO:0000280", title="GO:0000280: nuclear division")
+
+# Trajectory analysis
+library(Seurat)
+library(SingleCellExperiment)
+library(scater)
+library(slingshot)
+library(ggplot2)
+library(ggbeeswarm)
+library(batchelor)
+# sudo apt-get update
+# sudo apt-get install libcairo2-dev libxt-dev
+# devtools::install_github('cole-trapnell-lab/monocle3')
+# devtools::install_github('cole-trapnell-lab/monocle3', ref="develop")
+library(monocle3)
+
+# ---- Trajectory with slingshot:
+
+# Download the dataset from github within Terminal tab:
+#  cd course_data/
+#  wget https://github.com/hemberg-lab/nrg-paper-figures/blob/master/deng-reads.rds?raw=true
+# mv deng-reads.rds\?raw\=true deng-reads.rds
+
+# Import SingleCellExperiment object:
+deng_SCE <- readRDS("deng-reads.rds")
+
+# Change levels as developmental order:
+deng_SCE$cell_type2 <- factor(deng_SCE$cell_type2,
+                              levels = c("zy",
+                                         "early2cell",
+                                         "mid2cell",
+                                         "late2cell",
+                                         "4cell",
+                                         "8cell",
+                                         "16cell",
+                                         "earlyblast",
+                                         "midblast",
+                                         "lateblast"))
+
+# Run PCA using scater:
+deng_SCE <- scater::runPCA(deng_SCE, ncomponents = 50)
+
+# Use the reducedDim function to access the PCA and store the results.
+pca <- SingleCellExperiment::reducedDim(deng_SCE, "PCA")
+
+# Describe how the PCA is stored in a matrix. Why does it have this structure?
+head(pca)
+
+# Add 2 first principal components to SCE object:
+deng_SCE$PC1 <- pca[, 1]
+deng_SCE$PC2 <- pca[, 2]
+
+# PCA plot with cells colored according to developmental stage:
+ggplot(as.data.frame(colData(deng_SCE)), aes(x = PC1, y = PC2, color = cell_type2)) +
+  geom_point(size=2, shape=20) +
+  theme_classic() +
+  xlab("PC1") + ylab("PC2") + ggtitle("PC biplot")
+
+# Plot PC1 vs cell_type2.
+deng_SCE$pseudotime_PC1 <- rank(deng_SCE$PC1)  # rank cells by their PC1 score
+
+# Create a jitter plot of developmental stage against PC1:
+ggplot(as.data.frame(colData(deng_SCE)), aes(x = pseudotime_PC1, y = cell_type2,
+                                             colour = cell_type2)) +
+  ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
+  theme_classic() +
+  xlab("PC1") + ylab("Timepoint") +
+  ggtitle("Cells ordered by first principal component")
+
+# Run Slingshot 
+?slingshot::slingshot
+sce <- slingshot::slingshot(deng_SCE, reducedDim = 'PCA')
+
+
+# custom function to plot the PCA based on a slingshot object:
+  PCAplot_slingshot <- function(sce, draw_lines = TRUE, variable = NULL, legend = FALSE, ...){
+    # set palette for factorial variables
+    palf <- colorRampPalette(RColorBrewer::brewer.pal(8, "Set2"))
+    # set palette for numeric variables
+    paln <- colorRampPalette(RColorBrewer::brewer.pal(9, "Blues"))
+    # extract pca from SingleCellExperiment object
+    pca <- SingleCellExperiment::reducedDims(sce)$PCA
+    
+    if(is.null(variable)){
+      col <- "black"
+    }
+    if(is.character(variable)){
+      variable <- as.factor(variable)
+    }
+    if(is.factor(variable)){
+      colpal <- palf(length(levels(variable)))
+      colors <- colpal[variable]
+    }
+    if(is.numeric(variable)){
+      colpal <- paln(50)
+      colors <- colpal[cut(variable,breaks=50)]
+    }
+    
+    # draw the plot
+    plot(pca, bg = colors, pch = 21)
+    # draw lines
+    if(draw_lines){
+      lines(slingshot::SlingshotDataSet(sce), lwd = 2, ... )
+    }
+    # add legend
+    if(legend & is.factor(variable)){
+      legend("bottomright", pt.bg = colpal,legend = levels(variable),pch=21)
+      
+    }
+}
+
+# Plot PC1 versus PC2 with slingshot pseudotime added as line and coloring the cells:
+PCAplot_slingshot(sce, variable = sce$slingPseudotime_1, draw_lines = TRUE)
+
+# Plot developmental stage against pseudotime:
+  ggplot(as.data.frame(colData(deng_SCE)), aes(x = sce$slingPseudotime_1,
+                                               y = cell_type2,
+                                               colour = cell_type2)) +
+  ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
+  theme_classic() +
+  xlab("Slingshot pseudotime") + ylab("Timepoint") +
+  ggtitle("Cells ordered by Slingshot pseudotime")
+
+  
+# Generate unsupervised clustering of the cells using seurat: 
+gcdata <- Seurat::CreateSeuratObject(counts = SingleCellExperiment::counts(deng_SCE),
+                                         project = "slingshot")
+  
+gcdata <- Seurat::NormalizeData(object = gcdata,
+                                  normalization.method = "LogNormalize",
+                                  scale.factor = 10000)
+  
+gcdata <- Seurat::FindVariableFeatures(object = gcdata,
+                                         mean.function = ExpMean,
+                                         dispersion.function = LogVMR)
+  
+gcdata <- Seurat::ScaleData(object = gcdata,
+                              do.center = T,
+                              do.scale = F)
+  
+gcdata <- Seurat::RunPCA(object = gcdata,
+                           pc.genes = gcdata@var.genes)
+  
+gcdata <- Seurat::FindNeighbors(gcdata,
+                                  reduction = "pca",
+                                  dims = 1:5)
+  
+# clustering with resolution of 0.6
+gcdata <- Seurat::FindClusters(object = gcdata,
+                                 resolution = 0.6)
+  
+# Now we can add these clusters to the SCE object and re-run slingshot providing cluster labels :
+deng_SCE$Seurat_clusters <- as.character(Idents(gcdata))  # go from factor to character
+  
+sce <- slingshot::slingshot(deng_SCE,
+                              clusterLabels = 'Seurat_clusters',
+                              reducedDim = 'PCA',
+                              start.clus = "2")
+  
+# Check how the slingshot object has evolved, it now contains 2 curves:
+SlingshotDataSet(sce)
+  
+# Plot PC1 versus PC2 colored by slingshot pseudotime:
+PCAplot_slingshot(sce, variable = sce$slingPseudotime_2)
+  
+# Plot Slingshot pseudotime vs cell stage.
+ggplot(data.frame(cell_type2 = deng_SCE$cell_type2,
+                    slingPseudotime_1 = sce$slingPseudotime_1),
+         aes(x = slingPseudotime_1, y = cell_type2,
+             colour = cell_type2)) +
+    ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
+    theme_classic() +
+    xlab("Slingshot pseudotime") + ylab("Timepoint") +
+    ggtitle("Cells ordered by Slingshot pseudotime")
+  
+ggplot(data.frame(cell_type2 = deng_SCE$cell_type2,
+                    slingPseudotime_2 = sce$slingPseudotime_2),
+         aes(x = slingPseudotime_2, y = cell_type2,
+             colour = cell_type2)) +
+    ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
+    theme_classic() +
+    xlab("Slingshot pseudotime") + ylab("Timepoint") +
+    ggtitle("Cells ordered by Slingshot pseudotime")
+  
+#  Particularly the later stages, separation seems to improve. Since we have included the Seurat clustering, we can plot the PCA, with colors according to these clusters:
+PCAplot_slingshot(sce,
+                      variable = deng_SCE$Seurat_clusters,
+                      type = 'lineages',
+                      col = 'black',
+                      legend = TRUE)
+  
+PCAplot_slingshot(sce,
+                    variable = deng_SCE$cell_type2,
+                    type = 'lineages',
+                    col = 'black',
+                    legend = TRUE)
+
+# Slingshot analysis with end clusters indicated:
+sce <- slingshot::slingshot(deng_SCE,
+                            clusterLabels = 'Seurat_clusters',
+                            reducedDim = 'PCA',
+                            end.clus = c("0", "3", "5")) ## check which would be the best according to bio
+
+# PCA plot with curves ending at 3 clusters:
+PCAplot_slingshot(sce,
+                  variable = deng_SCE$Seurat_clusters,
+                  type = 'lineages',
+                  col = 'black',
+                  legend = F)
+
+
+# ---- Trajectory with Monocle3 
+# Generate a monocle3 object (with class cell_data_set) from our Seurat object:
+
+# get matrix and filter for minimum number of cells and 
+# features (the latter is a fix for backward compatibility)
+mat_tmp <- seu_int@assays$RNA@counts
+seu_tmp <- Seurat::CreateSeuratObject(mat_tmp, min.cells = 3,
+                                      min.features = 100)
+
+feature_names <- as.data.frame(rownames(seu_tmp))
+rownames(feature_names) <- rownames(seu_tmp)
+colnames(feature_names) <- "gene_short_name"
+
+seu_int_monocl <- monocle3::new_cell_data_set(seu_tmp@assays$RNA@counts,
+                                              cell_metadata = seu_int@meta.data,
+                                              gene_metadata = feature_names)
+
+# # Preprocess the dataset:
+?preprocess_cds
+# already ran: (takes a while)
+seu_int_monocl <- monocle3::preprocess_cds(seu_int_monocl)
+
+# elbow plot:
+monocle3::plot_pc_variance_explained(seu_int_monocl)
+
+# Perform UMAP using the implementation in the monocle3 package and its default parameters:
+seu_int_monocl <- monocle3::reduce_dimension(seu_int_monocl, reduction_method = "UMAP")
+
+# Plot the monocle3 UMAP coloring cells according to the cluster ID ran with Seurat:
+monocle3::plot_cells(seu_int_monocl, 
+                     color_cells_by = "integrated_snn_res.0.3", 
+                     cell_size = 1, 
+                     show_trajectory_graph = FALSE)
+# Slightly different UMAP between Seurat and Monocle3:
+p1<-DimPlot(seu_int, group.by = "integrated_snn_res.0.3")
+p2<-monocle3::plot_cells(seu_int_monocl, 
+                         color_cells_by = "integrated_snn_res.0.3", 
+                         cell_size = 0.7, 
+                         show_trajectory_graph = FALSE,
+                         label_cell_groups = F)
+cowplot::plot_grid(p1,p2, ncol = 2)
+
+# plot B cell marker:
+monocle3::plot_cells(seu_int_monocl, genes = "CD79A", 
+                     show_trajectory_graph = FALSE, 
+                     cell_size = 1)
+# Cluster cells using monocle3‘s clustering function:
+?cluster_cells
+seu_int_monocl <- monocle3::cluster_cells(seu_int_monocl, resolution=0.00025)
+monocle3::plot_cells(seu_int_monocl, label_cell_groups = F)
+
+# learn graph (i.e. identify trajectory) using monocle3 UMAP and clustering:
+seu_int_monocl <- monocle3::learn_graph(seu_int_monocl)
+monocle3::plot_cells(seu_int_monocl)
+
+# Find the CD34+ B-cell cluster in the monocle UMAP. This cluster has a high expressession of CD79A and expresses CD34.
+monocle3::plot_cells(seu_int_monocl, genes = c("CD79A", "CD34"),
+                     show_trajectory_graph = FALSE, 
+                     cell_size = 0.7, group_label_size = 4)
+
+# Select the “initial” cells in the B-cell cluster to calculate pseudotime. 
+# The initial cells in this case are the CD34+ B-cells we have just identified. 
+# A pop up window will open and you need to click on the “initial” cells (one node per trajectory), then click “Done”.
+seu_int_monocl<-monocle3::order_cells(seu_int_monocl)
+
+monocle3::plot_cells(seu_int_monocl,
+                     color_cells_by = "pseudotime",
+                     label_cell_groups=F,
+                     label_leaves=F,
+                     label_branch_points=FALSE,
+                     graph_label_size=1.5, cell_size = 1)
+
+# In order to find genes which expression is affected by pseudotime, 
+# we first have to isolate the B-cell cluster. Therefore, extract 
+# all cells in the B-cell cluster with the interactive choose_cells function:
+seuB <- choose_cells(seu_int_monocl)
+class(seuB)
+
+# Check whether you have selected the right cells:
+plot_cells(seuB, show_trajectory_graph = FALSE, cell_size = 1)
+# Now we can use the cells in this trajectory to test which genes are affected by the trajectory:
+?graph_test # Moran's I test
+pr_test <- graph_test(seuB, 
+                      cores=4, 
+                      neighbor_graph = "principal_graph")
+# order by test statistic
+pr_test <- pr_test[order(pr_test$morans_test_statistic, 
+                         decreasing = TRUE),]
+View(pr_test)
+# There are some interesting genes in there, for example related to cell cycling (MKI67, CKS2), 
+# related to B-cell development 
+# (CD34, MS4A1) and immunoglobulins (IGLL1 and IGLL5). We can plot those in the UMAP:
+goi <- c("CD34", "MS4A1", "IGLL1", "IGLL5", 
+         "MKI67", "CKS2")
+plot_cells(seuB, label_cell_groups=FALSE, genes = goi,
+           show_trajectory_graph=FALSE, cell_size = 1)
+
+# But also against pseudotime:
+?monocle3::clusters
+table(clusters(seuB))
+# 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 
+# 0 770   0   0   0   0 464   0   0   0   0   0   0   0   0   0 
+seuB@colData$monocle_cluster <- clusters(seuB)
+plot_genes_in_pseudotime(subset(seuB, 
+                                rowData(seuB)$gene_short_name %in% goi),
+                         min_expr=0.5, color_cells_by = "monocle_cluster")