diff --git a/NAMESPACE b/NAMESPACE index 8b8e980..88bd74b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(annotation_consensus) export(annotation_label_transfer) export(calculate_pseudobulk) export(cell_cycle_scoring) +export(clean_cellxgene_cell_types) export(convert_gene_names) export(create_pseudobulk) export(delete_lines_with_word) @@ -52,6 +53,7 @@ export(normalise_abundance_seurat_SCT) export(preprocessing_output) export(pseudobulk_merge) export(read_data_container) +export(reference_annotation_to_consensus) export(reference_label_coarse_id) export(reference_label_fine_id) export(remove_dead_scuttle) @@ -60,7 +62,6 @@ export(remove_empty_DropletUtils) export(run_targets_pipeline) export(score_cell_cycle_seurat) export(se_add_dispersion) -export(seurat_to_ligand_receptor_count) export(split_summarized_experiment) export(target_append) export(test_differential_abundance_hpc) @@ -73,7 +74,6 @@ import(broom) import(crew) import(crew.cluster) import(dplyr) -import(future.apply) import(ggplot2) import(ggupset) import(here) @@ -85,23 +85,6 @@ import(tidySingleCellExperiment) import(tidySummarizedExperiment) import(tidyseurat) importFrom(AnnotationDbi,mapIds) -importFrom(CellChat,aggregateNet) -importFrom(CellChat,computeExpr_LR) -importFrom(CellChat,computeExpr_agonist) -importFrom(CellChat,computeExpr_coreceptor) -importFrom(CellChat,computeRegionDistance) -importFrom(CellChat,createCellChat) -importFrom(CellChat,filterCommunication) -importFrom(CellChat,identifyOverExpressedGenes) -importFrom(CellChat,identifyOverExpressedInteractions) -importFrom(CellChat,projectData) -importFrom(CellChat,scPalette) -importFrom(CellChat,searchPair) -importFrom(CellChat,setIdent) -importFrom(CellChat,subsetCommunication) -importFrom(CellChat,subsetDB) -importFrom(CellChat,subsetData) -importFrom(CellChat,triMean) importFrom(DropletUtils,barcodeRanks) importFrom(DropletUtils,emptyDrops) importFrom(EnsDb.Hsapiens.v86,EnsDb.Hsapiens.v86) @@ -109,7 +92,6 @@ importFrom(HDF5Array,loadHDF5SummarizedExperiment) importFrom(HDF5Array,saveHDF5SummarizedExperiment) importFrom(Matrix,Matrix) importFrom(Matrix,colSums) -importFrom(RColorBrewer,brewer.pal) importFrom(S4Vectors,cbind) importFrom(S4Vectors,metadata) importFrom(S4Vectors,split) @@ -147,14 +129,11 @@ importFrom(SummarizedExperiment,rowData) importFrom(callr,r) importFrom(celldex,BlueprintEncodeData) importFrom(celldex,MonacoImmuneData) -importFrom(circlize,colorRamp2) -importFrom(cowplot,as_grob) importFrom(crew,crew_controller_local) importFrom(data.table,":=") importFrom(digest,digest) importFrom(dplyr,"%>%") importFrom(dplyr,across) -importFrom(dplyr,add_count) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) @@ -162,7 +141,6 @@ importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) -importFrom(dplyr,if_else) importFrom(dplyr,join_by) importFrom(dplyr,left_join) importFrom(dplyr,mutate) @@ -176,16 +154,10 @@ importFrom(dplyr,tibble) importFrom(dplyr,tribble) importFrom(dplyr,with_groups) importFrom(edgeR,estimateDisp) -importFrom(future,nbrOfWorkers) importFrom(future,tweak) importFrom(glue,glue) -importFrom(grid,grid.grab) -importFrom(gridGraphics,grid.echo) importFrom(here,here) importFrom(ids,random_id) -importFrom(igraph,graph_from_adjacency_matrix) -importFrom(igraph,in_circle) -importFrom(igraph,layout_) importFrom(lme4,findbars) importFrom(magrittr,"%$%") importFrom(magrittr,"%>%") @@ -194,31 +166,22 @@ importFrom(magrittr,extract2) importFrom(magrittr,not) importFrom(magrittr,set_names) importFrom(methods,show) -importFrom(methods,slot) -importFrom(patchwork,wrap_elements) -importFrom(pbapply,pbsapply) importFrom(purrr,compact) importFrom(purrr,imap) importFrom(purrr,map) importFrom(purrr,map2) -importFrom(purrr,map2_dbl) importFrom(purrr,map_chr) importFrom(purrr,map_int) importFrom(purrr,rep_along) importFrom(purrr,safely) importFrom(purrr,set_names) -importFrom(purrr,when) importFrom(readr,read_csv) importFrom(readr,write_lines) -importFrom(reshape2,melt) importFrom(rlang,enquo) importFrom(rlang,is_symbolic) importFrom(rlang,parse_expr) importFrom(rlang,quo_is_symbolic) -importFrom(rlang,quo_name) importFrom(rlang,sym) -importFrom(scales,rescale) -importFrom(scales,viridis_pal) importFrom(scater,isOutlier) importFrom(scuttle,logNormCounts) importFrom(scuttle,perCellQCMetrics) @@ -238,6 +201,7 @@ importFrom(targets,tar_config_get) importFrom(targets,tar_option_set) importFrom(targets,tar_script) importFrom(tibble,as_tibble) +importFrom(tibble,deframe) importFrom(tibble,enframe) importFrom(tibble,rowid_to_column) importFrom(tibble,tibble) diff --git a/R/CellChat.R b/R/CellChat.R deleted file mode 100644 index e230ba6..0000000 --- a/R/CellChat.R +++ /dev/null @@ -1,1115 +0,0 @@ -# More robust implementation that does not fail if no results - -#' @importFrom CellChat searchPair -cellchat_matrix_for_circle = function (object, signaling, signaling.name = NULL, color.use = NULL, - vertex.receiver = NULL, sources.use = NULL, targets.use = NULL, - top = 1, remove.isolate = FALSE, vertex.weight = NULL, vertex.weight.max = NULL, - vertex.size.max = 15, weight.scale = TRUE, edge.weight.max = NULL, - edge.width.max = 8, layout = c("hierarchy", "circle", "chord"), - thresh = 0.05, from = NULL, to = NULL, bidirection = NULL, - vertex.size = NULL, pt.title = 12, title.space = 6, vertex.label.cex = 0.8, - group = NULL, cell.order = NULL, small.gap = 1, big.gap = 10, - scale = FALSE, reduce = -1, show.legend = FALSE, legend.pos.x = 20, - legend.pos.y = 20, ...) { - - # Fix GCHECK - pathway_name = NULL - - if(object@LR$LRsig %>% filter(pathway_name == signaling) %>% nrow %>% magrittr::equals(0)) return(NULL) - - layout <- match.arg(layout) - if (!is.null(vertex.size)) { - warning("'vertex.size' is deprecated. Use `vertex.weight`") - } - if (is.null(vertex.weight)) { - vertex.weight <- as.numeric(table(object@idents)) - } - pairLR <- searchPair(signaling = signaling, pairLR.use = object@LR$LRsig, - key = "pathway_name", matching.exact = T, pair.only = T) - if (is.null(signaling.name)) { - signaling.name <- signaling - } - net <- object@net - pairLR.use.name <- dimnames(net$prob)[[3]] - pairLR.name <- intersect(rownames(pairLR), pairLR.use.name) - pairLR <- pairLR[pairLR.name, ] - prob <- net$prob - pval <- net$pval - prob[pval > thresh] <- 0 - if (length(pairLR.name) > 1) { - pairLR.name.use <- pairLR.name[apply(prob[, , pairLR.name], - 3, sum) != 0] - } - else { - pairLR.name.use <- pairLR.name[sum(prob[, , pairLR.name]) != - 0] - } - if (length(pairLR.name.use) == 0) { - return(NULL) - #stop(paste0("There is no significant communication of ", signaling.name)) - } - else { - pairLR <- pairLR[pairLR.name.use, ] - } - nRow <- length(pairLR.name.use) - prob <- prob[, , pairLR.name.use] - pval <- pval[, , pairLR.name.use] - if (length(dim(prob)) == 2) { - prob <- replicate(1, prob, simplify = "array") - pval <- replicate(1, pval, simplify = "array") - } - prob.sum <- apply(prob, c(1, 2), sum) - - prob.sum -} - -#' @importFrom tidyr gather -#' @importFrom dplyr if_else -cellchat_process_sample_signal = function (object, signaling = NULL, pattern = c("outgoing", - "incoming", "all"), slot.name = "netP", color.use = NULL, - color.heatmap = "BuGn", title = NULL, width = 10, height = 8, - font.size = 8, font.size.title = 10, cluster.rows = FALSE, - cluster.cols = FALSE) -{ - # Fix GCHECK - cell_type = NULL - value = NULL - gene = NULL - - pattern <- match.arg(pattern) - if (length(slot(object, slot.name)$centr) == 0) { - stop("Please run `netAnalysis_computeCentrality` to compute the network centrality scores! ") - } - centr <- slot(object, slot.name)$centr - outgoing <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr)) - incoming <- matrix(0, nrow = nlevels(object@idents), ncol = length(centr)) - dimnames(outgoing) <- list(levels(object@idents), names(centr)) - dimnames(incoming) <- dimnames(outgoing) - for (i in 1:length(centr)) { - outgoing[, i] <- centr[[i]]$outdeg - incoming[, i] <- centr[[i]]$indeg - } - if (pattern == "outgoing") { - mat <- t(outgoing) - legend.name <- "Outgoing" - } - else if (pattern == "incoming") { - mat <- t(incoming) - legend.name <- "Incoming" - } - else if (pattern == "all") { - mat <- t(outgoing + incoming) - legend.name <- "Overall" - } - if (is.null(title)) { - title <- paste0(legend.name, " signaling patterns") - } - else { - title <- paste0(paste0(legend.name, " signaling patterns"), - " - ", title) - } - if (!is.null(signaling)) { - mat1 <- mat[rownames(mat) %in% signaling, , drop = FALSE] - mat <- matrix(0, nrow = length(signaling), ncol = ncol(mat)) - idx <- match(rownames(mat1), signaling) - mat[idx[!is.na(idx)], ] <- mat1 - dimnames(mat) <- list(signaling, colnames(mat1)) - } - mat.ori <- mat - mat <- sweep(mat, 1L, apply(mat, 1, max), "/", check.margin = FALSE) - mat[mat == 0] <- NA - - mat %>% - as_tibble(rownames = "gene") %>% - gather(cell_type, value, -gene) %>% - mutate(value = if_else(value %in% c(NaN, NA), 0, value)) -} - -computeCommunProbPathway= function (object = NULL, net = NULL, pairLR.use = NULL, thresh = 0.05) -{ - if (is.null(net)) { - net <- object@net - } - if (is.null(pairLR.use)) { - pairLR.use <- object@LR$LRsig - } - prob <- net$prob - prob[net$pval > thresh] <- 0 - pathways <- unique(pairLR.use$pathway_name) - group <- factor(pairLR.use$pathway_name, levels = pathways) - - # STEFANO FIX - if(length(levels(group))==1){ - xx = apply(prob, c(1, 2), by, group, sum) - prob.pathways = xx |> array(dim = c(nrow(xx), ncol(xx), 1), dimnames = list(rownames(xx), colnames(xx), levels(group))) - } - else - prob.pathways <- aperm(apply(prob, c(1, 2), by, group, sum), - c(2, 3, 1)) - - - - pathways.sig <- pathways[apply(prob.pathways, 3, sum) != 0] - prob.pathways.sig <- prob.pathways[, , pathways.sig, drop=FALSE] - idx <- sort(apply(prob.pathways.sig, 3, sum), decreasing = TRUE, - index.return = TRUE)$ix - pathways.sig <- pathways.sig[idx] - prob.pathways.sig <- prob.pathways.sig[, , idx, drop=FALSE] - if (is.null(object)) { - netP = list(pathways = pathways.sig, prob = prob.pathways.sig) - return(netP) - } - else { - object@netP$pathways <- pathways.sig - object@netP$prob <- prob.pathways.sig - return(object) - } -} - -#' @importFrom CellChat triMean computeExpr_LR computeExpr_coreceptor computeRegionDistance computeExpr_agonist -computeCommunProb = function (object, type = c("triMean", "truncatedMean", "thresholdedMean", - "median"), trim = 0.1, LR.use = NULL, raw.use = TRUE, population.size = FALSE, - distance.use = TRUE, interaction.length = 200, scale.distance = 0.01, - k.min = 10, nboot = 100, seed.use = 1L, Kh = 0.5, n = 1) -{ - - - type <- match.arg(type) - cat(type, "is used for calculating the average gene expression per cell group.", - "\n") - FunMean <- switch(type, triMean = triMean, truncatedMean = function(x) mean(x, - trim = trim, na.rm = TRUE), median = function(x) median(x, - na.rm = TRUE)) - if (raw.use) { - data <- as.matrix(object@data.signaling) - } - else { - data <- object@data.project - } - if (is.null(LR.use)) { - pairLR.use <- object@LR$LRsig - } - else { - pairLR.use <- LR.use - } - complex_input <- object@DB$complex - cofactor_input <- object@DB$cofactor - my.sapply <- sapply - ptm = Sys.time() - pairLRsig <- pairLR.use - group <- object@idents - geneL <- as.character(pairLRsig$ligand) - geneR <- as.character(pairLRsig$receptor) - nLR <- nrow(pairLRsig) - numCluster <- nlevels(group) - if (numCluster != length(unique(group))) { - stop("Please check `unique(object@idents)` and ensure that the factor levels are correct!\n You may need to drop unused levels using 'droplevels' function. e.g.,\n `meta$labels = droplevels(meta$labels, exclude = setdiff(levels(meta$labels),unique(meta$labels)))`") - } - data.use <- data/max(data) - nC <- ncol(data.use) - data.use.avg <- aggregate(t(data.use), list(group), FUN = FunMean) - data.use.avg <- t(data.use.avg[, -1]) - colnames(data.use.avg) <- levels(group) - dataLavg <- computeExpr_LR(geneL, data.use.avg, complex_input) - dataRavg <- computeExpr_LR(geneR, data.use.avg, complex_input) - - # STEFANO ADDED THIS - dataRavg[dataRavg=="NaN"]=0 - dataLavg[dataLavg=="NaN"]=0 - - dataRavg.co.A.receptor <- computeExpr_coreceptor(cofactor_input, - data.use.avg, pairLRsig, type = "A") - dataRavg.co.I.receptor <- computeExpr_coreceptor(cofactor_input, - data.use.avg, pairLRsig, type = "I") - dataRavg <- dataRavg * dataRavg.co.A.receptor/dataRavg.co.I.receptor - dataLavg2 <- t(replicate(nrow(dataLavg), as.numeric(table(group))/nC)) - dataRavg2 <- dataLavg2 - index.agonist <- which(!is.na(pairLRsig$agonist) & pairLRsig$agonist != - "") - index.antagonist <- which(!is.na(pairLRsig$antagonist) & - pairLRsig$antagonist != "") - if (object@options$datatype != "RNA") { - data.spatial <- object@images$coordinates - spot.size.fullres <- object@images$scale.factors$spot - spot.size <- object@images$scale.factors$spot.diameter - d.spatial <- computeRegionDistance(coordinates = data.spatial, - group = group, trim = trim, interaction.length = interaction.length, - spot.size = spot.size, spot.size.fullres = spot.size.fullres, - k.min = k.min) - if (distance.use) { - print(paste0(">>> Run CellChat on spatial imaging data using distances as constraints <<< [", - Sys.time(), "]")) - d.spatial <- d.spatial * scale.distance - diag(d.spatial) <- NaN - cat("The suggested minimum value of scaled distances is in [1,2], and the calculated value here is ", - min(d.spatial, na.rm = TRUE), "\n") - if (min(d.spatial, na.rm = TRUE) < 1) { - stop("Please increase the value of `scale.distance` and check the suggested values in the parameter description (e.g., 1, 0.1, 0.01, 0.001, 0.11, 0.011)") - } - P.spatial <- 1/d.spatial - P.spatial[is.na(d.spatial)] <- 0 - diag(P.spatial) <- max(P.spatial) - d.spatial <- d.spatial/scale.distance - } - else { - print(paste0(">>> Run CellChat on spatial imaging data without distances as constraints <<< [", - Sys.time(), "]")) - P.spatial <- matrix(1, nrow = numCluster, ncol = numCluster) - P.spatial[is.na(d.spatial)] <- 0 - } - } - else { - print(paste0(">>> Run CellChat on sc/snRNA-seq data <<< [", - Sys.time(), "]")) - d.spatial <- matrix(NaN, nrow = numCluster, ncol = numCluster) - P.spatial <- matrix(1, nrow = numCluster, ncol = numCluster) - distance.use = NULL - interaction.length = NULL - spot.size = NULL - spot.size.fullres = NULL - k.min = NULL - } - Prob <- array(0, dim = c(numCluster, numCluster, nLR)) - Pval <- array(0, dim = c(numCluster, numCluster, nLR)) - set.seed(seed.use) - permutation <- replicate(nboot, sample.int(nC, size = nC)) - data.use.avg.boot <- my.sapply(X = 1:nboot, FUN = function(nE) { - groupboot <- group[permutation[, nE]] - data.use.avgB <- aggregate(t(data.use), list(groupboot), - FUN = FunMean) - data.use.avgB <- t(data.use.avgB[, -1]) - return(data.use.avgB) - }, simplify = FALSE) - pb <- txtProgressBar(min = 0, max = nLR, style = 3, file = stderr()) - for (i in 1:nLR) { - dataLR <- Matrix::crossprod(matrix(dataLavg[i, ], nrow = 1), - matrix(dataRavg[i, ], nrow = 1)) - P1 <- dataLR^n/(Kh^n + dataLR^n) - P1_Pspatial <- P1 * P.spatial - if (sum(P1_Pspatial) == 0) { - Pnull = P1_Pspatial - Prob[, , i] <- Pnull - p = 1 - Pval[, , i] <- matrix(p, nrow = numCluster, ncol = numCluster, - byrow = FALSE) - } - else { - if (is.element(i, index.agonist)) { - data.agonist <- computeExpr_agonist(data.use = data.use.avg, - pairLRsig, cofactor_input, index.agonist = i, - Kh = Kh, n = n) - P2 <- Matrix::crossprod(matrix(data.agonist, - nrow = 1)) - } - else { - P2 <- matrix(1, nrow = numCluster, ncol = numCluster) - } - if (is.element(i, index.antagonist)) { - data.antagonist <- CellChat::computeExpr_antagonist(data.use = data.use.avg, - pairLRsig, cofactor_input, index.antagonist = i, - Kh = Kh, n = n) - P3 <- Matrix::crossprod(matrix(data.antagonist, - nrow = 1)) - } - else { - P3 <- matrix(1, nrow = numCluster, ncol = numCluster) - } - if (population.size) { - P4 <- Matrix::crossprod(matrix(dataLavg2[i, - ], nrow = 1), matrix(dataRavg2[i, ], nrow = 1)) - } - else { - P4 <- matrix(1, nrow = numCluster, ncol = numCluster) - } - Pnull = P1 * P2 * P3 * P4 * P.spatial - Prob[, , i] <- Pnull - Pnull <- as.vector(Pnull) - Pboot <- sapply(X = 1:nboot, FUN = function(nE) { - data.use.avgB <- data.use.avg.boot[[nE]] - dataLavgB <- computeExpr_LR(geneL[i], data.use.avgB, - complex_input) - dataRavgB <- computeExpr_LR(geneR[i], data.use.avgB, - complex_input) - dataRavgB.co.A.receptor <- computeExpr_coreceptor(cofactor_input, - data.use.avgB, pairLRsig[i, , drop = FALSE], - type = "A") - dataRavgB.co.I.receptor <- computeExpr_coreceptor(cofactor_input, - data.use.avgB, pairLRsig[i, , drop = FALSE], - type = "I") - dataRavgB <- dataRavgB * dataRavgB.co.A.receptor/dataRavgB.co.I.receptor - dataLRB = Matrix::crossprod(dataLavgB, dataRavgB) - P1.boot <- dataLRB^n/(Kh^n + dataLRB^n) - if (is.element(i, index.agonist)) { - data.agonist <- computeExpr_agonist(data.use = data.use.avgB, - pairLRsig, cofactor_input, index.agonist = i, - Kh = Kh, n = n) - P2.boot <- Matrix::crossprod(matrix(data.agonist, - nrow = 1)) - } - else { - P2.boot <- matrix(1, nrow = numCluster, ncol = numCluster) - } - if (is.element(i, index.antagonist)) { - data.antagonist <- CellChat::computeExpr_antagonist(data.use = data.use.avgB, - pairLRsig, cofactor_input, index.antagonist = i, - Kh = Kh, n = n) - P3.boot <- Matrix::crossprod(matrix(data.antagonist, - nrow = 1)) - } - else { - P3.boot <- matrix(1, nrow = numCluster, ncol = numCluster) - } - if (population.size) { - groupboot <- group[permutation[, nE]] - dataLavg2B <- as.numeric(table(groupboot))/nC - dataLavg2B <- matrix(dataLavg2B, nrow = 1) - dataRavg2B <- dataLavg2B - P4.boot = Matrix::crossprod(dataLavg2B, dataRavg2B) - } - else { - P4.boot = matrix(1, nrow = numCluster, ncol = numCluster) - } - Pboot = P1.boot * P2.boot * P3.boot * P4.boot * - P.spatial - return(as.vector(Pboot)) - }) - Pboot <- matrix(unlist(Pboot), nrow = length(Pnull), - ncol = nboot, byrow = FALSE) - nReject <- rowSums(Pboot - Pnull > 0) - p = nReject/nboot - Pval[, , i] <- matrix(p, nrow = numCluster, ncol = numCluster, - byrow = FALSE) - } - setTxtProgressBar(pb = pb, value = i) - } - close(con = pb) - Pval[Prob == 0] <- 1 - dimnames(Prob) <- list(levels(group), levels(group), rownames(pairLRsig)) - dimnames(Pval) <- dimnames(Prob) - net <- list(prob = Prob, pval = Pval) - execution.time = Sys.time() - ptm - object@options$run.time <- as.numeric(execution.time, units = "secs") - object@options$parameter <- list(type.mean = type, trim = trim, - raw.use = raw.use, population.size = population.size, - nboot = nboot, seed.use = seed.use, Kh = Kh, n = n, - distance.use = distance.use, interaction.length = interaction.length, - spot.size = spot.size, spot.size.fullres = spot.size.fullres, - k.min = k.min) - if (object@options$datatype != "RNA") { - object@images$distance <- d.spatial - } - object@net <- net - print(paste0(">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [", - Sys.time(), "]")) - return(object) -} - -#' @importFrom future nbrOfWorkers -#' @importFrom methods slot -#' @importFrom pbapply pbsapply -#' @import future.apply -netAnalysis_computeCentrality = function (object = NULL, slot.name = "netP", net = NULL, net.name = NULL, - thresh = 0.05) -{ - if (is.null(net)) { - prob <- methods::slot(object, slot.name)$prob - pval <- methods::slot(object, slot.name)$pval - pval[prob == 0] <- 1 - prob[pval >= thresh] <- 0 - net = prob - } - if (is.null(net.name)) { - net.name <- dimnames(net)[[3]] - } - if (length(dim(net)) == 3) { - nrun <- dim(net)[3] - my.sapply <- ifelse(test = future::nbrOfWorkers() == - 1, yes = pbapply::pbsapply, no = future.apply::future_sapply) - centr.all = my.sapply(X = 1:nrun, FUN = function(x) { - net0 <- net[, , x] - - # ADDED BY STEFANO - net0[net0<0] = 0 - - return(computeCentralityLocal(net0)) - }, simplify = FALSE) - } - else { - centr.all <- as.list(computeCentralityLocal(net)) - } - names(centr.all) <- net.name - if (is.null(object)) { - return(centr.all) - } - else { - slot(object, slot.name)$centr <- centr.all - return(object) - } -} - -cellchat_circle_plot = function(pathway, x, y, DB, joint){ - - cellchat_diff_for_circle(pathway, x, y) %>% - draw_cellchat_circle_plot( - vertex.weight = as.numeric((table(x@idents) + table(y@idents))/2), - title.name = paste(pathway, DB, "\n", select_genes_for_circle_plot(joint, pathway)), - edge.width.max = 4 - ) -} - -#' @importFrom purrr when -cellchat_diff_for_circle = function(pathway, x, y){ - - # Fix GCHECK - . = NULL - - zero_matrix = - pathway %>% - when( - (.) %in% x@netP$pathways ~ cellchat_matrix_for_circle(x, layout = "circle", signaling = .), - (.) %in% y@netP$pathways ~ cellchat_matrix_for_circle(y, layout = "circle", signaling = .) - ) %>% - `-` (.,.) - - m1 = pathway %>% - when( - (.) %in% x@netP$pathways ~ cellchat_matrix_for_circle(x, layout = "circle", signaling = .), - ~ zero_matrix - ) - - m2 = pathway %>% - when( - (.) %in% y@netP$pathways ~ cellchat_matrix_for_circle(y, layout = "circle", signaling = .), - ~ zero_matrix - ) - - m2 - m1 -} - -#' @importFrom igraph graph_from_adjacency_matrix layout_ -#' @importFrom CellChat scPalette -#' @importFrom reshape2 melt -#' @importFrom patchwork wrap_elements -#' @importFrom cowplot as_grob -#' @importFrom circlize colorRamp2 -#' @importFrom RColorBrewer brewer.pal -#' @importFrom scales rescale -#' @importFrom igraph in_circle -draw_cellchat_circle_plot = function (net, color.use = NULL, title.name = NULL, sources.use = NULL, - targets.use = NULL, remove.isolate = FALSE, top = 1, top_absolute = NULL, weight.scale = T, - vertex.weight = 20, vertex.weight.max = NULL, vertex.size.max = 15, - vertex.label.cex = 0.8, vertex.label.color = "black", edge.weight.max = NULL, - edge.width.max = 8, alpha.edge = 0.6, label.edge = FALSE, - edge.label.color = "black", edge.label.cex = 0.8, edge.curved = 0.2, - shape = "circle", layout = in_circle(), margin = 0.2, vertex.size = NULL, - arrow.width = 1, arrow.size = 0.2) -{ - # Pass GCHECKS - target = NULL - - if (!is.null(vertex.size)) { - warning("'vertex.size' is deprecated. Use `vertex.weight`") - } - options(warn = -1) - - if(!is.null(top_absolute)) { - thresh = top_absolute - net[abs(net) < thresh] <- 0 - } - - thresh <- stats::quantile(as.numeric(net) %>% abs %>% .[.>0], probs = 1 - top) - - net[abs(net) < thresh] <- 0 - - if(sum(net)==0) return(NULL) - - if ((!is.null(sources.use)) | (!is.null(targets.use))) { - if (is.null(rownames(net))) { - stop("The input weighted matrix should have rownames!") - } - cells.level <- rownames(net) - df.net <- reshape2::melt(net, value.name = "value") - colnames(df.net)[1:2] <- c("source", "target") - if (!is.null(sources.use)) { - if (is.numeric(sources.use)) { - sources.use <- cells.level[sources.use] - } - df.net <- subset(df.net, source %in% sources.use) - } - if (!is.null(targets.use)) { - if (is.numeric(targets.use)) { - targets.use <- cells.level[targets.use] - } - df.net <- subset(df.net, target %in% targets.use) - } - df.net$source <- factor(df.net$source, levels = cells.level) - df.net$target <- factor(df.net$target, levels = cells.level) - df.net$value[is.na(df.net$value)] <- 0 - net <- tapply(df.net[["value"]], list(df.net[["source"]], - df.net[["target"]]), sum) - } - net[is.na(net)] <- 0 - if (remove.isolate) { - idx1 <- which(Matrix::rowSums(net) == 0) - idx2 <- which(Matrix::colSums(net) == 0) - idx <- intersect(idx1, idx2) - if(length(idx)>0){ - net <- net[-idx, ,drop=FALSE] - net <- net[, -idx, drop=FALSE] - } - } - g <- igraph::graph_from_adjacency_matrix(net, mode = "directed", - weighted = T) - edge.start <- igraph::ends(g, es = igraph::E(g), names = FALSE) - coords <- igraph::layout_(g, layout) - if (nrow(coords) != 1) { - coords_scale = scale(coords) - } - else { - coords_scale <- coords - } - if (is.null(color.use)) { - color.use = CellChat::scPalette(length(igraph::V(g))) - } - if (is.null(vertex.weight.max)) { - vertex.weight.max <- max(vertex.weight) - } - vertex.weight <- vertex.weight/vertex.weight.max * vertex.size.max + - 5 - loop.angle <- ifelse(coords_scale[igraph::V(g), 1] > 0, -atan(coords_scale[igraph::V(g), - 2]/coords_scale[igraph::V(g), 1]), pi - atan(coords_scale[igraph::V(g), - 2]/coords_scale[igraph::V(g), 1])) - igraph::V(g)$size <- vertex.weight - igraph::V(g)$color <- color.use[igraph::V(g)] - igraph::V(g)$frame.color <- color.use[igraph::V(g)] - igraph::V(g)$label.color <- vertex.label.color - igraph::V(g)$label.cex <- vertex.label.cex - if (label.edge) { - igraph::E(g)$label <- igraph::E(g)$weight - igraph::E(g)$label <- round(igraph::E(g)$label, digits = 1) - } - if (is.null(edge.weight.max)) { - edge.weight.max <- max(abs(igraph::E(g)$weight)) - } - if (weight.scale == TRUE) { - igraph::E(g)$width <- 0.3 + abs(igraph::E(g)$weight)/edge.weight.max * - edge.width.max - } - else { - igraph::E(g)$width <- 0.3 + edge.width.max * abs(igraph::E(g)$weight) - } - igraph::E(g)$arrow.width <- arrow.width - igraph::E(g)$arrow.size <- arrow.size - igraph::E(g)$label.color <- edge.label.color - igraph::E(g)$label.cex <- edge.label.cex - - igraph::E(g)$color = - circlize::colorRamp2(seq(max(abs(igraph::E(g)$weight)), -max(abs(igraph::E(g)$weight)), length.out =11), RColorBrewer::brewer.pal(11, "RdBu"))(igraph::E(g)$weight) %>% - grDevices::adjustcolor(alpha.edge) - - - if (sum(edge.start[, 2] == edge.start[, 1]) != 0) { - igraph::E(g)$loop.angle[which(edge.start[, 2] == edge.start[, - 1])] <- loop.angle[edge.start[which(edge.start[, - 2] == edge.start[, 1]), 1]] - } - radian.rescale <- function(x, start = 0, direction = 1) { - c.rotate <- function(x) (x + start)%%(2 * pi) * direction - c.rotate(scales::rescale(x, c(0, 2 * pi), range(x))) - } - label.locs <- radian.rescale(x = 1:length(igraph::V(g)), - direction = -1, start = 0) - label.dist <- vertex.weight/max(vertex.weight) + 2 - plot(g, edge.curved = edge.curved, vertex.shape = shape, - layout = coords_scale, margin = margin, vertex.label.dist = label.dist, - vertex.label.degree = label.locs, vertex.label.family = "Helvetica", - edge.label.family = "Helvetica") - if (!is.null(title.name)) { - text(0, 1.5, title.name, cex = 0.8) - } - - grab_grob() |> cowplot::as_grob() |> patchwork::wrap_elements() - -} - -#' @importFrom dplyr distinct -#' -select_genes_for_circle_plot = function(x, pathway){ - # Fix GChecks - CellChatDB.human <- NULL - pathway_name <- NULL - ligand <- NULL - . <- NULL - receptor <- NULL - - - paste( - c( - x@data.signaling[rownames(x@data.signaling) %in% (CellChatDB.human$interaction %>% filter(pathway_name == pathway) %>% distinct(ligand) %>% pull(1)),, drop=F] %>% rowSums() %>% .[(.)>100] %>% names(), - x@data.signaling[rownames(x@data.signaling) %in% (CellChatDB.human$interaction %>% filter(pathway_name == pathway) %>% distinct(receptor) %>% pull(1)),, drop=F] %>% rowSums() %>% .[(.)>100] %>% names() - ) %>% unique(), - collapse = "," - ) - -} - -#' @importFrom CellChat subsetCommunication -#' @importFrom RColorBrewer brewer.pal -#' @importFrom scales viridis_pal -#' -get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, targets.use = NULL, signaling = NULL, - pairLR.use = NULL, color.heatmap = c("Spectral", "viridis"), - n.colors = 10, direction = -1, thresh = 0.05, comparison = NULL, - group = NULL, remove.isolate = FALSE, max.dataset = NULL, - min.dataset = NULL, min.quantile = 0, max.quantile = 1, - line.on = TRUE, line.size = 0.2, color.text.use = TRUE, - color.text = NULL, title.name = NULL, font.size = 10, font.size.title = 10, - show.legend = TRUE, grid.on = TRUE, color.grid = "grey90", - angle.x = 90, vjust.x = NULL, hjust.x = NULL, return.data = FALSE) -{ - - # Fix GChecks - prob.original = NULL - - # cells.level <- levels(object@idents) - # source.use.numerical = which(cells.level == source.use) - # - color.heatmap <- match.arg(color.heatmap) - if (is.list(object@net[[1]])) { - message("Comparing communications on a merged object \n") - } - else { - message("Comparing communications on a single object \n") - } - if (is.null(vjust.x) | is.null(hjust.x)) { - angle = c(0, 45, 90) - hjust = c(0, 1, 1) - vjust = c(0, 1, 0.5) - vjust.x = vjust[angle == angle.x] - hjust.x = hjust[angle == angle.x] - } - if (length(color.heatmap) == 1) { - color.use <- tryCatch({ - RColorBrewer::brewer.pal(n = n.colors, name = color.heatmap) - }, error = function(e) { - (scales::viridis_pal(option = color.heatmap, direction = -1))(n.colors) - }) - } - else { - color.use <- color.heatmap - } - if (direction == -1) { - color.use <- rev(color.use) - } - if (is.null(comparison)) { - cells.level <- levels(object@idents) - if (is.numeric(sources.use)) { - sources.use <- cells.level[sources.use] - } - if (is.numeric(targets.use)) { - targets.use <- cells.level[targets.use] - } - - # TRY CATCH - df.net <- tryCatch( - expr = { - CellChat::subsetCommunication(object, slot.name = "net", - sources.use = sources.use, targets.use = targets.use, - signaling = signaling, pairLR.use = pairLR.use, - thresh = thresh) - }, - error = function(e){ - return(NULL) - } - ) - - if(is.null(df.net)) return(NULL) - - df.net$source.target <- paste(df.net$source, df.net$target, - sep = " -> ") - source.target <- paste(rep(sources.use, each = length(targets.use)), - targets.use, sep = " -> ") - source.target.isolate <- setdiff(source.target, unique(df.net$source.target)) - if (length(source.target.isolate) > 0) { - df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate), - ncol = ncol(df.net))) - colnames(df.net.isolate) <- colnames(df.net) - df.net.isolate$source.target <- source.target.isolate - df.net.isolate$interaction_name_2 <- df.net$interaction_name_2[1] - df.net.isolate$pval <- 1 - a <- stringr::str_split(df.net.isolate$source.target, - " -> ", simplify = T) - df.net.isolate$source <- as.character(a[, 1]) - df.net.isolate$target <- as.character(a[, 2]) - df.net <- rbind(df.net, df.net.isolate) - } - df.net$pval[df.net$pval > 0.05] = 1 - df.net$pval[df.net$pval > 0.01 & df.net$pval <= 0.05] = 2 - df.net$pval[df.net$pval <= 0.01] = 3 - df.net$prob[df.net$prob == 0] <- NA - df.net$prob.original <- df.net$prob - df.net$prob <- -1/log(df.net$prob) - idx1 <- which(is.infinite(df.net$prob) | df.net$prob < - 0) - if (sum(idx1) > 0) { - values.assign <- seq(max(df.net$prob, na.rm = T) * - 1.1, max(df.net$prob, na.rm = T) * 1.5, length.out = length(idx1)) - position <- sort(prob.original[idx1], index.return = TRUE)$ix - df.net$prob[idx1] <- values.assign[match(1:length(idx1), - position)] - } - df.net$source <- factor(df.net$source, levels = cells.level[cells.level %in% - unique(df.net$source)]) - df.net$target <- factor(df.net$target, levels = cells.level[cells.level %in% - unique(df.net$target)]) - group.names <- paste(rep(levels(df.net$source), each = length(levels(df.net$target))), - levels(df.net$target), sep = " -> ") - df.net$interaction_name_2 <- as.character(df.net$interaction_name_2) - df.net <- with(df.net, df.net[order(interaction_name_2), - ]) - df.net$interaction_name_2 <- factor(df.net$interaction_name_2, - levels = unique(df.net$interaction_name_2)) - cells.order <- group.names - df.net$source.target <- factor(df.net$source.target, - levels = cells.order) - df <- df.net - } - else { - dataset.name <- names(object@net) - df.net.all <- CellChat::subsetCommunication(object, slot.name = "net", - sources.use = sources.use, targets.use = targets.use, - signaling = signaling, pairLR.use = pairLR.use, - thresh = thresh) - df.all <- data.frame() - for (ii in 1:length(comparison)) { - cells.level <- levels(object@idents[[comparison[ii]]]) - if (is.numeric(sources.use)) { - sources.use <- cells.level[sources.use] - } - if (is.numeric(targets.use)) { - targets.use <- cells.level[targets.use] - } - df.net <- df.net.all[[comparison[ii]]] - df.net$interaction_name_2 <- as.character(df.net$interaction_name_2) - df.net$source.target <- paste(df.net$source, df.net$target, - sep = " -> ") - source.target <- paste(rep(sources.use, each = length(targets.use)), - targets.use, sep = " -> ") - source.target.isolate <- setdiff(source.target, - unique(df.net$source.target)) - if (length(source.target.isolate) > 0) { - df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate), - ncol = ncol(df.net))) - colnames(df.net.isolate) <- colnames(df.net) - df.net.isolate$source.target <- source.target.isolate - df.net.isolate$interaction_name_2 <- df.net$interaction_name_2[1] - df.net.isolate$pval <- 1 - a <- stringr::str_split(df.net.isolate$source.target, - " -> ", simplify = T) - df.net.isolate$source <- as.character(a[, 1]) - df.net.isolate$target <- as.character(a[, 2]) - df.net <- rbind(df.net, df.net.isolate) - } - df.net$source <- factor(df.net$source, levels = cells.level[cells.level %in% - unique(df.net$source)]) - df.net$target <- factor(df.net$target, levels = cells.level[cells.level %in% - unique(df.net$target)]) - group.names <- paste(rep(levels(df.net$source), - each = length(levels(df.net$target))), levels(df.net$target), - sep = " -> ") - group.names0 <- group.names - group.names <- paste0(group.names0, " (", dataset.name[comparison[ii]], - ")") - if (nrow(df.net) > 0) { - df.net$pval[df.net$pval > 0.05] = 1 - df.net$pval[df.net$pval > 0.01 & df.net$pval <= - 0.05] = 2 - df.net$pval[df.net$pval <= 0.01] = 3 - df.net$prob[df.net$prob == 0] <- NA - df.net$prob.original <- df.net$prob - df.net$prob <- -1/log(df.net$prob) - } - else { - df.net <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(group.names), - ncol = 5)) - colnames(df.net) <- c("interaction_name_2", - "source.target", "prob", "pval", "prob.original") - df.net$source.target <- group.names0 - } - df.net$group.names <- as.character(df.net$source.target) - df.net$source.target <- paste0(df.net$source.target, - " (", dataset.name[comparison[ii]], ")") - df.net$dataset <- dataset.name[comparison[ii]] - df.all <- rbind(df.all, df.net) - } - if (nrow(df.all) == 0) { - return(NULL) - #stop("No interactions are detected. Please consider changing the cell groups for analysis. ") - } - idx1 <- which(is.infinite(df.all$prob) | df.all$prob < - 0) - if (sum(idx1) > 0) { - values.assign <- seq(max(df.all$prob, na.rm = T) * - 1.1, max(df.all$prob, na.rm = T) * 1.5, length.out = length(idx1)) - position <- sort(df.all$prob.original[idx1], index.return = TRUE)$ix - df.all$prob[idx1] <- values.assign[match(1:length(idx1), - position)] - } - df.all$interaction_name_2[is.na(df.all$interaction_name_2)] <- df.all$interaction_name_2[!is.na(df.all$interaction_name_2)][1] - df <- df.all - df <- with(df, df[order(interaction_name_2), ]) - df$interaction_name_2 <- factor(df$interaction_name_2, - levels = unique(df$interaction_name_2)) - cells.order <- c() - dataset.name.order <- c() - for (i in 1:length(group.names0)) { - for (j in 1:length(comparison)) { - cells.order <- c(cells.order, paste0(group.names0[i], - " (", dataset.name[comparison[j]], ")")) - dataset.name.order <- c(dataset.name.order, - dataset.name[comparison[j]]) - } - } - df$source.target <- factor(df$source.target, levels = cells.order) - } - min.cutoff <- quantile(df$prob, min.quantile, na.rm = T) - max.cutoff <- quantile(df$prob, max.quantile, na.rm = T) - df$prob[df$prob < min.cutoff] <- min.cutoff - df$prob[df$prob > max.cutoff] <- max.cutoff - if (remove.isolate) { - df <- df[!is.na(df$prob), ] - line.on <- FALSE - } - if (!is.null(max.dataset)) { - signaling <- as.character(unique(df$interaction_name_2)) - for (i in signaling) { - df.i <- df[df$interaction_name_2 == i, , drop = FALSE] - cell <- as.character(unique(df.i$group.names)) - for (j in cell) { - df.i.j <- df.i[df.i$group.names == j, , drop = FALSE] - values <- df.i.j$prob - idx.max <- which(values == max(values, na.rm = T)) - idx.min <- which(values == min(values, na.rm = T)) - dataset.na <- c(df.i.j$dataset[is.na(values)], - setdiff(dataset.name[comparison], df.i.j$dataset)) - if (length(idx.max) > 0) { - if (!(df.i.j$dataset[idx.max] %in% dataset.name[max.dataset])) { - df.i.j$prob <- NA - } - else if ((idx.max != idx.min) & !is.null(min.dataset)) { - if (!(df.i.j$dataset[idx.min] %in% dataset.name[min.dataset])) { - df.i.j$prob <- NA - } - else if (length(dataset.na) > 0 & sum(!(dataset.name[min.dataset] %in% - dataset.na)) > 0) { - df.i.j$prob <- NA - } - } - } - df.i[df.i$group.names == j, "prob"] <- df.i.j$prob - } - df[df$interaction_name_2 == i, "prob"] <- df.i$prob - } - } - if (remove.isolate) { - df <- df[!is.na(df$prob), ] - line.on <- FALSE - } - if (nrow(df) == 0) { - return(NULL) - #stop("No interactions are detected. Please consider changing the cell groups for analysis. ") - } - df$interaction_name_2 <- factor(df$interaction_name_2, levels = unique(df$interaction_name_2)) - df$source.target = droplevels(df$source.target, exclude = setdiff(levels(df$source.target), - unique(df$source.target))) - df -} - -#' @importFrom gridGraphics grid.echo -#' @importFrom grid grid.grab -#' -grab_grob <- function(){ - grid.echo() - grid.grab() -} - - -#' Ligand-Receptor Count from Seurat Data -#' -#' @description -#' Calculates ligand-receptor interactions for each cell type in a Seurat object using CellChat. -#' -#' @param counts Seurat object. -#' @param .cell_group Cell group variable. -#' @param assay Name of the assay to use. -#' @param sample_for_plotting Sample name for plotting. -#' -#' @return A list of communication results including interactions and signaling pathways. -#' -#' @importFrom CellChat createCellChat -#' @importFrom CellChat setIdent -#' @importFrom CellChat subsetDB -#' @importFrom CellChat subsetData -#' @importFrom CellChat identifyOverExpressedGenes -#' @importFrom CellChat identifyOverExpressedInteractions -#' @importFrom CellChat projectData -#' @importFrom CellChat filterCommunication -#' @importFrom CellChat aggregateNet -#' @importFrom rlang quo_name -#' @importFrom rlang enquo -#' @importFrom tibble tibble -#' @importFrom purrr map2 -#' @importFrom purrr map -#' @importFrom purrr map2_dbl -#' @importFrom dplyr distinct add_count -#' @export -seurat_to_ligand_receptor_count = function(counts, .cell_group, assay, sample_for_plotting = ""){ - - #Fix GChecks - cell_type_harmonised <- NULL - n_cells <- NULL - DB <- NULL - cell_vs_all_cells_per_pathway <- NULL - gene <- NULL - - # Your code for seurat_to_ligand_receptor_count function here - - - .cell_group = enquo(.cell_group) - - # If only one cell, return empty - if((counts |> distinct(!!.cell_group) |> nrow()) < 2) return(tibble) - - counts_cellchat = - counts |> - - # Filter - filter(!is.na(!!.cell_group)) |> - - # Filter cell types with > 10 cells - add_count(cell_type_harmonised, name = "n_cells") |> - filter(n_cells>=10) |> - - - # Convert from seurat MUST BE LOG-NORMALISED - createCellChat(group.by = quo_name(.cell_group), assay = assay) |> - setIdent( ident.use = quo_name(.cell_group)) - - - communication_results = - tibble(DB = c("Secreted Signaling", "ECM-Receptor" , "Cell-Cell Contact" )) |> - mutate(data = list(counts_cellchat)) |> - mutate(data = map2( - data, DB, - ~ { - print(.y) - .x@DB <- subsetDB(CellChat::CellChatDB.human, search = .y) - - x = .x |> - subsetData() |> - identifyOverExpressedGenes() |> - identifyOverExpressedInteractions() |> - projectData(CellChat::PPI.human) - - if(nrow(x@LR$LRsig)==0) return(NA) - - x |> - computeCommunProb() |> - filterCommunication() |> - computeCommunProbPathway() |> - aggregateNet() - - } - )) |> - - # Record sample - mutate(sample = sample_for_plotting) |> - - # Add histogram - mutate(tot_interactions = map2_dbl( - data, DB, - ~ .x |> when( - !is.na(.) ~ sum(.x@net$count), - ~ 0 - ) - )) |> - - # Add histogram - mutate(cell_cell_count = map2_dbl( - data, DB, - ~ { - my_data = .x - - # Return empty if no results - if(is.na(my_data)) return(tibble(cell_from = character(), cell_to = character(), weight = numeric())) - - my_data@net$count |> - as_tibble(rownames = "cell_from") |> - pivot_longer(-cell_from, names_to = "cell_to", values_to = "count") - - - } - )) |> - - # values_df_for_heatmap - # Scores for each cell types across all others. How communicative is each cell type - mutate(cell_vs_all_cells_per_pathway = map2( - data , sample, - ~ when( - .x, - !is.na(.x) && length(.x@netP$pathways) > 0 ~ - netAnalysis_computeCentrality(., slot.name = "netP") |> - cellchat_process_sample_signal( - pattern = "all", signaling = .x@netP$pathways, - title = .y, width = 5, height = 6, color.heatmap = "OrRd" - ), - ~ tibble(gene = character(), cell_type = character(), value = double()) - ) - )) - - genes = communication_results |> select(cell_vs_all_cells_per_pathway) |> unnest(cell_vs_all_cells_per_pathway) |> distinct(gene) |> pull(gene) - - # Hugh resolution - communication_results |> - mutate(cell_vs_cell_per_pathway = map( - data, - ~ { - my_data = .x - - # Return empty if no results - if(is.na(my_data)) return(tibble(gene = character(), result = list())) - - tibble(gene = genes) |> - mutate(result = map(gene, ~ { - - unparsed_result = cellchat_matrix_for_circle(my_data, layout = "circle", signaling = .x) - - if(!is.null(unparsed_result)) - unparsed_result |> - as_tibble(rownames = "cell_type_from") |> - pivot_longer(-cell_type_from, names_to = "cell_type_to", values_to = "score") - - unparsed_result - - })) - } - )) |> - - mutate(cell_cell_weight = map( - data, - ~ { - my_data = .x - - # Return empty if no results - if(is.na(my_data)) return(tibble(cell_from = character(), cell_to = character(), weight = numeric())) - - my_data@net$weight |> - as_tibble(rownames = "cell_from") |> - pivot_longer(-cell_from, names_to = "cell_to", values_to = "weight") - - - } - )) - - -} diff --git a/R/differential_expression.R b/R/differential_expression.R index faa678c..a6ec451 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -282,10 +282,10 @@ factory_de_fix_effect = function(se_list_input, output_se, formula, method, tier #' @importFrom rlang sym #' @importFrom dplyr left_join -#' @importFrom dplyr nest +#' @importFrom tidyr nest #' @importFrom dplyr group_by #' @importFrom dplyr mutate -#' @importFrom dplyr unnest +#' @importFrom tidyr unnest #' @importFrom purrr map #' @importFrom S4Vectors split #' @importFrom purrr compact diff --git a/R/utilities.R b/R/utilities.R index eac81fa..469ed47 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -895,6 +895,7 @@ is_strong_evidence = function(single_cell_data, cell_annotation_azimuth_l2, cell #' @importFrom dplyr tribble #' @importFrom tidyr expand_grid #' @importFrom stringr str_detect +#' @importFrom tibble deframe #' #' @param azimuth_input A vector of cell type annotations from the Azimuth dataset. #' @param monaco_input A vector of cell type annotations from the Monaco dataset. @@ -929,7 +930,7 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "Naive CD8 T cells", "cd8 naive", "monaco_fine", "Central memory CD8 T cells", "cd8 tcm", "monaco_fine", "Effector memory CD8 T cells", "cd8 tem", "monaco_fine", - "Terminal effector CD8 T cells", "terminal effector cd4 t", "monaco_fine", # Adjusting for the closest match + "Terminal effector CD8 T cells", "cd8 tem", "monaco_fine", # Adjusting for the closest match "MAIT cells", "mait", "monaco_fine", "Vd2 gd T cells", "tgd", "monaco_fine", "Non-Vd2 gd T cells", "tgd", "monaco_fine", # No direct match, leaving as NA @@ -963,7 +964,7 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "Lowdensity basophils", "granulocyte", "monaco_fine", # No direct match, leaving as NA "Terminal effector CD4 T cells", "terminal effector cd4 t", "monaco_fine", "progenitor", "progenitor_cell", "monaco_fine" - ) + ) azimuth = tribble( @@ -974,18 +975,18 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "dnT", "dnt", "azimuth_pbmc", "CD8 Naive", "cd8 naive", "azimuth_pbmc", "CD4 Naive", "cd4 naive", "azimuth_pbmc", - "CD4 TCM", "cd4 helper", "azimuth_pbmc", # Central memory cells often relate to Th1 or Th17 + "CD4 TCM", "cd4 tcm", "azimuth_pbmc", # Central memory cells often relate to Th1 or Th17 "gdT", "tgd", "azimuth_pbmc", "CD8 TCM", "cd8 tcm", "azimuth_pbmc", "MAIT", "mait", "azimuth_pbmc", - "CD4 TEM", "terminal effector cd4 t", "azimuth_pbmc", # Effector memory cells can relate to terminal effector cells + "CD4 TEM", "cd4 tem", "azimuth_pbmc", # Effector memory cells can relate to terminal effector cells "ILC", "ilc", "azimuth_pbmc", "CD14 Mono", "cd14 mono", "azimuth_pbmc", "cDC1", "cdc", "azimuth_pbmc", # Conventional dendritic cell 1 is commonly referred to as CDC "pDC", "pdc", "azimuth_pbmc", "cDC2", "cdc", "azimuth_pbmc", # No specific reference for cDC2, but using CDC as a general category "B naive", "b naive", "azimuth_pbmc", - "B intermediate", "b naive", "azimuth_pbmc", # No direct match, leaving as NA + "B intermediate", "b memory", "azimuth_pbmc", # No direct match, leaving as NA "B memory", "b memory", "azimuth_pbmc", "Platelet", "platelet", "azimuth_pbmc", "Eryth", "erythrocyte", "azimuth_pbmc", @@ -996,9 +997,11 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "Plasmablast", "plasma_cell", "azimuth_pbmc", "NK Proliferating", "NK", "azimuth_pbmc", # NK cells can be proliferative, linked to general proliferation "ASDC", "cdc", "azimuth_pbmc", # No direct match, leaving as NA - "CD8 Proliferating", "proliferating_t_cell", "azimuth_pbmc", - "CD4 Proliferating", "proliferating_t_cell", "azimuth_pbmc", - "doublet", "non_immune", "azimuth_pbmc" + "CD8 Proliferating", "cd8_proliferating_t_cell", "azimuth_pbmc", + "CD4 Proliferating", "cd4_proliferating_t_cell", "azimuth_pbmc", + "doublet", + "non_immune", + "azimuth_pbmc" ) blueprint = tribble( @@ -1006,10 +1009,10 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "Neutrophils", "granulocyte", "blueprint_fine", "Monocytes", "monocyte", "blueprint_fine", "MEP", "hematopoietic_cell", "blueprint_fine", # MEP typically refers to megakaryocyte-erythroid progenitor - "CD4+ T-cells", "cd4 th1", "blueprint_fine", + "CD4+ T-cells", "cd4 t", "blueprint_fine", "Tregs", "treg", "blueprint_fine", - "CD4+ Tcm", "cd4 th1/th17", "blueprint_fine", - "CD4+ Tem", "terminal effector cd4 t", "blueprint_fine", + "CD4+ Tcm", "cd4 tcm", "blueprint_fine", + "CD4+ Tem", "cd4 tem", "blueprint_fine", "CD8+ Tcm", "cd8 tcm", "blueprint_fine", "CD8+ Tem", "cd8 tem", "blueprint_fine", "NK cells", "nk", "blueprint_fine", @@ -1048,22 +1051,42 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr "Astrocytes", "astrocyte", "blueprint_fine", "Mesangial cells", "mesangial_cell", "blueprint_fine" ) - - conversion_table = - bind_rows(monaco, blueprint, azimuth) + non_immune_cells <- c( + "megakaryocytes", + "endothelial_cell", + "chondrocyte", + "fibroblast", + "smooth_muscle_cell", + "epithelial_cell", + "melanocyte", + "muscle_cell", + "keratinocyte", + "endothelial_cell", # Appears again in the original vector + "myocyte", + "fat_cell", + "neuron", + "pericyte_cell", + "adipocyte", + "astrocyte", + "mesangial_cell" + ) t_cells <- c( "cd8 naive", "cd8 tcm", "cd8 tem", + "cd4 tem", + "cd4 tcm", "terminal effector cd4 t", "treg", "cd4 th1/th17", "cd4 th1", "cd4 th17", + "cd4 t", "t_nk", - "proliferating_t_cell", + "cd4_proliferating_t_cell", + "cd8_proliferating_t_cell", "dnt", "cd4 naive", "cd4 th2", @@ -1106,6 +1129,10 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr # Find consensus manually mutate(consensus = case_when( + + # Non immune + blueprint_fine %in% non_immune_cells ~ "non_immune", + # Full consensus blueprint_fine == monaco_fine & blueprint_fine == azimuth_pbmc ~ blueprint_fine , @@ -1186,11 +1213,11 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr # select(-Reference) |> # pivot_wider(names_from = Database, values_from = Query, values_fn = function(x) paste(unique(x), collapse = ",")) - # parse names + # parse names, chenge to lower case for all tibble( - blueprint_fine = blueprint |> select(-Database) |> deframe() |> _[!!blueprint_input], - monaco_fine = monaco |> select(-Database) |> deframe() |> _[!!monaco_input], - azimuth_pbmc = azimuth |> select(-Database) |> deframe() |> _[!!azimuth_input], + blueprint_fine = blueprint |> select(-Database) |> mutate(across(everything(), tolower)) |> deframe() |> _[!!tolower(blueprint_input)], + monaco_fine = monaco |> select(-Database) |> mutate(across(everything(), tolower)) |> deframe() |> _[!!tolower(monaco_input)], + azimuth_pbmc = azimuth |> select(-Database) |> mutate(across(everything(), tolower)) |> deframe() |> _[!!tolower(azimuth_input)], ) |> left_join( all_combinations, @@ -1203,85 +1230,137 @@ reference_annotation_to_consensus = function(azimuth_input, monaco_input, bluepr pull(consensus) - # - # - # - # #Fix GChecks - # cell_type_clean = NULL - # - # x |> - # # Annotate - # mutate(cell_type_clean = cell_type_clean |> tolower()) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove_all(",")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("alphabeta")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove_all("positive")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("cd4 t", "cd4")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("regulatory t", "treg")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("thymusderived")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("human")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("igg ")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("igm ")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("iga ")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("group [0-9]")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("common")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("cd45ro")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("type i")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("germinal center")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("iggnegative")) |> - # mutate(cell_type_clean = cell_type_clean |> str_remove("terminally differentiated")) |> - # - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect("macrophage"), "macrophage", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean == "mononuclear phagocyte", "macrophage", cell_type_clean) ) |> - # - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect(" treg"), "treg", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect(" dendritic"), "dendritic", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect(" thelper"), "thelper", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect("thelper "), "thelper", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect("gammadelta"), "tgd", cell_type_clean) ) |> - # mutate(cell_type_clean = if_else(cell_type_clean |> str_detect("natural killer"), "nk", cell_type_clean) ) |> - # - # - # mutate(cell_type_clean = cell_type_clean |> str_replace_all(" ", " ")) |> - # - # - # mutate(cell_type_clean = cell_type_clean |> str_replace("myeloid leukocyte", "myeloid")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("effector memory", "tem")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("effector", "tem")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace_all("cd8 t", "cd8")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("central memory", "tcm")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("gammadelta t", "gdt")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("nonclassical monocyte", "cd16 monocyte")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("classical monocyte", "cd14 monocyte")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("follicular b", "b")) |> - # mutate(cell_type_clean = cell_type_clean |> str_replace("unswitched memory", "memory")) |> - # - # mutate(cell_type_clean = cell_type_clean |> str_trim()) } -#' Clean and Standardize Cell Types +#' Clean and Standardize Cell Type Names #' -#' This function takes a vector of cell types and applies a series of transformations -#' to clean and standardize them for better consistency. +#' Cleans and standardizes a vector of cell type names by applying a series of string transformations to improve consistency. +#' This function is particularly useful for preprocessing cell type labels in biological datasets where consistent naming conventions are important. #' -#' @importFrom stringr str_remove_all -#' @importFrom stringr str_trim +#' @param x A character vector of cell type names to be cleaned and standardized. #' -#' @param .x A vector of cell types. +#' @return A character vector of cleaned and standardized cell type names. #' -#' @return A cleaned and standardized vector of cell types. +#' @importFrom stringr str_remove_all +#' @importFrom stringr str_remove +#' @importFrom stringr str_replace +#' @importFrom stringr str_replace_all +#' @importFrom stringr str_trim #' #' @examples -#' cell_types <- c("CD4+ T-cells", "NK cells", "Blast-cells") -# cleaned_cell_types <- clean_cell_types(cell_types) - -clean_cell_types = function(.x){ - .x |> +#' cell_types <- c("CD4+ T-cells", "NK cells", "Blast-cells", "Terminally differentiated macrophage") +#' cleaned_cell_types <- clean_cellxgene_cell_types(cell_types) +#' print(cleaned_cell_types) +#' +#' # Output: +#' # [1] "cd4 t" "nk" "" "macrophage" +#' +#' @export +clean_cellxgene_cell_types = function(x){ + + x |> + # Annotate + tolower() |> + str_remove_all(",") |> + str_remove("alphabeta") |> + str_remove_all("positive") |> + str_replace("cd4 t", "cd4") |> + str_replace("regulatory t", "treg") |> + str_remove("thymusderived") |> + str_remove("human") |> + str_remove("igg ") |> + str_remove("igm ") |> + str_remove("iga ") |> + str_remove("group [0-9]") |> + str_remove("common") |> + str_remove("cd45ro") |> + str_remove("type i") |> + str_remove("germinal center") |> + str_remove("iggnegative") |> + str_remove("terminally differentiated") |> + + str_replace(".*macrophage.*", "macrophage") |> + str_replace("^mononuclear phagocyte$", "macrophage") |> + str_replace(".* treg.*", "treg") |> + str_replace(".* dendritic.*", "dendritic") |> + str_replace(".* thelper.*", "thelper") |> + str_replace(".*thelper .*", "thelper") |> + str_replace(".*gammadelta.*", "tgd") |> + str_replace(".*natural killer.*", "nk") |> + + str_replace_all(" ", " ") |> + + str_replace("myeloid leukocyte", "myeloid") |> + str_replace("effector memory", "tem") |> + str_replace("effector", "tem") |> + str_replace_all("cd8 t", "cd8") |> + str_replace("central memory", "tcm") |> + str_replace("gammadelta t", "gdt") |> + str_replace("nonclassical monocyte", "cd16 monocyte") |> + str_replace("classical monocyte", "cd14 monocyte") |> + str_replace("follicular b", "b") |> + str_replace("unswitched memory", "memory") |> + + str_trim() |> + str_remove_all("\\+") |> str_remove_all("cells") |> str_remove_all("cell") |> str_remove_all("blast") |> str_remove_all("-") |> - str_trim() + str_trim() |> + + str_remove("^_+|_+$") |> # Removes leading and trailing underscores + + # clean NON IMMUNE + str_replace("(?i)\\bepithelial\\b", "epithelial_cell") |> + str_replace("(?i)\\bfibroblast\\b", "fibroblast") |> + str_replace("(?i)\\bendothelial\\b", "endothelial_cell") |> + str_replace("(?i)^(Mueller cell|Muller cell)$", "Muller_cell") |> + str_replace("(?i)\\bneuron\\b", "neuron") |> + str_replace("(?i)amplifying cell", "amplifying_cell") |> + str_replace("(?i)stem cell", "stem_cell") |> + str_replace("(?i)progenitor cell", "progenitor_cell") |> + str_replace("(?i)acinar cell", "acinar_cell") |> + str_replace("(?i)goblet cell", "goblet_cell") |> + str_replace("(?i)thymocyte", "thymocyte") |> + str_replace("(?i)urothelial", "urothelial_cell") |> + str_replace("(?i)\\bfat\\b", "fat_cell") |> + str_replace("(?i)pneumocyte", "pneumocyte") |> + str_replace("(?i)mesothelial", "mesothelial_cell") |> + str_replace("(?i)enteroendocrine", "enteroendocrine_cell") |> + str_replace("(?i)enterocyte", "enterocyte") |> + str_replace("(?i)\\bbasal\\b", "basal_cell") |> + str_replace("(?i)stromal", "stromal_cell") |> + str_replace("(?i)retina", "retinal_cell") |> + str_replace("(?i)ciliated", "ciliated_cell") |> + str_replace("(?i)pericyte", "pericyte_cell") |> + str_replace("(?i)trophoblast", "trophoblast") |> + str_replace("(?i)brush", "brush_cell") |> + str_replace("(?i)serous", "serous_cell") |> + str_replace("(?i)hepatocyte", "hepatocyte") |> + str_replace("(?i)melanocyte", "melanocyte") |> + str_replace("(?i)myocyte", "myocyte") |> + str_replace("(?i)promyelocyte", "promyelocyte") |> + str_replace("(?i)cholangiocyte", "cholangiocyte") |> + str_replace("(?i)myoblast", "myoblast") |> + str_replace("(?i)satellite", "satellite_cell") |> + str_replace("(?i)muscle", "muscle_cell") |> + str_replace("(?i)progenitor", "progenitor_cell") |> + str_replace("(?i)erythrocyte", "erythrocyte") |> + str_replace("(?i)myoepithelial", "myoepithelial_cell") |> + str_replace("(?i)myofibroblast", "myofibroblast_cell") |> + str_replace("(?i)pancreatic", "pancreatic_cell") |> + str_replace("(?i)renal", "renal_cell") |> + str_replace("(?i)epidermal", "epidermal_cell") |> + str_replace("(?i)cortical", "cortical_cell") |> + str_replace("(?i)interstitial", "interstitial_cell") |> + str_replace("(?i)neuroendocrine", "neuroendocrine_cell") |> + str_replace("(?i)granular", "granular_cell") |> + str_replace("(?i)kidney", "kidney_cell") |> + str_replace("(?i)paneth", "paneth_cell") |> + str_replace("(?i)bipolar", "bipolar_cell") |> + str_replace_all(" ", "_") } @@ -1525,648 +1604,648 @@ harmonise_names_non_immune = function(metadata){ metadata } -get_manually_curated_immune_cell_types = function(){ - - # library(zellkonverter) - # library(Seurat) - # library(SingleCellExperiment) # load early to avoid masking dplyr::count() - # library(tidySingleCellExperiment) - # library(dplyr) - # library(cellxgenedp) - # library(tidyverse) - #library(tidySingleCellExperiment) - # library(stringr) - # library(scMerge) - # library(glue) - # library(tidyseurat) - # library(celldex) - # library(SingleR) - # library(glmGamPoi) - # library(stringr) - # library(purrr) - - - #Fix GCHECKS - metadata_file = NULL - .cell = NULL - cell_type = NULL - file_id = NULL - .sample = NULL - azhimut_confirmed = NULL - blueprint_confirmed <- NULL - arrange <- NULL # This one is actually a function from dplyr, so you should use it with dplyr::arrange or import it - cell_type_clean <- NULL - blueprint_singler <- NULL - predicted.celltype.l2 <- NULL - strong_evidence <- NULL - cell_type_harmonised <- NULL - confidence_class <- NULL - lineage_1 <- NULL - monaco_singler <- NULL - cell_annotation_monaco_singler <- NULL - cell_annotation_azimuth_l2 <- NULL - cell_annotation_blueprint_singler <- NULL - confidence_class_manually_curated <- NULL - cell_type_harmonised_manually_curated <- NULL - file_curated_annotation_merged <- NULL - .sample <- NULL - cell_type_harmonised_non_immune <- NULL - - # library(zellkonverter) - # library(Seurat) - # library(SingleCellExperiment) # load early to avoid masking dplyr::count() - # library(tidySingleCellExperiment) - # library(dplyr) - # library(cellxgenedp) - # library(tidyverse) - # #library(tidySingleCellExperiment) - # library(stringr) - # library(scMerge) - # library(glue) - # library(DelayedArray) - # library(HDF5Array) - # library(tidyseurat) - # library(celldex) - # library(SingleR) - # library(glmGamPoi) - # library(stringr) - # library(purrr) - - # # source("utility.R") - # - # metadata_file = "/vast/projects/cellxgene_curated//metadata_0.2.rds" - # file_curated_annotation_merged = "~/PostDoc/CuratedAtlasQueryR/dev/cell_type_curated_annotation_0.2.3.rds" - # file_metadata_annotated = "/vast/projects/cellxgene_curated/metadata_annotated_0.2.3.rds" - # annotation_directory = "/vast/projects/cellxgene_curated//annotated_data_0.2/" - # - # # metadata_file = "/vast/projects/cellxgene_curated//metadata.rds" - # # file_curated_annotation_merged = "~/PostDoc/CuratedAtlasQueryR/dev/cell_type_curated_annotation.rds" - # # file_metadata_annotated = "/vast/projects/cellxgene_curated//metadata_annotated.rds" - # # annotation_directory = "/vast/projects/cellxgene_curated//annotated_data_0.1/" - # - # - # annotation_harmonised = - # dir(annotation_directory, full.names = TRUE) |> - # enframe(value="file") |> - # tidyr::extract( file,".sample", "/([a-z0-9]+)\\.rds", remove = F) |> - # mutate(data = map(file, ~ .x |> readRDS() |> select(-contains("score")) )) |> - # unnest(data) |> - # - # # Format - # mutate(across(c(predicted.celltype.l1, predicted.celltype.l2, blueprint_singler, monaco_singler, ), tolower )) |> - # mutate(across(c(predicted.celltype.l1, predicted.celltype.l2, blueprint_singler, monaco_singler, ), clean_cell_types )) |> - # - # # Format - # is_strong_evidence(predicted.celltype.l2, blueprint_singler) |> - # - # - # - # - # job::job({ - # annotation_harmonised |> saveRDS("~/PostDoc/CuratedAtlasQueryR/dev/annotated_data_0.2_temp_table.rds") - # }) - # - - annotation_harmonised = readRDS("~/PostDoc/CuratedAtlasQueryR/dev/annotated_data_0.2_temp_table.rds") - - # library(CuratedAtlasQueryR) - metadata_df = readRDS(metadata_file) - - # Integrate with metadata - - annotation = - metadata_df |> - select(.cell, cell_type, file_id, .sample) |> - as_tibble() |> - left_join(read_csv("~/PostDoc/CuratedAtlasQueryR/dev/metadata_cell_type.csv"), by = "cell_type") |> - left_join(annotation_harmonised, by = c(".cell", ".sample")) |> - - # Clen cell types - mutate(cell_type_clean = cell_type |> clean_cell_types()) - - # annotation |> - # filter(lineage_1=="immune") |> - # count(cell_type, predicted.celltype.l2, blueprint_singler, strong_evidence) |> - # arrange(!strong_evidence, desc(n)) |> - # write_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm.csv") - - - annotation_crated_confirmed = - read_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm_manually_curated.csv") |> - - # TEMPORARY - rename(cell_type_clean = cell_type) |> - - filter(!is.na(azhimut_confirmed) | !is.na(blueprint_confirmed)) |> - filter(azhimut_confirmed + blueprint_confirmed > 0) |> - - # Format - mutate(cell_type_harmonised = case_when( - azhimut_confirmed ~ predicted.celltype.l2, - blueprint_confirmed ~ blueprint_singler - )) |> - - mutate(confidence_class = 1) - - - - # To avoid immune cell annotation if very contrasting evidence - blueprint_definitely_non_immune = c( "astrocytes" , "chondrocytes" , "endothelial" , "epithelial" , "fibros" , "keratinocytes" , "melanocytes" , "mesangial" , "mv endothelial", "myocytes" , "neurons" , "pericytes" , "preadipocytes" , "skeletal muscle" , "smooth muscle" ) - - - - annotation_crated_UNconfirmed = - - # Read - read_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm_manually_curated.csv") |> - - # TEMPORARY - rename(cell_type_clean = cell_type) |> - - filter(is.na(azhimut_confirmed) | (azhimut_confirmed + blueprint_confirmed) == 0) |> - - clean_cell_types_deeper() |> - - mutate(cell_type_harmonised = "") |> - - # Classify strong evidence - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("cd8 cytokine secreting tem t") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("cd8 cytotoxic t") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("cd8alphaalpha intraepithelial t") & predicted.celltype.l2 == "cd8 tem" & blueprint_singler == "cd8 tem", T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("mature t") & strong_evidence & predicted.celltype.l2 |> str_detect("tem|tcm"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("myeloid") & strong_evidence & predicted.celltype.l2 == "cd16 mono", T, azhimut_confirmed) ) |> - - # Classify weak evidence - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("b", "B") & predicted.celltype.l2 == "b memory" & blueprint_singler == "classswitched memory b", T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("b", "B") & predicted.celltype.l2 %in% c("b memory", "b intermediate", "b naive", "plasma") & !blueprint_singler %in% c("classswitched memory b", "memory b", "naive b"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("b", "B") & !predicted.celltype.l2 %in% c("b memory", "b intermediate", "b naive") & blueprint_singler %in% c("classswitched memory b", "memory b", "naive b", "plasma"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "activated cd4" & predicted.celltype.l2 %in% c("cd4 tcm", "cd4 tem", "tregs"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "activated cd4" & blueprint_singler %in% c("cd4 tcm", "cd4 tem", "tregs"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "activated cd8" & predicted.celltype.l2 %in% c("cd8 tcm", "cd8 tem"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "activated cd8" & blueprint_singler %in% c("cd8 tcm", "cd8 tem"), T, blueprint_confirmed) ) |> - - # Monocyte macrophage - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 cd16 monocyte" & predicted.celltype.l2 %in% c("cd14 mono", "cd16 mono"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 cd16negative classical monocyte" & predicted.celltype.l2 %in% c("cd14 mono"), T, azhimut_confirmed) ) |> - mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14 cd16negative classical monocyte" & blueprint_singler %in% c("monocytes"), "cd14 mono", cell_type_harmonised) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 monocyte" & predicted.celltype.l2 %in% c("cd14 mono"), T, azhimut_confirmed) ) |> - mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14 monocyte" & blueprint_singler %in% c("monocytes"), "cd14 mono", cell_type_harmonised) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14low cd16 monocyte" & predicted.celltype.l2 %in% c("cd16 mono"), T, azhimut_confirmed) ) |> - mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14low cd16 monocyte" & blueprint_singler %in% c("monocytes"), "cd16 mono", cell_type_harmonised) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd16 monocyte" & predicted.celltype.l2 %in% c("cd16 mono"), T, azhimut_confirmed) ) |> - mutate(cell_type_harmonised = if_else(cell_type_clean == "cd16 monocyte" & blueprint_singler %in% c("monocytes"), "cd16 mono", cell_type_harmonised) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "monocyte" & blueprint_singler |> str_detect("monocyte|macrophage") & !predicted.celltype.l2 |> str_detect(" mono"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "monocyte" & predicted.celltype.l2 |> str_detect(" mono"), T, azhimut_confirmed) ) |> - - - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd4" & predicted.celltype.l2 |> str_detect("cd4|treg") & !blueprint_singler |> str_detect("cd4"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "cd4" & !predicted.celltype.l2 |> str_detect("cd4") & blueprint_singler |> str_detect("cd4|treg"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> - - - mutate(azhimut_confirmed = if_else(cell_type_clean == "memory t" & predicted.celltype.l2 |> str_detect("tem|tcm") & !blueprint_singler |> str_detect("tem|tcm"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "memory t" & !predicted.celltype.l2 |> str_detect("tem|tcm") & blueprint_singler |> str_detect("tem|tcm"), T, blueprint_confirmed) ) |> - - - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8alphaalpha intraepithelial t" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8alphaalpha intraepithelial t" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8hymocyte" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8hymocyte" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> - - # B cells - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("memory b") & predicted.celltype.l2 =="b memory", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("memory b") & blueprint_singler |> str_detect("memory b"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "immature b" & predicted.celltype.l2 =="b naive", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "immature b" & blueprint_singler |> str_detect("naive b"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "mature b" & predicted.celltype.l2 %in% c("b memory", "b intermediate"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "mature b" & blueprint_singler |> str_detect("memory b"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "naive b" & predicted.celltype.l2 %in% c("b naive"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "naive b" & blueprint_singler |> str_detect("naive b"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "transitional stage b" & predicted.celltype.l2 %in% c("b intermediate"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "transitional stage b" & blueprint_singler |> str_detect("naive b") & !predicted.celltype.l2 %in% c("b intermediate"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "memory b" & predicted.celltype.l2 %in% c("b intermediate"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & predicted.celltype.l2 %in% c("b naive") & !blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & blueprint_singler |> str_detect("naive b") & predicted.celltype.l2 %in% c("hspc"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & predicted.celltype.l2 %in% c("hspc"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> - - # Plasma cells - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "plasma") & predicted.celltype.l2 == "plasma" , T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "plasma") & predicted.celltype.l2 == "plasma" , T, blueprint_confirmed) ) |> - - mutate(azhimut_confirmed = case_when( - cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & predicted.celltype.l2 == "cd4 ctl" & blueprint_singler != "cd4 tcm" ~ T, - cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & predicted.celltype.l2 == "cd4 tem" & blueprint_singler != "cd4 tcm" ~ T, - TRUE ~ azhimut_confirmed - ) ) |> - mutate(blueprint_confirmed = case_when( - cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & blueprint_singler == "cd4 tem" & predicted.celltype.l2 != "cd4 tcm" ~ T, - cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & blueprint_singler == "cd4 t" & predicted.celltype.l2 != "cd4 tcm" ~ T, - TRUE ~ blueprint_confirmed - ) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean == "cd4hymocyte" & predicted.celltype.l2 |> str_detect("cd4|treg") & !blueprint_singler |> str_detect("cd4"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "cd4hymocyte" & !predicted.celltype.l2 |> str_detect("cd4") & blueprint_singler |> str_detect("cd4|treg"), T, blueprint_confirmed) ) |> - - mutate(azhimut_confirmed = case_when( - cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 == "cd8 tem" & blueprint_singler != "cd8 tcm" ~ T, - cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 == "cd8 tcm" & blueprint_singler != "cd8 tem" ~ T, - TRUE ~ azhimut_confirmed - ) ) |> - mutate(blueprint_confirmed = case_when( - cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 != "cd8 tem" & blueprint_singler == "cd8 tcm" ~ T, - cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 != "cd8 tcm" & blueprint_singler == "cd8 tem" ~ T, - TRUE ~ blueprint_confirmed - ) ) |> - - mutate(azhimut_confirmed = case_when( - cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 == "cd4 tem" & blueprint_singler != "cd8 tcm" ~ T, - cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 == "cd4 tcm" & blueprint_singler != "cd8 tem" ~ T, - TRUE ~ azhimut_confirmed - ) ) |> - mutate(blueprint_confirmed = case_when( - cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 != "cd4 tem" & blueprint_singler == "cd4 tcm" ~ T, - cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 != "cd4 tcm" & blueprint_singler == "cd4 tem" ~ T, - TRUE ~ blueprint_confirmed - ) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "t") & blueprint_singler =="cd8 t" & predicted.celltype.l2 |> str_detect("cd8"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "t") & blueprint_singler =="cd4 t" & predicted.celltype.l2 |> str_detect("cd4|treg"), T, azhimut_confirmed) ) |> - - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "treg") & blueprint_singler %in% c("tregs"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "treg") & predicted.celltype.l2 == "treg", T, azhimut_confirmed) ) |> - - - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tcm cd4") & blueprint_singler %in% c("cd4 tcm"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tcm cd4") & predicted.celltype.l2 == "cd4 tcm", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tcm cd8") & blueprint_singler %in% c("cd8 tcm"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tcm cd8") & predicted.celltype.l2 == "cd8 tcm", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tem cd4") & blueprint_singler %in% c("cd4 tem"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tem cd4") & predicted.celltype.l2 == "cd4 tem", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tem cd8") & blueprint_singler %in% c("cd8 tem"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tem cd8") & predicted.celltype.l2 == "cd8 tem", T, azhimut_confirmed) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tgd") & predicted.celltype.l2 == "gdt", T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "activated cd4") & predicted.celltype.l2 == "cd4 proliferating", T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "activated cd8") & predicted.celltype.l2 == "cd8 proliferating", T, azhimut_confirmed) ) |> - - - - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("naive cd4", "naive t") & predicted.celltype.l2 %in% c("cd4 naive"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("naive cd8", "naive t") & predicted.celltype.l2 %in% c("cd8 naive"), T, azhimut_confirmed) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("cd4 naive") & !blueprint_singler |> str_detect("clp|hcs|mpp|cd8"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("cd8 naive") & !blueprint_singler |> str_detect("clp|hcs|mpp|cd4"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("hspc"), T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "prot") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> - - mutate(azhimut_confirmed = if_else(cell_type_clean == "dendritic" & predicted.celltype.l2 %in% c("asdc", "cdc2", "cdc1", "pdc"), T, azhimut_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "double negative t regulatory" & predicted.celltype.l2 == "dnt", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "early t lineage precursor", "immature innate lymphoid") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "early t lineage precursor", "immature innate lymphoid") & predicted.celltype.l2 == "hspc" & blueprint_singler != "clp", T, azhimut_confirmed) ) |> - - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("ilc1", "ilc2", "innate lymphoid") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("ilc1", "ilc2", "innate lymphoid") & predicted.celltype.l2 %in% c( "nk", "ilc", "nk proliferating"), T, azhimut_confirmed) ) |> - - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "immature t") & blueprint_singler %in% c("naive t"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "immature t") & predicted.celltype.l2 == "t naive", T, azhimut_confirmed) ) |> - - mutate(cell_type_harmonised = if_else(cell_type_clean == "fraction a prepro b", "naive b", cell_type_harmonised)) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == "granulocyte" & blueprint_singler %in% c("eosinophils", "neutrophils"), T, blueprint_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("immature neutrophil", "neutrophil") & blueprint_singler %in% c( "neutrophils"), T, blueprint_confirmed) ) |> - - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("megakaryocyte") & blueprint_singler |> str_detect("megakaryocyte"), T, blueprint_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("macrophage") & blueprint_singler |> str_detect("macrophage"), T, blueprint_confirmed) ) |> - - mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "nk") & blueprint_singler %in% c("nk"), T, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "nk") & predicted.celltype.l2 %in% c("nk", "nk proliferating", "nk_cd56bright", "ilc"), T, azhimut_confirmed) ) |> - - - # If identical force - mutate(azhimut_confirmed = if_else(cell_type_clean == predicted.celltype.l2 , T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean == blueprint_singler , T, blueprint_confirmed) ) |> - - # Perogenitor - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("progenitor|hematopoietic|precursor") & predicted.celltype.l2 == "hspc", T, azhimut_confirmed) ) |> - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("progenitor|hematopoietic|precursor") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> - - # Generic original annotation and stem for new annotations - mutate(azhimut_confirmed = if_else( - cell_type_clean %in% c("T cell", "myeloid cell", "leukocyte", "myeloid leukocyte", "B cell") & - predicted.celltype.l2 == "hspc" & - blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, azhimut_confirmed) ) |> - - # Omit mature for stem - mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("mature") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), F, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("mature") & predicted.celltype.l2 == "hspc", F, azhimut_confirmed) ) |> - - # Omit megacariocyte for stem - mutate(blueprint_confirmed = if_else(cell_type_clean == "megakaryocyte" & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), F, blueprint_confirmed) ) |> - mutate(azhimut_confirmed = if_else(cell_type_clean == "megakaryocyte" & predicted.celltype.l2 == "hspc", F, azhimut_confirmed) ) |> - - # Mast cells - mutate(cell_type_harmonised = if_else(cell_type_clean == "mast", "mast", cell_type_harmonised)) |> - - - # Visualise - #distinct(cell_type_clean, predicted.celltype.l2, blueprint_singler, strong_evidence, azhimut_confirmed, blueprint_confirmed) |> - arrange(!strong_evidence, cell_type_clean) |> - - # set cell names - mutate(cell_type_harmonised = case_when( - cell_type_harmonised == "" & azhimut_confirmed ~ predicted.celltype.l2, - cell_type_harmonised == "" & blueprint_confirmed ~ blueprint_singler, - TRUE ~ cell_type_harmonised - )) |> - - # Add NA - mutate(cell_type_harmonised = case_when(cell_type_harmonised != "" ~ cell_type_harmonised)) |> - - # Add unannotated cells because datasets were too small - mutate(cell_type_harmonised = case_when( - is.na(cell_type_harmonised) & cell_type_clean |> str_detect("progenitor|hematopoietic|stem|precursor") ~ "stem", - - is.na(cell_type_harmonised) & cell_type_clean == "cd14 monocyte" ~ "cd14 mono", - is.na(cell_type_harmonised) & cell_type_clean == "cd16 monocyte" ~ "cd16 mono", - is.na(cell_type_harmonised) & cell_type_clean %in% c("cd4 cytotoxic t", "tem cd4") ~ "cd4 tem", - is.na(cell_type_harmonised) & cell_type_clean %in% c("cd8 cytotoxic t", "tem cd8") ~ "cd8 tem", - is.na(cell_type_harmonised) & cell_type_clean |> str_detect("macrophage") ~ "macrophage", - is.na(cell_type_harmonised) & cell_type_clean %in% c("mature b", "memory b", "transitional stage b") ~ "b memory", - is.na(cell_type_harmonised) & cell_type_clean == "mucosal invariant t" ~ "mait", - is.na(cell_type_harmonised) & cell_type_clean == "naive b" ~ "b naive", - is.na(cell_type_harmonised) & cell_type_clean == "nk" ~ "nk", - is.na(cell_type_harmonised) & cell_type_clean == "naive cd4" ~"cd4 naive", - is.na(cell_type_harmonised) & cell_type_clean == "naive cd8" ~"cd8 naive", - is.na(cell_type_harmonised) & cell_type_clean == "treg" ~ "treg", - is.na(cell_type_harmonised) & cell_type_clean == "tgd" ~ "tgd", - TRUE ~ cell_type_harmonised - )) |> - - mutate(confidence_class = case_when( - !is.na(cell_type_harmonised) & strong_evidence ~ 2, - !is.na(cell_type_harmonised) & !strong_evidence ~ 3 - )) |> - - # Lowest grade annotation UNreliable - mutate(cell_type_harmonised = case_when( - - # Get origincal annotation - is.na(cell_type_harmonised) & cell_type_clean %in% c("neutrophil", "granulocyte") ~ cell_type_clean, - is.na(cell_type_harmonised) & cell_type_clean %in% c("conventional dendritic", "dendritic") ~ "cdc", - is.na(cell_type_harmonised) & cell_type_clean %in% c("classical monocyte") ~ "cd14 mono", - - # Get Seurat annotation - is.na(cell_type_harmonised) & predicted.celltype.l2 != "eryth" & !is.na(predicted.celltype.l2) ~ predicted.celltype.l2, - is.na(cell_type_harmonised) & !blueprint_singler %in% c( - "astrocytes", "smooth muscle", "preadipocytes", "mesangial", "myocytes", - "doublet", "melanocytes", "chondrocytes", "mv endothelial", "fibros", - "neurons", "keratinocytes", "endothelial", "epithelial", "skeletal muscle", "pericytes", "erythrocytes", "adipocytes" - ) & !is.na(blueprint_singler) ~ blueprint_singler, - TRUE ~ cell_type_harmonised - - )) |> - - # Lowest grade annotation UNreliable - mutate(cell_type_harmonised = case_when( - - # Get origincal annotation - !cell_type_harmonised %in% c("doublet", "platelet") ~ cell_type_harmonised - - )) |> - - mutate(confidence_class = case_when( - is.na(confidence_class) & !is.na(cell_type_harmonised) ~ 4, - TRUE ~ confidence_class - )) - - # Another passage - - # annotated_samples = annotation_crated_UNconfirmed |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type, .sample, file_id) - # - # annotation_crated_UNconfirmed |> - # filter(is.na(cell_type_harmonised)) |> - # count(cell_type , cell_type_harmonised ,predicted.celltype.l2 ,blueprint_singler) |> - # arrange(desc(n)) |> - # print(n=99) - - - annotation_all = - annotation_crated_confirmed |> - clean_cell_types_deeper() |> - bind_rows( - annotation_crated_UNconfirmed - ) |> - - # I have multiple confidence_class per combination of labels - distinct() |> - with_groups(c(cell_type_clean, predicted.celltype.l2, blueprint_singler), ~ .x |> arrange(confidence_class) |> slice(1)) |> - - # Simplify after harmonisation - mutate(cell_type_harmonised = case_when( - cell_type_harmonised %in% c("b memory", "b intermediate", "classswitched memory b", "memory b" ) ~ "b memory", - cell_type_harmonised %in% c("b naive", "naive b") ~ "b naive", - cell_type_harmonised %in% c("nk_cd56bright", "nk", "nk proliferating", "ilc") ~ "ilc", - cell_type_harmonised %in% c("mpp", "clp", "hspc", "mep", "cmp", "hsc", "gmp") ~ "stem", - cell_type_harmonised %in% c("macrophages", "macrophages m1", "macrophages m2") ~ "macrophage", - cell_type_harmonised %in% c("treg", "tregs") ~ "treg", - cell_type_harmonised %in% c("gdt", "tgd") ~ "tgd", - cell_type_harmonised %in% c("cd8 proliferating", "cd8 tem") ~ "cd8 tem", - cell_type_harmonised %in% c("cd4 proliferating", "cd4 tem") ~ "cd4 tem", - cell_type_harmonised %in% c("eosinophils", "neutrophils", "granulocyte", "neutrophil") ~ "granulocyte", - cell_type_harmonised %in% c("cdc", "cdc1", "cdc2", "dc") ~ "cdc", - - TRUE ~ cell_type_harmonised - )) |> - dplyr::select(cell_type_clean, cell_type_harmonised, predicted.celltype.l2, blueprint_singler, confidence_class) |> - distinct() - - - curated_annotation = - annotation |> - clean_cell_types_deeper() |> - filter(lineage_1=="immune") |> - dplyr::select( - .cell, .sample, cell_type, cell_type_clean, predicted.celltype.l2, blueprint_singler, monaco_singler) |> - left_join( - annotation_all , - by = c("cell_type_clean", "predicted.celltype.l2", "blueprint_singler") - ) |> - dplyr::select( - .cell, .sample, cell_type, cell_type_harmonised, confidence_class, - cell_annotation_azimuth_l2 = predicted.celltype.l2, cell_annotation_blueprint_singler = blueprint_singler, - cell_annotation_monaco_singler = monaco_singler - ) |> - - # Reannotation of generic cell types - mutate(cell_type_harmonised = case_when( - cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("effector memory") ~ "cd4 tem", - cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("mait") ~ "mait", - cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("central memory") ~ "cd4 tcm", - cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("naive") ~ "cd4 naive", - cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("effector memory") ~ "cd8 tem", - cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("central memory") ~ "cd8 tcm", - cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("naive") ~ "cd8 naive", - cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler |> str_detect("non classical") ~ "cd16 mono", - cell_type == "nonclassical monocyte" & cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="intermediate monocytes" ~ "cd16 mono", - cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler |> str_detect("^classical") ~ "cd14 mono", - cell_type == "classical monocyte" & cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="intermediate monocytes" ~ "cd14 mono", - cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="myeloid dendritic" & str_detect(cell_annotation_azimuth_l2, "cdc") ~ "cdc", - - - TRUE ~ cell_type_harmonised - )) |> - - # Change CD4 classification for version 0.2.1 - mutate(confidence_class = if_else( - cell_type_harmonised |> str_detect("cd4|mait|treg|tgd") & cell_annotation_monaco_singler %in% c("terminal effector cd4 t", "naive cd4 t", "th2", "th17", "t regulatory", "follicular helper t", "th1/th17", "th1", "nonvd2 gd t", "vd2 gd t"), - 3, - confidence_class - )) |> - - # Change CD4 classification for version 0.2.1 - mutate(cell_type_harmonised = if_else( - cell_type_harmonised |> str_detect("cd4|mait|treg|tgd") & cell_annotation_monaco_singler %in% c("terminal effector cd4 t", "naive cd4 t", "th2", "th17", "t regulatory", "follicular helper t", "th1/th17", "th1", "nonvd2 gd t", "vd2 gd t"), - cell_annotation_monaco_singler, - cell_type_harmonised - )) |> - - - mutate(cell_type_harmonised = cell_type_harmonised |> - str_replace("naive cd4 t", "cd4 naive") |> - str_replace("th2", "cd4 th2") |> - str_replace("^th17$", "cd4 th17") |> - str_replace("t regulatory", "treg") |> - str_replace("follicular helper t", "cd4 fh") |> - str_replace("th1/th17", "cd4 th1/th17") |> - str_replace("^th1$", "cd4 th1") |> - str_replace("nonvd2 gd t", "tgd") |> - str_replace("vd2 gd t", "tgd") - ) |> - - # add immune_unclassified - mutate(cell_type_harmonised = if_else(cell_type_harmonised == "monocytes", "immune_unclassified", cell_type_harmonised)) |> - mutate(cell_type_harmonised = if_else(is.na(cell_type_harmonised), "immune_unclassified", cell_type_harmonised)) |> - mutate(confidence_class = if_else(is.na(confidence_class), 5, confidence_class)) |> - - # drop uncommon cells - mutate(cell_type_harmonised = if_else(cell_type_harmonised %in% c("cd4 t", "cd8 t", "asdc", "cd4 ctl"), "immune_unclassified", cell_type_harmonised)) - - - # Further rescue of unannotated cells, manually - - # curated_annotation |> - # filter(cell_type_harmonised == "immune_unclassified") |> - # count(cell_type , cell_type_harmonised ,confidence_class ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ,cell_annotation_monaco_singler) |> - # arrange(desc(n)) |> - # write_csv("curated_annotation_still_unannotated_0.2.csv") - - - curated_annotation = - curated_annotation |> - left_join( - read_csv("~/PostDoc/CuratedAtlasQueryR/dev/curated_annotation_still_unannotated_0.2_manually_labelled.csv") |> - select(cell_type, cell_type_harmonised_manually_curated = cell_type_harmonised, confidence_class_manually_curated = confidence_class, everything()), - by = join_by(cell_type, cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler) - ) |> - mutate( - confidence_class = if_else(cell_type_harmonised == "immune_unclassified", confidence_class_manually_curated, confidence_class), - cell_type_harmonised = if_else(cell_type_harmonised == "immune_unclassified", cell_type_harmonised_manually_curated, cell_type_harmonised), - ) |> - select(-contains("manually_curated"), -n) |> - - # drop uncommon cells - mutate(cell_type_harmonised = if_else(cell_type_harmonised %in% c("cd4 tcm", "cd4 tem"), "immune_unclassified", cell_type_harmonised)) - - - - # # Recover confidence class == 4 - - # curated_annotation |> - # filter(confidence_class==4) |> - # count(cell_type , cell_type_harmonised ,confidence_class ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ,cell_annotation_monaco_singler) |> - # arrange(desc(n)) |> - # write_csv("curated_annotation_still_unannotated_0.2_confidence_class_4.csv") - - curated_annotation = - curated_annotation |> - left_join( - read_csv("~/PostDoc/CuratedAtlasQueryR/dev/curated_annotation_still_unannotated_0.2_confidence_class_4_manually_labelled.csv") |> - select(confidence_class_manually_curated = confidence_class, everything()), - by = join_by(cell_type, cell_type_harmonised, cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler) - ) |> - mutate( - confidence_class = if_else(confidence_class == 4 & !is.na(confidence_class_manually_curated), confidence_class_manually_curated, confidence_class) - ) |> - select(-contains("manually_curated"), -n) - - # Correct fishy stem cell labelling - # If stem for the study's annotation and blueprint is non-immune it is probably wrong, - # even because the heart has too many progenitor/stem - curated_annotation = - curated_annotation |> - mutate(confidence_class = case_when( - cell_type_harmonised == "stem" & cell_annotation_blueprint_singler %in% c( - "skeletal muscle", "adipocytes", "epithelial", "smooth muscle", "chondrocytes", "endothelial" - ) ~ 5, - TRUE ~ confidence_class - )) - - - curated_annotation_merged = - - # Fix cell ID - metadata_df |> - dplyr::select(.cell, .sample, cell_type) |> - as_tibble() |> - - # Add cell type - left_join(curated_annotation |> dplyr::select(-cell_type), by = c(".cell", ".sample")) |> - - # Add non immune - mutate(cell_type_harmonised = if_else(is.na(cell_type_harmonised), "non_immune", cell_type_harmonised)) |> - mutate(confidence_class = if_else(is.na(confidence_class) & cell_type_harmonised == "non_immune", 1, confidence_class)) |> - - # For some unknown reason - distinct() - - - curated_annotation_merged |> - - # Save - saveRDS(file_curated_annotation_merged) - - metadata_annotated = - curated_annotation_merged |> - - # merge with the rest of metadata - left_join( - metadata_df |> - as_tibble(), - by=c(".cell", ".sample", "cell_type") - ) - - # Replace `.` with `_` for all column names as it can create difficoulties for MySQL and Python - colnames(metadata_annotated) = colnames(metadata_annotated) |> str_replace_all("\\.", "_") - metadata_annotated = metadata_annotated |> rename(cell_ = `_cell`, sample_ = `_sample`) - - - dictionary_connie_non_immune = - metadata_annotated |> - filter(cell_type_harmonised == "non_immune") |> - distinct(cell_type) |> - harmonise_names_non_immune() |> - rename(cell_type_harmonised_non_immune = cell_type_harmonised ) - - metadata_annotated = - metadata_annotated |> - left_join(dictionary_connie_non_immune) |> - mutate(cell_type_harmonised = if_else(cell_type_harmonised=="non_immune", cell_type_harmonised_non_immune, cell_type_harmonised)) |> - select(-cell_type_harmonised_non_immune) - - -} +# get_manually_curated_immune_cell_types = function(){ +# +# # library(zellkonverter) +# # library(Seurat) +# # library(SingleCellExperiment) # load early to avoid masking dplyr::count() +# # library(tidySingleCellExperiment) +# # library(dplyr) +# # library(cellxgenedp) +# # library(tidyverse) +# #library(tidySingleCellExperiment) +# # library(stringr) +# # library(scMerge) +# # library(glue) +# # library(tidyseurat) +# # library(celldex) +# # library(SingleR) +# # library(glmGamPoi) +# # library(stringr) +# # library(purrr) +# +# +# #Fix GCHECKS +# metadata_file = NULL +# .cell = NULL +# cell_type = NULL +# file_id = NULL +# .sample = NULL +# azhimut_confirmed = NULL +# blueprint_confirmed <- NULL +# arrange <- NULL # This one is actually a function from dplyr, so you should use it with dplyr::arrange or import it +# cell_type_clean <- NULL +# blueprint_singler <- NULL +# predicted.celltype.l2 <- NULL +# strong_evidence <- NULL +# cell_type_harmonised <- NULL +# confidence_class <- NULL +# lineage_1 <- NULL +# monaco_singler <- NULL +# cell_annotation_monaco_singler <- NULL +# cell_annotation_azimuth_l2 <- NULL +# cell_annotation_blueprint_singler <- NULL +# confidence_class_manually_curated <- NULL +# cell_type_harmonised_manually_curated <- NULL +# file_curated_annotation_merged <- NULL +# .sample <- NULL +# cell_type_harmonised_non_immune <- NULL +# +# # library(zellkonverter) +# # library(Seurat) +# # library(SingleCellExperiment) # load early to avoid masking dplyr::count() +# # library(tidySingleCellExperiment) +# # library(dplyr) +# # library(cellxgenedp) +# # library(tidyverse) +# # #library(tidySingleCellExperiment) +# # library(stringr) +# # library(scMerge) +# # library(glue) +# # library(DelayedArray) +# # library(HDF5Array) +# # library(tidyseurat) +# # library(celldex) +# # library(SingleR) +# # library(glmGamPoi) +# # library(stringr) +# # library(purrr) +# +# # # source("utility.R") +# # +# # metadata_file = "/vast/projects/cellxgene_curated//metadata_0.2.rds" +# # file_curated_annotation_merged = "~/PostDoc/CuratedAtlasQueryR/dev/cell_type_curated_annotation_0.2.3.rds" +# # file_metadata_annotated = "/vast/projects/cellxgene_curated/metadata_annotated_0.2.3.rds" +# # annotation_directory = "/vast/projects/cellxgene_curated//annotated_data_0.2/" +# # +# # # metadata_file = "/vast/projects/cellxgene_curated//metadata.rds" +# # # file_curated_annotation_merged = "~/PostDoc/CuratedAtlasQueryR/dev/cell_type_curated_annotation.rds" +# # # file_metadata_annotated = "/vast/projects/cellxgene_curated//metadata_annotated.rds" +# # # annotation_directory = "/vast/projects/cellxgene_curated//annotated_data_0.1/" +# # +# # +# # annotation_harmonised = +# # dir(annotation_directory, full.names = TRUE) |> +# # enframe(value="file") |> +# # tidyr::extract( file,".sample", "/([a-z0-9]+)\\.rds", remove = F) |> +# # mutate(data = map(file, ~ .x |> readRDS() |> select(-contains("score")) )) |> +# # unnest(data) |> +# # +# # # Format +# # mutate(across(c(predicted.celltype.l1, predicted.celltype.l2, blueprint_singler, monaco_singler, ), tolower )) |> +# # mutate(across(c(predicted.celltype.l1, predicted.celltype.l2, blueprint_singler, monaco_singler, ), clean_cell_types )) |> +# # +# # # Format +# # is_strong_evidence(predicted.celltype.l2, blueprint_singler) |> +# # +# # +# # +# # +# # job::job({ +# # annotation_harmonised |> saveRDS("~/PostDoc/CuratedAtlasQueryR/dev/annotated_data_0.2_temp_table.rds") +# # }) +# # +# +# annotation_harmonised = readRDS("~/PostDoc/CuratedAtlasQueryR/dev/annotated_data_0.2_temp_table.rds") +# +# # library(CuratedAtlasQueryR) +# metadata_df = readRDS(metadata_file) +# +# # Integrate with metadata +# +# annotation = +# metadata_df |> +# select(.cell, cell_type, file_id, .sample) |> +# as_tibble() |> +# left_join(read_csv("~/PostDoc/CuratedAtlasQueryR/dev/metadata_cell_type.csv"), by = "cell_type") |> +# left_join(annotation_harmonised, by = c(".cell", ".sample")) |> +# +# # Clen cell types +# mutate(cell_type_clean = cell_type |> clean_cell_types()) +# +# # annotation |> +# # filter(lineage_1=="immune") |> +# # count(cell_type, predicted.celltype.l2, blueprint_singler, strong_evidence) |> +# # arrange(!strong_evidence, desc(n)) |> +# # write_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm.csv") +# +# +# annotation_crated_confirmed = +# read_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm_manually_curated.csv") |> +# +# # TEMPORARY +# rename(cell_type_clean = cell_type) |> +# +# filter(!is.na(azhimut_confirmed) | !is.na(blueprint_confirmed)) |> +# filter(azhimut_confirmed + blueprint_confirmed > 0) |> +# +# # Format +# mutate(cell_type_harmonised = case_when( +# azhimut_confirmed ~ predicted.celltype.l2, +# blueprint_confirmed ~ blueprint_singler +# )) |> +# +# mutate(confidence_class = 1) +# +# +# +# # To avoid immune cell annotation if very contrasting evidence +# blueprint_definitely_non_immune = c( "astrocytes" , "chondrocytes" , "endothelial" , "epithelial" , "fibros" , "keratinocytes" , "melanocytes" , "mesangial" , "mv endothelial", "myocytes" , "neurons" , "pericytes" , "preadipocytes" , "skeletal muscle" , "smooth muscle" ) +# +# +# +# annotation_crated_UNconfirmed = +# +# # Read +# read_csv("~/PostDoc/CuratedAtlasQueryR/dev/annotation_confirm_manually_curated.csv") |> +# +# # TEMPORARY +# rename(cell_type_clean = cell_type) |> +# +# filter(is.na(azhimut_confirmed) | (azhimut_confirmed + blueprint_confirmed) == 0) |> +# +# clean_cell_types_deeper() |> +# +# mutate(cell_type_harmonised = "") |> +# +# # Classify strong evidence +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("cd8 cytokine secreting tem t") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("cd8 cytotoxic t") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("cd8alphaalpha intraepithelial t") & predicted.celltype.l2 == "cd8 tem" & blueprint_singler == "cd8 tem", T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("mature t") & strong_evidence & predicted.celltype.l2 |> str_detect("tem|tcm"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("myeloid") & strong_evidence & predicted.celltype.l2 == "cd16 mono", T, azhimut_confirmed) ) |> +# +# # Classify weak evidence +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("b", "B") & predicted.celltype.l2 == "b memory" & blueprint_singler == "classswitched memory b", T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("b", "B") & predicted.celltype.l2 %in% c("b memory", "b intermediate", "b naive", "plasma") & !blueprint_singler %in% c("classswitched memory b", "memory b", "naive b"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("b", "B") & !predicted.celltype.l2 %in% c("b memory", "b intermediate", "b naive") & blueprint_singler %in% c("classswitched memory b", "memory b", "naive b", "plasma"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "activated cd4" & predicted.celltype.l2 %in% c("cd4 tcm", "cd4 tem", "tregs"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "activated cd4" & blueprint_singler %in% c("cd4 tcm", "cd4 tem", "tregs"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "activated cd8" & predicted.celltype.l2 %in% c("cd8 tcm", "cd8 tem"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "activated cd8" & blueprint_singler %in% c("cd8 tcm", "cd8 tem"), T, blueprint_confirmed) ) |> +# +# # Monocyte macrophage +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 cd16 monocyte" & predicted.celltype.l2 %in% c("cd14 mono", "cd16 mono"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 cd16negative classical monocyte" & predicted.celltype.l2 %in% c("cd14 mono"), T, azhimut_confirmed) ) |> +# mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14 cd16negative classical monocyte" & blueprint_singler %in% c("monocytes"), "cd14 mono", cell_type_harmonised) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14 monocyte" & predicted.celltype.l2 %in% c("cd14 mono"), T, azhimut_confirmed) ) |> +# mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14 monocyte" & blueprint_singler %in% c("monocytes"), "cd14 mono", cell_type_harmonised) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd14low cd16 monocyte" & predicted.celltype.l2 %in% c("cd16 mono"), T, azhimut_confirmed) ) |> +# mutate(cell_type_harmonised = if_else(cell_type_clean == "cd14low cd16 monocyte" & blueprint_singler %in% c("monocytes"), "cd16 mono", cell_type_harmonised) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd16 monocyte" & predicted.celltype.l2 %in% c("cd16 mono"), T, azhimut_confirmed) ) |> +# mutate(cell_type_harmonised = if_else(cell_type_clean == "cd16 monocyte" & blueprint_singler %in% c("monocytes"), "cd16 mono", cell_type_harmonised) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "monocyte" & blueprint_singler |> str_detect("monocyte|macrophage") & !predicted.celltype.l2 |> str_detect(" mono"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "monocyte" & predicted.celltype.l2 |> str_detect(" mono"), T, azhimut_confirmed) ) |> +# +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd4" & predicted.celltype.l2 |> str_detect("cd4|treg") & !blueprint_singler |> str_detect("cd4"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "cd4" & !predicted.celltype.l2 |> str_detect("cd4") & blueprint_singler |> str_detect("cd4|treg"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> +# +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "memory t" & predicted.celltype.l2 |> str_detect("tem|tcm") & !blueprint_singler |> str_detect("tem|tcm"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "memory t" & !predicted.celltype.l2 |> str_detect("tem|tcm") & blueprint_singler |> str_detect("tem|tcm"), T, blueprint_confirmed) ) |> +# +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8alphaalpha intraepithelial t" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8alphaalpha intraepithelial t" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd8hymocyte" & predicted.celltype.l2 |> str_detect("cd8") & !blueprint_singler |> str_detect("cd8"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "cd8hymocyte" & !predicted.celltype.l2 |> str_detect("cd8") & blueprint_singler |> str_detect("cd8"), T, blueprint_confirmed) ) |> +# +# # B cells +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("memory b") & predicted.celltype.l2 =="b memory", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("memory b") & blueprint_singler |> str_detect("memory b"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "immature b" & predicted.celltype.l2 =="b naive", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "immature b" & blueprint_singler |> str_detect("naive b"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "mature b" & predicted.celltype.l2 %in% c("b memory", "b intermediate"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "mature b" & blueprint_singler |> str_detect("memory b"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "naive b" & predicted.celltype.l2 %in% c("b naive"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "naive b" & blueprint_singler |> str_detect("naive b"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "transitional stage b" & predicted.celltype.l2 %in% c("b intermediate"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "transitional stage b" & blueprint_singler |> str_detect("naive b") & !predicted.celltype.l2 %in% c("b intermediate"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "memory b" & predicted.celltype.l2 %in% c("b intermediate"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & predicted.celltype.l2 %in% c("b naive") & !blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & blueprint_singler |> str_detect("naive b") & predicted.celltype.l2 %in% c("hspc"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & predicted.celltype.l2 %in% c("hspc"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "precursor b", "prob") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> +# +# # Plasma cells +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "plasma") & predicted.celltype.l2 == "plasma" , T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "plasma") & predicted.celltype.l2 == "plasma" , T, blueprint_confirmed) ) |> +# +# mutate(azhimut_confirmed = case_when( +# cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & predicted.celltype.l2 == "cd4 ctl" & blueprint_singler != "cd4 tcm" ~ T, +# cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & predicted.celltype.l2 == "cd4 tem" & blueprint_singler != "cd4 tcm" ~ T, +# TRUE ~ azhimut_confirmed +# ) ) |> +# mutate(blueprint_confirmed = case_when( +# cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & blueprint_singler == "cd4 tem" & predicted.celltype.l2 != "cd4 tcm" ~ T, +# cell_type_clean %in% c("cd4 cytotoxic t", "cd4 helper t") & blueprint_singler == "cd4 t" & predicted.celltype.l2 != "cd4 tcm" ~ T, +# TRUE ~ blueprint_confirmed +# ) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "cd4hymocyte" & predicted.celltype.l2 |> str_detect("cd4|treg") & !blueprint_singler |> str_detect("cd4"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "cd4hymocyte" & !predicted.celltype.l2 |> str_detect("cd4") & blueprint_singler |> str_detect("cd4|treg"), T, blueprint_confirmed) ) |> +# +# mutate(azhimut_confirmed = case_when( +# cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 == "cd8 tem" & blueprint_singler != "cd8 tcm" ~ T, +# cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 == "cd8 tcm" & blueprint_singler != "cd8 tem" ~ T, +# TRUE ~ azhimut_confirmed +# ) ) |> +# mutate(blueprint_confirmed = case_when( +# cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 != "cd8 tem" & blueprint_singler == "cd8 tcm" ~ T, +# cell_type_clean %in% c("cd8 memory t") & predicted.celltype.l2 != "cd8 tcm" & blueprint_singler == "cd8 tem" ~ T, +# TRUE ~ blueprint_confirmed +# ) ) |> +# +# mutate(azhimut_confirmed = case_when( +# cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 == "cd4 tem" & blueprint_singler != "cd8 tcm" ~ T, +# cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 == "cd4 tcm" & blueprint_singler != "cd8 tem" ~ T, +# TRUE ~ azhimut_confirmed +# ) ) |> +# mutate(blueprint_confirmed = case_when( +# cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 != "cd4 tem" & blueprint_singler == "cd4 tcm" ~ T, +# cell_type_clean %in% c("cd4 memory t") & predicted.celltype.l2 != "cd4 tcm" & blueprint_singler == "cd4 tem" ~ T, +# TRUE ~ blueprint_confirmed +# ) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "t") & blueprint_singler =="cd8 t" & predicted.celltype.l2 |> str_detect("cd8"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "t") & blueprint_singler =="cd4 t" & predicted.celltype.l2 |> str_detect("cd4|treg"), T, azhimut_confirmed) ) |> +# +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "treg") & blueprint_singler %in% c("tregs"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "treg") & predicted.celltype.l2 == "treg", T, azhimut_confirmed) ) |> +# +# +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tcm cd4") & blueprint_singler %in% c("cd4 tcm"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tcm cd4") & predicted.celltype.l2 == "cd4 tcm", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tcm cd8") & blueprint_singler %in% c("cd8 tcm"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tcm cd8") & predicted.celltype.l2 == "cd8 tcm", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tem cd4") & blueprint_singler %in% c("cd4 tem"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tem cd4") & predicted.celltype.l2 == "cd4 tem", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "tem cd8") & blueprint_singler %in% c("cd8 tem"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tem cd8") & predicted.celltype.l2 == "cd8 tem", T, azhimut_confirmed) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "tgd") & predicted.celltype.l2 == "gdt", T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "activated cd4") & predicted.celltype.l2 == "cd4 proliferating", T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "activated cd8") & predicted.celltype.l2 == "cd8 proliferating", T, azhimut_confirmed) ) |> +# +# +# +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("naive cd4", "naive t") & predicted.celltype.l2 %in% c("cd4 naive"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("naive cd8", "naive t") & predicted.celltype.l2 %in% c("cd8 naive"), T, azhimut_confirmed) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("cd4 naive") & !blueprint_singler |> str_detect("clp|hcs|mpp|cd8"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("cd8 naive") & !blueprint_singler |> str_detect("clp|hcs|mpp|cd4"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "prot") & predicted.celltype.l2 %in% c("hspc"), T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "prot") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> +# +# mutate(azhimut_confirmed = if_else(cell_type_clean == "dendritic" & predicted.celltype.l2 %in% c("asdc", "cdc2", "cdc1", "pdc"), T, azhimut_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "double negative t regulatory" & predicted.celltype.l2 == "dnt", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "early t lineage precursor", "immature innate lymphoid") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "early t lineage precursor", "immature innate lymphoid") & predicted.celltype.l2 == "hspc" & blueprint_singler != "clp", T, azhimut_confirmed) ) |> +# +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("ilc1", "ilc2", "innate lymphoid") & blueprint_singler == "nk", T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c("ilc1", "ilc2", "innate lymphoid") & predicted.celltype.l2 %in% c( "nk", "ilc", "nk proliferating"), T, azhimut_confirmed) ) |> +# +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "immature t") & blueprint_singler %in% c("naive t"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "immature t") & predicted.celltype.l2 == "t naive", T, azhimut_confirmed) ) |> +# +# mutate(cell_type_harmonised = if_else(cell_type_clean == "fraction a prepro b", "naive b", cell_type_harmonised)) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == "granulocyte" & blueprint_singler %in% c("eosinophils", "neutrophils"), T, blueprint_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c("immature neutrophil", "neutrophil") & blueprint_singler %in% c( "neutrophils"), T, blueprint_confirmed) ) |> +# +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("megakaryocyte") & blueprint_singler |> str_detect("megakaryocyte"), T, blueprint_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("macrophage") & blueprint_singler |> str_detect("macrophage"), T, blueprint_confirmed) ) |> +# +# mutate(blueprint_confirmed = if_else(cell_type_clean %in% c( "nk") & blueprint_singler %in% c("nk"), T, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean %in% c( "nk") & predicted.celltype.l2 %in% c("nk", "nk proliferating", "nk_cd56bright", "ilc"), T, azhimut_confirmed) ) |> +# +# +# # If identical force +# mutate(azhimut_confirmed = if_else(cell_type_clean == predicted.celltype.l2 , T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean == blueprint_singler , T, blueprint_confirmed) ) |> +# +# # Perogenitor +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("progenitor|hematopoietic|precursor") & predicted.celltype.l2 == "hspc", T, azhimut_confirmed) ) |> +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("progenitor|hematopoietic|precursor") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, blueprint_confirmed) ) |> +# +# # Generic original annotation and stem for new annotations +# mutate(azhimut_confirmed = if_else( +# cell_type_clean %in% c("T cell", "myeloid cell", "leukocyte", "myeloid leukocyte", "B cell") & +# predicted.celltype.l2 == "hspc" & +# blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), T, azhimut_confirmed) ) |> +# +# # Omit mature for stem +# mutate(blueprint_confirmed = if_else(cell_type_clean |> str_detect("mature") & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), F, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean |> str_detect("mature") & predicted.celltype.l2 == "hspc", F, azhimut_confirmed) ) |> +# +# # Omit megacariocyte for stem +# mutate(blueprint_confirmed = if_else(cell_type_clean == "megakaryocyte" & blueprint_singler %in% c("clp","hcs", "mpp", "gmp"), F, blueprint_confirmed) ) |> +# mutate(azhimut_confirmed = if_else(cell_type_clean == "megakaryocyte" & predicted.celltype.l2 == "hspc", F, azhimut_confirmed) ) |> +# +# # Mast cells +# mutate(cell_type_harmonised = if_else(cell_type_clean == "mast", "mast", cell_type_harmonised)) |> +# +# +# # Visualise +# #distinct(cell_type_clean, predicted.celltype.l2, blueprint_singler, strong_evidence, azhimut_confirmed, blueprint_confirmed) |> +# arrange(!strong_evidence, cell_type_clean) |> +# +# # set cell names +# mutate(cell_type_harmonised = case_when( +# cell_type_harmonised == "" & azhimut_confirmed ~ predicted.celltype.l2, +# cell_type_harmonised == "" & blueprint_confirmed ~ blueprint_singler, +# TRUE ~ cell_type_harmonised +# )) |> +# +# # Add NA +# mutate(cell_type_harmonised = case_when(cell_type_harmonised != "" ~ cell_type_harmonised)) |> +# +# # Add unannotated cells because datasets were too small +# mutate(cell_type_harmonised = case_when( +# is.na(cell_type_harmonised) & cell_type_clean |> str_detect("progenitor|hematopoietic|stem|precursor") ~ "stem", +# +# is.na(cell_type_harmonised) & cell_type_clean == "cd14 monocyte" ~ "cd14 mono", +# is.na(cell_type_harmonised) & cell_type_clean == "cd16 monocyte" ~ "cd16 mono", +# is.na(cell_type_harmonised) & cell_type_clean %in% c("cd4 cytotoxic t", "tem cd4") ~ "cd4 tem", +# is.na(cell_type_harmonised) & cell_type_clean %in% c("cd8 cytotoxic t", "tem cd8") ~ "cd8 tem", +# is.na(cell_type_harmonised) & cell_type_clean |> str_detect("macrophage") ~ "macrophage", +# is.na(cell_type_harmonised) & cell_type_clean %in% c("mature b", "memory b", "transitional stage b") ~ "b memory", +# is.na(cell_type_harmonised) & cell_type_clean == "mucosal invariant t" ~ "mait", +# is.na(cell_type_harmonised) & cell_type_clean == "naive b" ~ "b naive", +# is.na(cell_type_harmonised) & cell_type_clean == "nk" ~ "nk", +# is.na(cell_type_harmonised) & cell_type_clean == "naive cd4" ~"cd4 naive", +# is.na(cell_type_harmonised) & cell_type_clean == "naive cd8" ~"cd8 naive", +# is.na(cell_type_harmonised) & cell_type_clean == "treg" ~ "treg", +# is.na(cell_type_harmonised) & cell_type_clean == "tgd" ~ "tgd", +# TRUE ~ cell_type_harmonised +# )) |> +# +# mutate(confidence_class = case_when( +# !is.na(cell_type_harmonised) & strong_evidence ~ 2, +# !is.na(cell_type_harmonised) & !strong_evidence ~ 3 +# )) |> +# +# # Lowest grade annotation UNreliable +# mutate(cell_type_harmonised = case_when( +# +# # Get origincal annotation +# is.na(cell_type_harmonised) & cell_type_clean %in% c("neutrophil", "granulocyte") ~ cell_type_clean, +# is.na(cell_type_harmonised) & cell_type_clean %in% c("conventional dendritic", "dendritic") ~ "cdc", +# is.na(cell_type_harmonised) & cell_type_clean %in% c("classical monocyte") ~ "cd14 mono", +# +# # Get Seurat annotation +# is.na(cell_type_harmonised) & predicted.celltype.l2 != "eryth" & !is.na(predicted.celltype.l2) ~ predicted.celltype.l2, +# is.na(cell_type_harmonised) & !blueprint_singler %in% c( +# "astrocytes", "smooth muscle", "preadipocytes", "mesangial", "myocytes", +# "doublet", "melanocytes", "chondrocytes", "mv endothelial", "fibros", +# "neurons", "keratinocytes", "endothelial", "epithelial", "skeletal muscle", "pericytes", "erythrocytes", "adipocytes" +# ) & !is.na(blueprint_singler) ~ blueprint_singler, +# TRUE ~ cell_type_harmonised +# +# )) |> +# +# # Lowest grade annotation UNreliable +# mutate(cell_type_harmonised = case_when( +# +# # Get origincal annotation +# !cell_type_harmonised %in% c("doublet", "platelet") ~ cell_type_harmonised +# +# )) |> +# +# mutate(confidence_class = case_when( +# is.na(confidence_class) & !is.na(cell_type_harmonised) ~ 4, +# TRUE ~ confidence_class +# )) +# +# # Another passage +# +# # annotated_samples = annotation_crated_UNconfirmed |> filter(!is.na(cell_type_harmonised)) |> distinct( cell_type, .sample, file_id) +# # +# # annotation_crated_UNconfirmed |> +# # filter(is.na(cell_type_harmonised)) |> +# # count(cell_type , cell_type_harmonised ,predicted.celltype.l2 ,blueprint_singler) |> +# # arrange(desc(n)) |> +# # print(n=99) +# +# +# annotation_all = +# annotation_crated_confirmed |> +# clean_cell_types_deeper() |> +# bind_rows( +# annotation_crated_UNconfirmed +# ) |> +# +# # I have multiple confidence_class per combination of labels +# distinct() |> +# with_groups(c(cell_type_clean, predicted.celltype.l2, blueprint_singler), ~ .x |> arrange(confidence_class) |> slice(1)) |> +# +# # Simplify after harmonisation +# mutate(cell_type_harmonised = case_when( +# cell_type_harmonised %in% c("b memory", "b intermediate", "classswitched memory b", "memory b" ) ~ "b memory", +# cell_type_harmonised %in% c("b naive", "naive b") ~ "b naive", +# cell_type_harmonised %in% c("nk_cd56bright", "nk", "nk proliferating", "ilc") ~ "ilc", +# cell_type_harmonised %in% c("mpp", "clp", "hspc", "mep", "cmp", "hsc", "gmp") ~ "stem", +# cell_type_harmonised %in% c("macrophages", "macrophages m1", "macrophages m2") ~ "macrophage", +# cell_type_harmonised %in% c("treg", "tregs") ~ "treg", +# cell_type_harmonised %in% c("gdt", "tgd") ~ "tgd", +# cell_type_harmonised %in% c("cd8 proliferating", "cd8 tem") ~ "cd8 tem", +# cell_type_harmonised %in% c("cd4 proliferating", "cd4 tem") ~ "cd4 tem", +# cell_type_harmonised %in% c("eosinophils", "neutrophils", "granulocyte", "neutrophil") ~ "granulocyte", +# cell_type_harmonised %in% c("cdc", "cdc1", "cdc2", "dc") ~ "cdc", +# +# TRUE ~ cell_type_harmonised +# )) |> +# dplyr::select(cell_type_clean, cell_type_harmonised, predicted.celltype.l2, blueprint_singler, confidence_class) |> +# distinct() +# +# +# curated_annotation = +# annotation |> +# clean_cell_types_deeper() |> +# filter(lineage_1=="immune") |> +# dplyr::select( +# .cell, .sample, cell_type, cell_type_clean, predicted.celltype.l2, blueprint_singler, monaco_singler) |> +# left_join( +# annotation_all , +# by = c("cell_type_clean", "predicted.celltype.l2", "blueprint_singler") +# ) |> +# dplyr::select( +# .cell, .sample, cell_type, cell_type_harmonised, confidence_class, +# cell_annotation_azimuth_l2 = predicted.celltype.l2, cell_annotation_blueprint_singler = blueprint_singler, +# cell_annotation_monaco_singler = monaco_singler +# ) |> +# +# # Reannotation of generic cell types +# mutate(cell_type_harmonised = case_when( +# cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("effector memory") ~ "cd4 tem", +# cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("mait") ~ "mait", +# cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("central memory") ~ "cd4 tcm", +# cell_type_harmonised=="cd4 t" & cell_annotation_monaco_singler |> str_detect("naive") ~ "cd4 naive", +# cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("effector memory") ~ "cd8 tem", +# cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("central memory") ~ "cd8 tcm", +# cell_type_harmonised=="cd8 t" & cell_annotation_monaco_singler |> str_detect("naive") ~ "cd8 naive", +# cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler |> str_detect("non classical") ~ "cd16 mono", +# cell_type == "nonclassical monocyte" & cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="intermediate monocytes" ~ "cd16 mono", +# cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler |> str_detect("^classical") ~ "cd14 mono", +# cell_type == "classical monocyte" & cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="intermediate monocytes" ~ "cd14 mono", +# cell_type_harmonised=="monocytes" & cell_annotation_monaco_singler =="myeloid dendritic" & str_detect(cell_annotation_azimuth_l2, "cdc") ~ "cdc", +# +# +# TRUE ~ cell_type_harmonised +# )) |> +# +# # Change CD4 classification for version 0.2.1 +# mutate(confidence_class = if_else( +# cell_type_harmonised |> str_detect("cd4|mait|treg|tgd") & cell_annotation_monaco_singler %in% c("terminal effector cd4 t", "naive cd4 t", "th2", "th17", "t regulatory", "follicular helper t", "th1/th17", "th1", "nonvd2 gd t", "vd2 gd t"), +# 3, +# confidence_class +# )) |> +# +# # Change CD4 classification for version 0.2.1 +# mutate(cell_type_harmonised = if_else( +# cell_type_harmonised |> str_detect("cd4|mait|treg|tgd") & cell_annotation_monaco_singler %in% c("terminal effector cd4 t", "naive cd4 t", "th2", "th17", "t regulatory", "follicular helper t", "th1/th17", "th1", "nonvd2 gd t", "vd2 gd t"), +# cell_annotation_monaco_singler, +# cell_type_harmonised +# )) |> +# +# +# mutate(cell_type_harmonised = cell_type_harmonised |> +# str_replace("naive cd4 t", "cd4 naive") |> +# str_replace("th2", "cd4 th2") |> +# str_replace("^th17$", "cd4 th17") |> +# str_replace("t regulatory", "treg") |> +# str_replace("follicular helper t", "cd4 fh") |> +# str_replace("th1/th17", "cd4 th1/th17") |> +# str_replace("^th1$", "cd4 th1") |> +# str_replace("nonvd2 gd t", "tgd") |> +# str_replace("vd2 gd t", "tgd") +# ) |> +# +# # add immune_unclassified +# mutate(cell_type_harmonised = if_else(cell_type_harmonised == "monocytes", "immune_unclassified", cell_type_harmonised)) |> +# mutate(cell_type_harmonised = if_else(is.na(cell_type_harmonised), "immune_unclassified", cell_type_harmonised)) |> +# mutate(confidence_class = if_else(is.na(confidence_class), 5, confidence_class)) |> +# +# # drop uncommon cells +# mutate(cell_type_harmonised = if_else(cell_type_harmonised %in% c("cd4 t", "cd8 t", "asdc", "cd4 ctl"), "immune_unclassified", cell_type_harmonised)) +# +# +# # Further rescue of unannotated cells, manually +# +# # curated_annotation |> +# # filter(cell_type_harmonised == "immune_unclassified") |> +# # count(cell_type , cell_type_harmonised ,confidence_class ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ,cell_annotation_monaco_singler) |> +# # arrange(desc(n)) |> +# # write_csv("curated_annotation_still_unannotated_0.2.csv") +# +# +# curated_annotation = +# curated_annotation |> +# left_join( +# read_csv("~/PostDoc/CuratedAtlasQueryR/dev/curated_annotation_still_unannotated_0.2_manually_labelled.csv") |> +# select(cell_type, cell_type_harmonised_manually_curated = cell_type_harmonised, confidence_class_manually_curated = confidence_class, everything()), +# by = join_by(cell_type, cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler) +# ) |> +# mutate( +# confidence_class = if_else(cell_type_harmonised == "immune_unclassified", confidence_class_manually_curated, confidence_class), +# cell_type_harmonised = if_else(cell_type_harmonised == "immune_unclassified", cell_type_harmonised_manually_curated, cell_type_harmonised), +# ) |> +# select(-contains("manually_curated"), -n) |> +# +# # drop uncommon cells +# mutate(cell_type_harmonised = if_else(cell_type_harmonised %in% c("cd4 tcm", "cd4 tem"), "immune_unclassified", cell_type_harmonised)) +# +# +# +# # # Recover confidence class == 4 +# +# # curated_annotation |> +# # filter(confidence_class==4) |> +# # count(cell_type , cell_type_harmonised ,confidence_class ,cell_annotation_azimuth_l2 ,cell_annotation_blueprint_singler ,cell_annotation_monaco_singler) |> +# # arrange(desc(n)) |> +# # write_csv("curated_annotation_still_unannotated_0.2_confidence_class_4.csv") +# +# curated_annotation = +# curated_annotation |> +# left_join( +# read_csv("~/PostDoc/CuratedAtlasQueryR/dev/curated_annotation_still_unannotated_0.2_confidence_class_4_manually_labelled.csv") |> +# select(confidence_class_manually_curated = confidence_class, everything()), +# by = join_by(cell_type, cell_type_harmonised, cell_annotation_azimuth_l2, cell_annotation_blueprint_singler, cell_annotation_monaco_singler) +# ) |> +# mutate( +# confidence_class = if_else(confidence_class == 4 & !is.na(confidence_class_manually_curated), confidence_class_manually_curated, confidence_class) +# ) |> +# select(-contains("manually_curated"), -n) +# +# # Correct fishy stem cell labelling +# # If stem for the study's annotation and blueprint is non-immune it is probably wrong, +# # even because the heart has too many progenitor/stem +# curated_annotation = +# curated_annotation |> +# mutate(confidence_class = case_when( +# cell_type_harmonised == "stem" & cell_annotation_blueprint_singler %in% c( +# "skeletal muscle", "adipocytes", "epithelial", "smooth muscle", "chondrocytes", "endothelial" +# ) ~ 5, +# TRUE ~ confidence_class +# )) +# +# +# curated_annotation_merged = +# +# # Fix cell ID +# metadata_df |> +# dplyr::select(.cell, .sample, cell_type) |> +# as_tibble() |> +# +# # Add cell type +# left_join(curated_annotation |> dplyr::select(-cell_type), by = c(".cell", ".sample")) |> +# +# # Add non immune +# mutate(cell_type_harmonised = if_else(is.na(cell_type_harmonised), "non_immune", cell_type_harmonised)) |> +# mutate(confidence_class = if_else(is.na(confidence_class) & cell_type_harmonised == "non_immune", 1, confidence_class)) |> +# +# # For some unknown reason +# distinct() +# +# +# curated_annotation_merged |> +# +# # Save +# saveRDS(file_curated_annotation_merged) +# +# metadata_annotated = +# curated_annotation_merged |> +# +# # merge with the rest of metadata +# left_join( +# metadata_df |> +# as_tibble(), +# by=c(".cell", ".sample", "cell_type") +# ) +# +# # Replace `.` with `_` for all column names as it can create difficoulties for MySQL and Python +# colnames(metadata_annotated) = colnames(metadata_annotated) |> str_replace_all("\\.", "_") +# metadata_annotated = metadata_annotated |> rename(cell_ = `_cell`, sample_ = `_sample`) +# +# +# dictionary_connie_non_immune = +# metadata_annotated |> +# filter(cell_type_harmonised == "non_immune") |> +# distinct(cell_type) |> +# harmonise_names_non_immune() |> +# rename(cell_type_harmonised_non_immune = cell_type_harmonised ) +# +# metadata_annotated = +# metadata_annotated |> +# left_join(dictionary_connie_non_immune) |> +# mutate(cell_type_harmonised = if_else(cell_type_harmonised=="non_immune", cell_type_harmonised_non_immune, cell_type_harmonised)) |> +# select(-cell_type_harmonised_non_immune) +# +# +# } remove_files_safely <- function(files) { for (file in files) { diff --git a/man/seurat_to_ligand_receptor_count.Rd b/man/seurat_to_ligand_receptor_count.Rd deleted file mode 100644 index 91d7a16..0000000 --- a/man/seurat_to_ligand_receptor_count.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CellChat.R -\name{seurat_to_ligand_receptor_count} -\alias{seurat_to_ligand_receptor_count} -\title{Ligand-Receptor Count from Seurat Data} -\usage{ -seurat_to_ligand_receptor_count( - counts, - .cell_group, - assay, - sample_for_plotting = "" -) -} -\arguments{ -\item{counts}{Seurat object.} - -\item{.cell_group}{Cell group variable.} - -\item{assay}{Name of the assay to use.} - -\item{sample_for_plotting}{Sample name for plotting.} -} -\value{ -A list of communication results including interactions and signaling pathways. -} -\description{ -Calculates ligand-receptor interactions for each cell type in a Seurat object using CellChat. -}