Skip to content

Commit 030f078

Browse files
authored
Merge pull request #58 from AllenInstitute/update_mappings
fix hierarchical and Seurat mappings
2 parents 1acbd66 + 5a9f2c1 commit 030f078

27 files changed

+768
-78
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: scrattch.mapping
22
Title: Generalized mapping of annotations from shiny taxonomy to query
33
data.
4-
Version: 0.55.5
4+
Version: 0.55.6
55
Authors@R:
66
person(given = "Nelson",
77
family = "Johansen",

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
export(corrMap)
44
export(getMappingResults)
5+
export(get_hierarchical_extended_results)
56
export(hierarchicalMapMyCells)
7+
export(map_by_cor)
68
export(map_dend)
79
export(mappingClass)
810
export(mappingMode)
@@ -15,8 +17,10 @@ exportClasses(mappingClass)
1517
exportMethods(getMappingResults)
1618
import(MatrixGenerics)
1719
import(WGCNA)
20+
import(anndata)
1821
import(doMC)
1922
import(foreach)
23+
import(jsonlite)
2024
import(mfishtools)
2125
import(randomForest)
2226
import(scrattch.hicat)

R/hierarchicalMapMyCells.R

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ hierarchicalMapMyCells = function(AIT_anndata,
4848
}
4949

5050
if (file.exists(extended_result_path)) {
51-
stop(paste("ERROR. Extended result path already exists:", extended_result_path))
51+
old_result_path <- gsub(".json","_OLD.json",extended_result_path)
52+
file.rename(extended_result_path,old_result_path)
53+
warning(paste0("WARNING. Extended result path already exists: ", extended_result_path,
54+
". Overwriting this file after moving existing file to ",gsub(".json","_OLD.json",extended_result_path)))
5255
}
5356

5457
taxonomy_anndata_path = file.path(AIT_anndata$uns$taxonomyDir, paste0(AIT_anndata$uns$title, ".h5ad"))
@@ -80,9 +83,13 @@ hierarchicalMapMyCells = function(AIT_anndata,
8083
mapmycells_results_json$results[[hierarcy_level]]$avg_correlation
8184
}
8285
}
86+
87+
## Extract additional hierarchical results from top 5 runner-ups
88+
runnerUps = get_hierarchical_extended_results(extended_result_path)
89+
runnerUps = runnerUps[,unique(colnames(runnerUps))] # If there are duplicates, take the first
8390

8491
## Return annotations and detailed model results
85-
return(mappingResults)
92+
return(list("result"=mappingResults, "detail"=runnerUps))
8693
},
8794
error = function(e) {
8895
errorMessage <- conditionMessage(e)
@@ -209,7 +216,7 @@ get_query_data_path = function(query_data, temp_folder) {
209216
#' @param n_processors Number of independent worker processes to spin up.
210217
#' @param normalization Normalization of the h5ad files; must be either 'raw' or 'log2CPM'.
211218
#'
212-
#' @return
219+
#' @return resuls
213220
#'
214221
#' @keywords internal
215222
get_mapmycells_results = function(query_data_output_path, extended_result_path,
@@ -242,4 +249,87 @@ list_function_params = function() {
242249
command <- "python -m cell_type_mapper.cli.from_specified_markers --help"
243250
output <- system(command, intern = TRUE)
244251
cat(output, sep = "\n")
245-
}
252+
}
253+
254+
255+
256+
#' This function returns names and bootstrap probabilities from all top mapped cell sets
257+
#'
258+
#' @param extended_result_path Full file path and name where the original mapping results will be saved.
259+
#'
260+
#' @import jsonlite
261+
#'
262+
#' @return A table of the top cell set names and bootstrap probabilities from top results and runner-up results for each level of the hierarchy included in the extended_result_path file. By default, this is the top 5 most likely cell sets for either (1) multiple hierachy levels in hierarchical mapping or (2) a single hierarchy level for correlation mapping.
263+
#'
264+
#' @export
265+
get_hierarchical_extended_results <- function(extended_result_path){
266+
267+
## Extract mapping results
268+
mapmycells_results_json = fromJSON(extended_result_path)
269+
cell_id = as.character(as.matrix(mapmycells_results_json$results$cell_id))
270+
271+
## Build mapping results dataframe
272+
results_all=NULL
273+
for (hierarcy_level in names(mapmycells_results_json$results)) {
274+
if (hierarcy_level != "cell_id") {
275+
# Pull in the information
276+
results = list()
277+
results[["assignment"]] =
278+
mapmycells_results_json$results[[hierarcy_level]]$runner_up_assignment
279+
results[["probability"]] =
280+
mapmycells_results_json$results[[hierarcy_level]]$runner_up_probability
281+
results[["correlation"]] =
282+
mapmycells_results_json$results[[hierarcy_level]]$runner_up_correlation
283+
284+
# Determine how many runner up slots are needed
285+
maxLen <- 0
286+
for (i in 1:length(results[["assignment"]])){
287+
maxLen <- max(maxLen,length(results[["assignment"]][[i]]))
288+
}
289+
290+
# Create matrices for relevant info
291+
assignment <- matrix(nrow=length(results[["assignment"]]),ncol=maxLen)
292+
rownames(assignment) <- cell_id
293+
probability <- correlation <- assignment
294+
colnames(assignment) <- paste0(hierarcy_level,"_assignment_runner_up_",1:maxLen)
295+
colnames(correlation) <- paste0(hierarcy_level,"_avg_correlation_runner_up_",1:maxLen)
296+
colnames(probability) <- paste0(hierarcy_level,"_bootstrap_probability_runner_up_",1:maxLen)
297+
298+
for (i in 1:length(results[["assignment"]])){
299+
len = length(results[["assignment"]][[i]])
300+
if(len>0){
301+
assignment[i,1:len] = results[["assignment"]][[i]]
302+
correlation[i,1:len] = results[["correlation"]][[i]]
303+
probability[i,1:len] = results[["probability"]][[i]]
304+
}
305+
}
306+
assignment[is.na(assignment)] = ""
307+
correlation[is.na(correlation)] = 0
308+
probability[is.na(probability)] = 0
309+
310+
# Merge top results and runner-up results into a single data.frame
311+
results_current <- data.frame(
312+
XXXX = as.vector(mapmycells_results_json$results[[hierarcy_level]]$assignment),
313+
assignment,
314+
YYYY = as.vector(mapmycells_results_json$results[[hierarcy_level]]$bootstrapping_probability),
315+
probability,
316+
ZZZZ = as.vector(mapmycells_results_json$results[[hierarcy_level]]$avg_correlation),
317+
correlation
318+
)
319+
colnames(results_current) <- gsub("XXXX",paste0(hierarcy_level,"_assignment_winner"),colnames(results_current))
320+
colnames(results_current) <- gsub("YYYY",paste0(hierarcy_level,"_bootstrapping_probability_winner"),colnames(results_current))
321+
colnames(results_current) <- gsub("ZZZZ",paste0(hierarcy_level,"_avg_correlation_winner"),colnames(results_current))
322+
}
323+
324+
# Combine current results into previous results data frame
325+
if (length(results_all)==0) {
326+
results_all = results_current
327+
} else {
328+
results_all = cbind(results_all,results_current)
329+
}
330+
}
331+
332+
# Output results for the whole hierarchy
333+
return(results_all)
334+
}
335+

R/mappingClass.R

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ setClass(
1616
#' This function instantiates a mappingClass S4 class object.
1717
#'
1818
#' @param annotations A reference taxonomy anndata object.
19-
#' @param detailed_results The number of cells to retain per cluster (default = 100).
19+
#' @param detailed_results a method-specific set of additional mapping output
2020
#'
2121
#' @examples
2222
#' resultAnno <- mappingClass(
@@ -48,7 +48,7 @@ mappingClass <- function(annotations, detailed_results) {
4848
#'
4949
#' @export
5050
setGeneric("getMappingResults",
51-
function(object, scores = FALSE) standardGeneric("getMappingResults")
51+
function(object, scores = TRUE) standardGeneric("getMappingResults")
5252
)
5353

5454
#' Get cell type annotations
@@ -61,7 +61,29 @@ setGeneric("getMappingResults",
6161
setMethod(
6262
"getMappingResults",
6363
signature(object="mappingClass", scores="logical"),
64-
definition = function(object, scores = FALSE) {
64+
definition = function(object, scores = TRUE) {
65+
mapping.anno = object@annotations
66+
if (!scores) {
67+
score.cols = sapply(mapping.anno, is.numeric)
68+
mapping.anno = mapping.anno[, !score.cols]
69+
}
70+
return(mapping.anno)
71+
}
72+
)
73+
74+
#' Get cell type annotations
75+
#'
76+
#' Extract cell type annotations from mappingClass S4 class
77+
#'
78+
#' @return mapping results as a data.frame with labels in map.Method and scores in score.Method
79+
#'
80+
#' @export
81+
setMethod(
82+
"getMappingResults",
83+
signature(object="mappingClass", scores="missing"),
84+
definition = function(object) {
85+
# If scores not provided, set as TRUE by default. Unclear why we need a separate solution for this.
86+
scores = TRUE
6587
mapping.anno = object@annotations
6688
if (!scores) {
6789
score.cols = sapply(mapping.anno, is.numeric)

R/seuratMap.R

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,22 @@ seuratMap = function(AIT.anndata, query.data, dims=30, k.weight=5){
1414
expr = {
1515

1616
## Build Query Seruat object
17-
query.seurat = CreateSeuratObject(query.data[AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes],])
18-
query.seurat = SetAssayData(query.seurat, slot = "data", new.data = query.data[AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes],], assay = "RNA")
17+
use.genes = intersect(rownames(query.data),AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes])
18+
query.seurat = suppressWarnings(CreateSeuratObject(query.data[use.genes,]))
19+
query.seurat = suppressWarnings(SetAssayData(query.seurat, slot = "data", new.data = query.data[use.genes,], assay = "RNA"))
20+
VariableFeatures(query.seurat) <- use.genes
1921

2022
## Build Ref Seurat object
21-
ref.seurat = suppressWarnings(CreateSeuratObject(t(AIT.anndata$X[,AIT.anndata$var$highly_variable_genes]), meta.data=as.data.frame(AIT.anndata$obs)));
22-
ref.seurat = SetAssayData(ref.seurat, slot = "data", new.data = t(AIT.anndata$X[,AIT.anndata$var$highly_variable_genes]), assay = "RNA")
23-
24-
## Create a data list for label transfer
25-
seurat.list <- list(ref.seurat, query.seurat)
26-
names(seurat.list) <- c("Reference", "Query")
27-
28-
## Compute variable features for each object
29-
for (i in 1:length(x = seurat.list)) VariableFeatures(seurat.list[[i]]) <- AIT.anndata$var_names[AIT.anndata$var$highly_variable_genes]
23+
ref.data = as.matrix(BiocGenerics::t(AIT.anndata$X[,use.genes]))
24+
ref.seurat = suppressWarnings(CreateSeuratObject(ref.data, meta.data=as.data.frame(AIT.anndata$obs)));
25+
ref.seurat = suppressWarnings(SetAssayData(ref.seurat, slot = "data", new.data = ref.data, assay = "RNA"))
26+
VariableFeatures(ref.seurat) <- use.genes
3027

3128
## Seurat label transfer (celltype)
32-
Target.anchors <- FindTransferAnchors(reference = seurat.list[["Reference"]], query = seurat.list[["Query"]],
33-
dims = 1:dims, verbose=FALSE, npcs=dims)
34-
predictions <- TransferData(anchorset = Target.anchors, refdata = seurat.list[["Reference"]]$cluster_label,
35-
dims = 1:dims, verbose=FALSE, k.weight=k.weight)
29+
Target.anchors <- suppressWarnings(FindTransferAnchors(reference = ref.seurat, query = query.seurat,
30+
dims = 1:dims, verbose=FALSE, npcs=dims))
31+
predictions <- suppressWarnings(TransferData(anchorset = Target.anchors, refdata = ref.seurat$cluster_label,
32+
dims = 1:dims, verbose=FALSE, k.weight=k.weight))
3633
## Create results data.frame
3734
mappingTarget = data.frame(map.Seurat=as.character(predictions$predicted.id),
3835
score.Seurat=predictions$prediction.score.max)

R/taxonomyMapping.R

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#'
55
#' @param AIT.anndata A reference taxonomy object.
66
#' @param query.data A logCPM normalized matrix to be annotated.
7-
#' @param label.cols Column names of annotations to map against. Note that this only works for metadata that represent clusters or groups of clusters (e.g., subclass, supertype, neighborhood, class)
7+
#' @param label.cols Column names of annotations to map against. Note that this only works for metadata that represent clusters or groups of clusters (e.g., subclass, supertype, neighborhood, class) and will default to whatever is included in AIT.anndata$uns$hierarchy
88
#' @param corr.map Should correlation mapping be performed?
99
#' @param tree.map Should tree mapping be performed?
1010
#' @param seurat.map Should seurat mapping be performed?
@@ -14,11 +14,15 @@
1414
#' @export
1515
taxonomy_mapping = function(AIT.anndata, query.data,
1616
corr.map=TRUE, tree.map=TRUE, hierarchical.map=TRUE, seurat.map=TRUE,
17-
label.cols = c("cluster_label","subclass_label", "class_label")){
17+
label.cols = AIT.anndata$uns$hierarchy){ # NOTE THE NEW DEFAULT
18+
#label.cols = c("cluster_label","subclass_label", "class_label")){
1819

20+
suppressWarnings({ # wrapping the whole function in suppressWarnings to avoid having this printed a zillion times: 'useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.'
21+
1922
print(paste("==============================","Mapping","======================="))
2023
print(date())
2124
mappingResults=list()
25+
if(sum(class(label.cols)=="list")<1) label.cols = as.character(hierarchy) # Convert from list to character for this function
2226

2327
## Sanity check on user input and taxonomy/reference annotations
2428
if(!all(label.cols %in% colnames(AIT.anndata$uns$clusterInfo))){
@@ -50,15 +54,25 @@ taxonomy_mapping = function(AIT.anndata, query.data,
5054

5155
#############
5256
## ----- Tree mapping -------------------------------------------------------------------------------
53-
if(tree.map == TRUE & !is.null(AIT.anndata$uns$dend)){ mappingTree = treeMap(AIT.anndata, query.data); mappingResults[["Tree"]] = mappingTree[["result"]] } else{ mappingTree = NULL }
57+
if(tree.map == TRUE & !is.null(AIT.anndata$uns$dend)){
58+
mappingTree = treeMap(AIT.anndata, query.data);
59+
mappingResults[["Tree"]] = mappingTree[["result"]]
60+
} else {
61+
mappingTree = NULL
62+
}
5463

5564
#############
5665
## ----- Seurat mapping ------------------------------------------------------------------------------
5766
if(seurat.map == TRUE){ mappingResults[["Seurat"]] = seuratMap(AIT.anndata, query.data) } else{ mappingResults[["Seurat"]] = NULL }
5867

5968
#############
6069
## ----- Hierarchical mapping ------------------------------------------------------------------------------
61-
if(hierarchical.map == TRUE){ mappingResults[["hierarchical"]] = hierarchicalMapMyCells(AIT.anndata, query.data) } else{ mappingResults[["hierarchical"]] = NULL }
70+
if(hierarchical.map == TRUE){
71+
mappingHierarchical <- hierarchicalMapMyCells(AIT.anndata, query.data)
72+
mappingResults[["hierarchical"]] <- mappingHierarchical[["result"]]
73+
} else {
74+
mappingHierarchical = NULL
75+
}
6276

6377
#############
6478
## Combine mapping results
@@ -80,10 +94,13 @@ taxonomy_mapping = function(AIT.anndata, query.data,
8094

8195
## Build mapping class object
8296
resultAnno <- mappingClass(annotations = mappingAnno,
83-
detailed_results = list("corr" = NA,
84-
"tree" = mappingTree[["detail"]],
85-
"seurat" = NA))
97+
detailed_results = list("corr" = NA,
98+
"tree" = mappingTree[["detail"]],
99+
"seurat" = NA,
100+
"hierarchical" = mappingHierarchical[["detail"]]))
86101

87102
## Return annotations and detailed model results
88103
return(resultAnno)
104+
105+
}) # End suppressWarnings
89106
}

README.md

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,20 @@ You can find a detail description of all scrattch.mapping functions here: ![Docu
88

99
Update notes are here: ![Versions](https://github.com/AllenInstitute/scrattch-mapping/blob/dev_njj/VERSIONS.md)
1010

11-
## Docker
12-
13-
We have setup a docker environemnt for scrattch.taxonomy and scrattch.mapping that contains all the required dependencies and the current version of all scrattch packages. This docker is accessible through docker hub via: `njjai/scrattch_mapping:0.6.6`.
14-
15-
#### HPC usage:
16-
17-
##### Non-interactive
18-
`singularity exec --cleanenv docker://njjai/scrattch_mapping:0.6.6 Rscript YOUR_CODE.R`
19-
20-
##### Interactive
21-
`singularity shell --cleanenv docker://njjai/scrattch_mapping:0.6.6`
22-
23-
2411
## Installation
2512

26-
While we advice using the provided docker, you can also install scrattch.mapping directly from github as follows:
13+
### Using docker (recommended)
14+
We have setup a docker environment for scrattch.taxonomy, scrattch.mapping, and scrattch.patchseq that contains all the required dependencies and the current version of all scrattch packages. **See [the readme](https://github.com/AllenInstitute/scrattch/blob/master/README.md#using-docker) for [the parent scrattch package](https://github.com/AllenInstitute/scrattch) for the most up-to-date docker information.**
2715

28-
*Note: slight edits to installation will be needed while repo is private. Also note that `doMC` may need to be installed manually from the download at https://r-forge.r-project.org/R/?group_id=947 if you use Windows.*
16+
### Directly from GitHub (strongly discouraged)
2917

30-
```
31-
# Quickly, but without the vignettes:
32-
devtools::install_github("AllenInstitute/scrattch-mapping")
18+
While we advise using the provided docker, you can also install scrattch.mapping directly from GitHub as follows:
3319

34-
# More slowly, but with the vignettes:
35-
devtools::install_github("AllenInstitute/scrattch-mapping",build_vignettes=TRUE, force=TRUE)
20+
```
21+
devtools::install_github("AllenInstitute/scrattch.mapping")
3622
```
3723

38-
Note that this strategy might not work outside the docker due to complicated dependencies. Vignettes are provided below.
24+
This strategy **might not work** due to complicated dependencies. Also note that `doMC` may need to be installed manually from [HERE](https://r-forge.r-project.org/R/?group_id=947) if you use Windows. Vignettes are provided below.
3925

4026
## Usage examples
4127

VERSIONS.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
## scrattch.mapping v0.55.6
2+
3+
Updates to streamline and fix the examples
4+
5+
### Major changes
6+
7+
### Minor changes
8+
* Updated examples
9+
* Related minor bug fixes and function edits
10+
* Updated help files
11+
12+
--
13+
14+
## scrattch.mapping v0.55.5
15+
16+
Major updates correcting issues with hierarchical and Seurat mapping
17+
18+
### Major changes
19+
* Update `seuratMap.R` to correct Seurat mapping, by rolling back to Seurat v4.4 and largely rewriting the code
20+
* Allowing hierarchical mapping to work on modes other than 'standard'
21+
* Returning detailed information for hierarchical mapping (e.g., top 5 matches and bootstrap probabilities) in main results
22+
23+
### Minor changes
24+
* Additional minor bug fixes
25+
* Updated help files
26+
27+
--
28+
29+
130
## scrattch.mapping v0.52.2
231

332
Major change to how we return mapping results.

0 commit comments

Comments
 (0)