-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.R
252 lines (228 loc) · 9.15 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# check whether a given input object is a kNN as produced by BiocNeighbors
.isKnn <- function(x, checkNNcl=TRUE, triggerError=TRUE){
res <- is.list(x) && all(c("index","distance") %in% names(x)) &&
all(vapply(x[c("index","distance")],FUN.VALUE=logical(1),FUN=is.matrix)) &&
all(dim(x$index)==dim(x$distance)) && max(x$index) <= nrow(x$index)
if(res && checkNNcl){
if(!is.matrix(x$nncl) || !all(dim(x$nncl)==dim(x$index))) res <- FALSE
}
if(res) return(TRUE)
if(triggerError) stop("The object is not a set of kNN or is not in a ",
"BiocNeighbors-like format.")
FALSE
}
.isNNlist <- function(x){
!is.matrix(x[[1]]) && is.integer(x[[1]])
}
.checkInputs <- function(knn, labels, ...){
.isKnn(knn, ...)
stopifnot(is.character(labels) || is.factor(labels) || is.integer(labels))
stopifnot(length(labels)==nrow(knn$index))
}
#' Computes k nearest neighbors from embedding
#'
#' Computes k nearest neighbors from embedding.
#'
#' @param x A numeric matrix (with features as columns and items as
#' rows) from which nearest neighbors will be computed.
#' @param k The number of nearest neighbors.
#' @param BNPARAM A BiocNeighbors parameter object to compute kNNs. Ignored
#' unless the input is a matrix or data.frame. If omitted, the Annoy
#' approximation will be used if there are more than 500 elements.
#' @return A knn list.
#' @importFrom BiocNeighbors AnnoyParam ExhaustiveParam findKNN
#' @export
#' @examples
#' d1 <- mockData()
#' emb2knn(as.matrix(d1[,1:2]),k=5)
emb2knn <- function(x, k, BNPARAM=NULL){
stopifnot(is.matrix(x) && is.numeric(x))
stopifnot(is.numeric(k) && length(k)==1 && k>0 && (k %/% 1)==k)
if(is.null(BNPARAM)){
if(nrow(x)>500){
BNPARAM <- BiocNeighbors::AnnoyParam()
}else{
BNPARAM <- BiocNeighbors::ExhaustiveParam()
}
}
findKNN(as.matrix(x), k=k, BNPARAM=BNPARAM)
}
#' Computes shared nearest neighbors from embedding
#'
#' computes shared nearest neighbors from embedding.
#'
#' @param x A numeric matrix (with features as columns and items as
#' rows) from which nearest neighbors will be computed.
#' @param k The number of nearest neighbors.
#' @param type A string specifying the type of weighting scheme to use for shared neighbors.
#' Possible choices include "rank", "number", and "jaccard". See `type` in
#' [bluster::neighborsToSNNGraph()] for details.
#' @param BNPARAM A BiocNeighbors parameter object to compute kNNs. Ignored
#' unless the input is a matrix or data.frame. If omitted, the Annoy
#' approximation will be used if there are more than 500 elements.
#' @return An igraph object.
#' @importFrom BiocNeighbors AnnoyParam ExhaustiveParam findKNN
#' @export
#' @examples
#' d1 <- mockData()
#' emb2snn(as.matrix(d1[,1:2]),k=5)
#' @importFrom bluster neighborsToSNNGraph
emb2snn <- function(x, k, type="rank", BNPARAM=NULL){
knn <- emb2knn(x, k, BNPARAM=BNPARAM)
bluster::neighborsToSNNGraph(knn$index, type = type)
}
# computes nearest neighbors from pairwise distance matrix
#' @importFrom methods is
.dist2knn <- function(x, k){
stopifnot(is(x,"dist"))
x <- as.matrix(x)
n <- dim(x)[1]
knn_distance <- matrix(NA, n, k)
knn_index <- matrix(NA, n, k)
for(i in seq_len(n)){
distances <- x[i,]
# Exclude the distance to itself
distances[i] <- Inf
# Get the indices of the k-nearest neighbors
neighbors <- order(distances)[seq_len(k)]
# Store the indices and distances of the k-nearest neighbors
knn_index[i, ] <- neighbors
knn_distance[i, ] <- distances[neighbors]
}
return(list(index = knn_index, distance = knn_distance))
}
# computes shared nearest neighbors from pairwise distance
#' @importFrom bluster neighborsToSNNGraph
.dist2snn <- function(x, k, type="rank"){
knn <- .dist2knn(x, k)
bluster::neighborsToSNNGraph(knn$index, type = type)
}
#' @importFrom igraph as_adj_list
.igraph2nn <- function(x, labels=NULL, directed=TRUE){
if(is.null(labels)) labels <- vertex_attr(x,"class")
nn <- lapply(as_adj_list(x, loops="ignore", mode=ifelse(directed,"out","total")),
as.integer)
if(!directed || !all(lengths(nn)==length(nn[[1]]))) return(nn)
knn <- matrix(unlist(nn),nrow=length(nn),byrow=TRUE)
knn <- list(index=knn,
distance=matrix(NA_integer_,nrow=nrow(knn),ncol=ncol(knn)))
if(!is.null(labels)){
knn$nncl <- matrix(labels[as.integer(knn$index)], nrow=length(nn))
}
knn
}
#' @importFrom bluster neighborsToKNNGraph
#' @importFrom igraph set_vertex_attr
.nn2graph <- function(x, labels=NULL){
g <- bluster::neighborsToKNNGraph(x$index, directed=TRUE)
if(!is.null(labels)) g <- set_vertex_attr(g, "class", value=labels)
g
}
# Adapted from https://github.com/cran/mclustcomp/blob/master/R/auxiliary.R
.aux.conversion <- function(x){
if (is.character(x)){
x <- as.numeric(as.factor(unlist(strsplit(x,split=""))))
} else if (is.factor(x)){
x <- as.numeric(x)
} else {
x <- as.numeric(as.factor(x))
}
return(round(x))
}
# switch the values between two named items in a list
.switchListItem <- function(mylist, name1, name2){
# Switching the values
temp <- my_list[[name1]]
my_list[[name1]] <- my_list[[name2]]
my_list[[name2]] <- temp
}
# Check for unrecognized arguments and filter arguments for each function
# example usage: function1(!!!.checkEllipsisArgs(fnList = list(function1, function2), a = 1, b = 2, c = 3)[[1]])
.checkEllipsisArgs <- function(fnList=list(), ...) {
args <- list(...)
formal_args <- lapply(fnList, FUN=\(x) names(formals(x)))
if(length(unknown <- setdiff(names(args), unlist(formal_args)))>0)
stop("Some unrecognized arguments were given: ", paste(unknown, collapse=", "))
lapply(formal_args, FUN=\(x){
args[names(args) %in% x]
})
}
# Check the argument where multiple values can be inputed once
# difference with match.arg: if the input arguments contain anything that is not
# recognised, this will throw an error or warning (depend on the argument "warning").
.checkInvalidArgs <- function(args, allowed_args, arg_name, warning=TRUE){
valid_args <- match.arg(args, allowed_args, several.ok = TRUE)
invalid_args <- setdiff(args, valid_args)
if (length(invalid_args) > 0) {
if (warning){
warning("Invalid ", arg_name, ": ", paste(invalid_args, collapse = ", "),
". Allowed ", arg_name, " are: ", paste(allowed_args, collapse = ", "))
}else{
stop("Invalid ", arg_name, ": ", paste(invalid_args, collapse = ", "),
". Allowed ", arg_name, " are: ", paste(allowed_args, collapse = ", "))
}
}
}
# for a function and an argument, get all the possibilities for this argument,
# either by looking at the default, or looking at the attribute of the function.
.get_allowed_args <- function(function_with_args, arg_name, use_default=TRUE,
use_attribute=FALSE, attr_name=NULL){
if(use_default & (!use_attribute)){
# Get the allowed args from the default args settings
allowed_args <- eval(formals(function_with_args)[[arg_name]])
}else if((!use_default) & use_attribute){
# Get the allowed args from the function's attributes
allowed_args <- attr(function_with_args, attr_name)
}else{stop("Either use default, or use the function attribute.")}
if (is.null(allowed_args)) {
stop("The candidate args are not defined for the function.")
}
return(allowed_args)
}
# check if the "metrics" argument is valide for the specified "level" of calculation
.checkMetricsLevel <- function(metrics, level, level_functions, ...) {
# Check if level is valid by looking it up in level_functions
if (!level %in% names(level_functions)) {
stop(paste("Invalid level:", level, ". Allowed levels are:",
paste(names(level_functions), collapse = ", ")))
}
# Retrieve the formal argument list of the function for the given level
function_for_level <- level_functions[[level]]
# Extract allowed metrics from function
allowed_metrics <- .get_allowed_args(function_for_level, "metrics",...)
# Check that all provided metrics are valid for this level
.checkInvalidArgs(metrics, allowed_metrics, "metrics", warning=FALSE)
}
.class2global <- function(class_res, summarize_fun=base::mean){
stopifnot(is.data.frame(class_res))
class_res$class <- 1
res <- aggregate(. ~ class, data = class_res, FUN = summarize_fun)
subset(res, select = -class)
}
.element2class <- function(element_res, summarize_fun=base::mean){
stopifnot(is.data.frame(element_res))
aggregate(. ~ class, data = element_res, FUN = summarize_fun)
}
# row-bind two dataframes with no common columns
.rbind_na <- function(df1, df2){
if(nrow(df1)==0){return(df2)}
if(nrow(df2)==0){return(df1)}
# Find the columns that are missing in each data frame
missing_cols_df1 <- setdiff(names(df2), names(df1))
missing_cols_df2 <- setdiff(names(df1), names(df2))
# Add the missing columns with NA values to each data frame
df1[missing_cols_df1] <- NA
df2[missing_cols_df2] <- NA
# Now bind the two data frames using rbind
return(rbind(df1, df2))
}
.decideBNPARAM <- function(ncells, BNPARAM=NULL){
if(is.null(BNPARAM)){
if(ncells>500){
BNPARAM <- BiocNeighbors::AnnoyParam()
}else{
BNPARAM <- BiocNeighbors::ExhaustiveParam()
}
}
return(BNPARAM)
}