-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetPartitionMetrics.R
97 lines (95 loc) · 4.17 KB
/
getPartitionMetrics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#' Compute partition-based metrics
#'
#' Computes a selection of external evaluation metrics for partition.
#'
#' @param true A vector containing the labels of the true classes. Must be a
#' vector of characters, integers, numerics, or a factor, but not a list.
#' @param pred A vector containing the labels of the predicted clusters. Must
#' be a vector of characters, integers, numerics, or a factor, but not a list.
#' @param metrics The metrics to compute. If omitted, main metrics will be
#' computed. See details.
#' @param level The level to calculate the metrics. Options include "element",
#' `"class"` and `"dataset"`.
#' @inheritParams getPartitionGlobalMetrics
#' @return A data.frame of metrics.
#' @details
#' The allowed values for `metrics` depend on the value of `level`:
#' - If `level = "element"`, the allowed `metrics` are:
#' - `"SPC"`: Spot-wise Pair Concordance.
#' - `"ASPC"`: Adjusted Spot-wise Pair Concordance.
#' - If `level = "class"`, the allowed `metrics` are: `"WC"`,`"WH"`,`"AWC"`,
#' `"AWH"`,`"FM"` (see below for details).
#' - If `level = "dataset"`, the allowed `metrics` are:
#' - `"RI"`: Rand Index
#' - `"WC"`: Wallace Completeness
#' - `"WH"`: Wallace Homogeneity
#' - `"ARI"`: Adjusted Rand Index
#' - `"AWC"`: Adjusted Wallace Completeness
#' - `"AWH"`: Adjusted Wallace Homogeneity
#' - `"NCR"`: Normalized class size Rand index
#' - `"MI"`: Mutual Information
#' - `"AMI"`: Adjusted Mutual Information
#' - `"VI"`: Variation of Information
#' - `"EH"`: (Entropy-based) Homogeneity
#' - `"EC"`: (Entropy-based) Completeness
#' - `"VM"`: V-measure
#' - `"FM"`: F-measure/weighted average F1 score
#' - `"VDM"`: Van Dongen Measure
#' - `"MHM"`: Meila-Heckerman Measure
#' - `"MMM"`: Maximum-Match Measure
#' - `"Mirkin"`: Mirkin Metric
#' - `"Accuracy"`: Set Matching Accuracy
#' @export
#' @examples
#' true <- rep(LETTERS[seq_len(3)], each=10)
#' pred <- c(rep("A", 8), rep("B", 9), rep("C", 3), rep("D", 10))
#' getPartitionMetrics(true, pred, level="class")
#' getPartitionMetrics(true, pred, level="dataset")
getPartitionMetrics <-function(true, pred,
metrics=c("WC","WH","AWC","AWH","FM"),
level="class", ...){
# Map level to the corresponding function
level_functions <- list(
"element" = getPartitionElementMetrics,
"class" = getPartitionClassMetrics,
"dataset" = getPartitionGlobalMetrics
)
.checkMetricsLevel(metrics, level, level_functions, use_default=FALSE,
use_attribute=TRUE, attr_name="allowed_metrics")
# Collect all arguments into a list
args <- list(true = true, pred = pred, metrics = metrics, ...)
do.call(level_functions[[level]], args)
}
#' getPartitionElementMetrics
#'
#' Computes a selection of external evaluation metrics for partition. The
#' metrics are reported per element.
#'
#' @inheritParams getPairConcordance
#' @param metrics The metrics to compute.
#' @keywords internal
#' @return A dataframe of metrics.
getPartitionElementMetrics <- function(true, pred, metrics=c("SPC"),
usePairs=TRUE, useNegatives=TRUE){
if (anyNA(true) | anyNA(pred))
stop("NA are not supported.")
if (is.character(true)) true <- as.factor(true)
if (is.character(pred)) pred <- as.factor(pred)
if (!is.atomic(true) || (!is.factor(true) && !is.integer(true)) ||
!is.atomic(pred) || (!is.factor(pred) && !is.integer(pred)) )
stop("true and pred must be vectors or factors but not lists.")
if(length(true) != length(pred)){
stop("The two input vectors should have the same length.")
}
res <- as.data.frame(lapply(setNames(metrics, metrics), FUN=function(m){
switch(m,
SPC = getPairConcordance(true, pred, usePairs=usePairs,
useNegatives=useNegatives, adjust=FALSE),
ASPC = getPairConcordance(true, pred, usePairs=usePairs,
useNegatives=useNegatives, adjust=TRUE),
stop("Unknown metric.")
)})
)
return(res)
}
attr(getPartitionElementMetrics, "allowed_metrics") <- c("SPC","ASPC")