forked from mlr-org/mlr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummarizeNA.R
46 lines (35 loc) · 1.45 KB
/
summarizeNA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#' Characterizes only variables of a data set with missing values. So, missing values are painted
#' black, while other observations keep white.
#'
#' @param data [\code{data.frame}]\cr
#' Data to summarize. Columns can be of type numeric, integer, logical, factor or character.
#' Characters and logicals will be treated as factors.
#' @return Names of the variables with their frequency of missing values and an additional plot
#' which shows the position of the missing values (color = black) for each variable with NAs.
#'
#' @export
#' @title Giving an image of a dataset with missing values
summaryNA = function(dataset, show.plot = F, margin.left = 4) {
num = as.numeric(which(apply(is.na(dataset), 2, any)))
if (length(num) > 0) {
cat("Variables with NAs: ", colnames(dataset)[num], "\n")
cat("Number of NAs: ", colSums(is.na(dataset[, num, drop = F])), "\n")
dataset.new = dataset[, num, drop = F]
color = apply(dataset.new, 2, function(x) as.integer(is.na(x)))
if (show.plot) {
image(color, col = c("white", "black"), yaxt = "n")
par(mar = c(5, margin.left, 4, 2) + 0.1)
abline(v = -0.001)
abline(h = 1.015)
if (length(num) == 1) {
y.type = 0
} else {
y.type = 0:(ncol(dataset.new) - 1) / (length(dataset.new) - 1)
}
axis(2, labels = colnames(dataset.new), at = y.type, las = 2)
}
}
else {
cat("There are no missing values in this dataset!!!", "\n")
}
}