forked from mlr-org/mlr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPreprocWrapperPCA.R
45 lines (41 loc) · 1.47 KB
/
PreprocWrapperPCA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#' @title Perform a PCA on all numeric columns in the data set.
#'
#' @description
#' Before training a PCA will be performed on all numeric columns in the trainings dataset.
#' The PCA center, scale and rotation will be saved and applied to the test dataset.
#' Internally uses \code{\link{prcomp}} with \code{scale = TRUE} before training.
#'
#' @template arg_learner
#' @export
#' @family wrapper
#' @template ret_learner
makePreprocWrapperPCA = function(learner) {
learner = checkLearner(learner)
trainfun = function(data, target, args) {
cns = colnames(data)
nums = setdiff(cns[vlapply(data, is.numeric)], target)
if (!length(nums)) {
return(list(data = data, control = list()))
}
x = data[, nums, drop = FALSE]
pca = prcomp(x, scale = TRUE)
data = data[, setdiff(cns, nums), drop = FALSE]
data = cbind(data, as.data.frame(pca$x))
ctrl = list(center = pca$center, scale = pca$scale, rotation = pca$rotation, pca.colnames = nums)
list(data = data, control = ctrl)
}
predictfun = function(data, target, args, control) {
# no numeric features ?
if (!length(control)) {
return(data)
}
cns = colnames(data)
nums = control$pca.colnames
x = as.matrix(data[, nums, drop = FALSE])
x = scale(x, center = control$center, scale = control$scale)
x = x %*% control$rotation
data = data[, setdiff(cns, nums), drop = FALSE]
cbind(data, as.data.frame(x))
}
makePreprocWrapper(learner, trainfun, predictfun)
}