-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhelper_functions.r
113 lines (96 loc) · 3.14 KB
/
helper_functions.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
## keep only the covariates needed for the analysis
## this has to be done in order to correctly use na.action
remove_unnecessary_covars <- function(data, time, status, variable,
group, model) {
# extract variables from outcome model
if (inherits(model, c("coxph", "mexhaz"))) {
model_vars <- all.vars(model$formula)
} else if (inherits(model, c("CauseSpecificCox", "FGR", "aalen",
"cox.aalen", "flexsurvreg",
"pecCforest", "prodlim",
"psm", "randomForest",
"riskRegression", "selectCox",
"glm", "ols", "rfsrc",
"penfitS3", "gbm",
"singleEventCB", "fcrr",
"comprisk"))) {
model_vars <- all.vars(model$call$formula)
} else if (inherits(model, "pecRpart")) {
model_vars <- all.vars(model$rpart$terms)
} else if (inherits(model, "ranger")) {
model_vars <- all.vars(model$call[[2]])
} else {
model_vars <- NULL
}
# covariates that are always needed
needed_covars <- c(time, status, variable, model_vars, group)
# remove duplicates
needed_covars <- unique(needed_covars)
# filter data
data <- dplyr::select(data, dplyr::all_of(needed_covars))
return(data)
}
## composite function to prepare the data for further use
prepare_inputdata <- function(data, time, status, variable, group, model,
na.action) {
# keep only needed columns
data <- remove_unnecessary_covars(data=data, time=time, status=status,
variable=variable, model=model,
group=group)
# perform na.action
if (is.function(na.action)) {
data <- na.action(data)
} else {
na.action <- get(na.action)
data <- na.action(data)
}
if (nrow(data)==0) {
stop("There is no data left after removing the missing values.")
}
return(data)
}
## use only data.frame methods, no tibbles etc.
use_data.frame <- function(data) {
# correct data type
if (!inherits(data, "data.frame")) {
stop("'data' must be a data.frame object.")
} else {
data <- as.data.frame(data)
}
return(data)
}
## takes a value x at which to read from the step function
## and step function data from which to read it
read_from_step_function <- function(x, data, est="surv", time="time") {
# keep only data with non-missing est
data <- data[which(!is.na(data[, est])), ]
# no extrapolation
if (x > max(data[, time])) {
return(NA)
}
# otherwise get value
check <- data[which(data[, time] <= x), ]
if (nrow(check)==0) {
if (est=="surv") {
val <- 1
} else if (est=="cif") {
val <- 0
} else {
val <- NA
}
} else {
val <- check[, est][which(check[, time]==max(check[, time]))][1]
}
return(val)
}
## calculate exact integral of a step function
stepfun_integral <- function(x, y) {
area <- 0
for (i in seq_len((length(x)-1))) {
x1 <- x[i]
x2 <- x[i+1]
rect_area <- (x2 - x1) * y[i]
area <- area + rect_area
}
return(area)
}