Skip to content

Improve Survival stuff #1833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Suggests:
smoof,
sparseLDA,
stepPlr,
survAUC,
SwarmSVM,
svglite,
testthat,
Expand Down
27 changes: 2 additions & 25 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,25 +1,2 @@
BSD 2-Clause License

Copyright (c) 2013-2017, Bernd Bischl
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
YEAR: 2013-2017
COPYRIGHT HOLDER: Bernd Bischl
6 changes: 3 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ S3method(makeRLearner,surv.gamboost)
S3method(makeRLearner,surv.gbm)
S3method(makeRLearner,surv.glmboost)
S3method(makeRLearner,surv.glmnet)
S3method(makeRLearner,surv.penalized)
S3method(makeRLearner,surv.randomForestSRC)
S3method(makeRLearner,surv.ranger)
S3method(makeRLearner,surv.rpart)
Expand Down Expand Up @@ -484,7 +483,6 @@ S3method(predictLearner,surv.gamboost)
S3method(predictLearner,surv.gbm)
S3method(predictLearner,surv.glmboost)
S3method(predictLearner,surv.glmnet)
S3method(predictLearner,surv.penalized)
S3method(predictLearner,surv.randomForestSRC)
S3method(predictLearner,surv.ranger)
S3method(predictLearner,surv.rpart)
Expand Down Expand Up @@ -735,7 +733,6 @@ S3method(trainLearner,surv.gamboost)
S3method(trainLearner,surv.gbm)
S3method(trainLearner,surv.glmboost)
S3method(trainLearner,surv.glmnet)
S3method(trainLearner,surv.penalized)
S3method(trainLearner,surv.randomForestSRC)
S3method(trainLearner,surv.ranger)
S3method(trainLearner,surv.rpart)
Expand All @@ -762,6 +759,7 @@ export(calculateConfusionMatrix)
export(calculateROCMeasures)
export(capLargeValues)
export(cindex)
export(cindex.uno)
export(configureMlr)
export(convertBMRToRankMatrix)
export(convertMLBenchObjToTask)
Expand Down Expand Up @@ -877,6 +875,7 @@ export(helpLearner)
export(helpLearnerParam)
export(holdout)
export(hout)
export(iauc.uno)
export(impute)
export(imputeConstant)
export(imputeHist)
Expand Down Expand Up @@ -1093,6 +1092,7 @@ export(setHyperPars)
export(setHyperPars2)
export(setId)
export(setLearnerId)
export(setMeasurePars)
export(setPredictThreshold)
export(setPredictType)
export(setThreshold)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# mlr 2.12:

## general
* relaxed the way wrappers can be nested -- the only explicitly forbidden
combination is to wrap a tuning wrapper around another optimization wrapper

## functions - general
* generatePartialDependenceData: added parameter "range" to allow to specify the
range of values for the partial dependencies
Expand Down
5 changes: 2 additions & 3 deletions R/BaseWrapper.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
makeBaseWrapper = function(id, type, next.learner, package = character(0L), par.set = makeParamSet(),
par.vals = list(), learner.subclass, model.subclass) {

if (inherits(next.learner, "OptWrapper"))
stop("Cannot wrap an optimization wrapper with something else!")
if (inherits(next.learner, "OptWrapper") && is.element("TuneWrapper", learner.subclass))
stop("Cannot wrap a tuning wrapper around another optimization wrapper!")
ns = intersect(names(par.set$pars), names(next.learner$par.set$pars))
if (length(ns) > 0L)
stopf("Hyperparameter names in wrapper clash with base learner names: %s", collapse(ns))
Expand Down
2 changes: 1 addition & 1 deletion R/ClusterTask.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ makeClusterTask = function(id = deparse(substitute(data)), data, weights = NULL,
assertChoice(fixup.data, choices = c("no", "quiet", "warn"))
assertFlag(check.data)

task = makeUnsupervisedTask("cluster", data, weights, blocking)
task = makeUnsupervisedTask("cluster", data, weights, blocking, fixup.data, check.data)
task$task.desc = makeClusterTaskDesc(id, data, weights, blocking)
addClasses(task, "ClusterTask")
}
Expand Down
2 changes: 1 addition & 1 deletion R/Filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ makeFilter(
supported.features = c("numerics", "ordered"),
fun = function(task, nselect, ...) {
if (inherits(task, "SurvTask")) {
data = getTaskData(task, target.extra = TRUE, recode.target = "rcens")
data = getTaskData(task, target.extra = TRUE, recode.target = "surv")
data = cbind(..surv = data$target, data$data)
target.ind = 1L
} else {
Expand Down
22 changes: 3 additions & 19 deletions R/Measure.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#' \item{req.task}{Is task object required in calculation? Usually not the case}
#' \item{req.model}{Is model object required in calculation? Usually not the case.}
#' \item{req.feats}{Are feature values required in calculation? Usually not the case.}
#' \item{req.prob}{Are predicted probabilites required in calculation? Usually not the case, example would be AUC.}
#' \item{req.prob}{Are predicted probabilities required in calculation? Usually not the case, example would be AUC.}
#' }
#' Default is \code{character(0)}.
#' @param fun [\code{function(task, model, pred, feats, extra.args)}]\cr
Expand All @@ -63,6 +63,7 @@
#' }
#' @param extra.args [\code{list}]\cr
#' List of extra arguments which will always be passed to \code{fun}.
#' Can be changed after construction via \code{\link{setMeasurePars}}<`3`>.
#' Default is empty list.
#' @param aggr [\code{\link{Aggregation}}]\cr
#' Aggregation funtion, which is used to aggregate the values measured
Expand Down Expand Up @@ -156,24 +157,6 @@ getDefaultMeasure = function(x) {
)
}


#' Set aggregation function of measure.
#'
#' Set how this measure will be aggregated after resampling.
#' To see possible aggregation functions: \code{\link{aggregations}}.
#'
#' @param measure [\code{\link{Measure}}]\cr
#' Performance measure.
#' @template arg_aggr
#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
#' @export
setAggregation = function(measure, aggr) {
assertClass(measure, classes = "Measure")
assertClass(aggr, classes = "Aggregation")
measure$aggr = aggr
return(measure)
}

#' @export
print.Measure = function(x, ...) {
catf("Name: %s", x$name)
Expand All @@ -182,5 +165,6 @@ print.Measure = function(x, ...) {
catf("Minimize: %s", x$minimize)
catf("Best: %g; Worst: %g", x$best, x$worst)
catf("Aggregated by: %s", x$aggr$id)
catf("Arguments: %s", listToShortString(x$extra.args))
catf("Note: %s", x$note)
}
43 changes: 43 additions & 0 deletions R/Measure_operators.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' @title Set parameters of performance measures
#'
#' @description
#' Sets hyperparameters of measures.
#'
#' @param measure [\code{\link{Measure}}]\cr
#' Performance measure.
#' @param ... [any]\cr
#' Named (hyper)parameters with new settings. Alternatively these can be passed
#' using the \code{par.vals} argument.
#' @param par.vals [\code{list}]\cr
#' Optional list of named (hyper)parameter settings. The arguments in
#' \code{...} take precedence over values in this list.
#' @template ret_measure
#' @family performance
#' @export
setMeasurePars = function(measure, ..., par.vals = list()) {
args = list(...)
assertClass(measure, classes = "Measure")
assertList(args, names = "unique", .var.name = "parameter settings")
assertList(par.vals, names = "unique", .var.name = "parameter settings")
measure$extra.args = insert(measure$extra.args, insert(par.vals, args))
measure
}

#' @title Set aggregation function of measure.
#'
#' @description
#' Set how this measure will be aggregated after resampling.
#' To see possible aggregation functions: \code{\link{aggregations}}.
#'
#' @param measure [\code{\link{Measure}}]\cr
#' Performance measure.
#' @template arg_aggr
#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
#' @family performance
#' @export
setAggregation = function(measure, aggr) {
assertClass(measure, classes = "Measure")
assertClass(aggr, classes = "Aggregation")
measure$aggr = aggr
return(measure)
}
2 changes: 1 addition & 1 deletion R/MultilabelTask.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weig
assertChoice(fixup.data, choices = c("no", "quiet", "warn"))
assertFlag(check.data)

task = makeSupervisedTask("multilabel", data, target, weights, blocking)
task = makeSupervisedTask("multilabel", data, target, weights, blocking, fixup.data, check.data)
# currently we dont do any fixup here
if (check.data) {
for (cn in target)
Expand Down
3 changes: 1 addition & 2 deletions R/Prediction_operators.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,7 @@ getPredictionTruth.PredictionCluster = function(pred) {

#' @export
getPredictionTruth.PredictionSurv = function(pred) {
lookup = setNames(c("left", "right", "interval2"), c("lcens", "rcens", "icens"))
Surv(pred$data$truth.time, pred$data$truth.event, type = lookup[pred$task.desc$censoring])
Surv(pred$data$truth.time, pred$data$truth.event, type = "right")
}

#' @export
Expand Down
29 changes: 20 additions & 9 deletions R/RLearner_classif_xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ makeRLearner.classif.xgboost = function() {
# we pass all of what goes in 'params' directly to ... of xgboost
# makeUntypedLearnerParam(id = "params", default = list()),
makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")),
makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE),
makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE),
makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1),
makeNumericLearnerParam(id = "gamma", default = 0, lower = 0),
makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L),
Expand All @@ -16,7 +16,7 @@ makeRLearner.classif.xgboost = function() {
makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1),
makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1),
makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L),
makeNumericLearnerParam(id = "lambda", default = 0, lower = 0),
makeNumericLearnerParam(id = "lambda", default = 1, lower = 0),
makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0),
makeNumericLearnerParam(id = "alpha", default = 0, lower = 0),
makeUntypedLearnerParam(id = "objective", default = "binary:logistic", tunable = FALSE),
Expand All @@ -26,6 +26,7 @@ makeRLearner.classif.xgboost = function() {
makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both",
special.vals = list(NA, NA_real_, NULL)),
makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1),
makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")),
makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE),
makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L),
# FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant
Expand All @@ -38,7 +39,14 @@ makeRLearner.classif.xgboost = function() {
makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")),
makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")),
makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart"))
makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
# TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L
#makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")),
#makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")),
#makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")),
#makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")),
#makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")),
makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE)
),
par.vals = list(nrounds = 1L, verbose = 0L),
properties = c("twoclass", "multiclass", "numerics", "prob", "weights", "missings", "featimp"),
Expand All @@ -54,8 +62,6 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL

td = getTaskDesc(.task)
parlist = list(...)
parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data)
parlist$label = match(as.character(getTaskData(.task, .subset, target.extra = TRUE)$target), td$class.levels) - 1
nc = length(td$class.levels)

if (is.null(parlist$objective))
Expand All @@ -68,10 +74,17 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL
if (parlist$objective %in% c("multi:softprob", "multi:softmax"))
parlist$num_class = nc

task.data = getTaskData(.task, .subset, target.extra = TRUE)
label = match(as.character(task.data$target), td$class.levels) - 1
parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = label)

if (!is.null(.weights))
parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights)
xgboost::setinfo(parlist$data, "weight", .weights)

if (is.null(parlist$watchlist))
parlist$watchlist = list(train = parlist$data)

do.call(xgboost::xgboost, parlist)
do.call(xgboost::xgb.train, parlist)
}

#' @export
Expand Down Expand Up @@ -131,5 +144,3 @@ getFeatureImportanceLearner.classif.xgboost = function(.learner, .model, ...) {
fiv = imp$Gain
setNames(fiv, imp$Feature)
}


30 changes: 21 additions & 9 deletions R/RLearner_regr_xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ makeRLearner.regr.xgboost = function() {
# we pass all of what goes in 'params' directly to ... of xgboost
#makeUntypedLearnerParam(id = "params", default = list()),
makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")),
makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE),
makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE),
makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1),
makeNumericLearnerParam(id = "gamma", default = 0, lower = 0),
makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L),
Expand All @@ -16,16 +16,17 @@ makeRLearner.regr.xgboost = function() {
makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1),
makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1),
makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L),
makeNumericLearnerParam(id = "lambda", default = 0, lower = 0),
makeNumericLearnerParam(id = "lambda", default = 1, lower = 0),
makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0),
makeNumericLearnerParam(id = "alpha", default = 0, lower = 0),
makeUntypedLearnerParam(id = "objective", default = "reg:linear", tunable = FALSE),
makeUntypedLearnerParam(id = "eval_metric", default = "rmse", tunable = FALSE),
makeNumericLearnerParam(id = "base_score", default = 0.5, tunable = FALSE),

makeNumericLearnerParam(id = "max_delta_step", lower = 0, default = 0),
makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both",
special.vals = list(NA, NA_real_, NULL)),
makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1),
makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")),
makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE),
makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L),
# FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant
Expand All @@ -35,9 +36,17 @@ makeRLearner.regr.xgboost = function() {
requires = quote(verbose == 1L)),
makeIntegerLearnerParam(id = "early_stopping_rounds", default = NULL, lower = 1L, special.vals = list(NULL), tunable = FALSE),
makeLogicalLearnerParam(id = "maximize", default = NULL, special.vals = list(NULL), tunable = FALSE),
makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")),
makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")),
makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart"))
makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
# TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L
#makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")),
#makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")),
#makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")),
#makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")),
#makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")),
makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE)
),
par.vals = list(nrounds = 1L, verbose = 0L),
properties = c("numerics", "weights", "featimp", "missings"),
Expand All @@ -52,16 +61,19 @@ makeRLearner.regr.xgboost = function() {
trainLearner.regr.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) {
parlist = list(...)

parlist$label = getTaskData(.task, .subset, target.extra = TRUE)$target
parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data)

if (is.null(parlist$objective))
parlist$objective = "reg:linear"

task.data = getTaskData(.task, .subset, target.extra = TRUE)
parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = task.data$target)

if (!is.null(.weights))
parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights)
xgboost::setinfo(parlist$data, "weight", .weights)

if (is.null(parlist$watchlist))
parlist$watchlist = list(train = parlist$data)

do.call(xgboost::xgboost, parlist)
do.call(xgboost::xgb.train, parlist)
}

#' @export
Expand Down
Loading