mlr-org · mllg · Jul 11, 2017 · Jun 9, 2017 · Jun 9, 2017 · Jun 9, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -150,6 +150,7 @@ Suggests:
     smoof,
     sparseLDA,
     stepPlr,
+    survAUC,
     SwarmSVM,
     svglite,
     testthat,

diff --git a/LICENSE b/LICENSE
@@ -1,25 +1,2 @@
-BSD 2-Clause License
-
-Copyright (c) 2013-2017, Bernd Bischl
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+YEAR: 2013-2017
+COPYRIGHT HOLDER: Bernd Bischl
diff --git a/NAMESPACE b/NAMESPACE
@@ -288,7 +288,6 @@ S3method(makeRLearner,surv.gamboost)
 S3method(makeRLearner,surv.gbm)
 S3method(makeRLearner,surv.glmboost)
 S3method(makeRLearner,surv.glmnet)
-S3method(makeRLearner,surv.penalized)
 S3method(makeRLearner,surv.randomForestSRC)
 S3method(makeRLearner,surv.ranger)
 S3method(makeRLearner,surv.rpart)
@@ -484,7 +483,6 @@ S3method(predictLearner,surv.gamboost)
 S3method(predictLearner,surv.gbm)
 S3method(predictLearner,surv.glmboost)
 S3method(predictLearner,surv.glmnet)
-S3method(predictLearner,surv.penalized)
 S3method(predictLearner,surv.randomForestSRC)
 S3method(predictLearner,surv.ranger)
 S3method(predictLearner,surv.rpart)
@@ -735,7 +733,6 @@ S3method(trainLearner,surv.gamboost)
 S3method(trainLearner,surv.gbm)
 S3method(trainLearner,surv.glmboost)
 S3method(trainLearner,surv.glmnet)
-S3method(trainLearner,surv.penalized)
 S3method(trainLearner,surv.randomForestSRC)
 S3method(trainLearner,surv.ranger)
 S3method(trainLearner,surv.rpart)
@@ -762,6 +759,7 @@ export(calculateConfusionMatrix)
 export(calculateROCMeasures)
 export(capLargeValues)
 export(cindex)
+export(cindex.uno)
 export(configureMlr)
 export(convertBMRToRankMatrix)
 export(convertMLBenchObjToTask)
@@ -877,6 +875,7 @@ export(helpLearner)
 export(helpLearnerParam)
 export(holdout)
 export(hout)
+export(iauc.uno)
 export(impute)
 export(imputeConstant)
 export(imputeHist)
@@ -1093,6 +1092,7 @@ export(setHyperPars)
 export(setHyperPars2)
 export(setId)
 export(setLearnerId)
+export(setMeasurePars)
 export(setPredictThreshold)
 export(setPredictType)
 export(setThreshold)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # mlr 2.12:
 
+## general
+* relaxed the way wrappers can be nested -- the only explicitly forbidden
+  combination is to wrap a tuning wrapper around another optimization wrapper
+
 ## functions - general
 * generatePartialDependenceData: added parameter "range" to allow to specify the
   range of values for the partial dependencies

diff --git a/R/BaseWrapper.R b/R/BaseWrapper.R
@@ -1,8 +1,7 @@
 makeBaseWrapper = function(id, type, next.learner, package = character(0L), par.set = makeParamSet(),
   par.vals = list(), learner.subclass, model.subclass) {
-
-  if (inherits(next.learner, "OptWrapper"))
-    stop("Cannot wrap an optimization wrapper with something else!")
+  if (inherits(next.learner, "OptWrapper") && is.element("TuneWrapper", learner.subclass))
+    stop("Cannot wrap a tuning wrapper around another optimization wrapper!")
   ns = intersect(names(par.set$pars), names(next.learner$par.set$pars))
   if (length(ns) > 0L)
     stopf("Hyperparameter names in wrapper clash with base learner names: %s", collapse(ns))

diff --git a/R/ClusterTask.R b/R/ClusterTask.R
@@ -6,7 +6,7 @@ makeClusterTask = function(id = deparse(substitute(data)), data, weights = NULL,
   assertChoice(fixup.data, choices = c("no", "quiet", "warn"))
   assertFlag(check.data)
 
-  task = makeUnsupervisedTask("cluster", data, weights, blocking)
+  task = makeUnsupervisedTask("cluster", data, weights, blocking, fixup.data, check.data)
   task$task.desc = makeClusterTaskDesc(id, data, weights, blocking)
   addClasses(task, "ClusterTask")
 }

diff --git a/R/Filter.R b/R/Filter.R
@@ -129,7 +129,7 @@ makeFilter(
   supported.features = c("numerics", "ordered"),
   fun = function(task, nselect, ...) {
     if (inherits(task, "SurvTask")) {
-      data = getTaskData(task, target.extra = TRUE, recode.target = "rcens")
+      data = getTaskData(task, target.extra = TRUE, recode.target = "surv")
       data = cbind(..surv = data$target, data$data)
       target.ind = 1L
     } else {

diff --git a/R/Measure.R b/R/Measure.R
@@ -43,7 +43,7 @@
 #'     \item{req.task}{Is task object required in calculation? Usually not the case}
 #'     \item{req.model}{Is model object required in calculation? Usually not the case.}
 #'     \item{req.feats}{Are feature values required in calculation? Usually not the case.}
-#'     \item{req.prob}{Are predicted probabilites required in calculation? Usually not the case, example would be AUC.}
+#'     \item{req.prob}{Are predicted probabilities required in calculation? Usually not the case, example would be AUC.}
 #'   }
 #'   Default is \code{character(0)}.
 #' @param fun [\code{function(task, model, pred, feats, extra.args)}]\cr
@@ -63,6 +63,7 @@
 #'   }
 #' @param extra.args [\code{list}]\cr
 #'   List of extra arguments which will always be passed to \code{fun}.
+#'   Can be changed after construction via \code{\link{setMeasurePars}}<`3`>.
 #'   Default is empty list.
 #' @param aggr [\code{\link{Aggregation}}]\cr
 #'   Aggregation funtion, which is used to aggregate the values measured
@@ -156,24 +157,6 @@ getDefaultMeasure = function(x) {
   )
 }
 
-
-#' Set aggregation function of measure.
-#'
-#' Set how this measure will be aggregated after resampling.
-#' To see possible aggregation functions: \code{\link{aggregations}}.
-#'
-#' @param measure [\code{\link{Measure}}]\cr
-#'   Performance measure.
-#' @template arg_aggr
-#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
-#' @export
-setAggregation = function(measure, aggr) {
-  assertClass(measure, classes = "Measure")
-  assertClass(aggr, classes = "Aggregation")
-  measure$aggr = aggr
-  return(measure)
-}
-
 #' @export
 print.Measure = function(x, ...) {
   catf("Name: %s", x$name)
@@ -182,5 +165,6 @@ print.Measure = function(x, ...) {
   catf("Minimize: %s", x$minimize)
   catf("Best: %g; Worst: %g", x$best, x$worst)
   catf("Aggregated by: %s", x$aggr$id)
+  catf("Arguments: %s", listToShortString(x$extra.args))
   catf("Note: %s", x$note)
 }
diff --git a/R/Measure_operators.R b/R/Measure_operators.R
@@ -0,0 +1,43 @@
+#' @title Set parameters of performance measures
+#'
+#' @description
+#' Sets hyperparameters of measures.
+#'
+#' @param measure [\code{\link{Measure}}]\cr
+#'   Performance measure.
+#' @param ... [any]\cr
+#'   Named (hyper)parameters with new settings. Alternatively these can be passed
+#'   using the \code{par.vals} argument.
+#' @param par.vals [\code{list}]\cr
+#'   Optional list of named (hyper)parameter settings. The arguments in
+#'   \code{...} take precedence over values in this list.
+#' @template ret_measure
+#' @family performance
+#' @export
+setMeasurePars = function(measure, ..., par.vals = list()) {
+  args = list(...)
+  assertClass(measure, classes = "Measure")
+  assertList(args, names = "unique", .var.name = "parameter settings")
+  assertList(par.vals, names = "unique", .var.name = "parameter settings")
+  measure$extra.args = insert(measure$extra.args, insert(par.vals, args))
+  measure
+}
+
+#' @title Set aggregation function of measure.
+#'
+#' @description
+#' Set how this measure will be aggregated after resampling.
+#' To see possible aggregation functions: \code{\link{aggregations}}.
+#'
+#' @param measure [\code{\link{Measure}}]\cr
+#'   Performance measure.
+#' @template arg_aggr
+#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
+#' @family performance
+#' @export
+setAggregation = function(measure, aggr) {
+  assertClass(measure, classes = "Measure")
+  assertClass(aggr, classes = "Aggregation")
+  measure$aggr = aggr
+  return(measure)
+}
diff --git a/R/MultilabelTask.R b/R/MultilabelTask.R
@@ -8,7 +8,7 @@ makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weig
   assertChoice(fixup.data, choices = c("no", "quiet", "warn"))
   assertFlag(check.data)
 
-  task = makeSupervisedTask("multilabel", data, target, weights, blocking)
+  task = makeSupervisedTask("multilabel", data, target, weights, blocking, fixup.data, check.data)
   # currently we dont do any fixup here
   if (check.data) {
     for (cn in target)

diff --git a/R/Prediction_operators.R b/R/Prediction_operators.R
@@ -158,8 +158,7 @@ getPredictionTruth.PredictionCluster = function(pred) {
 
 #' @export
 getPredictionTruth.PredictionSurv = function(pred) {
-  lookup = setNames(c("left", "right", "interval2"), c("lcens", "rcens", "icens"))
-  Surv(pred$data$truth.time, pred$data$truth.event, type = lookup[pred$task.desc$censoring])
+  Surv(pred$data$truth.time, pred$data$truth.event, type = "right")
 }
 
 #' @export

diff --git a/R/RLearner_classif_xgboost.R b/R/RLearner_classif_xgboost.R
@@ -7,7 +7,7 @@ makeRLearner.classif.xgboost = function() {
       # we pass all of what goes in 'params' directly to ... of xgboost
       # makeUntypedLearnerParam(id = "params", default = list()),
       makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")),
-      makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE),
+      makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE),
       makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1),
       makeNumericLearnerParam(id = "gamma", default = 0, lower = 0),
       makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L),
@@ -16,7 +16,7 @@ makeRLearner.classif.xgboost = function() {
       makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1),
       makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1),
       makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L),
-      makeNumericLearnerParam(id = "lambda", default = 0, lower = 0),
+      makeNumericLearnerParam(id = "lambda", default = 1, lower = 0),
       makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0),
       makeNumericLearnerParam(id = "alpha", default = 0, lower = 0),
       makeUntypedLearnerParam(id = "objective", default = "binary:logistic", tunable = FALSE),
@@ -26,6 +26,7 @@ makeRLearner.classif.xgboost = function() {
       makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both",
         special.vals = list(NA, NA_real_, NULL)),
       makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1),
+      makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")),
       makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE),
       makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L),
       # FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant
@@ -38,7 +39,14 @@ makeRLearner.classif.xgboost = function() {
       makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")),
       makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")),
       makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
-      makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart"))
+      makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
+      # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L
+      #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")),
+      #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")),
+      #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")),
+      #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")),
+      #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")),
+      makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE)
     ),
     par.vals = list(nrounds = 1L, verbose = 0L),
     properties = c("twoclass", "multiclass", "numerics", "prob", "weights", "missings", "featimp"),
@@ -54,8 +62,6 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL
 
   td = getTaskDesc(.task)
   parlist = list(...)
-  parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data)
-  parlist$label = match(as.character(getTaskData(.task, .subset, target.extra = TRUE)$target), td$class.levels) - 1
   nc = length(td$class.levels)
 
   if (is.null(parlist$objective))
@@ -68,10 +74,17 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL
   if (parlist$objective %in% c("multi:softprob", "multi:softmax"))
     parlist$num_class = nc
 
+  task.data = getTaskData(.task, .subset, target.extra = TRUE)
+  label = match(as.character(task.data$target), td$class.levels) - 1
+  parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = label)
+
   if (!is.null(.weights))
-    parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights)
+    xgboost::setinfo(parlist$data, "weight", .weights)
+
+  if (is.null(parlist$watchlist))
+    parlist$watchlist = list(train = parlist$data)
 
-  do.call(xgboost::xgboost, parlist)
+  do.call(xgboost::xgb.train, parlist)
 }
 
 #' @export
@@ -131,5 +144,3 @@ getFeatureImportanceLearner.classif.xgboost = function(.learner, .model, ...) {
   fiv = imp$Gain
   setNames(fiv, imp$Feature)
 }
-
-
diff --git a/R/RLearner_regr_xgboost.R b/R/RLearner_regr_xgboost.R
@@ -7,7 +7,7 @@ makeRLearner.regr.xgboost = function() {
       # we pass all of what goes in 'params' directly to ... of xgboost
       #makeUntypedLearnerParam(id = "params", default = list()),
       makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")),
-      makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE),
+      makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE),
       makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1),
       makeNumericLearnerParam(id = "gamma", default = 0, lower = 0),
       makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L),
@@ -16,16 +16,17 @@ makeRLearner.regr.xgboost = function() {
       makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1),
       makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1),
       makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L),
-      makeNumericLearnerParam(id = "lambda", default = 0, lower = 0),
+      makeNumericLearnerParam(id = "lambda", default = 1, lower = 0),
       makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0),
       makeNumericLearnerParam(id = "alpha", default = 0, lower = 0),
       makeUntypedLearnerParam(id = "objective", default = "reg:linear", tunable = FALSE),
       makeUntypedLearnerParam(id = "eval_metric", default = "rmse", tunable = FALSE),
       makeNumericLearnerParam(id = "base_score", default = 0.5, tunable = FALSE),
-
+      makeNumericLearnerParam(id = "max_delta_step", lower = 0, default = 0),
       makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both",
         special.vals = list(NA, NA_real_, NULL)),
       makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1),
+      makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")),
       makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE),
       makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L),
       # FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant
@@ -35,9 +36,17 @@ makeRLearner.regr.xgboost = function() {
         requires = quote(verbose == 1L)),
       makeIntegerLearnerParam(id = "early_stopping_rounds", default = NULL, lower = 1L, special.vals = list(NULL), tunable = FALSE),
       makeLogicalLearnerParam(id = "maximize", default = NULL, special.vals = list(NULL), tunable = FALSE),
+      makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")),
       makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")),
       makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
-      makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart"))
+      makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")),
+      # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L
+      #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")),
+      #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")),
+      #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")),
+      #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")),
+      #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")),
+      makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE)
     ),
     par.vals = list(nrounds = 1L, verbose = 0L),
     properties = c("numerics", "weights", "featimp", "missings"),
@@ -52,16 +61,19 @@ makeRLearner.regr.xgboost = function() {
 trainLearner.regr.xgboost = function(.learner, .task, .subset, .weights = NULL,  ...) {
   parlist = list(...)
 
-  parlist$label = getTaskData(.task, .subset, target.extra = TRUE)$target
-  parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data)
-
   if (is.null(parlist$objective))
     parlist$objective = "reg:linear"
 
+  task.data = getTaskData(.task, .subset, target.extra = TRUE)
+  parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = task.data$target)
+
   if (!is.null(.weights))
-    parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights)
+    xgboost::setinfo(parlist$data, "weight", .weights)
+
+  if (is.null(parlist$watchlist))
+    parlist$watchlist = list(train = parlist$data)
 
-  do.call(xgboost::xgboost, parlist)
+  do.call(xgboost::xgb.train, parlist)
 }
 
 #' @export