vrodriguezf
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎NEWS.md
Lines changed: 7 additions & 4 deletions b/‎NEWS.md
Lines changed: 7 additions & 4 deletions
diff --git a/‎R/Filter.R
Lines changed: 1 addition & 1 deletion b/‎R/Filter.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/RLearner_classif_cforest.R
Lines changed: 3 additions & 0 deletions b/‎R/RLearner_classif_cforest.R
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/RLearner_classif_svm.R
Lines changed: 10 additions & 3 deletions b/‎R/RLearner_classif_svm.R
Lines changed: 10 additions & 3 deletions
diff --git a/‎R/RLearner_regr_cforest.R
Lines changed: 3 additions & 0 deletions b/‎R/RLearner_regr_cforest.R
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/RLearner_regr_crs.R
Lines changed: 1 addition & 2 deletions b/‎R/RLearner_regr_crs.R
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/RLearner_regr_svm.R
Lines changed: 8 additions & 3 deletions b/‎R/RLearner_regr_svm.R
Lines changed: 8 additions & 3 deletions
diff --git a/‎docs/PULL_REQUEST_TEMPLATE.html
Lines changed: 1 addition & 1 deletion b/‎docs/PULL_REQUEST_TEMPLATE.html
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/articles/tutorial/create_filter.html
Lines changed: 7 additions & 13 deletions b/‎docs/articles/tutorial/create_filter.html
Lines changed: 7 additions & 13 deletions
@@ -1,7 +1,7 @@
 We are always happy to receive pull requests.
 
 Please make sure you have read our coding guidelines: 
-https://github.com/mlr-org/mlr/wiki/mlr-Coding-Guidelines
+https://www.notion.so/mlrorg/Style-Guide-740bc663207a4bbb9a457987bda6fd91
 
 This especially means that you have understood:
 
 
@@ -11,10 +11,7 @@
   See `?regr.randomForest` for more details.  
   `regr.ranger` relies on the functions provided by the package ("jackknife" and "infjackknife" (default))  
   (@jakob-r, #1784)
-
-## functions - general
-- `getClassWeightParam()` now also works for Wrapper* Models and ensemble models (@ja-thomas, #891)
-- added `getLearnerNote()` to query the "Note" slot of a learner (@alona-sydorova, #2086)
+- `e1071::svm()` now only uses the formula interface if factors are present. This change is supposed to prevent from "stack overflow" issues some users encountered when using large datasets. See #1738 for more information. (@mb706, #1740)
 
 ## learners - new
 - add learner `cluster.MiniBatchKmeans` from package _ClusterR_ (@Prasiddhi, #2554)
@@ -23,6 +20,12 @@
 - `plotHyperParsEffect()` now supports facet visualization of hyperparam effects for nested cv (@MasonGallo, #1653)
 - fixed a bug that caused an incorrect aggregation of probabilities in some cases. The bug existed since quite some time and was exposed due to the change of `data.table`s default in `rbindlist()`. See #2578 for more information. (@mllg, #2579)
 - fixed a bug in which `options(on.learner.error)` was not respected in `benchmark()`. This caused `benchmark()` to stop even if it should have continued including `FailureModels` in the result (@dagola, #1984)
+- `getClassWeightParam()` now also works for Wrapper* Models and ensemble models (@ja-thomas, #891)
+- added `getLearnerNote()` to query the "Note" slot of a learner (@alona-sydorova, #2086)
+
+## filters - general
+
+- Filter `praznik_mrmr` also supports `regr` and `surv` tasks
 
 # mlr 2.14.0
 
 
@@ -1003,7 +1003,7 @@ makeFilter(
   name = "praznik_MRMR",
   desc = "Minimum redundancy maximal relevancy filter",
   pkg = "praznik",
-  supported.tasks = "classif",
+  supported.tasks = c("classif", "regr", "surv"),
   supported.features = c("numerics", "factors", "integer", "character", "logical"),
   fun = praznik_filter("MRMR")
 )
 
@@ -40,6 +40,8 @@ trainLearner.classif.cforest = function(.learner, .task, .subset,
 
   f = getTaskFormula(.task)
   d = getTaskData(.task, .subset)
+
+  # default handling necessary because the default of controls is `cforest_unbiased()` which does not allow all parameters (e.g. replace)
   defaults = getDefaults(getParamSet(.learner))
   if (missing(teststat)) teststat = defaults$teststat
   if (missing(testtype)) testtype = defaults$testtype
@@ -50,6 +52,7 @@ trainLearner.classif.cforest = function(.learner, .task, .subset,
     fraction, trace, teststat, testtype, mincriterion,
     minsplit, minbucket, stump, nresample, maxsurrogate,
     maxdepth, savesplitstats)
+
   party::cforest(f, data = d, controls = ctrl, weights = .weights, ...)
 }
 
 
@@ -28,9 +28,16 @@ makeRLearner.classif.svm = function() {
 }
 
 #' @export
-trainLearner.classif.svm = function(.learner, .task, .subset, .weights = NULL, ...) {
-  f = getTaskFormula(.task)
-  e1071::svm(f, data = getTaskData(.task, .subset), probability = .learner$predict.type == "prob", ...)
+trainLearner.classif.svm = function(.learner, .task, .subset, .weights = NULL,  ...) {
+  if (sum(getTaskDesc(.task)$n.feat[c("factors", "ordered")]) > 0) {
+    # use formula interface if factors are present 
+    f = getTaskFormula(.task)
+    e1071::svm(f, data = getTaskData(.task, .subset), probability = .learner$predict.type == "prob", ...)
+  } else {
+    # use the "data.frame" approach if no factors are present to prevent issues like https://github.com/mlr-org/mlr/issues/1738
+    d = getTaskData(.task, .subset, target.extra = TRUE)
+    e1071::svm(d$data, d$target, probability = .learner$predict.type == "prob", ...)
+  }
 }
 
 #' @export
 
@@ -39,6 +39,8 @@ trainLearner.regr.cforest = function(.learner, .task, .subset, .weights = NULL,
 
   f = getTaskFormula(.task)
   d = getTaskData(.task, .subset)
+
+  # default handling necessary because the default of controls is `cforest_unbiased()` which does not allow all parameters (e.g. replace)
   defaults = getDefaults(getParamSet(.learner))
   if (missing(teststat)) teststat = defaults$teststat
   if (missing(testtype)) testtype = defaults$testtype
@@ -49,6 +51,7 @@ trainLearner.regr.cforest = function(.learner, .task, .subset, .weights = NULL,
     trace, teststat, testtype, mincriterion,
     minsplit, minbucket, stump,
     nresample, maxsurrogate, maxdepth, savesplitstats)
+
   party::cforest(f, data = d, controls = ctrl, weights = .weights, ...)
 }
 
 
@@ -64,8 +64,7 @@ predictLearner.regr.crs = function(.learner, .model, .newdata, ...) {
     lwr = attr(pred, "lwr")
     attr(pred, "lwr") = NULL
     attr(pred, "upr") = NULL
-    # FIXME: make sure that this is correct, ask Daniel
-    se = (pred - lwr) * sqrt(.model$task.desc$size) / qnorm(0.95)
+    se = (pred - lwr) / qnorm(0.95)
     cbind(pred, se)
   } else {
     pred = predict(.model$learner.model, newdata = .newdata, ...)
 
@@ -27,9 +27,14 @@ makeRLearner.regr.svm = function() {
 }
 
 #' @export
-trainLearner.regr.svm = function(.learner, .task, .subset, .weights = NULL, ...) {
-  f = getTaskFormula(.task)
-  e1071::svm(f, data = getTaskData(.task, .subset), ...)
+trainLearner.regr.svm = function(.learner, .task, .subset, .weights = NULL,  ...) {
+  if (sum(getTaskDesc(.task)$n.feat[c("factors", "ordered")]) > 0) {
+    f = getTaskFormula(.task)
+    e1071::svm(f, data = getTaskData(.task, .subset), ...)
+  } else {
+    d = getTaskData(.task, .subset, target.extra = TRUE)
+    e1071::svm(d$data, d$target, ...)
+  }
 }
 
 #' @export
Original file line number	Diff line number	Diff line change
`@@ -1003,7 +1003,7 @@ makeFilter(`
`1003`	`1003`	`name = "praznik_MRMR",`
`1004`	`1004`	`desc = "Minimum redundancy maximal relevancy filter",`
`1005`	`1005`	`pkg = "praznik",`
`1006`		`- supported.tasks = "classif",`
	`1006`	`+ supported.tasks = c("classif", "regr", "surv"),`
`1007`	`1007`	`supported.features = c("numerics", "factors", "integer", "character", "logical"),`
`1008`	`1008`	`fun = praznik_filter("MRMR")`
`1009`	`1009`	`)`