Closed
Description
library(tidymodels)
library(tidyverse)
library(xgboost)
#>
#> Attaching package: 'xgboost'
#> The following object is masked from 'package:dplyr':
#>
#> slice
data<- iris |> as_tibble() |>
filter(Species!="setosa")
mydata |> view()
#> Error in view(mydata): object 'mydata' not found
mydata<- data |>
rename(y=Species) |>
mutate(y = as.numeric(y)-2)
labels <- mydata$y
features <- mydata[, -1]
# Convert your data to a DMatrix object
dtrain <- xgb.DMatrix(data = as.matrix(features), label = labels)
new_data <- mydata[1:100, -1]
dtest <- xgb.DMatrix(data = as.matrix(new_data))
# for check exactly same result -------------------------------------------
# user define objective function, given prediction, return gradient and second order gradient
# this is log likelihood loss
logregobj <- function(preds, dtrain) {
labels <- xgboost::getinfo(dtrain, "label")
preds <- 1 / (1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
# should be same results -------------------------------------------------
params <- list(
objective = "binary:logistic",
#eval_metric = "logloss",
eta = 0.1,
max_depth = 5
)
param_org <- list(objective = logregobj,
base_score= 0.0,
eta = 0.1,
max_depth = 5 )
# model -------------------------------------------------------------------
num_round <- 10
xgb_model <- xgb.train(params, dtrain, num_round)
xgb_model_custom <- xgb.train(param_org, dtrain, num_round)
logistic_f <- function(x){
1/(1+exp(-x))
}
custom_obj<- predict(xgb_model_custom, dtest) |> logistic_f()
default_obj<- predict(xgb_model, dtest)
# almost same!!!
(default_obj-custom_obj) |> range()
#> [1] -3.296303e-08 1.806186e-08
# I want to this custom obj function into tidymodel ecosystem ---------------------
set.seed(100)
data<- iris |> as_tibble() |>
filter(Species!="setosa")
mydata<- data |>
rename(y=Species)
#mydata |> view()
splits<- initial_split(mydata,prop = 0.8,strata = y)
train_data <- training(splits)
test_data <- testing(splits)
resamples<- vfold_cv(data = train_data,v = 5,strata = y)
xgboost_model <- boost_tree( mode = "classification",
tree_depth =tune(),
trees =tune(),
learn_rate =tune(),
mtry =tune(),
min_n =tune(),
loss_reduction =tune(),
sample_size =tune(),
stop_iter =tune()
) %>% set_engine(engine = "xgboost" ,
objective = logregobj,
base_score= 0.0,
set.seed = 100)
xgboost_model |> translate()
#> Boosted Tree Model Specification (classification)
#>
#> Main Arguments:
#> mtry = tune()
#> trees = tune()
#> min_n = tune()
#> tree_depth = tune()
#> learn_rate = tune()
#> loss_reduction = tune()
#> sample_size = tune()
#> stop_iter = tune()
#>
#> Engine-Specific Arguments:
#> objective = logregobj
#> base_score = 0
#> set.seed = 100
#>
#> Computational engine: xgboost
#>
#> Model fit template:
#> parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
#> colsample_bynode = tune(), nrounds = tune(), min_child_weight = tune(),
#> max_depth = tune(), eta = tune(), gamma = tune(), subsample = tune(),
#> early_stop = tune(), objective = logregobj, base_score = 0,
#> set.seed = 100, nthread = 1, verbose = 0)
rec_base<- train_data |>
recipe(y~.) #%>%
#step_mutate_at(all_numeric_predictors(), fn = list(orig = ~.)) %>%
# step_normalize(all_predictors(), -all_outcomes())
xgboost_workflow <-
workflow() %>%
add_recipe(rec_base) %>%
add_model(xgboost_model)
param_info <-
xgboost_workflow %>%
extract_parameter_set_dials() %>%
finalize(train_data) |>
update(
trees = trees(range = c(5L,1500L)),
tree_depth= tree_depth(range = c(2L,15L)),
learn_rate = learn_rate(range = c(-5,-0.5)),
loss_reduction = loss_reduction(range = c(-5,1.5))
)
#fit_xy.model_spec()
grid_size <- 3
set.seed(100)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = resamples,
grid = grid_latin_hypercube(param_info,size = grid_size),
control =control_grid(verbose = TRUE,
allow_par = FALSE,
parallel_over =NULL,save_pred = FALSE),
metrics = metric_set(mcc)#rank_percentile_rmse
)
#> i Fold1: preprocessor 1/1
#> ✓ Fold1: preprocessor 1/1
#> i Fold1: preprocessor 1/1, model 1/3
#> ✓ Fold1: preprocessor 1/1, model 1/3
#> i Fold1: preprocessor 1/1, model 1/3 (predictions)
#> x Fold1: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold1: preprocessor 1/1, model 2/3
#> ✓ Fold1: preprocessor 1/1, model 2/3
#> i Fold1: preprocessor 1/1, model 2/3 (predictions)
#> x Fold1: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold1: preprocessor 1/1, model 3/3
#> ✓ Fold1: preprocessor 1/1, model 3/3
#> i Fold1: preprocessor 1/1, model 3/3 (predictions)
#> x Fold1: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1
#> ✓ Fold2: preprocessor 1/1
#> i Fold2: preprocessor 1/1, model 1/3
#> ✓ Fold2: preprocessor 1/1, model 1/3
#> i Fold2: preprocessor 1/1, model 1/3 (predictions)
#> x Fold2: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1, model 2/3
#> ✓ Fold2: preprocessor 1/1, model 2/3
#> i Fold2: preprocessor 1/1, model 2/3 (predictions)
#> x Fold2: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1, model 3/3
#> ✓ Fold2: preprocessor 1/1, model 3/3
#> i Fold2: preprocessor 1/1, model 3/3 (predictions)
#> x Fold2: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1
#> ✓ Fold3: preprocessor 1/1
#> i Fold3: preprocessor 1/1, model 1/3
#> ✓ Fold3: preprocessor 1/1, model 1/3
#> i Fold3: preprocessor 1/1, model 1/3 (predictions)
#> x Fold3: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1, model 2/3
#> ✓ Fold3: preprocessor 1/1, model 2/3
#> i Fold3: preprocessor 1/1, model 2/3 (predictions)
#> x Fold3: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1, model 3/3
#> ✓ Fold3: preprocessor 1/1, model 3/3
#> i Fold3: preprocessor 1/1, model 3/3 (predictions)
#> x Fold3: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1
#> ✓ Fold4: preprocessor 1/1
#> i Fold4: preprocessor 1/1, model 1/3
#> ✓ Fold4: preprocessor 1/1, model 1/3
#> i Fold4: preprocessor 1/1, model 1/3 (predictions)
#> x Fold4: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1, model 2/3
#> ✓ Fold4: preprocessor 1/1, model 2/3
#> i Fold4: preprocessor 1/1, model 2/3 (predictions)
#> x Fold4: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1, model 3/3
#> ✓ Fold4: preprocessor 1/1, model 3/3
#> i Fold4: preprocessor 1/1, model 3/3 (predictions)
#> x Fold4: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1
#> ✓ Fold5: preprocessor 1/1
#> i Fold5: preprocessor 1/1, model 1/3
#> ✓ Fold5: preprocessor 1/1, model 1/3
#> i Fold5: preprocessor 1/1, model 1/3 (predictions)
#> x Fold5: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1, model 2/3
#> ✓ Fold5: preprocessor 1/1, model 2/3
#> i Fold5: preprocessor 1/1, model 2/3 (predictions)
#> x Fold5: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1, model 3/3
#> ✓ Fold5: preprocessor 1/1, model 3/3
#> i Fold5: preprocessor 1/1, model 3/3 (predictions)
#> x Fold5: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
#> information.
show_notes(.Last.tune.result)
#> unique notes:
#> ────────────────────────────────────────────────────────────────────────────────
#> Error in switch(object$params$objective, `binary:logitraw` = stats::binomial()$linkinv(res), : EXPR must be a length 1 vector
I checked through Google that there are basic options such as objective="binary:logistic", is there any way to change this to any function I made?
Thank you for always creating and maintaining the tidy ecosystem.