Skip to content

Try Customizing objective function into XGboost boost_tree()  #873

Closed
@SHo-JANG

Description

@SHo-JANG
library(tidymodels)
library(tidyverse)
library(xgboost)
#> 
#> Attaching package: 'xgboost'
#> The following object is masked from 'package:dplyr':
#> 
#>     slice



data<- iris |> as_tibble() |> 
  filter(Species!="setosa")

mydata |> view()
#> Error in view(mydata): object 'mydata' not found
mydata<- data |> 
  rename(y=Species) |> 
  mutate(y = as.numeric(y)-2) 
labels <- mydata$y
features <- mydata[, -1]


# Convert your data to a DMatrix object
dtrain <- xgb.DMatrix(data = as.matrix(features), label = labels)

new_data <- mydata[1:100, -1]

dtest <- xgb.DMatrix(data = as.matrix(new_data))


# for check exactly same result -------------------------------------------

# user define objective function, given prediction, return gradient and second order gradient
# this is log likelihood loss
logregobj <- function(preds, dtrain) {
  labels <- xgboost::getinfo(dtrain, "label")
  preds <- 1 / (1 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1 - preds)
  return(list(grad = grad, hess = hess))
}



# should be same results  -------------------------------------------------

params <- list(
  objective = "binary:logistic",
  #eval_metric = "logloss",
  eta = 0.1,
  max_depth = 5
)


param_org <- list(objective = logregobj,
                  base_score= 0.0,
                  eta = 0.1,
                  max_depth = 5 )

# model -------------------------------------------------------------------

num_round <- 10
xgb_model <- xgb.train(params, dtrain, num_round)
xgb_model_custom <- xgb.train(param_org, dtrain, num_round)



logistic_f <- function(x){
  1/(1+exp(-x))
}


custom_obj<- predict(xgb_model_custom, dtest) |> logistic_f()
default_obj<- predict(xgb_model, dtest)

# almost same!!!
(default_obj-custom_obj) |> range()
#> [1] -3.296303e-08  1.806186e-08



# I want to this custom obj function into tidymodel ecosystem ---------------------


set.seed(100)

data<- iris |> as_tibble() |> 
  filter(Species!="setosa") 
mydata<- data |> 
  rename(y=Species) 
#mydata |> view()
splits<- initial_split(mydata,prop = 0.8,strata = y)
train_data <- training(splits)
test_data <- testing(splits)

resamples<- vfold_cv(data = train_data,v = 5,strata = y)


xgboost_model <- boost_tree( mode = "classification",
                               tree_depth     =tune(),
                               trees           =tune(),
                               learn_rate     =tune(),
                               mtry           =tune(),
                               min_n          =tune(),
                               loss_reduction =tune(),
                               sample_size    =tune(),
                               stop_iter      =tune()
) %>% set_engine(engine = "xgboost" ,
                 objective = logregobj,
                 base_score= 0.0,
                 set.seed = 100)

xgboost_model |> translate()
#> Boosted Tree Model Specification (classification)
#> 
#> Main Arguments:
#>   mtry = tune()
#>   trees = tune()
#>   min_n = tune()
#>   tree_depth = tune()
#>   learn_rate = tune()
#>   loss_reduction = tune()
#>   sample_size = tune()
#>   stop_iter = tune()
#> 
#> Engine-Specific Arguments:
#>   objective = logregobj
#>   base_score = 0
#>   set.seed = 100
#> 
#> Computational engine: xgboost 
#> 
#> Model fit template:
#> parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), 
#>     colsample_bynode = tune(), nrounds = tune(), min_child_weight = tune(), 
#>     max_depth = tune(), eta = tune(), gamma = tune(), subsample = tune(), 
#>     early_stop = tune(), objective = logregobj, base_score = 0, 
#>     set.seed = 100, nthread = 1, verbose = 0)




rec_base<- train_data |> 
  recipe(y~.) #%>%
  #step_mutate_at(all_numeric_predictors(), fn = list(orig = ~.)) %>%
 # step_normalize(all_predictors(), -all_outcomes())




xgboost_workflow <- 
  workflow() %>% 
  add_recipe(rec_base) %>% 
  add_model(xgboost_model)



param_info <- 
  xgboost_workflow %>%
  extract_parameter_set_dials() %>%
  finalize(train_data) |> 
  update(
    trees = trees(range = c(5L,1500L)),
    tree_depth= tree_depth(range = c(2L,15L)),
    learn_rate = learn_rate(range = c(-5,-0.5)),
    loss_reduction = loss_reduction(range = c(-5,1.5))
    
  )
#fit_xy.model_spec()

grid_size <- 3
set.seed(100)
xgboost_tune <-
  tune_grid(xgboost_workflow,
            resamples = resamples,
            grid = grid_latin_hypercube(param_info,size = grid_size),
            control =control_grid(verbose = TRUE,
                                  allow_par = FALSE,
                                  parallel_over =NULL,save_pred = FALSE),
            metrics = metric_set(mcc)#rank_percentile_rmse
  )
#> i Fold1: preprocessor 1/1
#> ✓ Fold1: preprocessor 1/1
#> i Fold1: preprocessor 1/1, model 1/3
#> ✓ Fold1: preprocessor 1/1, model 1/3
#> i Fold1: preprocessor 1/1, model 1/3 (predictions)
#> x Fold1: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold1: preprocessor 1/1, model 2/3
#> ✓ Fold1: preprocessor 1/1, model 2/3
#> i Fold1: preprocessor 1/1, model 2/3 (predictions)
#> x Fold1: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold1: preprocessor 1/1, model 3/3
#> ✓ Fold1: preprocessor 1/1, model 3/3
#> i Fold1: preprocessor 1/1, model 3/3 (predictions)
#> x Fold1: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1
#> ✓ Fold2: preprocessor 1/1
#> i Fold2: preprocessor 1/1, model 1/3
#> ✓ Fold2: preprocessor 1/1, model 1/3
#> i Fold2: preprocessor 1/1, model 1/3 (predictions)
#> x Fold2: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1, model 2/3
#> ✓ Fold2: preprocessor 1/1, model 2/3
#> i Fold2: preprocessor 1/1, model 2/3 (predictions)
#> x Fold2: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold2: preprocessor 1/1, model 3/3
#> ✓ Fold2: preprocessor 1/1, model 3/3
#> i Fold2: preprocessor 1/1, model 3/3 (predictions)
#> x Fold2: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1
#> ✓ Fold3: preprocessor 1/1
#> i Fold3: preprocessor 1/1, model 1/3
#> ✓ Fold3: preprocessor 1/1, model 1/3
#> i Fold3: preprocessor 1/1, model 1/3 (predictions)
#> x Fold3: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1, model 2/3
#> ✓ Fold3: preprocessor 1/1, model 2/3
#> i Fold3: preprocessor 1/1, model 2/3 (predictions)
#> x Fold3: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold3: preprocessor 1/1, model 3/3
#> ✓ Fold3: preprocessor 1/1, model 3/3
#> i Fold3: preprocessor 1/1, model 3/3 (predictions)
#> x Fold3: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1
#> ✓ Fold4: preprocessor 1/1
#> i Fold4: preprocessor 1/1, model 1/3
#> ✓ Fold4: preprocessor 1/1, model 1/3
#> i Fold4: preprocessor 1/1, model 1/3 (predictions)
#> x Fold4: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1, model 2/3
#> ✓ Fold4: preprocessor 1/1, model 2/3
#> i Fold4: preprocessor 1/1, model 2/3 (predictions)
#> x Fold4: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold4: preprocessor 1/1, model 3/3
#> ✓ Fold4: preprocessor 1/1, model 3/3
#> i Fold4: preprocessor 1/1, model 3/3 (predictions)
#> x Fold4: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1
#> ✓ Fold5: preprocessor 1/1
#> i Fold5: preprocessor 1/1, model 1/3
#> ✓ Fold5: preprocessor 1/1, model 1/3
#> i Fold5: preprocessor 1/1, model 1/3 (predictions)
#> x Fold5: preprocessor 1/1, model 1/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1, model 2/3
#> ✓ Fold5: preprocessor 1/1, model 2/3
#> i Fold5: preprocessor 1/1, model 2/3 (predictions)
#> x Fold5: preprocessor 1/1, model 2/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> i Fold5: preprocessor 1/1, model 3/3
#> ✓ Fold5: preprocessor 1/1, model 3/3
#> i Fold5: preprocessor 1/1, model 3/3 (predictions)
#> x Fold5: preprocessor 1/1, model 3/3 (predictions): Error in switch(object$params$objective, `binary:logitraw` = stats::bino...
#> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
#> information.

show_notes(.Last.tune.result)
#> unique notes:
#> ────────────────────────────────────────────────────────────────────────────────
#> Error in switch(object$params$objective, `binary:logitraw` = stats::binomial()$linkinv(res), : EXPR must be a length 1 vector

I checked through Google that there are basic options such as objective="binary:logistic", is there any way to change this to any function I made?

Thank you for always creating and maintaining the tidy ecosystem.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugan unexpected problem or unintended behavior

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions