Closed
Description
library(tidyverse)
library(WDI)
#Dataset for methane emissions
df_methane <-
WDI(indicator = "EN.ATM.METH.KT.CE",
extra = TRUE) %>%
as_tibble() %>%
janitor::clean_names() %>%
drop_na() %>%
rename(methane = en_atm_meth_kt_ce)
#Modeling
library(tidymodels)
#Dataset for modeling
df_mod <-
df_methane %>%
filter(region != "Aggregates",
income != "Not classified") %>%
mutate(latitude = latitude %>% as.numeric(),
longitude = longitude %>% as.numeric(),
income = income %>% as_factor(),
region = region %>% as_factor()) %>%
select(region, income, longitude,latitude, methane)
#Splitting
set.seed(12345)
df_split <- initial_split(df_mod,
prop = 0.8,
strata = "income")
df_train <- training(df_split)
df_test <- testing(df_split)
tidymodels/workflows#10-fold cross validation for tuning
set.seed(12345)
df_fold <-
vfold_cv(df_train,
strata = income,
repeats = 5)
#Linear regression models for different engines/packages
spec_lm <-
linear_reg() %>%
set_engine("lm")
spec_glm <-
linear_reg() %>%
set_engine("glm")
spec_glmnet <-
linear_reg(penalty = tune(),
mixture = tune()) %>%
set_engine("glmnet")
spec_keras <-
linear_reg(penalty = tune()) %>%
set_engine("keras")
spec_stan <-
linear_reg() %>%
set_engine("stan")
#Workflow set
basic_recipe <-
recipe(methane ~ ., data = df_train)
all_workflows <-
workflow_set(
preproc = list(basic = basic_recipe),
models = list(LM = spec_lm,
GLM = spec_glm,
GLMNET = spec_glmnet,
Keras = spec_keras,
Stan = spec_stan)
)
#Tuning and evaluating the models
grid_ctrl <-
control_grid(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
)
grid_results <-
all_workflows %>%
workflow_map(
seed = 98765,
resamples = df_fold,
grid = 15,
control = grid_ctrl
)
#one of the error lines:
# `y` should be one of the following classes: 'data.frame', 'matrix', 'factor', 'Surv'