Skip to content

Issue interactions lightgbm uneven columns when rbindlist(treeList) #6

@felxcon

Description

@felxcon

Hi,
I tried to run your function EIX::interactions on a data set of 1(qf2020_div)+8 numeric columns with about 80,000 rows together with a tuned lightgbm model. However, an error is reported as follows:
"Error in rbindlist(treeList) :
Item 97 has 13 columns, inconsistent with item 1 which has 19 columns. To fill missing columns use fill=TRUE."
This does not happen when running the default lightgbm model parameter settings.

When I try to do with a small subset (100 rows) I encouter the same Error of "uneven columns" when num_leaves to min_gain_to_split are turned off. Otherwise
"Error: comparison (1) is possible only for atomic and list types"happens, or that error happens: "" Any idea why this occurs?

I used this code:

mmmf_df_100 = structure(list(qf2020_div = c(-0.683344740108416, -0.62200251820213,
-0.660933392581695, -0.931454042941375, -0.678846234812683, -0.678709195184706,
-0.62200251820213, -0.619032040654088, -0.741462927558781, -0.882350949746443,
-0.747540455479868, -0.743496834435778, 0, -0.63301735032532,
-0.850596218655163, -0.860808275916884, -0.62200251820213, -0.669529409627363,
-0.675469611757471, 0, 0, 0, -0.388044330254854, -0.850634478054759,
0, 0, -0.617546396858118, -0.8891822325675, -0.703075765512668,
-0.886130787928763, -0.681806828303268, 0, -0.88604646624308,
-0.926167021298114, -0.692090760819216, -0.660933392581695, -0.83931706735653,
-0.881578476738358, -0.684460497124147, -0.705416304923849, -0.685713271747449,
-0.686152296703342, -0.88723658127604, -0.846382748304772, -0.62200251820213,
-0.720211468617393, -0.684998539883293, -0.675830994910749, -0.61719971562315,
-0.908777071672487, 0, 0, 0, -0.813671235655738, 0, -0.886130787928763,
0, -0.388179591352467, -0.889236363195927, -0.883763006684634,
0, -0.681806828303268, -0.692090760819216, -0.670785617377905,
-0.675573715067695, 0, -0.746739480916366, -0.684460497124147,
-0.738360299567337, 0, -0.692090760819216, 0, -0.640423140555064,
-0.695504563944157, 0, -0.613657933810985, -0.74923545834839,
-0.660933392581695, -0.821653413397282, -0.738971403646119, -0.61719971562315,
-0.678846234812683, -0.819372375152443, -0.720211468617393, -0.886130787928763,
-0.629409957539496, -0.680296374263876, 0, -0.844873743332596,
-0.619032040654088, 0, -0.670286891070436, -0.678278455996463,
-0.739735765831987, -0.602360477184269, 0, -0.692034388476076,
-0.675469611757471, -0.886130787928763, -0.684998539883293),
watershed = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3), compactnes = c(0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553,
0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553, 0.78553
), mmm_fsize = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), curve_numbe = c(65, 70, 70,
87, 65, 65, 70, 70, 65, 84, 65, 65, 56, 70, 77, 77, 70, 65,
65, 50, 50, 56, 56, 77, 56, 50, 70, 77, 65, 79, 70, 50, 77,
87, 65, 70, 77, 84, 70, 65, 70, 70, 79, 77, 70, 65, 70, 70,
70, 79, 56, 50, 50, 83, 56, 79, 50, 56, 84, 79, 56, 70, 65,
65, 65, 56, 65, 70, 65, 50, 65, 50, 70, 65, 50, 70, 65, 70,
83, 65, 70, 65, 83, 65, 79, 70, 70, 50, 77, 70, 56, 65, 65,
65, 70, 50, 65, 65, 79, 70), hsg = c(0, 2, 2, 0, 0, 0, 2,
2, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0,
2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2,
0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0,
0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2,
0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2), aspect = c(86.1312789916992,
16.4371280670166, 5.29735994338989, 201.93327331543, 322.773468017578,
89.2040252685547, 219.443618774414, 314.053527832031, 180.033554077148,
312.130004882812, 193.037216186523, 9.60710716247559, 50.0658378601074,
279.368316650391, 63.079231262207, 296.165985107422, 249.407730102539,
264.652557373047, 263.472015380859, 113.461738586426, 352.356231689453,
73.4116973876953, 325.854156494141, 153.332122802734, 111.455612182617,
213.973266601562, 235.915802001953, 44.4550132751465, 266.452331542969,
130.592666625977, 278.087646484375, 12.3049230575562, 194.30876159668,
269.795562744141, 273.022857666016, 181.224151611328, 27.4992580413818,
8.57164478302002, 198.986557006836, 33.6422309875488, 38.4557991027832,
178.001922607422, 200.945281982422, 359.575592041016, 348.970916748047,
145.922546386719, 303.911651611328, 272.455993652344, 337.481353759766,
83.7395782470703, 3.79256057739258, 16.1068820953369, 342.483032226562,
250.845794677734, 202.625839233398, 115.868446350098, 125.002998352051,
54.4174537658691, 136.00732421875, 238.046249389648, 203.473831176758,
288.734497070312, 106.890609741211, 128.663162231445, 12.6799297332764,
71.0660247802734, 281.640441894531, 154.839492797852, 312.834503173828,
275.901824951172, 39.8677558898926, 90.3907852172852, 194.46012878418,
302.63037109375, 19.0449523925781, 12.9935855865479, 132.882751464844,
97.9574356079102, 336.990753173828, 59.7389221191406, 157.052444458008,
329.076629638672, 41.2350616455078, 49.0847129821777, 39.1097717285156,
44.3229064941406, 351.448303222656, 231.318969726562, 291.198272705078,
225.222579956055, 224.549331665039, 244.679962158203, 263.660064697266,
191.419311523438, 205.053527832031, 77.8348999023438, 320.839141845703,
270.162658691406, 147.001251220703, 101.014167785645), number_rain = c(4.11478662490845,
4.11478662490845, 4.69904613494873, 4.22641563415527, 4.11478662490845,
4.11478662490845, 4.11478662490845, 4.11478662490845, 5.06366062164307,
4.97407245635986, 4.97407245635986, 5.06366062164307, 4.22641563415527,
4.22641563415527, 4.11478662490845, 4.22641563415527, 4.11478662490845,
4.11478662490845, 4.11478662490845, 4.11478662490845, 4.11478662490845,
4.11478662490845, 4.11478662490845, 4.11478662490845, 4.4235634803772,
4.22641563415527, 4.11478662490845, 5.12175559997559, 4.22641563415527,
4.22641563415527, 5.06366062164307, 5.12175559997559, 5.06366062164307,
4.22641563415527, 4.22641563415527, 4.69904613494873, 4.11478662490845,
5.06366062164307, 5.06366062164307, 4.22641563415527, 5.06366062164307,
4.97407245635986, 4.22641563415527, 4.11478662490845, 4.11478662490845,
4.69904613494873, 5.06366062164307, 5.06366062164307, 4.11478662490845,
4.97407245635986, 4.22641563415527, 4.11478662490845, 5.12175559997559,
4.11478662490845, 4.22641563415527, 4.22641563415527, 4.22641563415527,
4.11478662490845, 4.11478662490845, 4.22641563415527, 4.11478662490845,
5.06366062164307, 4.22641563415527, 4.11478662490845, 4.11478662490845,
5.12175559997559, 5.06366062164307, 5.06366062164307, 5.06366062164307,
4.11478662490845, 4.22641563415527, 4.11478662490845, 4.22641563415527,
4.22641563415527, 4.22641563415527, 4.11478662490845, 4.97407245635986,
4.69904613494873, 4.11478662490845, 5.06366062164307, 4.11478662490845,
4.11478662490845, 4.11478662490845, 4.69904613494873, 4.22641563415527,
4.22641563415527, 5.12175559997559, 5.06366062164307, 4.11478662490845,
4.11478662490845, 4.22641563415527, 4.11478662490845, 4.11478662490845,
5.06366062164307, 4.11478662490845, 4.97407245635986, 4.22641563415527,
4.11478662490845, 4.22641563415527, 5.06366062164307), precipitat = c(105.631990780906,
105.631990780906, 113.525026987469, 95.7414173890674, 108.81347918132,
111.585622257657, 105.631990780906, 107.371452626728, 108.427198425172,
104.19571208197, 106.32300672077, 108.237226047213, 108.663529055459,
105.526280009557, 105.631990780906, 103.14488716731, 105.631990780906,
117.181404984187, 111.968032095167, 105.631990780906, 105.631990780906,
117.875289985112, 114.660130235884, 105.89557216281, 117.985597080655,
105.526280009557, 110.971759538802, 113.277256125496, 103.226915200551,
95.7414173890674, 107.448963218265, 113.277256125496, 115.204860694825,
102.406323243701, 108.663529055459, 113.525026987469, 116.909620224483,
106.32300672077, 104.19571208197, 100.017380517627, 105.95153774534,
105.95153774534, 96.1293804077875, 111.585622257657, 105.631990780906,
117.757825435154, 105.14771898966, 111.93966234298, 108.81347918132,
106.32300672077, 103.226915200551, 114.660130235884, 111.252964557163,
116.909620224483, 105.89557216281, 95.7414173890674, 96.1293804077875,
111.968032095167, 118.493426474314, 99.5515276136853, 117.875289985112,
107.448963218265, 108.663529055459, 117.875289985112, 112.081975460053,
107.530949713692, 105.14771898966, 104.19571208197, 113.237426341526,
118.493426474314, 108.663529055459, 105.631990780906, 103.226915200551,
105.526280009557, 102.406323243701, 112.081975460053, 105.95153774534,
113.525026987469, 111.585622257657, 111.93966234298, 108.81347918132,
108.81347918132, 112.081975460053, 117.757825435154, 95.7414173890674,
108.663529055459, 111.608108346424, 113.237426341526, 111.900469795106,
107.371452626728, 108.146229834784, 116.909620224483, 108.206884308467,
112.154623977722, 119.449564721849, 106.32300672077, 105.89557216281,
111.968032095167, 95.7414173890674, 105.14771898966)), row.names = c(NA,
-100L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000001df3a7b1ef0>)

#preprocess dataframe
mmmf_df_rules= lgb.convert_with_rules(
data=mmmf_df)

#extracted prepared dataframe
mmmf_df_prep = mmmf_df_rules$data

#remove dependent variables
mmmf_df_prep_indie_vars <- as.matrix(mmmf_df_prep[, 2:8, with = FALSE])

#create correct dataset for lightgbm model "training"
mmmf_lgb_ds<- lgb.Dataset(data = mmmf_df_prep_indie_vars
,
label = mmmf_df_prep $qf2020_div
)

#or define specific data.matrix
mmmf_df_prep_indie_vars_2 <- Matrix::sparse.model.matrix(
qf2020_div ~ .,data = mmmf_df_prep, with = FALSE)
mmmf_lgb_ds_2 <- lightgbm::lgb.Dataset(mmmf_df_prep_indie_vars_2)

#define parameter space from tuned lightgbm model
params <- list(objective = "regression"
,
num_leaves = 100,
num_iterations = 1863,
learning_rate = 0.2556561,
max_depth = 12,
min_data_in_leaf = 34,
min_gain_to_split = 0.001104944,
num_threads = 1,
boosting = "goss",
tree_learner = "data",
extra_trees = T,
monotone_constraints_method = "advanced",
feature_pre_filter = F,
pre_partition = T,
two_round = F,
force_row_wise = T,
force_col_wise = F,
device_type = "cpu",
verbosity = -1
)
#train model on data and settings
lgb_model_intax <- lightgbm::lgb.train(params, mmmf_lgb_ds)

#check if trees were created
treedt = lightgbm::lgb.model.dt.tree(lgb_model_intax)

#extract the interactions for plotting
inter <- EIX::interactions(lgb_model_intax, mmmf_lgb_ds, option = "interactions");plot(inter)

Of course I could also create and provide a reprex.

Happy for advises :)

Felix

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions