change in Laplace approximation: reset mode to zero in every call to …

…log-likelihood
fabsig · Apr 3, 2024 · 49963a1 · 49963a1
1 parent 7b2efbb
commit 49963a1
Show file tree

Hide file tree

Showing 11 changed files with 237 additions and 222 deletions.
diff --git a/R-package/tests/testthat/test_GPModel_grouped_random_effects.R b/R-package/tests/testthat/test_GPModel_grouped_random_effects.R
@@ -154,11 +154,11 @@ if(Sys.getenv("GPBOOST_ALL_TESTS") == "GPBOOST_ALL_TESTS"){
     # Evaluate negative log-likelihood
     nll <- gp_model$neg_log_likelihood(cov_pars=c(0.1,1), y=y)
     expect_lt(abs(nll-2282.073),1E-2)
-    nll <- neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=y)
+    nll <- gpboost::neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=y)
     expect_lt(abs(nll-2282.073),1E-2)
     fixed_effects <- rep(1, length(y))
-    nll1 <- neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=(y-fixed_effects))
-    nll2 <- neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=y, fixed_effects=fixed_effects)
+    nll1 <- gpboost::neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=(y-fixed_effects))
+    nll2 <- gpboost::neg_log_likelihood(gp_model, cov_pars=c(0.1,1), y=y, fixed_effects=fixed_effects)
     expect_lt(abs(nll1-nll2),1E-6)
 
     # Do optimization using optim and e.g. Nelder-Mead

diff --git a/R-package/tests/testthat/test_GPModel_non_Gaussian_data.R b/R-package/tests/testthat/test_GPModel_non_Gaussian_data.R
diff --git a/R-package/tests/testthat/test_z_GPBoost_algorithm.R b/R-package/tests/testthat/test_z_GPBoost_algorithm.R
@@ -1045,7 +1045,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-c(0.009778865, 1.142124739, 0.072746954))),TOLERANCE)
       pred <- predict(bst, data = X_test, gp_coords_pred = coords_test, predict_var=TRUE, pred_latent = TRUE)
       expect_lt(sum(abs(tail(pred$random_effect_mean, n=4)-c(-1.09009608, -1.02661256, -1.06180549, -0.04424235))),TOLERANCE)
-      expect_lt(sum(abs(tail(pred$random_effect_cov, n=4)-c(0.3742586, 0.6970324, 0.5897960, 1.1453712))),TOLERANCE)
+      expect_lt(sum(abs(tail(pred$random_effect_cov, n=4)-c(0.3644896470, 0.6872674831, 0.5800297063, 1.1356006965))),TOLERANCE)
       expect_lt(sum(abs(tail(pred$fixed_effect, n=4)-c(4.255524, 4.807404, 4.659824, 4.499290))),TOLERANCE)
     })
 
@@ -1107,7 +1107,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
         }
         pred <- predict(bst, data = X_test, gp_coords_pred = coords_test, predict_var=TRUE, pred_latent = TRUE)
         expect_lt(sum(abs(tail(pred$random_effect_mean, n=4)-c(-0.4672591, -0.8086326, -0.6178553, -0.1621476))),TOLERANCE2)
-        expect_lt(sum(abs(tail(pred$random_effect_cov, n=4)-c(0.7547910, 0.8706967, 0.8887207, 1.1682794))),TOLERANCE2)
+        expect_lt(sum(abs(tail(pred$random_effect_cov, n=4)-c(0.2624237699, 0.3784147773, 0.3964287460, 0.6761869249))),TOLERANCE2)
         expect_lt(sum(abs(tail(pred$fixed_effect,n=4)-c(4.683135, 4.608892, 4.571550, 4.406394))),TOLERANCE2)
       }
     })

diff --git a/R-package/tests/testthat/test_z_GPBoost_algorithm_non_Gaussian_data.R b/R-package/tests/testthat/test_z_GPBoost_algorithm_non_Gaussian_data.R
@@ -3,6 +3,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
   context("generalized_GPBoost_combined_boosting_GP_random_effects")
 
   TOLERANCE <- 1E-3
+  TOLERANCE_LOOSE <- 1E-2
   DEFAULT_OPTIM_PARAMS <- list(optimizer_cov="gradient_descent", use_nesterov_acc=TRUE,
                                delta_rel_conv=1E-6, lr_cov=0.1, lr_coef=0.1)
   DEFAULT_OPTIM_PARAMS_V2 <- list(optimizer_cov="gradient_descent", use_nesterov_acc=TRUE,
@@ -367,8 +368,8 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       bst <- gpboost(data = X_train, label = y_train, gp_model = gp_model,
                      nrounds = 30, learning_rate = 0.1, max_depth = 6,
                      min_data_in_leaf = 5, objective = "binary", verbose = 0)
-      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-c(0.4655319683, 0.3553683836))),TOLERANCE)
-      expect_lt(abs(gp_model$get_current_neg_log_likelihood()-375.4036016), TOLERANCE)
+      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-c(0.4711222111, 0.3547389900))),TOLERANCE)
+      expect_lt(abs(gp_model$get_current_neg_log_likelihood()-376.5699209), TOLERANCE)
 
       # Validation metrics for training data
       # Default metric is "Approx. negative marginal log-likelihood" if there is only one training set
@@ -812,19 +813,19 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       bst <- gpboost(data = X_train, label = y_train, gp_model = gp_model,
                      nrounds = 30, learning_rate = 0.1, max_depth = 6,
                      min_data_in_leaf = 5, objective = "binary", verbose = 0)
-      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-0.9927358)),TOLERANCE)
+      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-0.9823336838)),TOLERANCE)
       # Prediction
       pred <- predict(bst, data = X_test, group_data_pred = group_data_test,
                       predict_var = TRUE, pred_latent = FALSE)
-      expect_lt(sum(abs(tail(pred$response_mean,n=4)-c(0.003543759, 0.588232583, 0.262807287, 0.401855180))),TOLERANCE)
+      expect_lt(sum(abs(tail(pred$response_mean,n=4)-c(0.003529128402, 0.590128529164, 0.262148832429, 0.409728732652))),TOLERANCE)
 
       # Training using BFGS
       gp_model <- GPModel(group_data = group_data_train, likelihood = "bernoulli_probit")
       gp_model$set_optim_params(params=list(optimizer_cov="lbfgs"))
       bst <- gpboost(data = X_train, label = y_train, gp_model = gp_model,
                      nrounds = 30, learning_rate = 0.1, max_depth = 6,
                      min_data_in_leaf = 5, objective = "binary", verbose = 0)
-      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-0.9918843)),TOLERANCE)
+      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-0.984982603)),TOLERANCE)
 
     })
 
@@ -1188,7 +1189,6 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       expect_lt(abs(cvbst$best_score-expcet_score), TOLERANCE)
     })
 
-    # This is a slow test
     test_that("GPBoost algorithm for binary classification with combined Gaussian process and grouped random effects model", {
 
       ntrain <- ntest <- 500
@@ -1259,7 +1259,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       pred <- predict(bst, data = X_test, gp_coords_pred = coords_test,
                       group_data_pred = group_data_test,
                       predict_var = TRUE, pred_latent = FALSE)
-      expect_lt(sum(abs(tail(pred$response_mean, n=4)-c(0.7599814, 0.5543266, 0.1063388, 0.5439135))),TOLERANCE)
+      expect_lt(sum(abs(tail(pred$response_mean, n=4)-c(0.7603291094, 0.5542489304, 0.1066844819, 0.5442139709))),TOLERANCE)
       expect_lt(sum(abs(tail(pred$response_var, n=4)-c(0.18240965, 0.24704862, 0.09503084, 0.24807160))),TOLERANCE)
 
       # # The following test is very slow (not run anymore)
@@ -1626,7 +1626,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       pred <- predict(bst, data = X_test, group_data_pred = group_data_test,
                       predict_var = TRUE, pred_latent = FALSE)
       expect_lt(sum(abs(tail(pred$response_mean, n=4)-c(0.04968272, 4.08967031, 0.55919834, 2.89184563))),TOLERANCE)
-      expect_lt(sum(abs(tail(pred$response_var, n=4)-c(0.002805737, 83.861102898, 1.567890445, 41.930899945))),TOLERANCE)
+      expect_lt(sum(abs(tail(pred$response_var, n=4)-c(0.002805742835, 83.868581179309, 1.568033920227, 41.934663096455))),TOLERANCE)
 
       # Also estimate shape parameter
       gp_model <- GPModel(group_data = group_data_train, likelihood = "gamma")
@@ -1713,7 +1713,7 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       pred <- predict(bst, data = X_test, group_data_pred = group_data_test,
                       predict_var = TRUE, pred_latent = FALSE)
       expect_lt(sum(abs(tail(pred$response_mean, n=4)-c( 0.2521111, 3.0180883, 1.0052383, 3.0666018))),TOLERANCE)
-      expect_lt(sum(abs(tail(pred$response_var, n=4)-c( 0.338194, 45.251929, 5.690510, 46.669110))),TOLERANCE)
+      expect_lt(sum(abs(tail(pred$response_var, n=4)-c( 0.338194, 45.251929, 5.690510, 46.669110))), TOLERANCE_LOOSE)
 
       # Also estimate shape parameter
       gp_model <- GPModel(group_data = group_data_train, likelihood = "negative_binomial")
@@ -1722,9 +1722,9 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
       gp_model$set_optim_params(params=params_shape)
       bst <- gpboost(data = dtrain,  gp_model = gp_model, nrounds = 30,
                      learning_rate = 0.1, max_depth = 6, min_data_in_leaf = 5, verbose = 0)
-      cov_pars_est <- c(0.5693560, 0.4920208)
-      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-cov_pars_est)),TOLERANCE)
-      expect_lt(sum(abs(as.vector(gp_model$get_aux_pars())-2.768788)),TOLERANCE)
+      cov_pars_est <- c(0.5765166557, 0.4890741572)
+      expect_lt(sum(abs(as.vector(gp_model$get_cov_pars())-cov_pars_est)),TOLERANCE_LOOSE)
+      expect_lt(sum(abs(as.vector(gp_model$get_aux_pars())-2.891514051 )),TOLERANCE_LOOSE)
     })
 
     test_that("Saving and loading a booster with a gp_model for non-Gaussian data ", {
@@ -1972,10 +1972,10 @@ if(Sys.getenv("NO_GPBOOST_ALGO_TESTS") != "NO_GPBOOST_ALGO_TESTS"){
                                                     data = dtrain, gp_model = gp_model, verbose_eval = 1,
                                                     nrounds = 100, early_stopping_rounds = 5,
                                                     eval = "test_neg_log_likelihood", folds = folds)
-      expect_lt(abs(opt_params$best_score-1.560793),TOLERANCE)
-      expect_equal(opt_params$best_iter,17)
+      expect_lt(abs(opt_params$best_score-1.569086827),TOLERANCE)
+      expect_equal(opt_params$best_iter,18)
       expect_equal(opt_params$best_params$learning_rate,0.11)
-      expect_equal(opt_params$best_params$max_bin,255)
+      expect_equal(opt_params$best_params$max_bin,10)
 
     })
 

diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-1.3.1
+1.3.2
diff --git a/external_libs/LBFGSpp/include/LBFGS.h b/external_libs/LBFGSpp/include/LBFGS.h
@@ -1,5 +1,5 @@
 // Copyright (C) 2016-2023 Yixuan Qiu <yixuan.qiu@cos.name>
-//  Modified work Copyright (c) 2024 Fabio Sigrist. All rights reserved.
+// Modified work Copyright (c) 2024 Fabio Sigrist. All rights reserved.
 // Under MIT license
 
 #ifndef LBFGSPP_LBFGS_H
@@ -89,7 +89,8 @@ class LBFGSSolver
         const int fpast = m_param.past;
 
         // Evaluate function and compute gradient
-        fx = f(x, m_grad, true);
+        fx = f(x, m_grad, true, true);// ChangedForGPBoost
+
         m_gnorm = m_grad.norm();
         if (fpast > 0)
             m_fx[0] = fx;
@@ -184,7 +185,7 @@ class LBFGSSolver
             if (f.LearnCovarianceParameters() && f.ShouldRedetermineNearestNeighborsVecchia())
             {
                 f.RedetermineNearestNeighborsVecchia();  // called only in certain iterations if gp_approx == "vecchia" and neighbors are selected based on correlations and not distances
-                fx = f(x, m_grad, false);
+                fx = f(x, m_grad, true, false);
                 m_bfgs.apply_Hv(m_grad, -Scalar(1), m_drt);
                 if (fpast > 0)
                 {

diff --git a/external_libs/LBFGSpp/include/LBFGSpp/LineSearchBacktracking.h b/external_libs/LBFGSpp/include/LBFGSpp/LineSearchBacktracking.h
@@ -1,4 +1,5 @@
 // Copyright (C) 2016-2023 Yixuan Qiu <yixuan.qiu@cos.name>
+// Modified work Copyright (c) 2024 Fabio Sigrist. All rights reserved.
 // Under MIT license
 
 #ifndef LBFGSPP_LINE_SEARCH_BACKTRACKING_H
@@ -71,7 +72,7 @@ class LineSearchBacktracking
             // x_{k+1} = x_k + step * d_k
             x.noalias() = xp + step * drt;
             // Evaluate this candidate
-            fx = f(x, grad, false);// ChangedForGPBoost
+            fx = f(x, grad, true, false);// ChangedForGPBoost
 
             if (fx > fx_init + step * test_decr || (fx != fx))
             {
@@ -124,7 +125,8 @@ class LineSearchBacktracking
             step = 0.;
             Log::REDebug("GPModel lbfgs: the line search routine reached the maximum number of iterations");
         }
-        f(x, grad, true);//calculate gradient
+        f(x, grad, false, true);//calculate gradient
+
     }
 };
 

diff --git a/external_libs/LBFGSpp/include/LBFGSpp/LineSearchNocedalWright.h b/external_libs/LBFGSpp/include/LBFGSpp/LineSearchNocedalWright.h
@@ -1,5 +1,6 @@
 // Copyright (C) 2016-2023 Yixuan Qiu <yixuan.qiu@cos.name>
 // Copyright (C) 2016-2023 Dirk Toewe <DirkToewe@GoogleMail.com>
+// Modified work Copyright (c) 2024 Fabio Sigrist. All rights reserved.
 // Under MIT license
 
 #ifndef LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
@@ -141,7 +142,7 @@ class LineSearchNocedalWright
         {
             // Evaluate the current step size
             x.noalias() = xp + step * drt;
-            fx = f(x, grad, false);
+            fx = f(x, grad, true, false);
             dg = grad.dot(drt);
 
             // Test the sufficient decrease condition
@@ -158,7 +159,7 @@ class LineSearchNocedalWright
             // Test the curvature condition
             if (std::abs(dg) <= test_curv)
             {
-                f(x, grad, true);//calculate gradient
+                f(x, grad, false, true);//calculate gradient
                 return;  // Case (4)
             }
 
@@ -191,7 +192,7 @@ class LineSearchNocedalWright
                 // But we need to move {x, grad}_lo back before returning
                 x.swap(x_lo);
                 grad.swap(grad_lo);
-                f(x, grad, true);//calculate gradient
+                f(x, grad, false, true);//calculate gradient
                 return;
             }
 
@@ -219,7 +220,7 @@ class LineSearchNocedalWright
 
             // Evaluate the current step size
             x.noalias() = xp + step * drt;
-            fx = f(x, grad, false);
+            fx = f(x, grad, true, false);
             dg = grad.dot(drt);
 
             // Test the sufficient decrease condition
@@ -236,7 +237,7 @@ class LineSearchNocedalWright
             {
                 // Test the curvature condition
                 if (std::abs(dg) <= test_curv)
-                    f(x, grad, true);//calculate gradient
+                    f(x, grad, false, true);//calculate gradient
                     return;
 
                 if (dg * (step_hi - step_lo) >= Scalar(0))
@@ -283,7 +284,7 @@ class LineSearchNocedalWright
                     x.swap(x_lo);
                     grad.swap(grad_lo);
                 }
-                f(x, grad, true);//calculate gradient
+                f(x, grad, false, true);//calculate gradient
                 return;
             }
         }