|
2 | 2 | import scipy as scp
|
3 | 3 | import math
|
4 | 4 | from ...models import GAMM
|
| 5 | +from .utils import correct_VB |
5 | 6 | import warnings
|
6 | 7 |
|
7 |
| -def GLRT_CDL(model1:GAMM, |
8 |
| - model2:GAMM, |
9 |
| - alpha=0.05): |
| 8 | +def compare_CDL(model1:GAMM, |
| 9 | + model2:GAMM, |
| 10 | + correct_V:bool=True, |
| 11 | + correct_t1:bool=True, |
| 12 | + perform_GLRT:bool=True, |
| 13 | + lR=20, |
| 14 | + nR=5, |
| 15 | + n_c=10, |
| 16 | + alpha=0.05, |
| 17 | + grid='JJJ', |
| 18 | + verbose=False): |
10 | 19 |
|
11 | 20 | """
|
12 |
| - Performs an approximate GLRT on twice the difference in unpenalized likelihood between the models. For the degrees of freedom the expected degrees of freedom (EDF) of each |
13 |
| - model are used (i.e., this is the conditional test discussed in Wood (2017: 6.12.4)). The difference between the models in EDF serves as DoF for computing the Chi-Square statistic. |
| 21 | + (Optionally) performs an approximate GLRT on twice the difference in unpenalized likelihood between model1 and model2 (see Wood, 2017). Also computes the AIC difference (see Wood et al., 2016). |
| 22 | + For the GLRT to be appropriate model1 should be set to the model containing more effects and model2 should be a nested, simpler, variant of model1. |
| 23 | + |
| 24 | + For the degrees of freedom for the test, the expected degrees of freedom (EDF) of each model are used (i.e., this is the conditional test discussed in Wood (2017: 6.12.4)). |
| 25 | + The difference between the models in EDF serves as DoF for computing the Chi-Square statistic. Similarly, for each model 2*edf is added to twice the negative (conditional) likelihood to |
| 26 | + compute the aic (see Wood et al., 2016). |
| 27 | + |
| 28 | + By default (``correct_V=True``), ``mssm`` will attempt to correct the edf for uncertainty in the estimated \lambda parameters. This requires computing a costly |
| 29 | + correction (see Greven & Scheipl, 2016 and the ``correct_VB`` function in the utils module) which will take quite some time for reasonably large models with more than 3-4 smoothing parameters. |
| 30 | + In that case relying on CIs and penalty-based comparisons might be preferable (see Marra & Wood, 2011 for details on the latter). |
| 31 | +
|
| 32 | + In case ``correct_t1=True`` and ``correct_V=True`` the EDF will be set to the smoothness uncertainty corrected and smoothness bias corrected exprected degrees of freedom (t1 in section 6.1.2 of Wood, 2017), |
| 33 | + for the GLRT (based on reccomendation given in section 6.12.4 in Wood, 2017). The AIC (Wood, 2017) of both models will still be based on the regular (smoothness uncertainty corrected) edf. |
14 | 34 |
|
15 | 35 | The computation here is different to the one performed by the ``compareML`` function in the R-package ``itsadug`` - which rather performs a version of the marginal GLRT
|
16 |
| - (also discussed in Wood, 2017: 6.12.4). The p-value is very **very** much approximate. Even more so than when using for example ``anova()`` in R to perform this test. The reason |
17 |
| - is that the lambda uncertainty correction applied by mgcv can not be obtained by ``mssm``. Also, the test should not be used to compare models differing in their random effect structures, |
18 |
| - (see Wood, 2017: 6.12.4) for details on those two points. |
| 36 | + (also discussed in Wood, 2017: 6.12.4). The p-value is approximate - very **very** much so if ``correct_V=False`` and the test should not be used to compare models differing in their random effect structures |
| 37 | + (see Wood, 2017: 6.12.4). |
19 | 38 |
|
20 | 39 | References:
|
| 40 | + - Marra, G., & Wood, S. N. (2011) Practical variable selection for generalized additive models. |
| 41 | + - Wood, S. N., Pya, N., Saefken, B., (2016). Smoothing Parameter and Model Selection for General Smooth Models |
| 42 | + - Greven, S., & Scheipl, F. (2016). Comment on: Smoothing Parameter and Model Selection for General Smooth Models |
21 | 43 | - Wood, S. N. (2017). Generalized Additive Models: An Introduction with R, Second Edition (2nd ed.).
|
22 | 44 | - ``compareML`` function from ``itsadug`` R-package: https://rdrr.io/cran/itsadug/man/compareML.html
|
| 45 | + - ``anova.gam`` function from ``mgcv``, see: https://www.rdocumentation.org/packages/mgcv/versions/1.9-1/topics/anova.gam |
23 | 46 | """
|
24 | 47 |
|
25 | 48 | if type(model1.family) != type(model2.family):
|
26 | 49 | raise ValueError("Both models should be estimated using the same family.")
|
27 | 50 |
|
28 |
| - # Collect total DOF |
29 |
| - DOF1 = model1.edf |
30 |
| - DOF2 = model2.edf |
31 |
| - |
32 |
| - # Compute un-penalized likelihood() |
33 |
| - llk1 = model1.get_llk(False) |
34 |
| - llk2 = model2.get_llk(False) |
| 51 | + if perform_GLRT and model1.formula.n_coef < model2.formula.n_coef: |
| 52 | + raise ValueError("For the GLRT, model1 needs to be set to the more complex model (i.e., needs to have more coefficients than model2).") |
35 | 53 |
|
36 |
| - if DOF1 < DOF2: |
37 |
| - # Re-order, making sure that more complex model is 1 |
38 |
| - llk_tmp = llk1 |
39 |
| - DOF_tmp = DOF1 |
40 |
| - llk1 = llk2 |
41 |
| - llk2 = llk_tmp |
42 |
| - DOF1 = DOF2 |
43 |
| - DOF2 = DOF_tmp |
| 54 | + # Collect total DOF for uncertainty in \lambda using correction proposed by Greven & Scheipl (2016) |
| 55 | + if correct_V: |
| 56 | + if verbose: |
| 57 | + print("Correcting for uncertainty in lambda estimates...\n") |
| 58 | + _,_,DOF1,DOF12 = correct_VB(model1,nR=nR,lR=lR,n_c=n_c,form_t1=correct_t1,grid_type=grid,verbose=verbose) |
| 59 | + _,_,DOF2,DOF22 = correct_VB(model2,nR=nR,lR=lR,n_c=n_c,form_t1=correct_t1,grid_type=grid,verbose=verbose) |
| 60 | + |
| 61 | + if correct_t1: |
| 62 | + # Section 6.12.4 suggests replacing t (edf) with t1 (2*t - (F@F).trace()) with F=(X.T@X+S_\llambda)^{-1}@X.T@X for GLRT - with the latter also being corrected for |
| 63 | + # uncertainty in lambda. However, Wood et al., (2016) suggest that the aic should be computed based on t - so some book-keeping is ncessary. |
| 64 | + aic_DOF1 = DOF1 |
| 65 | + aic_DOF2 = DOF2 |
| 66 | + DOF1 = DOF12 |
| 67 | + DOF2 = DOF22 |
| 68 | + |
| 69 | + else: |
| 70 | + DOF1 = model1.edf |
| 71 | + DOF2 = model2.edf |
| 72 | + |
| 73 | + # Compute un-penalized likelihood based on scale estimate of more complex (in terms of edf - so actually more complex) model if a scale was estimated (see section 3.1.4, Wood, 2017). |
| 74 | + ext_scale = None |
| 75 | + if model1.family.twopar: |
| 76 | + if DOF1 > DOF2: |
| 77 | + _,ext_scale = model1.get_pars() |
| 78 | + else: |
| 79 | + _,ext_scale = model2.get_pars() |
44 | 80 |
|
45 |
| - # Compute Chi-square statistic |
| 81 | + llk1 = model1.get_llk(penalized=False,ext_scale=ext_scale) |
| 82 | + llk2 = model2.get_llk(penalized=False,ext_scale=ext_scale) |
| 83 | + |
| 84 | + # Compute Chi-square statistic... |
46 | 85 | stat = 2 * (llk1 - llk2)
|
| 86 | + test_stat = stat |
| 87 | + |
| 88 | + # ... and degrees of freedom under NULL (see Wood, 2017) |
| 89 | + DOF_diff = DOF1-DOF2 |
| 90 | + test_DOF_diff = abs(DOF_diff) |
47 | 91 |
|
48 |
| - if DOF1-DOF2 < 1: |
49 |
| - warnings.warn("Difference in EDF is extremely small. Enforcing a minimum of 1 for the DOF of the CHI^2 distribution...") |
| 92 | + # Multiple scenarios that this test needs to cover... |
| 93 | + # 1) LLK1 < LLK2, DOF1 < DOF2; This is a valid test, essentially model2 turns out to be the more complicated one. |
| 94 | + # 2) LLK1 < LLK2, DOF1 > DOF2; This makes no sense. Model 1 - the more complex one - has worse llk but more DOF. |
| 95 | + # 3) LLK1 > LLK2, DOF1 < DOF2; Notationally correct: model1 should after all be more complex. But in terms of edf makes little sense (as pointed out by Wood, 2017). |
| 96 | + # 4) LLK1 > LLK2, DOF1 > DOF2; Valid, inverse of case 1. |
| 97 | + |
| 98 | + # Personally, I think cases 2 & 3 should both return NAs for p-values.. But anova.gam for mgcv returns a p-value for case 3 so we will do the same here |
| 99 | + # and just raise a warning. For case 1, we need to take -1*test_stat. |
| 100 | + if llk1 < llk2 and DOF1 < DOF2: |
| 101 | + test_stat = -1*test_stat |
50 | 102 |
|
51 | 103 | # Compute p-value under reference distribution.
|
52 |
| - # scipy seems to handle non-integer DOF quite well, so I won't bother rounding here. |
53 |
| - p = 1 - scp.stats.chi2.cdf(stat,max(DOF1-DOF2,1)) |
| 104 | + if perform_GLRT == False or test_stat < 0: # Correct for aforementioned possibility 2: model 1 has lower llk and higher edf. |
| 105 | + H1 = np.nan |
| 106 | + p = np.nan |
| 107 | + else: |
| 108 | + if llk1 > llk2 and DOF1 < DOF2: |
| 109 | + warnings.warn("Model with more coefficients has higher likelihood but lower expected degrees of freedom. Interpret results with caution.") |
54 | 110 |
|
55 |
| - # Reject NULL? |
56 |
| - H1 = p <= alpha |
| 111 | + p = 1 - scp.stats.chi2.cdf(test_stat,test_DOF_diff) |
57 | 112 |
|
58 |
| - return H1,p,stat,DOF1,DOF2 |
| 113 | + # Reject NULL? |
| 114 | + H1 = p <= alpha |
| 115 | + |
| 116 | + # Also correct AIC for GAM (see Wood et al., 2017) |
| 117 | + if correct_t1: |
| 118 | + aic1 = -2*llk1 + 2*aic_DOF1 |
| 119 | + aic2 = -2*llk2 + 2*aic_DOF2 |
| 120 | + else: |
| 121 | + aic1 = -2*llk1 + 2*DOF1 |
| 122 | + aic2 = -2*llk2 + 2*DOF2 |
| 123 | + |
| 124 | + result = {"H1":H1, |
| 125 | + "p":p, |
| 126 | + "chi^2":stat, |
| 127 | + "DOF1":DOF1, |
| 128 | + "DOF2":DOF2, |
| 129 | + "Res. DOF":DOF_diff, |
| 130 | + "aic1":aic1, |
| 131 | + "aic2":aic2, |
| 132 | + "aic_diff":aic1-aic2} |
| 133 | + |
| 134 | + return result |
0 commit comments