BUG: object of type 'NoneType' has no len() when computing log likelihood #6820
Closed
Description
Describe the issue:
I have see this error when computing the log likelihood in hierarchical models. If you do not calculate the log likelihood the model samples just fine and if your model is not hierarchical you can calculate the log likelihood. I have seen this issue in my models but you can reproduce it using the radon example. When computing the log likelihood with either
with partial_pooling:
partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))
or with
with partial_pooling:
pm.compute_log_likelihood(partial_pooling_trace)
The code below is copied out of the simplest partial pooling model from the radon example.
Reproduceable code example:
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin'
import warnings
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import seaborn as sns
import xarray as xr
warnings.filterwarnings("ignore", module="scipy")
RANDOM_SEED = 8924
try:
srrs2 = pd.read_csv(os.path.join("..", "data", "srrs2.dat"))
except FileNotFoundError:
srrs2 = pd.read_csv(pm.get_data("srrs2.dat"))
srrs2.columns = srrs2.columns.map(str.strip)
srrs_mn = srrs2[srrs2.state == "MN"].copy()
try:
cty = pd.read_csv(os.path.join("..", "data", "cty.dat"))
except FileNotFoundError:
cty = pd.read_csv(pm.get_data("cty.dat"))
srrs_mn["fips"] = srrs_mn.stfips * 1000 + srrs_mn.cntyfips
cty_mn = cty[cty.st == "MN"].copy()
cty_mn["fips"] = 1000 * cty_mn.stfips + cty_mn.ctfips
srrs_mn = srrs_mn.merge(cty_mn[["fips", "Uppm"]], on="fips")
srrs_mn = srrs_mn.drop_duplicates(subset="idnum")
u = np.log(srrs_mn.Uppm).unique()
n = len(srrs_mn)
srrs_mn.county = srrs_mn.county.map(str.strip)
county, mn_counties = srrs_mn.county.factorize()
srrs_mn["county_code"] = county
radon = srrs_mn.activity
srrs_mn["log_radon"] = log_radon = np.log(radon + 0.1).values
floor_measure = srrs_mn.floor.values
with pm.Model(coords=coords) as partial_pooling:
county_idx = pm.MutableData("county_idx", county, dims="obs_id")
# Priors
mu_a = pm.Normal("mu_a", mu=0.0, sigma=10)
sigma_a = pm.Exponential("sigma_a", 1)
# Random intercepts
alpha = pm.Normal("alpha", mu=mu_a, sigma=sigma_a, dims="county")
# Model error
sigma_y = pm.Exponential("sigma_y", 1)
# Expected value
y_hat = alpha[county_idx]
# Data likelihood
y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon, dims="obs_id")
with partial_pooling:
partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))
Error message:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[27], line 2
1 with partial_pooling:
----> 2 partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\sampling\mcmc.py:791, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
787 t_sampling = time.time() - t_start
789 # Packaging, validating and returning the result was extracted
790 # into a function to make it easier to test and refactor.
--> 791 return _sample_return(
792 run=run,
793 traces=traces,
794 tune=tune,
795 t_sampling=t_sampling,
796 discard_tuned_samples=discard_tuned_samples,
797 compute_convergence_checks=compute_convergence_checks,
798 return_inferencedata=return_inferencedata,
799 keep_warning_stat=keep_warning_stat,
800 idata_kwargs=idata_kwargs or {},
801 model=model,
802 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\sampling\mcmc.py:859, in _sample_return(run, traces, tune, t_sampling, discard_tuned_samples, compute_convergence_checks, return_inferencedata, keep_warning_stat, idata_kwargs, model)
857 ikwargs: Dict[str, Any] = dict(model=model, save_warmup=not discard_tuned_samples)
858 ikwargs.update(idata_kwargs)
--> 859 idata = pm.to_inference_data(mtrace, **ikwargs)
861 if compute_convergence_checks:
862 warns = run_convergence_checks(idata, model)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\backends\arviz.py:519, in to_inference_data(trace, prior, posterior_predictive, log_likelihood, coords, dims, sample_dims, model, save_warmup, include_transformed)
505 if isinstance(trace, InferenceData):
506 return trace
508 return InferenceDataConverter(
509 trace=trace,
510 prior=prior,
511 posterior_predictive=posterior_predictive,
512 log_likelihood=log_likelihood,
513 coords=coords,
514 dims=dims,
515 sample_dims=sample_dims,
516 model=model,
517 save_warmup=save_warmup,
518 include_transformed=include_transformed,
--> 519 ).to_inference_data()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\backends\arviz.py:442, in InferenceDataConverter.to_inference_data(self)
439 if self.log_likelihood:
440 from pymc.stats.log_likelihood import compute_log_likelihood
--> 442 idata = compute_log_likelihood(
443 idata,
444 var_names=None if self.log_likelihood is True else self.log_likelihood,
445 extend_inferencedata=True,
446 model=self.model,
447 sample_dims=self.sample_dims,
448 progressbar=False,
449 )
450 return idata
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\stats\log_likelihood.py:116, in compute_log_likelihood(idata, var_names, extend_inferencedata, model, sample_dims, progressbar)
111 for key, array in loglike_trace.items():
112 loglike_trace[key] = array.reshape(
113 (*[len(coord) for coord in stacked_dims.values()], *array.shape[1:])
114 )
--> 116 loglike_dataset = dict_to_dataset(
117 loglike_trace,
118 library=pymc,
119 dims={dname: list(dvals) for dname, dvals in model.named_vars_to_dims.items()},
120 coords={
121 cname: np.array(cvals) if isinstance(cvals, tuple) else cvals
122 for cname, cvals in model.coords.items()
123 },
124 default_dims=list(sample_dims),
125 skip_event_dims=True,
126 )
128 if extend_inferencedata:
129 idata.add_groups(dict(log_likelihood=loglike_dataset))
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:306, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
303 if dims is None:
304 dims = {}
--> 306 data_vars = {
307 key: numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 for key, values in data.items()
317 }
318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:307, in <dictcomp>(.0)
303 if dims is None:
304 dims = {}
306 data_vars = {
--> 307 key: numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 for key, values in data.items()
317 }
318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:231, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
228 else:
229 ary = utils.one_de(ary)
--> 231 dims, coords = generate_dims_coords(
232 ary.shape[len(default_dims) :],
233 var_name,
234 dims=dims,
235 coords=coords,
236 default_dims=default_dims,
237 index_origin=index_origin,
238 skip_event_dims=skip_event_dims,
239 )
241 # reversed order for default dims: 'chain', 'draw'
242 if "draw" not in dims and "draw" in default_dims:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:146, in generate_dims_coords(shape, var_name, dims, coords, default_dims, index_origin, skip_event_dims)
143 if skip_event_dims:
144 # this is needed in case the reduction keeps the dimension with size 1
145 for i, (dim, dim_size) in enumerate(zip(dims, shape)):
--> 146 if (dim in coords) and (dim_size != len(coords[dim])):
147 dims = dims[:i]
148 break
TypeError: object of type 'NoneType' has no len()
PyMC version information:
pymc version 5.6.0
pytensor version 2.12.3
I also got this error using pymc version 5.5.0
Context for the issue:
Users cannot calculate log likelihoods for hierarchical models which limits their ability to do model comparisons.