Skip to content

BUG: object of type 'NoneType' has no len() when computing log likelihood #6820

Closed
@jaharvey8

Description

Describe the issue:

I have see this error when computing the log likelihood in hierarchical models. If you do not calculate the log likelihood the model samples just fine and if your model is not hierarchical you can calculate the log likelihood. I have seen this issue in my models but you can reproduce it using the radon example. When computing the log likelihood with either

with partial_pooling:
    partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))

or with

with partial_pooling:
    pm.compute_log_likelihood(partial_pooling_trace)

The code below is copied out of the simplest partial pooling model from the radon example.

Reproduceable code example:

import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin'
import warnings

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import seaborn as sns
import xarray as xr

warnings.filterwarnings("ignore", module="scipy")
RANDOM_SEED = 8924

try:
    srrs2 = pd.read_csv(os.path.join("..", "data", "srrs2.dat"))
except FileNotFoundError:
    srrs2 = pd.read_csv(pm.get_data("srrs2.dat"))

srrs2.columns = srrs2.columns.map(str.strip)
srrs_mn = srrs2[srrs2.state == "MN"].copy()

try:
    cty = pd.read_csv(os.path.join("..", "data", "cty.dat"))
except FileNotFoundError:
    cty = pd.read_csv(pm.get_data("cty.dat"))

srrs_mn["fips"] = srrs_mn.stfips * 1000 + srrs_mn.cntyfips
cty_mn = cty[cty.st == "MN"].copy()
cty_mn["fips"] = 1000 * cty_mn.stfips + cty_mn.ctfips

srrs_mn = srrs_mn.merge(cty_mn[["fips", "Uppm"]], on="fips")
srrs_mn = srrs_mn.drop_duplicates(subset="idnum")
u = np.log(srrs_mn.Uppm).unique()

n = len(srrs_mn)

srrs_mn.county = srrs_mn.county.map(str.strip)
county, mn_counties = srrs_mn.county.factorize()
srrs_mn["county_code"] = county
radon = srrs_mn.activity
srrs_mn["log_radon"] = log_radon = np.log(radon + 0.1).values
floor_measure = srrs_mn.floor.values

with pm.Model(coords=coords) as partial_pooling:
    county_idx = pm.MutableData("county_idx", county, dims="obs_id")

    # Priors
    mu_a = pm.Normal("mu_a", mu=0.0, sigma=10)
    sigma_a = pm.Exponential("sigma_a", 1)

    # Random intercepts
    alpha = pm.Normal("alpha", mu=mu_a, sigma=sigma_a, dims="county")

    # Model error
    sigma_y = pm.Exponential("sigma_y", 1)

    # Expected value
    y_hat = alpha[county_idx]

    # Data likelihood
    y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon, dims="obs_id")

with partial_pooling:
    partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))

Error message:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[27], line 2
      1 with partial_pooling:
----> 2     partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\sampling\mcmc.py:791, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    787 t_sampling = time.time() - t_start
    789 # Packaging, validating and returning the result was extracted
    790 # into a function to make it easier to test and refactor.
--> 791 return _sample_return(
    792     run=run,
    793     traces=traces,
    794     tune=tune,
    795     t_sampling=t_sampling,
    796     discard_tuned_samples=discard_tuned_samples,
    797     compute_convergence_checks=compute_convergence_checks,
    798     return_inferencedata=return_inferencedata,
    799     keep_warning_stat=keep_warning_stat,
    800     idata_kwargs=idata_kwargs or {},
    801     model=model,
    802 )

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\sampling\mcmc.py:859, in _sample_return(run, traces, tune, t_sampling, discard_tuned_samples, compute_convergence_checks, return_inferencedata, keep_warning_stat, idata_kwargs, model)
    857 ikwargs: Dict[str, Any] = dict(model=model, save_warmup=not discard_tuned_samples)
    858 ikwargs.update(idata_kwargs)
--> 859 idata = pm.to_inference_data(mtrace, **ikwargs)
    861 if compute_convergence_checks:
    862     warns = run_convergence_checks(idata, model)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\backends\arviz.py:519, in to_inference_data(trace, prior, posterior_predictive, log_likelihood, coords, dims, sample_dims, model, save_warmup, include_transformed)
    505 if isinstance(trace, InferenceData):
    506     return trace
    508 return InferenceDataConverter(
    509     trace=trace,
    510     prior=prior,
    511     posterior_predictive=posterior_predictive,
    512     log_likelihood=log_likelihood,
    513     coords=coords,
    514     dims=dims,
    515     sample_dims=sample_dims,
    516     model=model,
    517     save_warmup=save_warmup,
    518     include_transformed=include_transformed,
--> 519 ).to_inference_data()

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\backends\arviz.py:442, in InferenceDataConverter.to_inference_data(self)
    439 if self.log_likelihood:
    440     from pymc.stats.log_likelihood import compute_log_likelihood
--> 442     idata = compute_log_likelihood(
    443         idata,
    444         var_names=None if self.log_likelihood is True else self.log_likelihood,
    445         extend_inferencedata=True,
    446         model=self.model,
    447         sample_dims=self.sample_dims,
    448         progressbar=False,
    449     )
    450 return idata

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\stats\log_likelihood.py:116, in compute_log_likelihood(idata, var_names, extend_inferencedata, model, sample_dims, progressbar)
    111 for key, array in loglike_trace.items():
    112     loglike_trace[key] = array.reshape(
    113         (*[len(coord) for coord in stacked_dims.values()], *array.shape[1:])
    114     )
--> 116 loglike_dataset = dict_to_dataset(
    117     loglike_trace,
    118     library=pymc,
    119     dims={dname: list(dvals) for dname, dvals in model.named_vars_to_dims.items()},
    120     coords={
    121         cname: np.array(cvals) if isinstance(cvals, tuple) else cvals
    122         for cname, cvals in model.coords.items()
    123     },
    124     default_dims=list(sample_dims),
    125     skip_event_dims=True,
    126 )
    128 if extend_inferencedata:
    129     idata.add_groups(dict(log_likelihood=loglike_dataset))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:306, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
    303 if dims is None:
    304     dims = {}
--> 306 data_vars = {
    307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:307, in <dictcomp>(.0)
    303 if dims is None:
    304     dims = {}
    306 data_vars = {
--> 307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:231, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
    228 else:
    229     ary = utils.one_de(ary)
--> 231 dims, coords = generate_dims_coords(
    232     ary.shape[len(default_dims) :],
    233     var_name,
    234     dims=dims,
    235     coords=coords,
    236     default_dims=default_dims,
    237     index_origin=index_origin,
    238     skip_event_dims=skip_event_dims,
    239 )
    241 # reversed order for default dims: 'chain', 'draw'
    242 if "draw" not in dims and "draw" in default_dims:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:146, in generate_dims_coords(shape, var_name, dims, coords, default_dims, index_origin, skip_event_dims)
    143 if skip_event_dims:
    144     # this is needed in case the reduction keeps the dimension with size 1
    145     for i, (dim, dim_size) in enumerate(zip(dims, shape)):
--> 146         if (dim in coords) and (dim_size != len(coords[dim])):
    147             dims = dims[:i]
    148             break

TypeError: object of type 'NoneType' has no len()

PyMC version information:

pymc version 5.6.0
pytensor version 2.12.3

I also got this error using pymc version 5.5.0

Context for the issue:

Users cannot calculate log likelihoods for hierarchical models which limits their ability to do model comparisons.

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions