Skip to content

Develop metatree #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Nov 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6b48f22
Create metatree.md
yuta-nakahara Jun 6, 2022
1e37aa3
Merge branch 'main' into develop-metatree-resume
yuta-nakahara Jun 10, 2022
c416640
Basic structure of MTRF
yuta-nakahara Jun 17, 2022
b91a005
meta-tree resume
SaitoGunma Jul 8, 2022
c3701cb
Update metatree.md
SaitoGunma Jul 13, 2022
a4483d8
Update metatree.md
yuta-nakahara Jul 17, 2022
e94b8c3
Merge pull request #14 from yuta-nakahara/develop-metatree-resume
yuta-nakahara Jul 17, 2022
bdd7394
Merge branch 'develop-metatree' into develop-metatree-categorical_x
yuta-nakahara Jul 17, 2022
e7d01c9
Update MAP recursion
yuta-nakahara Jul 18, 2022
e5915fc
Merge branch 'develop' into develop-metatree
yuta-nakahara Aug 9, 2022
1a0c961
Merge branch 'develop-check' into develop-metatree
yuta-nakahara Aug 9, 2022
1dff832
Merge branch 'develop-metatree' into develop-metatree-categorical_x
yuta-nakahara Aug 9, 2022
2985615
Merge branch 'develop-check' into develop-metatree-categorical_x
yuta-nakahara Oct 28, 2022
2b85559
Merge branch 'develop' into develop-metatree-categorical_x
yuta-nakahara Nov 15, 2022
4984f95
Revise var name of constants
yuta-nakahara Nov 15, 2022
f771ec2
Merge branch 'develop' into develop-metatree-categorical_x
yuta-nakahara Nov 15, 2022
8f96057
Revise set_ functions of LearnModel
yuta-nakahara Nov 15, 2022
549d78a
Merge branch 'develop-check' into develop-metatree-categorical_x
yuta-nakahara Nov 15, 2022
94d796b
Merge branch 'develop-check' into develop-metatree-categorical_x
yuta-nakahara Nov 15, 2022
21c7e16
Add docstring for LearnModel
yuta-nakahara Nov 15, 2022
9246b55
Revise GenModel
yuta-nakahara Nov 16, 2022
d1a9410
Add dict_out option to estimate_params
yuta-nakahara Nov 16, 2022
72b0465
poisson and exponential leaf node
yuta-nakahara Nov 16, 2022
77ce7db
Merge pull request #37 from yuta-nakahara/develop-metatree-categorical_x
yuta-nakahara Nov 17, 2022
61105a7
Update _metatree_x_discrete.py
yuta-nakahara Nov 20, 2022
460a0d2
Fix map_recursion bug
yuta-nakahara Nov 20, 2022
4e22358
Fix make_prediction bug
yuta-nakahara Nov 20, 2022
d24d01a
Update _metatree_x_discrete.py
yuta-nakahara Nov 20, 2022
132f30a
Remove unnecessary files
yuta-nakahara Nov 20, 2022
cc286c6
Merge branch 'develop' into develop-metatree
yuta-nakahara Nov 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bayesml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import multivariate_normal
from . import normal
from . import poisson
from . import metatree

__all__ = ['bernoulli',
'categorical',
Expand All @@ -14,5 +15,6 @@
'linearregression',
'multivariate_normal',
'normal',
'poisson'
'poisson',
'metatree'
]
9 changes: 8 additions & 1 deletion bayesml/_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def nonneg_int_vecs(val,val_name,exception_class):
return val
raise(exception_class(val_name + " must be a numpy.ndarray whose ndim >= 1 and dtype is int. Its values must be non-negative (including 0)."))


def nonneg_float_vec(val,val_name,exception_class):
if type(val) is np.ndarray:
if np.issubdtype(val.dtype,np.floating) and val.ndim == 1 and np.all(val>=0):
Expand Down Expand Up @@ -242,8 +243,14 @@ def onehot_vecs(val,val_name,exception_class):
return val
raise(exception_class(val_name + " must be a numpy.ndarray whose dtype is int and whose last axis constitutes one-hot vectors."))

def int_vecs(val,val_name,exception_class):
if type(val) is np.ndarray:
if np.issubdtype(val.dtype,np.integer) and val.ndim >= 1:
return val
raise(exception_class(val_name + " must be a numpy.ndarray whose dtype is int and ndim >= 1."))

def shape_consistency(val: int, val_name: str, correct: int, correct_name: str, exception_class):
if val != correct:
message = (f"{val_name} must coincide with {correct_name}: "
+ f"{val_name} = {val}, {correct_name} = {correct}")
raise(exception_class(message))
raise(exception_class(message))
36 changes: 28 additions & 8 deletions bayesml/bernoulli/_bernoulli.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,18 +272,20 @@ def update_posterior(self,x):
self.hn_alpha += np.sum(x==1)
self.hn_beta += np.sum(x==0)

def estimate_params(self,loss="squared"):
def estimate_params(self,loss="squared",dict_out=False):
"""Estimate the parameter of the stochastic data generative model under the given criterion.

Parameters
----------
loss : str, optional
Loss function underlying the Bayes risk function, by default \"squared\".
This function supports \"squared\", \"0-1\", \"abs\", and \"KL\".
dict_out : bool, optional
If ``True``, output will be a dict, by default ``False``.

Returns
-------
Estimator : {float, None, rv_frozen}
estimator : {float, None, rv_frozen} or dict of {str : float, None}
The estimated values under the given loss function. If it is not exist, `None` will be returned.
If the loss function is \"KL\", the posterior distribution itself will be returned
as rv_frozen object of scipy.stats.
Expand All @@ -294,19 +296,37 @@ def estimate_params(self,loss="squared"):
scipy.stats.rv_discrete
"""
if loss == "squared":
return self.hn_alpha / (self.hn_alpha + self.hn_beta)
if dict_out:
return {'theta':self.hn_alpha / (self.hn_alpha + self.hn_beta)}
else:
return self.hn_alpha / (self.hn_alpha + self.hn_beta)
elif loss == "0-1":
if self.hn_alpha > 1.0 and self.hn_beta > 1.0:
return (self.hn_alpha - 1.0) / (self.hn_alpha + self.hn_beta - 2.0)
if dict_out:
return {'theta':(self.hn_alpha - 1.0) / (self.hn_alpha + self.hn_beta - 2.0)}
else:
return (self.hn_alpha - 1.0) / (self.hn_alpha + self.hn_beta - 2.0)
elif self.hn_alpha > 1.0:
return 1.0
if dict_out:
return {'theta':1.0}
else:
return 1.0
elif self.hn_beta > 1.0:
return 0.0
if dict_out:
return {'theta':0.0}
else:
return 0.0
else:
warnings.warn("MAP estimate doesn't exist for the current hn_alpha and hn_beta.",ResultWarning)
return None
if dict_out:
return {'theta':None}
else:
return None
elif loss == "abs":
return ss_beta.median(self.hn_alpha,self.hn_beta)
if dict_out:
return {'theta':ss_beta.median(self.hn_alpha,self.hn_beta)}
else:
return ss_beta.median(self.hn_alpha,self.hn_beta)
elif loss == "KL":
return ss_beta(self.hn_alpha,self.hn_beta)
else:
Expand Down
16 changes: 12 additions & 4 deletions bayesml/categorical/_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,18 +356,20 @@ def update_posterior(self, x):
for k in range(self.degree):
self.hn_alpha_vec[k] += x[:,k].sum()

def estimate_params(self, loss="squared"):
def estimate_params(self, loss="squared",dict_out=False):
"""Estimate the parameter of the stochastic data generative model under the given criterion.

Parameters
----------
loss : str, optional
Loss function underlying the Bayes risk function, by default \"squared\".
This function supports \"squared\", \"0-1\", and \"KL\".
dict_out : bool, optional
If ``True``, output will be a dict, by default ``False``.

Returns
-------
Estimates : {numpy ndarray, float, None, or rv_frozen}
estimates : {numpy ndarray, float, None, or rv_frozen}
The estimated values under the given loss function. If it is not exist, `None` will be returned.
If the loss function is \"KL\", the posterior distribution itself will be returned
as rv_frozen object of scipy.stats.
Expand All @@ -378,10 +380,16 @@ def estimate_params(self, loss="squared"):
scipy.stats.rv_discrete
"""
if loss == "squared":
return self.hn_alpha_vec / np.sum(self.hn_alpha_vec)
if dict_out:
return {'theta_vec':self.hn_alpha_vec / np.sum(self.hn_alpha_vec)}
else:
return self.hn_alpha_vec / np.sum(self.hn_alpha_vec)
elif loss == "0-1":
if np.all(self.hn_alpha_vec > 1):
return (self.hn_alpha_vec - 1) / (np.sum(self.hn_alpha_vec) - self.degree)
if dict_out:
return {'theta_vec':(self.hn_alpha_vec - 1) / (np.sum(self.hn_alpha_vec) - self.degree)}
else:
return (self.hn_alpha_vec - 1) / (np.sum(self.hn_alpha_vec) - self.degree)
else:
warnings.warn("MAP estimate of lambda_mat doesn't exist for the current hn_alpha_vec.",ResultWarning)
return None
Expand Down
28 changes: 21 additions & 7 deletions bayesml/exponential/_exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def gen_params(self):

The generated vaule is set at ``self.lambda_``.
"""
self.lambda_ = self.rng.gamma(self.h_alpha,1.0/self.h_beta, 1)
self.lambda_ = self.rng.gamma(self.h_alpha,1.0/self.h_beta)

def set_params(self,lambda_):
"""Set the parameter of the sthocastic data generative model.
Expand Down Expand Up @@ -277,18 +277,20 @@ def update_posterior(self,x):
self.hn_alpha += x.size
self.hn_beta += np.sum(x)

def estimate_params(self,loss="squared"):
def estimate_params(self,loss="squared",dict_out=False):
"""Estimate the parameter of the stochastic data generative model under the given criterion.

Parameters
----------
loss : str, optional
Loss function underlying the Bayes risk function, by default \"squared\".
This function supports \"squared\", \"0-1\", \"abs\", and \"KL\".
dict_out : bool, optional
If ``True``, output will be a dict, by default ``False``.

Returns
-------
Estimator : {float, None, rv_frozen}
estimator : {float, None, rv_frozen}
The estimated values under the given loss function. If it is not exist, `None` will be returned.
If the loss function is \"KL\", the posterior distribution itself will be returned
as rv_frozen object of scipy.stats.
Expand All @@ -299,14 +301,26 @@ def estimate_params(self,loss="squared"):
scipy.stats.rv_discrete
"""
if loss == "squared":
return self.hn_alpha / self.hn_beta
if dict_out:
return {'lambda_':self.hn_alpha / self.hn_beta}
else:
return self.hn_alpha / self.hn_beta
elif loss == "0-1":
if self.hn_alpha > 1.0 :
return (self.hn_alpha - 1.0) / self.hn_beta
if dict_out:
return {'lambda_':(self.hn_alpha - 1.0) / self.hn_beta}
else:
return (self.hn_alpha - 1.0) / self.hn_beta
else:
return 0.0
if dict_out:
return {'lambda_':0.0}
else:
return 0.0
elif loss == "abs":
return ss_gamma.median(a=self.hn_alpha,scale=1/self.hn_beta)
if dict_out:
return {'lambda_':ss_gamma.median(a=self.hn_alpha,scale=1/self.hn_beta)}
else:
return ss_gamma.median(a=self.hn_alpha,scale=1/self.hn_beta)
elif loss == "KL":
return ss_gamma(a=self.hn_alpha,scale=1/self.hn_beta)
else:
Expand Down
32 changes: 25 additions & 7 deletions bayesml/linearregression/_linearregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class GenModel(base.Generative):
a value consistent with ``theta_vec``, ``h_mu_vec``,
and ``h_lambda_mat`` is used. If all of them are not given,
degree is assumed to be 1.
theta_vec : numpy ndarray, optional
a vector of real numbers, by default [0.0, 0.0, ... , 0.0]
tau : float, optional
a positive real number, by default 1.0
h_mu_vec : numpy ndarray, optional
a vector of real numbers, by default [0.0, 0.0, ... , 0.0]
h_lambda_mat : numpy ndarray, optional
Expand Down Expand Up @@ -558,7 +562,7 @@ def update_posterior(self, x, y):
self.hn_beta += (-self.hn_mu_vec[np.newaxis,:] @ self.hn_lambda_mat @ self.hn_mu_vec[:,np.newaxis]
+ y @ y + hn1_mu[np.newaxis,:] @ hn1_Lambda @ hn1_mu[:,np.newaxis])[0,0] /2.0

def estimate_params(self,loss="squared"):
def estimate_params(self,loss="squared",dict_out=False):
"""Estimate the parameter of the stochastic data generative model under the given criterion.

Note that the criterion is applied to estimating ``theta_vec`` and ``tau`` independently.
Expand All @@ -569,10 +573,12 @@ def estimate_params(self,loss="squared"):
loss : str, optional
Loss function underlying the Bayes risk function, by default \"squared\".
This function supports \"squared\", \"0-1\", \"abs\", and \"KL\".
dict_out : bool, optional
If ``True``, output will be a dict, by default ``False``.

Returns
-------
Estimates : tuple of {numpy ndarray, float, None, or rv_frozen}
estimates : tuple of {numpy ndarray, float, None, or rv_frozen}
* ``theta_vec`` : the estimate for w
* ``tau_hat`` : the estimate for tau
The estimated values under the given loss function. If it is not exist, `None` will be returned.
Expand All @@ -584,15 +590,27 @@ def estimate_params(self,loss="squared"):
scipy.stats.rv_continuous
scipy.stats.rv_discrete
"""
if loss == "squared":
return self.hn_mu_vec, self.hn_alpha/self.hn_beta
if loss == "squared":
if dict_out:
return {'theta_vec':self.hn_mu_vec,'tau':self.hn_alpha/self.hn_beta}
else:
return self.hn_mu_vec, self.hn_alpha/self.hn_beta
elif loss == "0-1":
if self.hn_alpha >= 1.0:
return self.hn_mu_vec, (self.hn_alpha - 1.0) / self.hn_beta
if dict_out:
return {'theta_vec':self.hn_mu_vec,'tau':(self.hn_alpha - 1.0) / self.hn_beta}
else:
return self.hn_mu_vec, (self.hn_alpha - 1.0) / self.hn_beta
else:
return self.hn_mu_vec, 0
if dict_out:
return {'theta_vec':self.hn_mu_vec,'tau':0.0}
else:
return self.hn_mu_vec, 0.0
elif loss == "abs":
return self.hn_mu_vec, ss_gamma.median(a=self.hn_alpha,scale=1.0/self.hn_beta)
if dict_out:
return {'theta_vec':self.hn_mu_vec,'tau':ss_gamma.median(a=self.hn_alpha,scale=1.0/self.hn_beta)}
else:
return self.hn_mu_vec, ss_gamma.median(a=self.hn_alpha,scale=1.0/self.hn_beta)
elif loss == "KL":
return (ss_multivariate_t(loc=self.hn_mu_vec,
shape=np.linalg.inv(self.hn_alpha / self.hn_beta * self.hn_lambda_mat),
Expand Down
Loading