Skip to content

Commit 9938ff0

Browse files
committed
ENH: added routines to convert ddp between full and SA formulations
1 parent c60251d commit 9938ff0

File tree

2 files changed

+119
-14
lines changed

2 files changed

+119
-14
lines changed

quantecon/markov/ddp.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,73 @@ def _check_action_feasibility(self):
456456
'violated for state {s}'.format(s=s)
457457
)
458458

459+
def to_sa_formulation(self, sparse=True):
460+
"""
461+
Convert this instance of `DiscreteDP` to SA-pair form
462+
463+
Parameters
464+
----------
465+
sparse : bool, optional(default=True)
466+
Should the `Q` matrix be stored as a sparse matrix.
467+
If true the CSR format is used
468+
469+
Returns
470+
-------
471+
ddp_sa : DiscreteDP
472+
The correspnoding DiscreteDP instance in SA form
473+
474+
Notes
475+
-----
476+
If this instance is already in SA form then it is returned
477+
un-modified
478+
"""
479+
480+
if self._sa_pair:
481+
return self
482+
else:
483+
s_ind, a_ind = np.where(np.isfinite(self.R))
484+
RL = self.R[s_ind, a_ind]
485+
if sparse:
486+
QL = sp.csr_matrix(self.Q[s_ind, a_ind])
487+
else:
488+
QL = self.Q[s_ind, a_ind]
489+
return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)
490+
491+
def to_full_formulation(self):
492+
"""
493+
Convert this instance of `DiscreteDP` to the "full" form.
494+
495+
The full form uses the version of the init method taking
496+
`R`, `Q` and `beta`.
497+
498+
Parameters
499+
----------
500+
501+
Returns
502+
-------
503+
ddp_sa : DiscreteDP
504+
The correspnoding DiscreteDP instance in full form
505+
506+
Notes
507+
-----
508+
If this instance is already in full form then it is returned
509+
un-modified
510+
511+
"""
512+
if self._sa_pair:
513+
ns = self.num_states
514+
na = self.a_indices.max() + 1
515+
R = np.zeros((ns, na))
516+
R[self.s_indices, self.a_indices] = self.R
517+
Q = np.zeros((ns, na, ns))
518+
if self._sparse:
519+
_fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
520+
else:
521+
_fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
522+
return DiscreteDP(R, Q, self.beta)
523+
else:
524+
return self
525+
459526
def RQ_sigma(self, sigma):
460527
"""
461528
Given a policy `sigma`, return the reward vector `R_sigma` and
@@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
9631030
return vs, sigmas
9641031

9651032

1033+
@jit(nopython=True)
1034+
def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
1035+
L = Q_in.shape[0]
1036+
for i in range(L):
1037+
Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]
1038+
1039+
return Q_out
1040+
1041+
9661042
@jit(nopython=True)
9671043
def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
9681044
n = len(out_max)

quantecon/markov/tests/test_ddp.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import numpy as np
1111
import scipy.sparse as sparse
1212
from numpy.testing import assert_array_equal, assert_allclose, assert_raises
13-
from nose.tools import eq_, ok_
13+
from nose.tools import ok_
1414

1515
from quantecon.markov import DiscreteDP, backward_induction
1616

@@ -126,10 +126,7 @@ def test_ddp_beta_0():
126126
v_init = [0, 0, 0]
127127

128128
ddp0 = DiscreteDP(R, Q, beta)
129-
s_indices, a_indices = np.where(R > -np.inf)
130-
R_sa = R[s_indices, a_indices]
131-
Q_sa = Q[s_indices, a_indices]
132-
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
129+
ddp1 = ddp0.to_sa_formulation()
133130
methods = ['vi', 'pi', 'mpi']
134131

135132
for ddp in [ddp0, ddp1]:
@@ -140,7 +137,6 @@ def test_ddp_beta_0():
140137

141138

142139
def test_ddp_sorting():
143-
n, m = 2, 2
144140
beta = 0.95
145141

146142
# Sorted
@@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():
238234

239235

240236
def test_ddp_no_feasibile_action_error():
241-
n, m = 3, 2
242-
243237
# No action is feasible at state 1
244238
s_indices = [0, 0, 2, 2]
245239
a_indices = [0, 1, 0, 1]
@@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
258252
beta = 1
259253

260254
ddp0 = DiscreteDP(R, Q, beta)
261-
s_indices, a_indices = np.where(R > -np.inf)
262-
R_sa = R[s_indices, a_indices]
263-
Q_sa = Q[s_indices, a_indices]
264-
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
265-
Q_sa_sp = sparse.csr_matrix(Q_sa)
266-
ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
255+
ddp1 = ddp0.to_sa_formulation()
256+
ddp2 = ddp0.to_sa_formulation(sparse=False)
267257

268258
solution_methods = \
269259
['value_iteration', 'policy_iteration', 'modified_policy_iteration']
@@ -274,6 +264,45 @@ def test_ddp_beta_1_not_implemented_error():
274264
assert_raises(NotImplementedError, getattr(ddp, method))
275265

276266

267+
def test_ddp_to_sa_and_to_full():
268+
n, m = 3, 2
269+
R = np.array([[0, 1], [1, 0], [0, 1]])
270+
Q = np.empty((n, m, n))
271+
Q[:] = 1/n
272+
beta = 0.95
273+
274+
sparse_R = np.array([0, 1, 1, 0, 0, 1])
275+
sparse_Q = sparse.coo_matrix(np.full((6, 3), 1/3))
276+
277+
ddp = DiscreteDP(R, Q, beta)
278+
ddp_sa = ddp.to_sa_formulation()
279+
ddp_sa2 = ddp_sa.to_sa_formulation()
280+
ddp_sa3 = ddp.to_sa_formulation(sparse=False)
281+
ddp2 = ddp_sa.to_full_formulation()
282+
ddp3 = ddp_sa2.to_full_formulation()
283+
ddp4 = ddp.to_full_formulation()
284+
285+
# make sure conversion worked
286+
for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
287+
assert_allclose(ddp_s.R, sparse_R)
288+
# allcose doesn't work on sparse
289+
np.max(np.abs((sparse_Q - ddp_sa.Q))) < 1e-15
290+
assert_allclose(ddp_s.beta, beta)
291+
292+
for ddp_f in [ddp2, ddp3, ddp4]:
293+
assert_allclose(ddp_f.R, ddp.R)
294+
assert_allclose(ddp_f.Q, ddp.Q)
295+
assert_allclose(ddp_f.beta, ddp.beta)
296+
297+
for method in ["pi", "vi", "mpi"]:
298+
sol1 = ddp.solve(method=method)
299+
for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
300+
sol2 = ddp_other.solve(method=method)
301+
302+
for k in ["v", "sigma", "num_iter"]:
303+
assert_allclose(sol1[k], sol2[k])
304+
305+
277306
if __name__ == '__main__':
278307
import sys
279308
import nose

0 commit comments

Comments
 (0)