10
10
import numpy as np
11
11
import scipy .sparse as sparse
12
12
from numpy .testing import assert_array_equal , assert_allclose , assert_raises
13
- from nose .tools import eq_ , ok_
13
+ from nose .tools import ok_
14
14
15
15
from quantecon .markov import DiscreteDP , backward_induction
16
16
@@ -126,10 +126,7 @@ def test_ddp_beta_0():
126
126
v_init = [0 , 0 , 0 ]
127
127
128
128
ddp0 = DiscreteDP (R , Q , beta )
129
- s_indices , a_indices = np .where (R > - np .inf )
130
- R_sa = R [s_indices , a_indices ]
131
- Q_sa = Q [s_indices , a_indices ]
132
- ddp1 = DiscreteDP (R_sa , Q_sa , beta , s_indices , a_indices )
129
+ ddp1 = ddp0 .to_sa_formulation ()
133
130
methods = ['vi' , 'pi' , 'mpi' ]
134
131
135
132
for ddp in [ddp0 , ddp1 ]:
@@ -140,7 +137,6 @@ def test_ddp_beta_0():
140
137
141
138
142
139
def test_ddp_sorting ():
143
- n , m = 2 , 2
144
140
beta = 0.95
145
141
146
142
# Sorted
@@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():
238
234
239
235
240
236
def test_ddp_no_feasibile_action_error ():
241
- n , m = 3 , 2
242
-
243
237
# No action is feasible at state 1
244
238
s_indices = [0 , 0 , 2 , 2 ]
245
239
a_indices = [0 , 1 , 0 , 1 ]
@@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
258
252
beta = 1
259
253
260
254
ddp0 = DiscreteDP (R , Q , beta )
261
- s_indices , a_indices = np .where (R > - np .inf )
262
- R_sa = R [s_indices , a_indices ]
263
- Q_sa = Q [s_indices , a_indices ]
264
- ddp1 = DiscreteDP (R_sa , Q_sa , beta , s_indices , a_indices )
265
- Q_sa_sp = sparse .csr_matrix (Q_sa )
266
- ddp2 = DiscreteDP (R_sa , Q_sa_sp , beta , s_indices , a_indices )
255
+ ddp1 = ddp0 .to_sa_formulation ()
256
+ ddp2 = ddp0 .to_sa_formulation (sparse = False )
267
257
268
258
solution_methods = \
269
259
['value_iteration' , 'policy_iteration' , 'modified_policy_iteration' ]
@@ -274,6 +264,45 @@ def test_ddp_beta_1_not_implemented_error():
274
264
assert_raises (NotImplementedError , getattr (ddp , method ))
275
265
276
266
267
+ def test_ddp_to_sa_and_to_full ():
268
+ n , m = 3 , 2
269
+ R = np .array ([[0 , 1 ], [1 , 0 ], [0 , 1 ]])
270
+ Q = np .empty ((n , m , n ))
271
+ Q [:] = 1 / n
272
+ beta = 0.95
273
+
274
+ sparse_R = np .array ([0 , 1 , 1 , 0 , 0 , 1 ])
275
+ sparse_Q = sparse .coo_matrix (np .full ((6 , 3 ), 1 / 3 ))
276
+
277
+ ddp = DiscreteDP (R , Q , beta )
278
+ ddp_sa = ddp .to_sa_formulation ()
279
+ ddp_sa2 = ddp_sa .to_sa_formulation ()
280
+ ddp_sa3 = ddp .to_sa_formulation (sparse = False )
281
+ ddp2 = ddp_sa .to_full_formulation ()
282
+ ddp3 = ddp_sa2 .to_full_formulation ()
283
+ ddp4 = ddp .to_full_formulation ()
284
+
285
+ # make sure conversion worked
286
+ for ddp_s in [ddp_sa , ddp_sa2 , ddp_sa3 ]:
287
+ assert_allclose (ddp_s .R , sparse_R )
288
+ # allcose doesn't work on sparse
289
+ np .max (np .abs ((sparse_Q - ddp_sa .Q ))) < 1e-15
290
+ assert_allclose (ddp_s .beta , beta )
291
+
292
+ for ddp_f in [ddp2 , ddp3 , ddp4 ]:
293
+ assert_allclose (ddp_f .R , ddp .R )
294
+ assert_allclose (ddp_f .Q , ddp .Q )
295
+ assert_allclose (ddp_f .beta , ddp .beta )
296
+
297
+ for method in ["pi" , "vi" , "mpi" ]:
298
+ sol1 = ddp .solve (method = method )
299
+ for ddp_other in [ddp_sa , ddp_sa2 , ddp_sa3 , ddp2 , ddp3 , ddp4 ]:
300
+ sol2 = ddp_other .solve (method = method )
301
+
302
+ for k in ["v" , "sigma" , "num_iter" ]:
303
+ assert_allclose (sol1 [k ], sol2 [k ])
304
+
305
+
277
306
if __name__ == '__main__' :
278
307
import sys
279
308
import nose
0 commit comments