|
1 | 1 | # https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
|
2 | 2 | # https://www.udemy.com/artificial-intelligence-reinforcement-learning-in-python
|
| 3 | +from __future__ import print_function, division |
| 4 | +from builtins import range |
| 5 | +# Note: you may need to update your version of future |
| 6 | +# sudo pip install -U future |
| 7 | + |
| 8 | + |
3 | 9 | import numpy as np
|
4 | 10 | from grid_world import standard_grid
|
5 | 11 |
|
6 | 12 | SMALL_ENOUGH = 1e-3 # threshold for convergence
|
7 | 13 |
|
8 | 14 | def print_values(V, g):
|
9 |
| - for i in xrange(g.width): |
10 |
| - print "---------------------------" |
11 |
| - for j in xrange(g.height): |
| 15 | + for i in range(g.width): |
| 16 | + print("---------------------------") |
| 17 | + for j in range(g.height): |
12 | 18 | v = V.get((i,j), 0)
|
13 | 19 | if v >= 0:
|
14 |
| - print " %.2f|" % v, |
| 20 | + print(" %.2f|" % v, end="") |
15 | 21 | else:
|
16 |
| - print "%.2f|" % v, # -ve sign takes up an extra space |
17 |
| - print "" |
| 22 | + print("%.2f|" % v, end="") # -ve sign takes up an extra space |
| 23 | + print("") |
18 | 24 |
|
19 | 25 |
|
20 | 26 | def print_policy(P, g):
|
21 |
| - for i in xrange(g.width): |
22 |
| - print "---------------------------" |
23 |
| - for j in xrange(g.height): |
| 27 | + for i in range(g.width): |
| 28 | + print("---------------------------") |
| 29 | + for j in range(g.height): |
24 | 30 | a = P.get((i,j), ' ')
|
25 |
| - print " %s |" % a, |
26 |
| - print "" |
| 31 | + print(" %s |" % a, end="") |
| 32 | + print("") |
27 | 33 |
|
28 | 34 | if __name__ == '__main__':
|
29 | 35 | # iterative policy evaluation
|
@@ -68,9 +74,9 @@ def print_policy(P, g):
|
68 | 74 |
|
69 | 75 | if biggest_change < SMALL_ENOUGH:
|
70 | 76 | break
|
71 |
| - print "values for uniformly random actions:" |
| 77 | + print("values for uniformly random actions:") |
72 | 78 | print_values(V, grid)
|
73 |
| - print "\n\n" |
| 79 | + print("\n\n") |
74 | 80 |
|
75 | 81 | ### fixed policy ###
|
76 | 82 | policy = {
|
@@ -110,5 +116,5 @@ def print_policy(P, g):
|
110 | 116 |
|
111 | 117 | if biggest_change < SMALL_ENOUGH:
|
112 | 118 | break
|
113 |
| - print "values for fixed policy:" |
| 119 | + print("values for fixed policy:") |
114 | 120 | print_values(V, grid)
|
0 commit comments