Skip to content

DiscreteDDP: minor fix in docstring and pep8 compliance #198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 25, 2015
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions quantecon/markov/ddp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
r"""
Filename: ddp.py

Author: Daisuke Oyama
Expand Down Expand Up @@ -141,7 +141,7 @@ class DiscreteDP(object):

with parameters:

* length L reward vector R,
* length L reward vector `R`,
* L x n transition probability array `Q`,
* discount factor `beta`,
* length L array `s_indices`, and
Expand Down Expand Up @@ -451,8 +451,8 @@ def _check_action_feasibility(self):

def RQ_sigma(self, sigma):
"""
Given a policy `sigma`, return the reward vector R_sigma and the
transition probability matrix Q_sigma.
Given a policy `sigma`, return the reward vector `R_sigma` and
the transition probability matrix `Q_sigma`.

Parameters
----------
Expand Down Expand Up @@ -483,7 +483,7 @@ def RQ_sigma(self, sigma):
def bellman_operator(self, v, Tv=None, sigma=None):
"""
The Bellman operator, which computes and returns the updated
value function Tv for a value function v.
value function `Tv` for a value function `v`.

Parameters
----------
Expand Down Expand Up @@ -538,7 +538,7 @@ def compute_greedy(self, v, sigma=None):
Value function vector, of length n.

sigma : ndarray(int, ndim=1), optional(default=None)
Optional output array for sigma.
Optional output array for `sigma`.

Returns
-------
Expand Down Expand Up @@ -708,12 +708,12 @@ def value_iteration(self, v_init=None, epsilon=None, max_iter=None):
sigma = self.compute_greedy(v)

res = DPSolveResult(v=v,
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='value iteration',
epsilon=epsilon,
max_iter=max_iter)
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='value iteration',
epsilon=epsilon,
max_iter=max_iter)

return res

Expand Down Expand Up @@ -745,11 +745,11 @@ def policy_iteration(self, v_init=None, max_iter=None):
num_iter = i + 1

res = DPSolveResult(v=v_sigma,
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='policy iteration',
max_iter=max_iter)
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='policy iteration',
max_iter=max_iter)

return res

Expand Down Expand Up @@ -798,13 +798,13 @@ def midrange(z):
num_iter = i + 1

res = DPSolveResult(v=v,
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='modified policy iteration',
epsilon=epsilon,
max_iter=max_iter,
k=k)
sigma=sigma,
num_iter=num_iter,
mc=self.controlled_mc(sigma),
method='modified policy iteration',
epsilon=epsilon,
max_iter=max_iter,
k=k)

return res

Expand All @@ -820,7 +820,7 @@ def controlled_mc(self, sigma):
Returns
-------
mc : MarkovChain
Controlled Markov Chain.
Controlled Markov chain.

"""
_, Q_sigma = self.RQ_sigma(sigma)
Expand Down