Implement mclennan_tourky

oyamad · oyamad · commit f03f83a83869 · 2016-11-17T22:01:33.000+09:00
diff --git a/quantecon/game_theory/__init__.py b/quantecon/game_theory/__init__.py
@@ -7,3 +7,4 @@
 from .random import random_game, covariance_game
 from .support_enumeration import support_enumeration, support_enumeration_gen
 from .lemke_howson import lemke_howson
+from .mclennan_tourky import mclennan_tourky
diff --git a/quantecon/game_theory/mclennan_tourky.py b/quantecon/game_theory/mclennan_tourky.py
@@ -0,0 +1,344 @@
+"""
+Author: Daisuke Oyama
+
+Compute mixed Nash equilibria of an N-player normal form game by
+applying the imitation game algorithm by McLennan and Tourky to the best
+response correspondence.
+
+"""
+import numbers
+import numpy as np
+from ..compute_fp import _compute_fixed_point_ig
+from .normal_form_game import pure2mixed
+
+
+def mclennan_tourky(g, init=None, epsilon=1e-3, max_iter=200,
+                    full_output=False):
+    """
+    Find one mixed-action epsilon-Nash equilibrium of an N-player normal
+    form game by the fixed point computation algorithm by McLennan and
+    Tourky [1]_.
+
+    Parameters
+    ----------
+    g : NormalFormGame
+        NormalFormGame instance.
+
+    init : array_like(int or array_like(float, ndim=1)),
+           optional(default=None)
+        Initial action profile, an array of N objects, where each object
+        must be an iteger (pure action) or an array of floats (mixed
+        action). If None, default to an array of zeros (the zero-th
+        action for each player).
+
+    epsilon : scalar(float), optional(default=1e-3)
+        Value of epsilon-optimality.
+
+    max_iter : scalar(int), optional(default=100)
+        Maximum number of iterations.
+
+    full_output : bool, optional(default=False)
+        If False, only the computed Nash equilibrium is returned. If
+        True, the return value is `(NE, res)`, where `NE` is the Nash
+        equilibrium and `res` is a `NashResult` object.
+
+    Returns
+    -------
+    NE : tuple(ndarray(float, ndim=1))
+        Tuple of computed Nash equilibrium mixed actions.
+
+    res : NashResult
+        Object containing information about the computation. Returned
+        only when `full_output` is True. See `NashResult` for details.
+
+    Examples
+    --------
+    Consider the following version of 3-player "anti-coordination" game,
+    where action 0 is a safe action which yields payoff 1, while action
+    1 yields payoff :math:`v` if no other player plays 1 and payoff 0
+    otherwise:
+
+    >>> n = 3
+    >>> v = 2
+    >>> payoff_array = np.empty((2,)*n)
+    >>> payoff_array[0, :] = 1
+    >>> payoff_array[1, :] = 0
+    >>> payoff_array[1].flat[0] = v
+    >>> g = NormalFormGame((Player(payoff_array),)*n)
+    >>> print(g)
+    3-player NormalFormGame with payoff profile array:
+    [[[[ 1.,  1.,  1.],   [ 1.,  1.,  2.]],
+      [[ 1.,  2.,  1.],   [ 1.,  0.,  0.]]],
+
+     [[[ 2.,  1.,  1.],   [ 0.,  1.,  0.]],
+      [[ 0.,  0.,  1.],   [ 0.,  0.,  0.]]]]
+
+    This game has a unique symmetric Nash equilibrium, where the
+    equilibrium action is given by :math:`(p^*, 1-p^*)` with :math:`p^*
+    = 1/v^{1/(n-1)}`:
+
+    >>> p_star = 1/(v**(1/(n-1)))
+    >>> [p_star, 1 - p_star]
+    [0.7071067811865475, 0.29289321881345254]
+
+    Obtain an approximate Nash equilibrium of this game by
+    `mclennan_tourky`:
+
+    >>> epsilon = 1e-5  # Value of epsilon-optimality
+    >>> NE = mclennan_tourky(g, epsilon=epsilon)
+    >>> print(NE[0], NE[1], NE[2], sep='\n')
+    [ 0.70710754  0.29289246]
+    [ 0.70710754  0.29289246]
+    [ 0.70710754  0.29289246]
+    >>> g.is_nash(NE, tol=epsilon)
+    True
+
+    Additional information is returned if `full_output` is set True:
+
+    >>> NE, res = mclennan_tourky(g, epsilon=epsilon, full_output=True)
+    >>> res.converged
+    True
+    >>> res.num_iter
+    18
+
+    References
+    ----------
+    .. [1] A. McLennan and R. Tourky, "From Imitation Games to
+       Kakutani," 2006.
+
+    """
+    try:
+        N = g.N
+    except:
+        raise TypeError('g must be a NormalFormGame')
+    if N < 2:
+        raise NotImplementedError('Not implemented for 1-player games')
+
+    if init is None:
+        init = (0,) * N
+    try:
+        l = len(init)
+    except TypeError:
+        raise TypeError('init must be array_like')
+    if l != N:
+        raise ValueError(
+            'init must be of length {N}'.format(N=N)
+        )
+
+    indptr = np.empty(N+1, dtype=int)
+    indptr[0] = 0
+    indptr[1:] = np.cumsum(g.nums_actions)
+    x_init = _flatten_action_profile(init, indptr)
+
+    is_approx_fp = lambda x: _is_epsilon_nash(x, g, epsilon, indptr)
+    x_star, converged, num_iter = \
+        _compute_fixed_point_ig(_best_response_selection, x_init, max_iter,
+                                verbose=0, print_skip=1,
+                                is_approx_fp=is_approx_fp,
+                                g=g, indptr=indptr)
+    NE = _get_action_profile(x_star, indptr)
+
+    if not full_output:
+        return NE
+
+    res = NashResult(NE=NE,
+                     converged=converged,
+                     num_iter=num_iter,
+                     max_iter=max_iter,
+                     init=init,
+                     epsilon=epsilon)
+
+    return NE, res
+
+
+def _best_response_selection(x, g, indptr=None):
+    """
+    Selection of the best response correspondence of `g` that selects
+    the best response action with the smallest index when there are
+    ties, where the input and output are flattened action profiles.
+
+    Parameters
+    ----------
+    x : array_like(float, ndim=1)
+        Array of flattened mixed action profile of length equal to n_0 +
+        ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
+        i's mixed action.
+
+    g : NormalFormGame
+
+    indptr : array_like(int, ndim=1), optional(default=None)
+        Array of index pointers of length N+1, where `indptr[0] = 0` and
+        `indptr[i+1] = indptr[i] + n_i`. Created internally if None.
+
+    Returns
+    -------
+    out : ndarray(float, ndim=1)
+        Array of flattened mixed action profile of length equal to n_0 +
+        ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
+        i's mixed action representation of his pure best response.
+
+    """
+    N = g.N
+
+    if indptr is None:
+        indptr = np.empty(N+1, dtype=int)
+        indptr[0] = 0
+        indptr[1:] = np.cumsum(g.nums_actions)
+
+    out = np.zeros(indptr[-1])
+
+    if N == 2:
+        for i in range(N):
+            opponent_action = x[indptr[1-i]:indptr[1-i+1]]
+            pure_br = g.players[i].best_response(opponent_action)
+            out[indptr[i]+pure_br] = 1
+    else:
+        for i in range(N):
+            opponent_actions = tuple(
+                x[indptr[(i+j)%N]:indptr[(i+j)%N+1]] for j in range(1, N)
+            )
+            pure_br = g.players[i].best_response(opponent_actions)
+            out[indptr[i]+pure_br] = 1
+
+    return out
+
+
+def _is_epsilon_nash(x, g, epsilon, indptr=None):
+    """
+    Determine whether `x` is an `epsilon`-Nash equilibrium of `g`.
+
+    Parameters
+    ----------
+    x : array_like(float, ndim=1)
+        Array of flattened mixed action profile of length equal to n_0 +
+        ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
+        i's mixed action.
+
+    g : NormalFormGame
+
+    epsilon : scalar(float)
+
+    indptr : array_like(int, ndim=1), optional(default=None)
+        Array of index pointers of length N+1, where `indptr[0] = 0` and
+        `indptr[i+1] = indptr[i] + n_i`. Created internally if None.
+
+    Returns
+    -------
+    bool
+
+    """
+    if indptr is None:
+        indptr = np.empty(g.N+1, dtype=int)
+        indptr[0] = 0
+        indptr[1:] = np.cumsum(g.nums_actions)
+
+    action_profile = _get_action_profile(x, indptr)
+    return g.is_nash(action_profile, tol=epsilon)
+
+
+def _get_action_profile(x, indptr):
+    """
+    Obtain a tuple of mixed actions from a flattened action profile.
+
+    Parameters
+    ----------
+    x : array_like(float, ndim=1)
+        Array of flattened mixed action profile of length equal to n_0 +
+        ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
+        i's mixed action.
+
+    indptr : array_like(int, ndim=1)
+        Array of index pointers of length N+1, where `indptr[0] = 0` and
+        `indptr[i+1] = indptr[i] + n_i`.
+
+    Returns
+    -------
+    action_profile : tuple(ndarray(float, ndim=1))
+        Tuple of N mixed actions, each of length n_i.
+
+    """
+    N = len(indptr) - 1
+    action_profile = tuple(x[indptr[i]:indptr[i+1]] for i in range(N))
+    return action_profile
+
+
+def _flatten_action_profile(action_profile, indptr):
+    """
+    Flatten the given action profile.
+
+    Parameters
+    ----------
+    action_profile : array_like(int or array_like(float, ndim=1))
+        Profile of actions of the N players, where each player i' action
+        is a pure action (int) or a mixed action (array_like of floats
+        of length n_i).
+
+    indptr : array_like(int, ndim=1)
+        Array of index pointers of length N+1, where `indptr[0] = 0` and
+        `indptr[i+1] = indptr[i] + n_i`.
+
+    Returns
+    -------
+    out : ndarray(float, ndim=1)
+        Array of flattened mixed action profile of length equal to n_0 +
+        ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
+        i's mixed action.
+
+    """
+    N = len(indptr) - 1
+    out = np.empty(indptr[-1])
+
+    for i in range(N):
+        if isinstance(action_profile[i], numbers.Integral):  # pure action
+            num_actions = indptr[i+1] - indptr[i]
+            mixed_action = pure2mixed(num_actions, action_profile[i])
+        else:  # mixed action
+            mixed_action = action_profile[i]
+        out[indptr[i]:indptr[i+1]] = mixed_action
+
+    return out
+
+
+class NashResult(dict):
+    """
+    Contain the information about the result of Nash equilibrium
+    computation.
+
+    Attributes
+    ----------
+    NE : tuple(ndarray(float, ndim=1))
+        Computed Nash equilibrium.
+
+    converged : bool
+        Whether the routine has converged.
+
+    num_iter : int
+        Number of iterations.
+
+    max_iter : int
+        Maximum number of iterations.
+
+    init_action_profile : array_like
+        Initial action profile used.
+
+    """
+    # This is sourced from sicpy.optimize.OptimizeResult.
+    def __getattr__(self, name):
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+    def __repr__(self):
+        if self.keys():
+            m = max(map(len, list(self.keys()))) + 1
+            return '\n'.join([k.rjust(m) + ': ' + repr(v)
+                              for k, v in sorted(self.items())])
+        else:
+            return self.__class__.__name__ + "()"
+
+    def __dir__(self):
+        return self.keys()
diff --git a/quantecon/game_theory/tests/test_mclennan_tourky.py b/quantecon/game_theory/tests/test_mclennan_tourky.py