Skip to content

Commit f03f83a

Browse files
committed
Implement mclennan_tourky
1 parent 9975afc commit f03f83a

File tree

3 files changed

+417
-0
lines changed

3 files changed

+417
-0
lines changed

quantecon/game_theory/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
from .random import random_game, covariance_game
88
from .support_enumeration import support_enumeration, support_enumeration_gen
99
from .lemke_howson import lemke_howson
10+
from .mclennan_tourky import mclennan_tourky
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
"""
2+
Author: Daisuke Oyama
3+
4+
Compute mixed Nash equilibria of an N-player normal form game by
5+
applying the imitation game algorithm by McLennan and Tourky to the best
6+
response correspondence.
7+
8+
"""
9+
import numbers
10+
import numpy as np
11+
from ..compute_fp import _compute_fixed_point_ig
12+
from .normal_form_game import pure2mixed
13+
14+
15+
def mclennan_tourky(g, init=None, epsilon=1e-3, max_iter=200,
16+
full_output=False):
17+
"""
18+
Find one mixed-action epsilon-Nash equilibrium of an N-player normal
19+
form game by the fixed point computation algorithm by McLennan and
20+
Tourky [1]_.
21+
22+
Parameters
23+
----------
24+
g : NormalFormGame
25+
NormalFormGame instance.
26+
27+
init : array_like(int or array_like(float, ndim=1)),
28+
optional(default=None)
29+
Initial action profile, an array of N objects, where each object
30+
must be an iteger (pure action) or an array of floats (mixed
31+
action). If None, default to an array of zeros (the zero-th
32+
action for each player).
33+
34+
epsilon : scalar(float), optional(default=1e-3)
35+
Value of epsilon-optimality.
36+
37+
max_iter : scalar(int), optional(default=100)
38+
Maximum number of iterations.
39+
40+
full_output : bool, optional(default=False)
41+
If False, only the computed Nash equilibrium is returned. If
42+
True, the return value is `(NE, res)`, where `NE` is the Nash
43+
equilibrium and `res` is a `NashResult` object.
44+
45+
Returns
46+
-------
47+
NE : tuple(ndarray(float, ndim=1))
48+
Tuple of computed Nash equilibrium mixed actions.
49+
50+
res : NashResult
51+
Object containing information about the computation. Returned
52+
only when `full_output` is True. See `NashResult` for details.
53+
54+
Examples
55+
--------
56+
Consider the following version of 3-player "anti-coordination" game,
57+
where action 0 is a safe action which yields payoff 1, while action
58+
1 yields payoff :math:`v` if no other player plays 1 and payoff 0
59+
otherwise:
60+
61+
>>> n = 3
62+
>>> v = 2
63+
>>> payoff_array = np.empty((2,)*n)
64+
>>> payoff_array[0, :] = 1
65+
>>> payoff_array[1, :] = 0
66+
>>> payoff_array[1].flat[0] = v
67+
>>> g = NormalFormGame((Player(payoff_array),)*n)
68+
>>> print(g)
69+
3-player NormalFormGame with payoff profile array:
70+
[[[[ 1., 1., 1.], [ 1., 1., 2.]],
71+
[[ 1., 2., 1.], [ 1., 0., 0.]]],
72+
73+
[[[ 2., 1., 1.], [ 0., 1., 0.]],
74+
[[ 0., 0., 1.], [ 0., 0., 0.]]]]
75+
76+
This game has a unique symmetric Nash equilibrium, where the
77+
equilibrium action is given by :math:`(p^*, 1-p^*)` with :math:`p^*
78+
= 1/v^{1/(n-1)}`:
79+
80+
>>> p_star = 1/(v**(1/(n-1)))
81+
>>> [p_star, 1 - p_star]
82+
[0.7071067811865475, 0.29289321881345254]
83+
84+
Obtain an approximate Nash equilibrium of this game by
85+
`mclennan_tourky`:
86+
87+
>>> epsilon = 1e-5 # Value of epsilon-optimality
88+
>>> NE = mclennan_tourky(g, epsilon=epsilon)
89+
>>> print(NE[0], NE[1], NE[2], sep='\n')
90+
[ 0.70710754 0.29289246]
91+
[ 0.70710754 0.29289246]
92+
[ 0.70710754 0.29289246]
93+
>>> g.is_nash(NE, tol=epsilon)
94+
True
95+
96+
Additional information is returned if `full_output` is set True:
97+
98+
>>> NE, res = mclennan_tourky(g, epsilon=epsilon, full_output=True)
99+
>>> res.converged
100+
True
101+
>>> res.num_iter
102+
18
103+
104+
References
105+
----------
106+
.. [1] A. McLennan and R. Tourky, "From Imitation Games to
107+
Kakutani," 2006.
108+
109+
"""
110+
try:
111+
N = g.N
112+
except:
113+
raise TypeError('g must be a NormalFormGame')
114+
if N < 2:
115+
raise NotImplementedError('Not implemented for 1-player games')
116+
117+
if init is None:
118+
init = (0,) * N
119+
try:
120+
l = len(init)
121+
except TypeError:
122+
raise TypeError('init must be array_like')
123+
if l != N:
124+
raise ValueError(
125+
'init must be of length {N}'.format(N=N)
126+
)
127+
128+
indptr = np.empty(N+1, dtype=int)
129+
indptr[0] = 0
130+
indptr[1:] = np.cumsum(g.nums_actions)
131+
x_init = _flatten_action_profile(init, indptr)
132+
133+
is_approx_fp = lambda x: _is_epsilon_nash(x, g, epsilon, indptr)
134+
x_star, converged, num_iter = \
135+
_compute_fixed_point_ig(_best_response_selection, x_init, max_iter,
136+
verbose=0, print_skip=1,
137+
is_approx_fp=is_approx_fp,
138+
g=g, indptr=indptr)
139+
NE = _get_action_profile(x_star, indptr)
140+
141+
if not full_output:
142+
return NE
143+
144+
res = NashResult(NE=NE,
145+
converged=converged,
146+
num_iter=num_iter,
147+
max_iter=max_iter,
148+
init=init,
149+
epsilon=epsilon)
150+
151+
return NE, res
152+
153+
154+
def _best_response_selection(x, g, indptr=None):
155+
"""
156+
Selection of the best response correspondence of `g` that selects
157+
the best response action with the smallest index when there are
158+
ties, where the input and output are flattened action profiles.
159+
160+
Parameters
161+
----------
162+
x : array_like(float, ndim=1)
163+
Array of flattened mixed action profile of length equal to n_0 +
164+
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
165+
i's mixed action.
166+
167+
g : NormalFormGame
168+
169+
indptr : array_like(int, ndim=1), optional(default=None)
170+
Array of index pointers of length N+1, where `indptr[0] = 0` and
171+
`indptr[i+1] = indptr[i] + n_i`. Created internally if None.
172+
173+
Returns
174+
-------
175+
out : ndarray(float, ndim=1)
176+
Array of flattened mixed action profile of length equal to n_0 +
177+
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
178+
i's mixed action representation of his pure best response.
179+
180+
"""
181+
N = g.N
182+
183+
if indptr is None:
184+
indptr = np.empty(N+1, dtype=int)
185+
indptr[0] = 0
186+
indptr[1:] = np.cumsum(g.nums_actions)
187+
188+
out = np.zeros(indptr[-1])
189+
190+
if N == 2:
191+
for i in range(N):
192+
opponent_action = x[indptr[1-i]:indptr[1-i+1]]
193+
pure_br = g.players[i].best_response(opponent_action)
194+
out[indptr[i]+pure_br] = 1
195+
else:
196+
for i in range(N):
197+
opponent_actions = tuple(
198+
x[indptr[(i+j)%N]:indptr[(i+j)%N+1]] for j in range(1, N)
199+
)
200+
pure_br = g.players[i].best_response(opponent_actions)
201+
out[indptr[i]+pure_br] = 1
202+
203+
return out
204+
205+
206+
def _is_epsilon_nash(x, g, epsilon, indptr=None):
207+
"""
208+
Determine whether `x` is an `epsilon`-Nash equilibrium of `g`.
209+
210+
Parameters
211+
----------
212+
x : array_like(float, ndim=1)
213+
Array of flattened mixed action profile of length equal to n_0 +
214+
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
215+
i's mixed action.
216+
217+
g : NormalFormGame
218+
219+
epsilon : scalar(float)
220+
221+
indptr : array_like(int, ndim=1), optional(default=None)
222+
Array of index pointers of length N+1, where `indptr[0] = 0` and
223+
`indptr[i+1] = indptr[i] + n_i`. Created internally if None.
224+
225+
Returns
226+
-------
227+
bool
228+
229+
"""
230+
if indptr is None:
231+
indptr = np.empty(g.N+1, dtype=int)
232+
indptr[0] = 0
233+
indptr[1:] = np.cumsum(g.nums_actions)
234+
235+
action_profile = _get_action_profile(x, indptr)
236+
return g.is_nash(action_profile, tol=epsilon)
237+
238+
239+
def _get_action_profile(x, indptr):
240+
"""
241+
Obtain a tuple of mixed actions from a flattened action profile.
242+
243+
Parameters
244+
----------
245+
x : array_like(float, ndim=1)
246+
Array of flattened mixed action profile of length equal to n_0 +
247+
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
248+
i's mixed action.
249+
250+
indptr : array_like(int, ndim=1)
251+
Array of index pointers of length N+1, where `indptr[0] = 0` and
252+
`indptr[i+1] = indptr[i] + n_i`.
253+
254+
Returns
255+
-------
256+
action_profile : tuple(ndarray(float, ndim=1))
257+
Tuple of N mixed actions, each of length n_i.
258+
259+
"""
260+
N = len(indptr) - 1
261+
action_profile = tuple(x[indptr[i]:indptr[i+1]] for i in range(N))
262+
return action_profile
263+
264+
265+
def _flatten_action_profile(action_profile, indptr):
266+
"""
267+
Flatten the given action profile.
268+
269+
Parameters
270+
----------
271+
action_profile : array_like(int or array_like(float, ndim=1))
272+
Profile of actions of the N players, where each player i' action
273+
is a pure action (int) or a mixed action (array_like of floats
274+
of length n_i).
275+
276+
indptr : array_like(int, ndim=1)
277+
Array of index pointers of length N+1, where `indptr[0] = 0` and
278+
`indptr[i+1] = indptr[i] + n_i`.
279+
280+
Returns
281+
-------
282+
out : ndarray(float, ndim=1)
283+
Array of flattened mixed action profile of length equal to n_0 +
284+
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
285+
i's mixed action.
286+
287+
"""
288+
N = len(indptr) - 1
289+
out = np.empty(indptr[-1])
290+
291+
for i in range(N):
292+
if isinstance(action_profile[i], numbers.Integral): # pure action
293+
num_actions = indptr[i+1] - indptr[i]
294+
mixed_action = pure2mixed(num_actions, action_profile[i])
295+
else: # mixed action
296+
mixed_action = action_profile[i]
297+
out[indptr[i]:indptr[i+1]] = mixed_action
298+
299+
return out
300+
301+
302+
class NashResult(dict):
303+
"""
304+
Contain the information about the result of Nash equilibrium
305+
computation.
306+
307+
Attributes
308+
----------
309+
NE : tuple(ndarray(float, ndim=1))
310+
Computed Nash equilibrium.
311+
312+
converged : bool
313+
Whether the routine has converged.
314+
315+
num_iter : int
316+
Number of iterations.
317+
318+
max_iter : int
319+
Maximum number of iterations.
320+
321+
init_action_profile : array_like
322+
Initial action profile used.
323+
324+
"""
325+
# This is sourced from sicpy.optimize.OptimizeResult.
326+
def __getattr__(self, name):
327+
try:
328+
return self[name]
329+
except KeyError:
330+
raise AttributeError(name)
331+
332+
__setattr__ = dict.__setitem__
333+
__delattr__ = dict.__delitem__
334+
335+
def __repr__(self):
336+
if self.keys():
337+
m = max(map(len, list(self.keys()))) + 1
338+
return '\n'.join([k.rjust(m) + ': ' + repr(v)
339+
for k, v in sorted(self.items())])
340+
else:
341+
return self.__class__.__name__ + "()"
342+
343+
def __dir__(self):
344+
return self.keys()

0 commit comments

Comments
 (0)