|
| 1 | +""" |
| 2 | +Author: Daisuke Oyama |
| 3 | +
|
| 4 | +Compute mixed Nash equilibria of an N-player normal form game by |
| 5 | +applying the imitation game algorithm by McLennan and Tourky to the best |
| 6 | +response correspondence. |
| 7 | +
|
| 8 | +""" |
| 9 | +import numbers |
| 10 | +import numpy as np |
| 11 | +from ..compute_fp import _compute_fixed_point_ig |
| 12 | +from .normal_form_game import pure2mixed |
| 13 | + |
| 14 | + |
| 15 | +def mclennan_tourky(g, init=None, epsilon=1e-3, max_iter=200, |
| 16 | + full_output=False): |
| 17 | + """ |
| 18 | + Find one mixed-action epsilon-Nash equilibrium of an N-player normal |
| 19 | + form game by the fixed point computation algorithm by McLennan and |
| 20 | + Tourky [1]_. |
| 21 | +
|
| 22 | + Parameters |
| 23 | + ---------- |
| 24 | + g : NormalFormGame |
| 25 | + NormalFormGame instance. |
| 26 | +
|
| 27 | + init : array_like(int or array_like(float, ndim=1)), |
| 28 | + optional(default=None) |
| 29 | + Initial action profile, an array of N objects, where each object |
| 30 | + must be an iteger (pure action) or an array of floats (mixed |
| 31 | + action). If None, default to an array of zeros (the zero-th |
| 32 | + action for each player). |
| 33 | +
|
| 34 | + epsilon : scalar(float), optional(default=1e-3) |
| 35 | + Value of epsilon-optimality. |
| 36 | +
|
| 37 | + max_iter : scalar(int), optional(default=100) |
| 38 | + Maximum number of iterations. |
| 39 | +
|
| 40 | + full_output : bool, optional(default=False) |
| 41 | + If False, only the computed Nash equilibrium is returned. If |
| 42 | + True, the return value is `(NE, res)`, where `NE` is the Nash |
| 43 | + equilibrium and `res` is a `NashResult` object. |
| 44 | +
|
| 45 | + Returns |
| 46 | + ------- |
| 47 | + NE : tuple(ndarray(float, ndim=1)) |
| 48 | + Tuple of computed Nash equilibrium mixed actions. |
| 49 | +
|
| 50 | + res : NashResult |
| 51 | + Object containing information about the computation. Returned |
| 52 | + only when `full_output` is True. See `NashResult` for details. |
| 53 | +
|
| 54 | + Examples |
| 55 | + -------- |
| 56 | + Consider the following version of 3-player "anti-coordination" game, |
| 57 | + where action 0 is a safe action which yields payoff 1, while action |
| 58 | + 1 yields payoff :math:`v` if no other player plays 1 and payoff 0 |
| 59 | + otherwise: |
| 60 | +
|
| 61 | + >>> n = 3 |
| 62 | + >>> v = 2 |
| 63 | + >>> payoff_array = np.empty((2,)*n) |
| 64 | + >>> payoff_array[0, :] = 1 |
| 65 | + >>> payoff_array[1, :] = 0 |
| 66 | + >>> payoff_array[1].flat[0] = v |
| 67 | + >>> g = NormalFormGame((Player(payoff_array),)*n) |
| 68 | + >>> print(g) |
| 69 | + 3-player NormalFormGame with payoff profile array: |
| 70 | + [[[[ 1., 1., 1.], [ 1., 1., 2.]], |
| 71 | + [[ 1., 2., 1.], [ 1., 0., 0.]]], |
| 72 | +
|
| 73 | + [[[ 2., 1., 1.], [ 0., 1., 0.]], |
| 74 | + [[ 0., 0., 1.], [ 0., 0., 0.]]]] |
| 75 | +
|
| 76 | + This game has a unique symmetric Nash equilibrium, where the |
| 77 | + equilibrium action is given by :math:`(p^*, 1-p^*)` with :math:`p^* |
| 78 | + = 1/v^{1/(n-1)}`: |
| 79 | +
|
| 80 | + >>> p_star = 1/(v**(1/(n-1))) |
| 81 | + >>> [p_star, 1 - p_star] |
| 82 | + [0.7071067811865475, 0.29289321881345254] |
| 83 | +
|
| 84 | + Obtain an approximate Nash equilibrium of this game by |
| 85 | + `mclennan_tourky`: |
| 86 | +
|
| 87 | + >>> epsilon = 1e-5 # Value of epsilon-optimality |
| 88 | + >>> NE = mclennan_tourky(g, epsilon=epsilon) |
| 89 | + >>> print(NE[0], NE[1], NE[2], sep='\n') |
| 90 | + [ 0.70710754 0.29289246] |
| 91 | + [ 0.70710754 0.29289246] |
| 92 | + [ 0.70710754 0.29289246] |
| 93 | + >>> g.is_nash(NE, tol=epsilon) |
| 94 | + True |
| 95 | +
|
| 96 | + Additional information is returned if `full_output` is set True: |
| 97 | +
|
| 98 | + >>> NE, res = mclennan_tourky(g, epsilon=epsilon, full_output=True) |
| 99 | + >>> res.converged |
| 100 | + True |
| 101 | + >>> res.num_iter |
| 102 | + 18 |
| 103 | +
|
| 104 | + References |
| 105 | + ---------- |
| 106 | + .. [1] A. McLennan and R. Tourky, "From Imitation Games to |
| 107 | + Kakutani," 2006. |
| 108 | +
|
| 109 | + """ |
| 110 | + try: |
| 111 | + N = g.N |
| 112 | + except: |
| 113 | + raise TypeError('g must be a NormalFormGame') |
| 114 | + if N < 2: |
| 115 | + raise NotImplementedError('Not implemented for 1-player games') |
| 116 | + |
| 117 | + if init is None: |
| 118 | + init = (0,) * N |
| 119 | + try: |
| 120 | + l = len(init) |
| 121 | + except TypeError: |
| 122 | + raise TypeError('init must be array_like') |
| 123 | + if l != N: |
| 124 | + raise ValueError( |
| 125 | + 'init must be of length {N}'.format(N=N) |
| 126 | + ) |
| 127 | + |
| 128 | + indptr = np.empty(N+1, dtype=int) |
| 129 | + indptr[0] = 0 |
| 130 | + indptr[1:] = np.cumsum(g.nums_actions) |
| 131 | + x_init = _flatten_action_profile(init, indptr) |
| 132 | + |
| 133 | + is_approx_fp = lambda x: _is_epsilon_nash(x, g, epsilon, indptr) |
| 134 | + x_star, converged, num_iter = \ |
| 135 | + _compute_fixed_point_ig(_best_response_selection, x_init, max_iter, |
| 136 | + verbose=0, print_skip=1, |
| 137 | + is_approx_fp=is_approx_fp, |
| 138 | + g=g, indptr=indptr) |
| 139 | + NE = _get_action_profile(x_star, indptr) |
| 140 | + |
| 141 | + if not full_output: |
| 142 | + return NE |
| 143 | + |
| 144 | + res = NashResult(NE=NE, |
| 145 | + converged=converged, |
| 146 | + num_iter=num_iter, |
| 147 | + max_iter=max_iter, |
| 148 | + init=init, |
| 149 | + epsilon=epsilon) |
| 150 | + |
| 151 | + return NE, res |
| 152 | + |
| 153 | + |
| 154 | +def _best_response_selection(x, g, indptr=None): |
| 155 | + """ |
| 156 | + Selection of the best response correspondence of `g` that selects |
| 157 | + the best response action with the smallest index when there are |
| 158 | + ties, where the input and output are flattened action profiles. |
| 159 | +
|
| 160 | + Parameters |
| 161 | + ---------- |
| 162 | + x : array_like(float, ndim=1) |
| 163 | + Array of flattened mixed action profile of length equal to n_0 + |
| 164 | + ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player |
| 165 | + i's mixed action. |
| 166 | +
|
| 167 | + g : NormalFormGame |
| 168 | +
|
| 169 | + indptr : array_like(int, ndim=1), optional(default=None) |
| 170 | + Array of index pointers of length N+1, where `indptr[0] = 0` and |
| 171 | + `indptr[i+1] = indptr[i] + n_i`. Created internally if None. |
| 172 | +
|
| 173 | + Returns |
| 174 | + ------- |
| 175 | + out : ndarray(float, ndim=1) |
| 176 | + Array of flattened mixed action profile of length equal to n_0 + |
| 177 | + ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player |
| 178 | + i's mixed action representation of his pure best response. |
| 179 | +
|
| 180 | + """ |
| 181 | + N = g.N |
| 182 | + |
| 183 | + if indptr is None: |
| 184 | + indptr = np.empty(N+1, dtype=int) |
| 185 | + indptr[0] = 0 |
| 186 | + indptr[1:] = np.cumsum(g.nums_actions) |
| 187 | + |
| 188 | + out = np.zeros(indptr[-1]) |
| 189 | + |
| 190 | + if N == 2: |
| 191 | + for i in range(N): |
| 192 | + opponent_action = x[indptr[1-i]:indptr[1-i+1]] |
| 193 | + pure_br = g.players[i].best_response(opponent_action) |
| 194 | + out[indptr[i]+pure_br] = 1 |
| 195 | + else: |
| 196 | + for i in range(N): |
| 197 | + opponent_actions = tuple( |
| 198 | + x[indptr[(i+j)%N]:indptr[(i+j)%N+1]] for j in range(1, N) |
| 199 | + ) |
| 200 | + pure_br = g.players[i].best_response(opponent_actions) |
| 201 | + out[indptr[i]+pure_br] = 1 |
| 202 | + |
| 203 | + return out |
| 204 | + |
| 205 | + |
| 206 | +def _is_epsilon_nash(x, g, epsilon, indptr=None): |
| 207 | + """ |
| 208 | + Determine whether `x` is an `epsilon`-Nash equilibrium of `g`. |
| 209 | +
|
| 210 | + Parameters |
| 211 | + ---------- |
| 212 | + x : array_like(float, ndim=1) |
| 213 | + Array of flattened mixed action profile of length equal to n_0 + |
| 214 | + ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player |
| 215 | + i's mixed action. |
| 216 | +
|
| 217 | + g : NormalFormGame |
| 218 | +
|
| 219 | + epsilon : scalar(float) |
| 220 | +
|
| 221 | + indptr : array_like(int, ndim=1), optional(default=None) |
| 222 | + Array of index pointers of length N+1, where `indptr[0] = 0` and |
| 223 | + `indptr[i+1] = indptr[i] + n_i`. Created internally if None. |
| 224 | +
|
| 225 | + Returns |
| 226 | + ------- |
| 227 | + bool |
| 228 | +
|
| 229 | + """ |
| 230 | + if indptr is None: |
| 231 | + indptr = np.empty(g.N+1, dtype=int) |
| 232 | + indptr[0] = 0 |
| 233 | + indptr[1:] = np.cumsum(g.nums_actions) |
| 234 | + |
| 235 | + action_profile = _get_action_profile(x, indptr) |
| 236 | + return g.is_nash(action_profile, tol=epsilon) |
| 237 | + |
| 238 | + |
| 239 | +def _get_action_profile(x, indptr): |
| 240 | + """ |
| 241 | + Obtain a tuple of mixed actions from a flattened action profile. |
| 242 | +
|
| 243 | + Parameters |
| 244 | + ---------- |
| 245 | + x : array_like(float, ndim=1) |
| 246 | + Array of flattened mixed action profile of length equal to n_0 + |
| 247 | + ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player |
| 248 | + i's mixed action. |
| 249 | +
|
| 250 | + indptr : array_like(int, ndim=1) |
| 251 | + Array of index pointers of length N+1, where `indptr[0] = 0` and |
| 252 | + `indptr[i+1] = indptr[i] + n_i`. |
| 253 | +
|
| 254 | + Returns |
| 255 | + ------- |
| 256 | + action_profile : tuple(ndarray(float, ndim=1)) |
| 257 | + Tuple of N mixed actions, each of length n_i. |
| 258 | +
|
| 259 | + """ |
| 260 | + N = len(indptr) - 1 |
| 261 | + action_profile = tuple(x[indptr[i]:indptr[i+1]] for i in range(N)) |
| 262 | + return action_profile |
| 263 | + |
| 264 | + |
| 265 | +def _flatten_action_profile(action_profile, indptr): |
| 266 | + """ |
| 267 | + Flatten the given action profile. |
| 268 | +
|
| 269 | + Parameters |
| 270 | + ---------- |
| 271 | + action_profile : array_like(int or array_like(float, ndim=1)) |
| 272 | + Profile of actions of the N players, where each player i' action |
| 273 | + is a pure action (int) or a mixed action (array_like of floats |
| 274 | + of length n_i). |
| 275 | +
|
| 276 | + indptr : array_like(int, ndim=1) |
| 277 | + Array of index pointers of length N+1, where `indptr[0] = 0` and |
| 278 | + `indptr[i+1] = indptr[i] + n_i`. |
| 279 | +
|
| 280 | + Returns |
| 281 | + ------- |
| 282 | + out : ndarray(float, ndim=1) |
| 283 | + Array of flattened mixed action profile of length equal to n_0 + |
| 284 | + ... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player |
| 285 | + i's mixed action. |
| 286 | +
|
| 287 | + """ |
| 288 | + N = len(indptr) - 1 |
| 289 | + out = np.empty(indptr[-1]) |
| 290 | + |
| 291 | + for i in range(N): |
| 292 | + if isinstance(action_profile[i], numbers.Integral): # pure action |
| 293 | + num_actions = indptr[i+1] - indptr[i] |
| 294 | + mixed_action = pure2mixed(num_actions, action_profile[i]) |
| 295 | + else: # mixed action |
| 296 | + mixed_action = action_profile[i] |
| 297 | + out[indptr[i]:indptr[i+1]] = mixed_action |
| 298 | + |
| 299 | + return out |
| 300 | + |
| 301 | + |
| 302 | +class NashResult(dict): |
| 303 | + """ |
| 304 | + Contain the information about the result of Nash equilibrium |
| 305 | + computation. |
| 306 | +
|
| 307 | + Attributes |
| 308 | + ---------- |
| 309 | + NE : tuple(ndarray(float, ndim=1)) |
| 310 | + Computed Nash equilibrium. |
| 311 | +
|
| 312 | + converged : bool |
| 313 | + Whether the routine has converged. |
| 314 | +
|
| 315 | + num_iter : int |
| 316 | + Number of iterations. |
| 317 | +
|
| 318 | + max_iter : int |
| 319 | + Maximum number of iterations. |
| 320 | +
|
| 321 | + init_action_profile : array_like |
| 322 | + Initial action profile used. |
| 323 | +
|
| 324 | + """ |
| 325 | + # This is sourced from sicpy.optimize.OptimizeResult. |
| 326 | + def __getattr__(self, name): |
| 327 | + try: |
| 328 | + return self[name] |
| 329 | + except KeyError: |
| 330 | + raise AttributeError(name) |
| 331 | + |
| 332 | + __setattr__ = dict.__setitem__ |
| 333 | + __delattr__ = dict.__delitem__ |
| 334 | + |
| 335 | + def __repr__(self): |
| 336 | + if self.keys(): |
| 337 | + m = max(map(len, list(self.keys()))) + 1 |
| 338 | + return '\n'.join([k.rjust(m) + ': ' + repr(v) |
| 339 | + for k, v in sorted(self.items())]) |
| 340 | + else: |
| 341 | + return self.__class__.__name__ + "()" |
| 342 | + |
| 343 | + def __dir__(self): |
| 344 | + return self.keys() |
0 commit comments