google-deepmind · sriramsowmithri9807 · Jan 24, 2026 · Jan 24, 2026 · Feb 7, 2026 · Feb 17, 2026
diff --git a/open_spiel/integration_tests/playthroughs/python_param_social_dilemma.txt b/open_spiel/integration_tests/playthroughs/python_param_social_dilemma.txt
@@ -0,0 +1,151 @@
+game: python_param_social_dilemma
+
+GameType.chance_mode = ChanceMode.DETERMINISTIC
+GameType.dynamics = Dynamics.SIMULTANEOUS
+GameType.information = Information.PERFECT_INFORMATION
+GameType.long_name = "Python Parameterized Social Dilemma"
+GameType.max_num_players = 10
+GameType.min_num_players = 2
+GameType.parameter_specification = ["dynamic_payoffs", "max_game_length", "num_actions", "payoff_change_prob", "payoff_matrix", "players", "reward_noise_std"]
+GameType.provides_information_state_string = False
+GameType.provides_information_state_tensor = False
+GameType.provides_observation_string = True
+GameType.provides_observation_tensor = False
+GameType.provides_factored_observation_string = False
+GameType.reward_model = RewardModel.REWARDS
+GameType.short_name = "python_param_social_dilemma"
+GameType.utility = Utility.GENERAL_SUM
+
+NumDistinctActions() = 2
+PolicyTensorShape() = [2]
+MaxChanceOutcomes() = 0
+GetParameters() = {dynamic_payoffs=False,max_game_length=10,num_actions=2,payoff_change_prob=0.0,payoff_matrix=default,players=3,reward_noise_std=0.0}
+NumPlayers() = 3
+MinUtility() = 0.0
+MaxUtility() = 33.333
+UtilitySum() = None
+MaxGameLength() = 10
+ToString() = "python_param_social_dilemma(dynamic_payoffs=False,max_game_length=10,num_actions=2,payoff_change_prob=0.0,payoff_matrix=default,players=3,reward_noise_std=0.0)"
+
+# State 0
+# initial
+IsTerminal() = False
+History() = []
+HistoryString() = ""
+IsChanceNode() = False
+IsSimultaneousNode() = True
+CurrentPlayer() = -2
+InformationStateString(0) = "initial"
+InformationStateString(1) = "initial"
+InformationStateString(2) = "initial"
+ObservationString(0) = "initial"
+ObservationString(1) = "initial"
+ObservationString(2) = "initial"
+Rewards() = [0, 0, 0]
+Returns() = [0, 0, 0]
+LegalActions(0) = [0, 1]
+LegalActions(1) = [0, 1]
+LegalActions(2) = [0, 1]
+StringLegalActions(0) = ["C", "D"]
+StringLegalActions(1) = ["C", "D"]
+StringLegalActions(2) = ["C", "D"]
+
+# Apply joint action ["D", "C", "D"]
+actions: [1, 0, 1]
+
+# State 1
+# t0:[D,C,D]
+IsTerminal() = False
+History() = [1, 0, 1]
+HistoryString() = "1, 0, 1"
+IsChanceNode() = False
+IsSimultaneousNode() = True
+CurrentPlayer() = -2
+InformationStateString(0) = "t0:[D,C,D]"
+InformationStateString(1) = "t0:[D,C,D]"
+InformationStateString(2) = "t0:[D,C,D]"
+ObservationString(0) = "t0:[D,C,D]"
+ObservationString(1) = "t0:[D,C,D]"
+ObservationString(2) = "t0:[D,C,D]"
+Rewards() = [1.66667, 1, 1.66667]
+Returns() = [1.66667, 1, 1.66667]
+LegalActions(0) = [0, 1]
+LegalActions(1) = [0, 1]
+LegalActions(2) = [0, 1]
+StringLegalActions(0) = ["C", "D"]
+StringLegalActions(1) = ["C", "D"]
+StringLegalActions(2) = ["C", "D"]
+
+# Apply joint action ["C", "C", "C"]
+actions: [0, 0, 0]
+
+# State 2
+# t0:[D,C,D] t1:[C,C,C]
+IsTerminal() = False
+History() = [1, 0, 1, 0, 0, 0]
+HistoryString() = "1, 0, 1, 0, 0, 0"
+IsChanceNode() = False
+IsSimultaneousNode() = True
+CurrentPlayer() = -2
+InformationStateString(0) = "t0:[D,C,D] t1:[C,C,C]"
+InformationStateString(1) = "t0:[D,C,D] t1:[C,C,C]"
+InformationStateString(2) = "t0:[D,C,D] t1:[C,C,C]"
+ObservationString(0) = "t0:[D,C,D] t1:[C,C,C]"
+ObservationString(1) = "t0:[D,C,D] t1:[C,C,C]"
+ObservationString(2) = "t0:[D,C,D] t1:[C,C,C]"
+Rewards() = [3, 3, 3]
+Returns() = [4.66667, 4, 4.66667]
+LegalActions(0) = [0, 1]
+LegalActions(1) = [0, 1]
+LegalActions(2) = [0, 1]
+StringLegalActions(0) = ["C", "D"]
+StringLegalActions(1) = ["C", "D"]
+StringLegalActions(2) = ["C", "D"]
+
+# Apply joint action ["D", "C", "C"]
+actions: [1, 0, 0]
+
+# State 3
+# Apply joint action ["C", "D", "C"]
+actions: [0, 1, 0]
+
+# State 4
+# Apply joint action ["D", "D", "C"]
+actions: [1, 1, 0]
+
+# State 5
+# Apply joint action ["D", "C", "C"]
+actions: [1, 0, 0]
+
+# State 6
+# Apply joint action ["D", "C", "C"]
+actions: [1, 0, 0]
+
+# State 7
+# Apply joint action ["D", "C", "C"]
+actions: [1, 0, 0]
+
+# State 8
+# Apply joint action ["D", "C", "C"]
+actions: [1, 0, 0]
+
+# State 9
+# Apply joint action ["C", "C", "D"]
+actions: [0, 0, 1]
+
+# State 10
+# t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]
+IsTerminal() = True
+History() = [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+HistoryString() = "1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = -4
+InformationStateString(0) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+InformationStateString(1) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+InformationStateString(2) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+ObservationString(0) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+ObservationString(1) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+ObservationString(2) = "t0:[D,C,D] t1:[C,C,C] t2:[D,C,C] t3:[C,D,C] t4:[D,D,C] t5:[D,C,C] t6:[D,C,C] t7:[D,C,C] t8:[D,C,C] t9:[C,C,D]"
+Rewards() = [2, 2, 3.33333]
+Returns() = [27, 21, 21]
diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt
@@ -239,6 +239,8 @@ set(PYTHON_TESTS ${PYTHON_TESTS}
   games/dynamic_routing_utils_test.py
   games/hangman_test.py
   games/liars_poker_test.py
+  games/param_social_dilemma_test.py
+  games/param_social_dilemma_bots_test.py
   games/pokerkit_wrapper_test.py
   games/team_dominoes_test.py
   games/tic_tac_toe_test.py

diff --git a/open_spiel/python/examples/param_social_dilemma_bots_example.py b/open_spiel/python/examples/param_social_dilemma_bots_example.py
@@ -0,0 +1,160 @@
+# Copyright 2019 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Axelrod tournament example for param_social_dilemma.
+
+This demonstrates bot-vs-bot tournaments using Axelrod-style strategies.
+"""
+
+import numpy as np
+from open_spiel.python.games import param_social_dilemma_bots
+import pyspiel
+
+
+def play_game_with_bots(game, bots):
+    state = game.new_initial_state()
+
+    for bot in bots:
+        bot.restart_at(state)
+
+    while not state.is_terminal():
+        actions = [bot.step(state) for bot in bots]
+        state.apply_actions(actions)
+
+    return state.returns()
+
+
+def run_tournament(num_rounds=100):
+    print("=" * 70)
+    print("Axelrod-Style Tournament - Parameterized Social Dilemma")
+    print("=" * 70)
+
+    bot_classes = [
+        ("AlwaysCooperate", param_social_dilemma_bots.AlwaysCooperateBot),
+        ("AlwaysDefect", param_social_dilemma_bots.AlwaysDefectBot),
+        ("TitForTat", param_social_dilemma_bots.TitForTatBot),
+        ("GrimTrigger", param_social_dilemma_bots.GrimTriggerBot),
+        ("Pavlov", param_social_dilemma_bots.PavlovBot),
+        ("TitForTwoTats", param_social_dilemma_bots.TitForTwoTatsBot),
+        ("Gradual", param_social_dilemma_bots.GradualBot),
+    ]
+
+    num_bots = len(bot_classes)
+    scores = np.zeros((num_bots, num_bots))
+
+    game = pyspiel.load_game("python_param_social_dilemma", {
+        "players": 2,
+        "max_game_length": 10
+    })
+
+    print(f"\nRunning {num_rounds} rounds for each pairing...")
+    print("-" * 70)
+
+    for i, (name1, bot_class1) in enumerate(bot_classes):
+        for j, (name2, bot_class2) in enumerate(bot_classes):
+            total_score = 0
+            for _ in range(num_rounds):
+                bot1 = bot_class1(player_id=0, num_players=2) if bot_class1.__name__ != "AlwaysCooperateBot" and bot_class1.__name__ != "AlwaysDefectBot" else bot_class1(player_id=0)
+                bot2 = bot_class2(player_id=1, num_players=2) if bot_class2.__name__ != "AlwaysCooperateBot" and bot_class2.__name__ != "AlwaysDefectBot" else bot_class2(player_id=1)
+
+                returns = play_game_with_bots(game, [bot1, bot2])
+                total_score += returns[0]
+
+            scores[i, j] = total_score / num_rounds
+
+    print("\nTournament Results:")
+    print("-" * 70)
+    print(f"{'Strategy':<18}", end="")
+    for name, _ in bot_classes:
+        print(f"{name[:12]:<14}", end="")
+    print()
+    print("-" * 70)
+
+    for i, (name, _) in enumerate(bot_classes):
+        print(f"{name:<18}", end="")
+        for j in range(num_bots):
+            print(f"{scores[i, j]:>6.2f}        ", end="")
+        print()
+
+    avg_scores = np.mean(scores, axis=1)
+    print("-" * 70)
+    print("\nAverage Scores:")
+    print("-" * 70)
+    rankings = sorted(enumerate(avg_scores), key=lambda x: x[1], reverse=True)
+
+    for rank, (idx, score) in enumerate(rankings, 1):
+        name = bot_classes[idx][0]
+        print(f"{rank}. {name:<18} {score:>6.2f}")
+
+    print("\n" + "=" * 70)
+
+
+def demonstrate_n_player_tournament():
+    print("\n\n" + "=" * 70)
+    print("N-Player Tournament (3 players)")
+    print("=" * 70)
+
+    game = pyspiel.load_game("python_param_social_dilemma", {
+        "players": 3,
+        "max_game_length": 10
+    })
+
+    scenarios = [
+        ("All Cooperate", [
+            param_social_dilemma_bots.AlwaysCooperateBot(0),
+            param_social_dilemma_bots.AlwaysCooperateBot(1),
+            param_social_dilemma_bots.AlwaysCooperateBot(2)
+        ]),
+        ("All Defect", [
+            param_social_dilemma_bots.AlwaysDefectBot(0),
+            param_social_dilemma_bots.AlwaysDefectBot(1),
+            param_social_dilemma_bots.AlwaysDefectBot(2)
+        ]),
+        ("Two Cooperators vs One Defector", [
+            param_social_dilemma_bots.AlwaysCooperateBot(0),
+            param_social_dilemma_bots.AlwaysCooperateBot(1),
+            param_social_dilemma_bots.AlwaysDefectBot(2)
+        ]),
+        ("TitForTat vs AlwaysCooperate vs AlwaysDefect", [
+            param_social_dilemma_bots.TitForTatBot(0, 3),
+            param_social_dilemma_bots.AlwaysCooperateBot(1),
+            param_social_dilemma_bots.AlwaysDefectBot(2)
+        ]),
+        ("Mixed Strategies", [
+            param_social_dilemma_bots.TitForTatBot(0, 3),
+            param_social_dilemma_bots.GrimTriggerBot(1, 3),
+            param_social_dilemma_bots.PavlovBot(2, 3)
+        ]),
+    ]
+
+    print("\nScenario Outcomes:")
+    print("-" * 70)
+
+    for scenario_name, bots in scenarios:
+        returns = play_game_with_bots(game, bots)
+        print(f"\n{scenario_name}:")
+        for i, ret in enumerate(returns):
+            bot_name = type(bots[i]).__name__.replace("Bot", "")
+            print(f"  Player {i} ({bot_name:<15}): {ret:>6.2f}")
+
+    print("\n" + "=" * 70)
+
+
+def main():
+    run_tournament(num_rounds=100)
+    demonstrate_n_player_tournament()
+
+
+if __name__ == "__main__":
+    main()