@@ -85,8 +85,6 @@ def play(player_x: Agent, player_o: Agent):
8585def learn_from_game (args ):
8686 builder = args [0 ]
8787 num_games = args [1 ]
88- index = args [2 ]
89- num_cpus = args [3 ]
9088
9189 td_agent = builder .make ()
9290 player_x = builder .make ()
@@ -100,7 +98,7 @@ def learn_from_game(args):
10098 Mark .O : player_o ,
10199 }
102100
103- for _ in tqdm ( range (num_games ), desc = f"agent: { index } " , total = num_games , position = index % num_cpus ):
101+ for _ in range (num_games ):
104102 player_x .obs = numpy .append (obs , Mark .X )
105103 player_o .obs = numpy .append (obs , Mark .O )
106104
@@ -163,13 +161,10 @@ def learn(builder: AgentBuilder, num_games: int, num_agents: int, policy_filenam
163161 main_agent = builder .make ()
164162 chunksize = math .floor (num_agents / processes )
165163 with multiprocessing .Pool (processes = processes ) as pool :
166- agents = [( builder , num_games , i , processes ) for i in range (num_agents )]
164+ agents = (( builder , num_games ) for _ in range (num_agents ))
167165
168- print ("Playing games..." )
169- agents = pool .map (learn_from_game , iterable = agents , chunksize = chunksize )
170-
171- print ("Merging knowledge..." )
172- for agent in agents :
166+ print ("Learning..." )
167+ for agent in tqdm (pool .imap_unordered (learn_from_game , iterable = agents , chunksize = chunksize ), total = num_agents ):
173168 main_agent .merge (agent )
174169
175170 if policy_filename :
@@ -266,7 +261,6 @@ def main():
266261 transitions = transitions )
267262 learn (builder , num_games = suboptions .num_games , num_agents = suboptions .num_agents ,
268263 policy_filename = suboptions .with_policy )
269- print ("No other options." )
270264
271265
272266if __name__ == "__main__" :
0 commit comments