microsoft
diff --git a/‎scripts/tw-make‎
Lines changed: 4 additions & 2 deletions b/‎scripts/tw-make‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎scripts/tw-stats‎
Lines changed: 1 addition & 1 deletion b/‎scripts/tw-stats‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts_dev/benchmark_framework.py‎
Lines changed: 10 additions & 27 deletions b/‎scripts_dev/benchmark_framework.py‎
Lines changed: 10 additions & 27 deletions
diff --git a/‎tests/test_make_game.py‎
Lines changed: 4 additions & 4 deletions b/‎tests/test_make_game.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/test_play_generated_games.py‎
Lines changed: 3 additions & 2 deletions b/‎tests/test_play_generated_games.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/test_textworld.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_textworld.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_tw_play.py‎ renamed to ‎tests/test_tw-play.py‎
Lines changed: 4 additions & 4 deletions b/‎tests/test_tw_play.py‎ renamed to ‎tests/test_tw-play.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎textworld/agents/walkthrough.py‎
Lines changed: 1 addition & 1 deletion b/‎textworld/agents/walkthrough.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎textworld/envs/glulx/git_glulx_ml.py‎
Lines changed: 25 additions & 13 deletions b/‎textworld/envs/glulx/git_glulx_ml.py‎
Lines changed: 25 additions & 13 deletions
diff --git a/‎textworld/envs/glulx/tests/test_git_glulx_ml.py‎
Lines changed: 27 additions & 12 deletions b/‎textworld/envs/glulx/tests/test_git_glulx_ml.py‎
Lines changed: 27 additions & 12 deletions
@@ -46,6 +46,8 @@ def parse_args():
                                help="Nb. of objects in the world.")
     custom_parser.add_argument("--quest-length", type=int, default=5, metavar="LENGTH",
                                help="Minimum nb. of actions the quest requires to be completed.")
+    custom_parser.add_argument("--quest-breadth", type=int, default=3, metavar="BREADTH",
+                               help="Control how non-linear a quest can be.")
 
     challenge_parser = subparsers.add_parser("challenge", parents=[general_parser],
                                              help='Generate a game for one of the challenges.')
@@ -72,7 +74,7 @@ if __name__ == "__main__":
     }
 
     if args.subcommand == "custom":
-        game_file, game = textworld.make(args.world_size, args.nb_objects, args.quest_length, grammar_flags,
+        game_file, game = textworld.make(args.world_size, args.nb_objects, args.quest_length, args.quest_breadth, grammar_flags,
                                          seed=args.seed, games_dir=args.output)
 
     elif args.subcommand == "challenge":
@@ -87,7 +89,7 @@ if __name__ == "__main__":
 
     print("Game generated: {}".format(game_file))
     if args.verbose:
-        print(game.quests[0].desc)
+        print(game.objective)
 
     if args.view:
         textworld.render.visualize(game, interactive=True)
@@ -38,7 +38,7 @@ if __name__ == "__main__":
             continue
 
         if len(game.quests) > 0:
-            objectives[game_filename] = game.quests[0].desc
+            objectives[game_filename] = game.objective
 
         names |= set(info.name for info in game.infos.values() if info.name is not None)
         game_logger.collect(game)
 
@@ -10,35 +10,14 @@
 from textworld.generator import World
 
 
-def generate_never_ending_game_old(args):
-    g_rng.set_seed(args.seed)
-    msg = "--max-steps {} --nb-objects {} --nb-rooms {} --seed {}"
-    print(msg.format(args.max_steps, args.nb_objects, args.nb_rooms, g_rng.seed))
-    print("Generating game...")
-
-    map_ = textworld.generator.make_map(n_rooms=args.nb_rooms)
-    world = World.from_map(map_)
-    world.set_player_room()
-    world.populate(nb_objects=args.nb_objects)
-    grammar = textworld.generator.make_grammar(flags={"theme": "house"})
-
-    quests = []  # No quest
-    game = textworld.generator.make_game_with(world, quests, grammar)
-
-    game_name = "neverending"
-    game_file = textworld.generator.compile_game(game, game_name, force_recompile=True,
-                                                 games_folder=args.output)
-    return game_file
-
-
 def generate_never_ending_game(args):
     g_rng.set_seed(args.seed)
-    msg = "--max-steps {} --nb-objects {} --nb-rooms {} --quest-length {} --seed {}"
-    print(msg.format(args.max_steps, args.nb_objects, args.nb_rooms, args.quest_length, g_rng.seed))
+    msg = "--max-steps {} --nb-objects {} --nb-rooms {} --quest-length {} --quest-breadth {} --seed {}"
+    print(msg.format(args.max_steps, args.nb_objects, args.nb_rooms, args.quest_length, args.quest_breadth, g_rng.seed))
     print("Generating game...")
 
     grammar_flags = {}
-    game = textworld.generator.make_game(args.nb_rooms, args.nb_objects, args.quest_length, grammar_flags)
+    game = textworld.generator.make_game(args.nb_rooms, args.nb_objects, args.quest_length, args.quest_breadth, grammar_flags)
     if args.no_quest:
         game.quests = []
 
@@ -52,9 +31,11 @@ def benchmark(game_file, args):
     print("Using {}".format(env.__class__.__name__))
 
     if args.mode == "random":
-        agent = textworld.agents.RandomTextAgent()
+        agent = textworld.agents.NaiveAgent()
     elif args.mode == "random-cmd":
         agent = textworld.agents.RandomCommandAgent()
+    elif args.mode == "walkthrough":
+        agent = textworld.agents.WalkthroughAgent()
 
     agent.reset(env)
 
@@ -96,13 +77,15 @@ def parse_args():
                         help="Nb. of rooms in the world. Default: %(default)s")
     parser.add_argument("--nb-objects", type=int, default=50,
                         help="Nb. of objects in the world. Default: %(default)s")
-    parser.add_argument("--quest-length", type=int, default=10,
+    parser.add_argument("--quest-length", type=int, default=5,
                         help="Minimum nb. of actions the quest requires to be completed. Default: %(default)s")
+    parser.add_argument("--quest-breadth", type=int, default=3,
+                        help="Control how non-linear a quest can be. Default: %(default)s")
     parser.add_argument("--max-steps", type=int, default=1000,
                         help="Stop the game after that many steps. Default: %(default)s")
     parser.add_argument("--output", default="./gen_games/",
                         help="Output folder to save generated game files.")
-    parser.add_argument("--mode", default="random-cmd", choices=["random", "random-cmd"])
+    parser.add_argument("--mode", default="random-cmd", choices=["random", "random-cmd", "walkthrough"])
     parser.add_argument("--no-quest", action="store_true")
     parser.add_argument("--compute_intermediate_reward", action="store_true")
     parser.add_argument("--activate_state_tracking", action="store_true")
 
@@ -11,11 +11,11 @@ def test_making_game_with_names_to_exclude():
     g_rng.set_seed(42)
 
     with make_temp_directory(prefix="test_render_wrapper") as tmpdir:
-        game_file1, game1 = textworld.make(2, 20, 3, {"names_to_exclude": []},
+        game_file1, game1 = textworld.make(2, 20, 3, 3, {"names_to_exclude": []},
                                            seed=123, games_dir=tmpdir)
 
         game1_objects_names = [info.name for info in game1.infos.values() if info.name is not None]
-        game_file2, game2 = textworld.make(2, 20, 3, {"names_to_exclude": game1_objects_names},
+        game_file2, game2 = textworld.make(2, 20, 3, 3, {"names_to_exclude": game1_objects_names},
                                            seed=123, games_dir=tmpdir)
         game2_objects_names = [info.name for info in game2.infos.values() if info.name is not None]
         assert len(set(game1_objects_names) & set(game2_objects_names)) == 0
@@ -24,8 +24,8 @@ def test_making_game_with_names_to_exclude():
 def test_making_game_is_reproducible_with_seed():
     grammar_flags = {}
     with make_temp_directory(prefix="test_render_wrapper") as tmpdir:
-        game_file1, game1 = textworld.make(2, 20, 3, grammar_flags, seed=123, games_dir=tmpdir)
-        game_file2, game2 = textworld.make(2, 20, 3, grammar_flags, seed=123, games_dir=tmpdir)
+        game_file1, game1 = textworld.make(2, 20, 3, 3, grammar_flags, seed=123, games_dir=tmpdir)
+        game_file2, game2 = textworld.make(2, 20, 3, 3, grammar_flags, seed=123, games_dir=tmpdir)
         assert game_file1 == game_file2
         assert game1 == game2
         # Make sure they are not the same Python objects.
 
@@ -16,12 +16,13 @@ def test_play_generated_games():
         # Sample game specs.
         world_size = rng.randint(1, 10)
         nb_objects = rng.randint(0, 20)
-        quest_length = rng.randint(1, 10)
+        quest_length = rng.randint(2, 5)
+        quest_breadth = rng.randint(3, 7)
         game_seed = rng.randint(0, 65365)
         grammar_flags = {}  # Default grammar.
 
         with make_temp_directory(prefix="test_play_generated_games") as tmpdir:
-            game_file, game = textworld.make(world_size, nb_objects, quest_length, grammar_flags,
+            game_file, game = textworld.make(world_size, nb_objects, quest_length, quest_breadth, grammar_flags,
                                              seed=game_seed, games_dir=tmpdir)
 
             # Solve the game using WalkthroughAgent.
 
@@ -58,7 +58,7 @@ def test_game_walkthrough_agent(self):
         agent = textworld.agents.WalkthroughAgent()
         env = textworld.start(self.game_file)
         env.activate_state_tracking()
-        commands = self.game.quests[0].commands
+        commands = self.game.main_quest.commands
         agent.reset(env)
         game_state = env.reset()
 
 
@@ -7,9 +7,9 @@
 from textworld.utils import make_temp_directory
 
 
-def test_making_a_custom_game():                                             
-    with make_temp_directory(prefix="test_tw-play") as tmpdir:    
-        game_file, _ = textworld.make(5, 10, 5, {}, seed=1234, games_dir=tmpdir)
+def test_playing_a_game():
+    with make_temp_directory(prefix="test_tw-play") as tmpdir:
+        game_file, _ = textworld.make(5, 10, 5, 4, {}, seed=1234, games_dir=tmpdir)
 
         command = ["tw-play", "--max-steps", "100", "--mode", "random", game_file]
         assert check_call(command) == 0
@@ -18,4 +18,4 @@ def test_making_a_custom_game():
         assert check_call(command) == 0
 
         command = ["tw-play", "--max-steps", "100", "--mode", "walkthrough", game_file]
-        assert check_call(command) == 0
+        assert check_call(command) == 0
@@ -26,7 +26,7 @@ def reset(self, env):
             raise NameError(msg)
 
         # Load command from the generated game.
-        self._commands = iter(env.game.quests[0].commands)
+        self._commands = iter(env.game.main_quest.commands)
 
     def act(self, game_state, reward, done):
         try:
 
@@ -129,10 +129,12 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._has_won = False
         self._has_lost = False
+        self.has_timeout = False
         self._state_tracking = False
         self._compute_intermediate_reward = False
+        self._max_score = 0
 
-    def init(self, output: str, game=None,
+    def init(self, output: str, game: Game,
              state_tracking: bool = False, compute_intermediate_reward: bool = False):
         """
         Initialize the game state and set tracking parameters.
@@ -149,10 +151,8 @@ def init(self, output: str, game=None,
         self._game_progression = GameProgression(game, track_quests=compute_intermediate_reward)
         self._state_tracking = state_tracking
         self._compute_intermediate_reward = compute_intermediate_reward and len(game.quests) > 0
-
-        self._objective = ""
-        if len(game.quests) > 0:
-            self._objective = game.quests[0].desc
+        self._objective = game.objective
+        self._max_score = sum(quest.reward for quest in game.quests)
 
     def view(self) -> "GlulxGameState":
         """
@@ -177,6 +177,7 @@ def view(self) -> "GlulxGameState":
         game_state._nb_moves = self.nb_moves
         game_state._has_won = self.has_won
         game_state._has_lost = self.has_lost
+        game_state.has_timeout = self.has_timeout
 
         if self._state_tracking:
             game_state._admissible_commands = self.admissible_commands
@@ -199,6 +200,7 @@ def update(self, command: str, output: str) -> "GlulxGameState":
         game_state = super().update(command, output)
         game_state.previous_state = self.view()
         game_state._objective = self.objective
+        game_state._max_score = self.max_score
         game_state._game_progression = self._game_progression
         game_state._state_tracking = self._state_tracking
         game_state._compute_intermediate_reward = self._compute_intermediate_reward
@@ -317,16 +319,21 @@ def intermediate_reward(self):
 
     @property
     def score(self):
-        if self.has_won:
-            return 1
-        elif self.has_lost:
-            return -1
+        if not hasattr(self, "_score"):
+            output = self._raw
+            if not self.game_ended:
+                output = self._env._send("score")
+
+            match = re.search("scored (?P<score>[0-9]+) out of a possible (?P<max_score>[0-9]+),", output)
+            self._score = 0
+            if match:
+                self._score = int(match.groupdict()["score"])
 
-        return 0
+        return self._score
 
     @property
     def max_score(self):
-        return 1
+        return self._max_score
 
     @property
     def has_won(self):
@@ -336,6 +343,11 @@ def has_won(self):
     def has_lost(self):
         return self._has_lost or '*** You lost! ***' in self.feedback
 
+    @property
+    def game_ended(self) -> bool:
+        """ Whether the game is finished or not. """
+        return self.has_won | self.has_lost | self.has_timeout
+
     @property
     def game_infos(self) -> Mapping:
         """ Additional information about the game. """
@@ -439,8 +451,8 @@ def step(self, command: str) -> Tuple[GlulxGameState, float, bool]:
             raise GameNotRunningError()
 
         self.game_state = self.game_state.update(command, output)
-        done = self.game_state.game_ended or not self.game_running
-        return self.game_state, self.game_state.score, done
+        self.game_state.has_timeout = not self.game_running
+        return self.game_state, self.game_state.score, self.game_state.game_ended
 
     def _send(self, command: str) -> Union[str, None]:
         if not self.game_running:
 
@@ -47,7 +47,12 @@ def build_test_game():
     chest.add_property("open")
     R2.add(chest)
 
-    M.set_quest_from_commands(commands)
+    quest1 = M.new_quest_using_commands(commands)
+    quest1.reward = 2
+    quest2 = M.new_quest_using_commands(commands + ["close chest"])
+    quest2.set_winning_conditions([M.new_fact("in", carrot, chest),
+                                   M.new_fact("closed", chest)])
+    M._quests = [quest1, quest2]
     game = M.build()
     return game
 
@@ -128,7 +133,10 @@ def test_inventory(self):
         game_state, _, _ = self.env.step("take carrot")
         game_state, _, _ = self.env.step("go east")
         game_state, _, _ = self.env.step("insert carrot into chest")
-        assert game_state.inventory == ""
+        assert "carrying nothing" in game_state.inventory
+
+        game_state, _, _ = self.env.step("close chest")
+        assert game_state.inventory == ""  # Game has ended
 
     def test_objective(self):
         assert self.game_state.objective.strip() in self.game_state.feedback
@@ -145,16 +153,19 @@ def test_description(self):
 
         # End the game.
         game_state, _, _ = self.env.step("insert carrot into chest")
+        game_state, _, _ = self.env.step("close chest")
         assert game_state.description == ""
 
     def test_score(self):
         assert self.game_state.score == 0
-        assert self.game_state.max_score == 1
+        assert self.game_state.max_score == 3
         game_state, _, _ = self.env.step("go east")
         assert game_state.score == 0
         game_state, _, _ = self.env.step("insert carrot into chest")
-        assert game_state.score == 1
-        assert game_state.max_score == 1
+        assert game_state.score == 2
+        assert game_state.max_score == 3
+        game_state, _, _ = self.env.step("close chest")
+        assert game_state.score == 3
 
     def test_game_ended_when_no_quest(self):
         M = GameMaker()
@@ -184,6 +195,8 @@ def test_has_won(self):
         game_state, _, _ = self.env.step("go east")
         assert not game_state.has_won
         game_state, _, done = self.env.step("insert carrot into chest")
+        assert not game_state.has_won
+        game_state, _, done = self.env.step("close chest")
         assert game_state.has_won
 
     def test_has_lost(self):
@@ -210,31 +223,33 @@ def test_intermediate_reward(self):
         game_state, _, _ = self.env.step("close wooden door")
         assert game_state.intermediate_reward == 0
         game_state, _, done = self.env.step("insert carrot into chest")
+        game_state, _, done = self.env.step("close chest")
         assert done
         assert game_state.has_won
         assert game_state.intermediate_reward == 1
 
     def test_policy_commands(self):
-        assert self.game_state.policy_commands == self.game.quests[0].commands
+        assert self.game_state.policy_commands == self.game.main_quest.commands
 
         game_state, _, _ = self.env.step("drop carrot")
-        expected = ["take carrot"] + self.game.quests[0].commands
+        expected = ["take carrot"] + self.game.main_quest.commands
         assert game_state.policy_commands == expected, game_state.policy_commands
 
         game_state, _, _ = self.env.step("take carrot")
-        expected = self.game.quests[0].commands
+        expected = self.game.main_quest.commands
         assert game_state.policy_commands == expected
 
         game_state, _, _ = self.env.step("go east")
-        expected = self.game.quests[0].commands[1:]
+        expected = self.game.main_quest.commands[1:]
         assert game_state.policy_commands == expected
 
         game_state, _, _ = self.env.step("insert carrot into chest")
+        game_state, _, _ = self.env.step("close chest")
         assert game_state.policy_commands == [], game_state.policy_commands
 
         # Test parallel subquests.
         game_state = self.env.reset()
-        commands = self.game.quests[0].commands
+        commands = self.game.main_quest.commands
         assert game_state.policy_commands == commands
         game_state, _, _ = self.env.step("close wooden door")
         assert game_state.policy_commands == ["open wooden door"] + commands
@@ -248,15 +263,15 @@ def test_policy_commands(self):
 
         # Irreversible action.
         game_state = self.env.reset()
-        assert game_state.policy_commands == self.game.quests[0].commands
+        assert game_state.policy_commands == self.game.main_quest.commands
         game_state, _, done = self.env.step("eat carrot")
         assert done
         assert game_state.has_lost
         assert len(game_state.policy_commands) == 0
 
     def test_admissible_commands(self):
         game_state = self.env.reset()
-        for command in self.game.quests[0].commands:
+        for command in self.game.main_quest.commands:
             assert command in game_state.admissible_commands
             game_state, _, done = self.env.step(command)