Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minesweeper Env #2

Merged
merged 19 commits into from
Dec 26, 2020
Prev Previous commit
Next Next commit
Add full game test and bug fixes
  • Loading branch information
aokellermann committed Dec 24, 2020
commit 7a4c0523d4ca8408a80ca5b038be162bc2046e89
46 changes: 36 additions & 10 deletions gym_minesweeper/minesweeper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import gym
from gym import spaces
import numpy as np
from gym.utils import seeding

DEFAULT_BOARD_SIZE = (16, 30)
DEFAULT_NUM_MINES = 99
Expand All @@ -21,7 +22,7 @@ def __init__(self, board_size=DEFAULT_BOARD_SIZE, num_mines=DEFAULT_NUM_MINES):
assert np.prod(board_size) >= num_mines
assert len(board_size) == 2
self.board_size, self.num_mines = board_size, num_mines
self.hist, self.board, self._board = None, None, None
self.hist, self.board, self._board, self._rng = None, None, None, None

self.observation_space = spaces.Box(SPACE_MINE, SPACE_MAX + 1, board_size, np.int)
self.action_space = spaces.Discrete(np.prod(board_size))
Expand All @@ -46,7 +47,7 @@ def step(self, action):

target_x, target_y = tuple(action)
assert self._is_valid_space(
target_x, target_y) and self._board[target_x, target_y] < 0, "Invalid action: {}".format(action)
target_x, target_y) and self.board[target_x, target_y] == SPACE_UNKNOWN, "Invalid action: {}".format(action)

# If already cleared, admonish user
if self.board[target_x, target_y] >= 0:
Expand All @@ -60,12 +61,14 @@ def step(self, action):
while mines_placed < self.num_mines:
mine_indices = list(
zip(*
[np.random.randint(0, dim_size, self.num_mines - mines_placed)
[self._rng.randint(0, dim_size, self.num_mines - mines_placed)
for dim_size in self.board_size]))
for i in mine_indices:
if self._board[i] == SPACE_UNKNOWN and i != action:
self._board[i] = SPACE_MINE
mines_placed += 1
if self._board[i] == SPACE_UNKNOWN:
# prohibit mines adjacent or equal to target on first step
if i[0] > target_x + 1 or i[0] < target_x - 1 or i[1] > target_y + 1 or i[1] < target_y - 1:
self._board[i] = SPACE_MINE
mines_placed += 1

# Calculate nearby mines in private board
for x in range(self.board_size[0]):
Expand All @@ -80,9 +83,12 @@ def step(self, action):
if status is None:
return self.board, 5, False, dict()
elif status:
return self.board, 1000, True, {"master": self._board}
# if won, no need to reveal mines
return self.board, 1000, True, dict()
else:
return self.board, -100, True, {"master": self._board}
# if lost, reveal mines
self.board = self._board
return self.board, -100, True, dict()

def reset(self):
"""Resets the environment to an initial state and returns an initial
Expand Down Expand Up @@ -154,14 +160,33 @@ def render(self, mode='human'):
outfile.write('\n')
return outfile

def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).

Note:
Some environments use multiple pseudorandom number generators.
We want to capture all such seeds used in order to ensure that
there aren't accidental correlations between multiple generators.

Returns:
list<bigint>: Returns the list of seeds used in this env's random
number generators. The first value in the list should be the
"main" seed, or the value which a reproducer should pass to
'seed'. Often, the main seed equals the provided 'seed', but
this won't be true if seed=None, for example.
"""

self._rng, seed = seeding.np_random(seed)
return [seed]

def _is_valid_space(self, x, y):
return 0 <= x < self.board_size[0] and 0 <= y < self.board_size[1]

def _num_nearby_mines(self, x, y):
num_mines = 0
for i in range(x - 1, x + 2):
for j in range(y - 1, y + 2):
if x != i and y != j and self._is_valid_space(i, j) and self._board[i, j] == SPACE_MINE:
if (x != i or y != j) and self._is_valid_space(i, j) and self._board[i, j] == SPACE_MINE:
num_mines += 1
return num_mines

Expand All @@ -182,6 +207,7 @@ def get_status(self):
Returns:
status (bool): True if game won, False if game lost, None if game in progress
"""

if np.count_nonzero(self.board == SPACE_MINE):
return False
return True if np.count_nonzero(self.board == SPACE_UNKNOWN) == 0 else None
return True if np.count_nonzero(self.board == SPACE_UNKNOWN) == self.num_mines else None
73 changes: 65 additions & 8 deletions gym_minesweeper/tests/minesweeper_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""Dummy."""

from gym_minesweeper import MinesweeperEnv, SPACE_MINE, SPACE_UNKNOWN

import numpy as np
import numpy.testing as npt

from gym_minesweeper import MinesweeperEnv, SPACE_UNKNOWN


def test_no_mines_init():
size = (2, 3)
Expand All @@ -22,10 +21,68 @@ def test_no_mines_step():
board, reward, done, info = ms.step(action)

expected_board = [[0] * size[1]] * size[0]
npt.assert_array_equal(expected_board, ms.board)
npt.assert_array_equal([action], ms.hist)
npt.assert_array_equal(ms.board, expected_board)
npt.assert_array_equal(ms.hist, [action])

npt.assert_array_equal(expected_board, board)
assert 1000 == reward
npt.assert_array_equal(board, expected_board)
assert reward == 1000
assert done
npt.assert_array_equal(expected_board, info["master"])
assert info == dict()


def test_mines_step():
size = (4, 5)
ms = MinesweeperEnv(size, 3)
ms.seed(42069)
action = (0, 0)
board, reward, done, info = ms.step(action)

expected_board = [[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]]
npt.assert_array_equal(ms.board, expected_board)
npt.assert_array_equal(ms.hist, [action])

npt.assert_array_equal(board, expected_board)
assert reward == 5
assert not done
assert not info


def assert_game(ms, actions, expected_boards, expected_rewards, expected_dones, expected_infos):
expected_hist = []
for i in range(len(actions)):
board, reward, done, info = ms.step(actions[i])

def err_msg():
return "idx: {}".format(i)

npt.assert_array_equal(ms.board, expected_boards[i], err_msg())
npt.assert_array_equal(board, expected_boards[i], err_msg())

expected_hist.append(actions[i])
npt.assert_array_equal(ms.hist, expected_hist)

assert reward == expected_rewards[i], err_msg()
assert done == expected_dones[i], err_msg()
assert info == expected_infos[i], err_msg()


def test_win():
size = (4, 5)
ms = MinesweeperEnv(size, 3)
ms.seed(42069)

actions = [(0, 0), (3, 3), (0, 3), (1, 2), (0, 4), (1, 4)]
expected_boards = [
[[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, -1, -1, -1], [-1, -1, -1, -1, -1]],
[[0, 1, -1, -1, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
[[0, 1, -1, 2, -1], [0, 1, -1, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
[[0, 1, -1, 2, -1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
[[0, 1, -1, 2, 1], [0, 1, 2, -1, -1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
[[0, 1, -1, 2, 1], [0, 1, 2, -1, 1], [1, 1, 1, 1, 1], [-1, 1, 0, 0, 0]],
]

expected_rewards = [5] * (len(expected_boards) - 1) + [1000]
expected_dones = [False] * (len(expected_boards) - 1) + [True]
expected_infos = [dict()] * len(expected_boards)

assert_game(ms, actions, expected_boards, expected_rewards, expected_dones, expected_infos)