Skip to content

Commit

Permalink
Minesweeper Env (#2)
Browse files Browse the repository at this point in the history
### Changes
* Implements minesweeper gym env
* Adds unit tests
* Changes to using `virtualenv` over `pipenv` due to [locking taking an eternity](pypa/pipenv#3827)
  • Loading branch information
aokellermann authored Dec 26, 2020
1 parent 97b08cd commit 80008b8
Show file tree
Hide file tree
Showing 13 changed files with 415 additions and 49 deletions.
25 changes: 15 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,31 @@ version: 2
jobs:
build:
docker:
- image: alpine
environment:
LANG: en_us.UTF-8
PIPENV_VENV_IN_PROJECT: true
PIPENV_DEV: true
- image: archlinux:base-devel

steps:
- checkout

- run:
name: Install Required Tools

# 1. Python deps
# 2. python-pillow does not provide wheel, so must be built with these deps
# 3. Formatting/linting
command: |
sed -i -e 's/v[[:digit:]]\..*\//edge\//g' /etc/apk/repositories
echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories
apk add --no-cache bash py3-pip shellcheck shfmt
pip install pipenv
pacman -Sy --noconfirm \
python python-pip python-virtualenv \
lcms2 libtiff openjpeg2 libimagequant libxcb \
shellcheck shfmt
- run:
name: Set Up Virtualenv
command: pipenv install
command: |
python -m venv venv
echo "source venv/bin/activate" >> $BASH_ENV
source venv/bin/activate
pip install --upgrade pip wheel
pip install -e .[dev]
- run:
name: Format
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/.idea/
/Pipfile.lock
__pycache__
/test_results/
/venv/
*.egg*
19 changes: 0 additions & 19 deletions Pipfile

This file was deleted.

5 changes: 5 additions & 0 deletions gym_minesweeper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""OpenAI gym environment for minesweeper."""

__all__ = ['MinesweeperEnv', 'SPACE_MINE', 'SPACE_UNKNOWN', 'REWARD_WIN', 'REWARD_LOSE', 'REWARD_CLEAR']

from gym_minesweeper.minesweeper import MinesweeperEnv, SPACE_MINE, SPACE_UNKNOWN, REWARD_WIN, REWARD_LOSE, REWARD_CLEAR
205 changes: 205 additions & 0 deletions gym_minesweeper/minesweeper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""OpenAI gym environment for minesweeper."""

import sys
from io import StringIO

import gym
import numpy as np
from gym import spaces
from gym.utils import seeding

DEFAULT_BOARD_SIZE = (16, 30)
DEFAULT_NUM_MINES = 99

SPACE_MINE = -2
SPACE_UNKNOWN = -1
SPACE_MAX = 8

REWARD_WIN = 1000
REWARD_LOSE = -100
REWARD_CLEAR = 5


# Based on https://github.com/genyrosk/gym-chess/blob/master/gym_chess/envs/chess.py
# pylint: disable=R0902
class MinesweeperEnv(gym.Env):
"""Minesweeper gym environment."""

metadata = {"render.modes": ["ansi", "human"]}

def __init__(self, board_size=DEFAULT_BOARD_SIZE, num_mines=DEFAULT_NUM_MINES):
assert np.prod(board_size) >= num_mines
assert len(board_size) == 2
self.board_size, self.num_mines = board_size, num_mines
self.hist, self.board, self._board, self._rng = None, None, None, None

self.observation_space = spaces.Box(SPACE_MINE, SPACE_MAX + 1, board_size, np.int)
self.action_space = spaces.Discrete(np.prod(board_size))
self.reset()

def step(self, action):
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, done, info).
Args:
action (np.array): [x, y] coordinate pair of space to clear
Returns:
observation (np.array[np.array]): current board state
reward (float) : amount of reward returned after previous action
done (bool): whether the episode has ended, in which case further step() calls will return undefined results
info (dict): currently contains nothing
"""

target_x, target_y = tuple(action)
assert self._is_clearable_space(target_x, target_y), "Invalid action: {}".format(action)

# If first step, populate board
# We do this here so that the first move never triggers a mine to explode
if len(self.hist) == 0:
# Place mines in private board
mines_placed = 0
while mines_placed < self.num_mines:
mine_indices = list(
zip(*
[self._rng.randint(0, dim_size, self.num_mines - mines_placed)
for dim_size in self.board_size]))
for i in mine_indices:
if self._board[i] == SPACE_UNKNOWN:
# prohibit mines adjacent or equal to target on first step
if i[0] > target_x + 1 or i[0] < target_x - 1 or i[1] > target_y + 1 or i[1] < target_y - 1:
self._board[i] = SPACE_MINE
mines_placed += 1

# Calculate nearby mines in private board
for calc_x in range(self.board_size[0]):
for calc_y in range(self.board_size[1]):
if self._board[calc_x, calc_y] == SPACE_UNKNOWN:
self._board[calc_x, calc_y] = self._num_nearby_mines(calc_x, calc_y)

self._clear_space(target_x, target_y)

status = self.get_status()

if status is None:
return self.board, 5, False, dict()
if status:
# if won, no need to reveal mines
return self.board, 1000, True, dict()
# if lost, reveal mines
self.board = self._board
return self.board, -100, True, dict()

def reset(self):
"""Resets the environment to an initial state and returns an initial
observation.
Note that this function does not reset the environment's random
number generator(s); random variables in the environment's state are
sampled independently between multiple calls to `reset()`. In other
words, each call of `reset()` yields an environment suitable for
a new episode, independent of previous episodes.
Returns:
observation (np.array[np.array]): current board state (all unknown)
"""

self.hist = []
self._board = np.full(self.board_size, SPACE_UNKNOWN, np.int)
self.board = np.array(self._board)
return self.board

def render(self, mode='human'):
"""Renders the environment.
If mode is:
- human: render to the current display or terminal and
return nothing. Usually for human consumption.
- ansi: Return a StringIO.StringIO containing a
terminal-style text representation. The text may include newlines
and ANSI escape sequences (e.g. for colors).
Args:
mode (str): the mode to render with
Returns:
outfile (StringIO or None): StringIO stream if mode is ansi, otherwise None
"""

outfile = StringIO() if mode == 'ansi' else sys.stdout if mode == 'human' else super().render(mode)
for i, dim_1 in enumerate(self.board):
for j, dim_2 in enumerate(dim_1):
if dim_2 == SPACE_MINE:
outfile.write('X')
elif dim_2 == SPACE_UNKNOWN:
outfile.write('-')
else:
outfile.write(str(dim_2))
if j != self.board_size[1] - 1:
outfile.write(' ')
if i != self.board_size[0] - 1:
outfile.write('\n')
if mode == 'ansi':
return outfile
return None

def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Returns:
list<bigint>: Returns the list of seeds used in this env's random
number generators. In this case, the length is 1.
"""

self._rng, seed = seeding.np_random(seed)
return [seed]

def _is_valid_space(self, target_x, target_y):
return 0 <= target_x < self.board_size[0] and 0 <= target_y < self.board_size[1]

def _is_clearable_space(self, target_x, target_y):
return self._is_valid_space(target_x, target_y) and self.board[target_x, target_y] == SPACE_UNKNOWN

def _num_nearby_mines(self, target_x, target_y):
num_mines = 0
for i in range(target_x - 1, target_x + 2):
for j in range(target_y - 1, target_y + 2):
if (target_x != i or target_y != j) and self._is_valid_space(i, j) and self._board[i, j] == SPACE_MINE:
num_mines += 1
return num_mines

def _clear_space(self, target_x, target_y):
spaces_to_clear = {(target_x, target_y)}
spaces_cleared = set()

update_hist = True
while spaces_to_clear:
current_space = next(iter(spaces_to_clear))
self.board[current_space[0], current_space[1]] = self._board[current_space[0], current_space[1]]
if update_hist:
self.hist.append(current_space)
update_hist = False

spaces_to_clear.remove(current_space)
spaces_cleared.add(current_space)

if self.board[current_space[0], current_space[1]] == 0:
for i in range(current_space[0] - 1, current_space[0] + 2):
for j in range(current_space[1] - 1, current_space[1] + 2):
if self._is_valid_space(i, j) and (i, j) not in spaces_cleared:
spaces_to_clear.add((i, j))

def get_status(self):
"""Gets the status of the game.
Returns:
status (bool): True if game won, False if game lost, None if game in progress
"""

if np.count_nonzero(self.board == SPACE_MINE):
return False
return True if np.count_nonzero(self.board == SPACE_UNKNOWN) == self.num_mines else None
6 changes: 0 additions & 6 deletions gym_minesweeper/tests/dummy_test.py

This file was deleted.

Loading

0 comments on commit 80008b8

Please sign in to comment.