Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
22a9f79
Update README.md
maxpumperla Dec 8, 2017
fc4aa7f
Update README.md
maxpumperla Dec 8, 2017
ff5550c
Add an extra test and fix some imports.
macfergus Jan 27, 2018
958a0ae
update code for chapter 3
maxpumperla Feb 3, 2018
2e86d76
update main module
maxpumperla Feb 3, 2018
7413a96
Update GameState.apply_move() in goboard_slow.py
tychota Mar 4, 2018
4b5333c
Merge pull request #1 from tychota/patch-1
maxpumperla Mar 5, 2018
b45356a
gitignore
maxpumperla Mar 11, 2018
e0087f4
bulk update
maxpumperla Mar 11, 2018
dfc1753
merge
maxpumperla Mar 11, 2018
49003ec
travis
maxpumperla Mar 11, 2018
4991b29
Update .travis.yml
maxpumperla Mar 11, 2018
71713b3
remove replace test_ with run_network
maxpumperla Mar 11, 2018
26aa8cf
update readme
maxpumperla Mar 11, 2018
7ab98c1
Add note about chapter branches.
macfergus Jun 2, 2018
7eefb7f
Add prompt explaining how to play.
macfergus Jul 15, 2018
227ad91
Add web_random_bot example.
macfergus Jul 19, 2018
ba63d5e
Add larger training set generated at 5000 rounds.
macfergus Jul 21, 2018
a59b973
Update .travis.yml
maxpumperla Aug 27, 2018
7913fbe
Update .travis.yml
maxpumperla Aug 27, 2018
9b5d74c
Update .travis.yml
maxpumperla Aug 27, 2018
b4a486d
Update .travis.yml
maxpumperla Aug 27, 2018
09b1cd1
update succession
maxpumperla Aug 27, 2018
01b9804
Merge branch 'master' of https://github.com/maxpumperla/deep_learning…
maxpumperla Aug 27, 2018
bff1d26
update master
maxpumperla Aug 27, 2018
0046600
Update README.md
maxpumperla Aug 27, 2018
1e0fced
ag test too heavy for travis
maxpumperla Aug 27, 2018
b0cf839
Merge branch 'master' of https://github.com/maxpumperla/deep_learning…
maxpumperla Aug 27, 2018
9776721
Update setup.py
maxpumperla Aug 27, 2018
b077d19
reactivate 2.7
maxpumperla Aug 27, 2018
62de27a
bump version
maxpumperla Aug 27, 2018
b3bbde1
Update README.md
maxpumperla Aug 27, 2018
24acd42
Merge branch 'master' of https://github.com/maxpumperla/deep_learning…
maxpumperla Aug 27, 2018
b19a823
Add interactive demos.
macfergus Nov 13, 2018
7719eab
pr template
maxpumperla Feb 11, 2019
7abf582
Update README.md
maxpumperla Feb 11, 2019
97a0d75
implement: use mnist data checked into git repo
groovescale Feb 21, 2019
cf3b99f
implement: add file from mnist s3 bucket
groovescale Feb 21, 2019
5e49575
fix: on python 2.7, "TypeError: super() argument 1 must be type, not …
groovescale Feb 23, 2019
7841e7d
Merge pull request #18 from jeffhgs/issue_12_v2
maxpumperla Feb 23, 2019
126042e
Correcting _remove_string to fix ko detection
Efaq Mar 11, 2019
17bec25
Merge pull request #22 from Efaq/master
maxpumperla Mar 11, 2019
0797b3e
Specify threaded=False in web server.
macfergus Apr 13, 2019
c1add1f
Update setup.py
maxpumperla Apr 28, 2019
8dcb0f6
Travis CI: The sudo: is deprecated in Travis
cclauss Sep 24, 2019
a4ddff5
Merge pull request #46 from cclauss/patch-1
maxpumperla Sep 24, 2019
4a29235
Create FUNDING.yml
maxpumperla Nov 18, 2019
89c6fb0
fixed typo with using h5 fileobj instead of string
krunt Dec 29, 2019
e7d1468
Merge pull request #48 from krunt/master
maxpumperla Jan 3, 2020
0fc6bda
fixed clear screen issue on windows
hackf5 Apr 3, 2020
bb07cf0
fixed clear screen issue on windows
hackf5 Apr 3, 2020
144e2df
minor changes to keep in line with book.
hackf5 Apr 3, 2020
22abdc5
Merge pull request #59 from hackf5/master
maxpumperla Apr 3, 2020
a17ac36
Fixed bugs in AlphaGoMCTS agent
JingOY0610 Apr 8, 2020
02bbda2
Merge pull request #60 from JingOY0610/JingOY0610-patch-AlphaGoBugFix-2
maxpumperla Apr 9, 2020
6714165
Fix script to generate zobrist hash
SojiroNishimura Jun 20, 2020
7dbefb8
Merge pull request #73 from SojiroNishimura/fix-zobrist-gen
maxpumperla Jun 22, 2020
55cd7ce
Fix blocking in Popen.
macfergus Sep 26, 2019
6148f57
Make pass always a legal move
macfergus Dec 22, 2020
35f983c
Update demo links in README
macfergus May 20, 2021
dc65968
fix typo: np.doc -> np.dot
llimllib Sep 17, 2022
c70cfe4
Merge pull request #105 from llimllib/fix-typo-np-doc
maxpumperla Sep 17, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# These are supported funding model platforms

github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
117 changes: 117 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.pyc

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# vim
*.swp

# Autogenerated svgs for formulas
stem*.svg
stem*.png

# Mac OS nonsense
.DS_Store

./kgs_index.html
./data

node_modules
24 changes: 24 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
dist: trusty
language: python
python:
# - "2.7" problems with np_utils in keras
- "3.4"
before_install:
- cd code
- sudo apt-get install -y python-dev python-pip python-virtualenv gfortran libhdf5-dev pkg-config
install:
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
fi
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py flask nose six tensorflow theano
- source activate test-environment
- python setup.py install
script: nosetests . --with-coverage
7 changes: 7 additions & 0 deletions PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Before submitting a pull request, note that `master` and the respective
chapter branches need to stay in sync with the print version of the book.

- If your PR consists of clarifications, bug fixes, or other vital enhancements, please submit your PR against `master`.
- If your PR has performance improvements, simplifications, added functionality that would alter the main text of the book etc., please open your PR against the `improvements` branch.

Should your current PR mix these types of changes, please consider to split it accordingly. Thank you!
50 changes: 34 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,54 @@
# Deep Learning and the Game of Go
# Deep Learning and the Game of Go [![Build Status](https://travis-ci.org/maxpumperla/deep_learning_and_the_game_of_go.svg?branch=master)](https://travis-ci.org/maxpumperla/deep_learning_and_the_game_of_go)

[Code](https://github.com/maxpumperla/deep_learning_and_the_game_of_go/tree/master/code), sample chapters and notebooks for the book "Deep Learning and the Game of Go" (Manning), available for early access [here](https://www.manning.com/books/deep-learning-and-the-game-of-go).
This repository is first and foremost a comprehensive machine learning framework for the game of Go, focussing on deep learning techniques. What you'll find here is a library that builds up from the game-play basics to very advanced techniques. In particular, you find code **for early approaches in game AI, intermediate techniques using deep learning, to implementations of AlphaGo and AlphaGo Zero - all presented in one common framework**. You can install this library with pip and follow the `examples` in the `code` folder.

```bash
pip install dlgo
```

On the other hand, this repository at the same time contains [Code](https://github.com/maxpumperla/deep_learning_and_the_game_of_go/tree/master/code), and sample chapters for the book "Deep Learning and the Game of Go" (Manning), available for early access [here](https://www.manning.com/books/deep-learning-and-the-game-of-go), which ties into the library and teaches its components bit by biy. If you're following the code samples from the book, check out the branches for
individual chapters.

*Note for contributors*: To ensure the book stays in sync, consider requesting changes and submitting pull requests against the `improvements` branch, instead of `master` (which we keep reserved for bug fixes etc.).

![dl_go_cover](dl_go_cover.jpg)

## Playable demos

The book is all about getting you started to create your own bots. To make the experience more fun and interactive, we built and deployed several bots showcasing the techniques of the respective chapter. So far you can play:

- **Chapter 4** A tiny tree search bot on a 5x5 board [here](https://www.badukai.com/demos/static/play_mcts_55.html).
- **Chapter 7** A full 19x19 bot powered by a deep neural network trained to predict human moves [here](https://www.badukai.com/demos/static/play_predict_19.html)
- **Chapter 9** A bot playing on a 9x9 board, using policy gradients, [here](https://www.badukai.com/demos/static/play_pg_99.html)
- **Chapter 4** A tiny tree search bot on a 5x5 board [here](https://demos.badukai.com/static/play_mcts_55.html).
- **Chapter 7** A full 19x19 bot powered by a deep neural network trained to predict human moves [here](https://demos.badukai.com/static/play_predict_19.html)
- **Chapter 9** A bot playing on a 9x9 board, using policy gradients, [here](https://demos.badukai.com/static/play_pg_99.html)

These demos will be available in the [liveBook](https://www.manning.com/books/deep-learning-and-the-game-of-go) version of the book as well.

<iframe src="https://www.badukai.com/demos/static/play_predict_19.html" height="800" width="800" style="border:2px solid grey; background-color: #f8f8f8;"></iframe>
<iframe src="https://www.badukai.com/demos/static/play_predict_19.html" height="500" width="800" style="border:2px solid grey; background-color: #f8f8f8;"></iframe>

## Table of Contents

1. Towards deep learning: a machine learning introduction
2. Machine learning for go
3. Representing a game of go: a simple search bot
4. Search and the early years of game AI
1. Toward deep learning: a machine learning introduction
2. Go as a machine learning problem
3. Implementing your first Go bot
4. Playing games with tree search
5. Getting started with neural networks
6. Designing a neural network for Go data
7. Learning from data: a deep learning bot
8. Enter deep reinforcement learning
9. Reinforcement learning with the policy gradient algorithm
10. Reinforcement learning with value methods
11. Reinforcement with actor-critic methods
12. AlphaGo: Combining approaches
13. Bots in the wild: deployment and scale-out
8. Deploying bots in the wild
9. Enter deep reinforcement learning
10. Reinforcement learning with policy gradients
11. Reinforcement learning with value methods
12. Reinforcement learning with actor-critic methods
13. AlphaGo: Combining approaches
14. AlphaGoZero and AlphaZero: Combining approaches

Appendices

- A. Mathematical foundations with Python
- B. The backpropagation algorithm
- C. Go programs and servers
- D. Training and deploying bots using Amazon Web Services
- E. Submitting a bot to the Online Go Server (OGS)

![chapter_succession](chapter_succession.png)

Expand Down
Binary file modified chapter_succession.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
67 changes: 67 additions & 0 deletions code/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Adapted from: https://github.com/gw0/docker-keras-jupyter/blob/master/Dockerfile
# docker-debian-cuda - Debian 9 with CUDA Toolkit

FROM gw000/keras:2.1.1-gpu
MAINTAINER gw0 [http://gw.tnode.com/] <gw.2017@ena.one>

# install py3-tf-cpu/gpu (Python 3, TensorFlow, CPU/GPU)
RUN apt-get update -qq \
&& apt-get install --no-install-recommends -y \
# install python 3
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-virtualenv \
python3-wheel \
pkg-config \
# requirements for numpy
libopenblas-base \
python3-numpy \
python3-scipy \
# requirements for keras
python3-h5py \
python3-yaml \
python3-pydot \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ARG TENSORFLOW_VERSION=1.4.0
ARG TENSORFLOW_DEVICE=gpu
ARG TENSORFLOW_APPEND=_gpu
RUN pip3 --no-cache-dir install https://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_DEVICE}/tensorflow${TENSORFLOW_APPEND}-${TENSORFLOW_VERSION}-cp35-cp35m-linux_x86_64.whl


# install Keras for Python 3
ARG KERAS_VERSION=2.1.1
ENV KERAS_BACKEND=tensorflow
RUN pip3 --no-cache-dir install --no-dependencies git+https://github.com/fchollet/keras.git@${KERAS_VERSION}

# install additional debian packages
RUN apt-get update -qq \
&& apt-get install --no-install-recommends -y \
# system tools
less \
procps \
vim-tiny \
# build dependencies
build-essential \
libffi-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN python3 -c "import tensorflow; print(tensorflow.__version__)" \
&& dpkg-query -l > /dpkg-query-l.txt \
&& pip3 freeze > /pip3-freeze.txt

# Copy application to container
RUN mkdir -p app
WORKDIR /app
COPY . /app

# Install requirements
RUN pip install -r requirements.txt

# Expose default port and start app
EXPOSE 5000
ENTRYPOINT ["python", "web_demo.py", "--bind-address", "0.0.0.0", "--pg-agent", "/app/agents/9x9_from_nothing/round_007.hdf5", "--predict-agent", "/app/agents/betago.hdf5"]
Binary file added code/agents/9x9_from_nothing/round_007.hdf5
Binary file not shown.
Binary file added code/agents/betago.hdf5
Binary file not shown.
47 changes: 47 additions & 0 deletions code/alpha_beta_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from six.moves import input

from dlgo import goboard
from dlgo import gotypes
from dlgo import minimax
from dlgo.utils import print_board, print_move, point_from_coords

BOARD_SIZE = 5


# tag::naive-board-heuristic[]
def capture_diff(game_state):
black_stones = 0
white_stones = 0
for r in range(1, game_state.board.num_rows + 1):
for c in range(1, game_state.board.num_cols + 1):
p = gotypes.Point(r, c)
color = game_state.board.get(p)
if color == gotypes.Player.black:
black_stones += 1
elif color == gotypes.Player.white:
white_stones += 1
diff = black_stones - white_stones
if game_state.next_player == gotypes.Player.black:
return diff
return -1 * diff
# end::naive-board-heuristic[]


def main():
game = goboard.GameState.new_game(BOARD_SIZE)
bot = minimax.AlphaBetaAgent(3, capture_diff)

while not game.is_over():
print_board(game.board)
if game.next_player == gotypes.Player.black:
human_move = input('-- ')
point = point_from_coords(human_move.strip())
move = goboard.Move.play(point)
else:
move = bot.select_move(game)
print_move(game.next_player, move)
game = game.apply_move(move)


if __name__ == '__main__':
main()
16 changes: 12 additions & 4 deletions code/bot_v_bot.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
from __future__ import print_function
# tag::bot_vs_bot[]
from dlgo import agent
from dlgo import goboard
from dlgo import gotypes
from dlgo.utils import print_board, print_move
from dlgo.utils import print_board, print_move, clear_screen
import time


def main():
board_size = 9
game = goboard.GameState.new_game(board_size)
bots = {
gotypes.Player.black: agent.RandomBot(),
gotypes.Player.white: agent.RandomBot(),
gotypes.Player.black: agent.naive.RandomBot(),
gotypes.Player.white: agent.naive.RandomBot(),
}
while not game.is_over():
time.sleep(0.3) # <1>

clear_screen() # <2>
print_board(game.board)
bot_move = bots[game.next_player].select_move(game)
print_move(game.next_player, bot_move)
game = game.apply_move(game.next_player, bot_move)
game = game.apply_move(bot_move)


if __name__ == '__main__':
main()

# <1> We set a sleep timer to 0.3 seconds so that bot moves aren't printed too fast to observe
# <2> Before each move we clear the screen. This way the board is always printed to the same position on the command line.
# end::bot_vs_bot[]
Loading