maxpumperla · fl4691 · Dec 8, 2017 · Dec 8, 2017 · Jan 27, 2018 · Feb 3, 2018
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,117 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.pyc
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# vim
+*.swp
+
+# Autogenerated svgs for formulas
+stem*.svg
+stem*.png
+
+# Mac OS nonsense
+.DS_Store
+
+./kgs_index.html
+./data
+
+node_modules
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,24 @@
+dist: trusty
+language: python
+python:
+  # - "2.7" problems with np_utils in keras
+  - "3.4"
+before_install:
+  - cd code
+  - sudo apt-get install -y python-dev python-pip python-virtualenv gfortran libhdf5-dev pkg-config
+install:
+  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
+      wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
+    else
+      wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
+    fi
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
+  - hash -r
+  - conda config --set always_yes yes --set changeps1 no
+  - conda update -q conda
+  - conda info -a
+  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py flask nose six tensorflow theano
+  - source activate test-environment
+  - python setup.py install
+script: nosetests . --with-coverage
diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,7 @@
+Before submitting a pull request, note that `master` and the respective
+chapter branches need to stay in sync with the print version of the book.
+
+- If your PR consists of clarifications, bug fixes, or other vital enhancements, please submit your PR against `master`.
+- If your PR has performance improvements, simplifications, added functionality that would alter the main text of the book etc., please open your PR against the `improvements` branch.
+
+Should your current PR mix these types of changes, please consider to split it accordingly. Thank you!
diff --git a/README.md b/README.md
@@ -1,36 +1,54 @@
-# Deep Learning and the Game of Go
+# Deep Learning and the Game of Go [![Build Status](https://travis-ci.org/maxpumperla/deep_learning_and_the_game_of_go.svg?branch=master)](https://travis-ci.org/maxpumperla/deep_learning_and_the_game_of_go)
 
-[Code](https://github.com/maxpumperla/deep_learning_and_the_game_of_go/tree/master/code), sample chapters and notebooks for the book "Deep Learning and the Game of Go" (Manning), available for early access [here](https://www.manning.com/books/deep-learning-and-the-game-of-go).
+This repository is first and foremost a comprehensive machine learning framework for the game of Go, focussing on deep learning techniques. What you'll find here is a library that builds up from the game-play basics to very advanced techniques. In particular, you find code **for early approaches in game AI, intermediate techniques using deep learning, to implementations of AlphaGo and AlphaGo Zero - all presented in one common framework**. You can install this library with pip and follow the `examples` in the `code` folder.
+
+```bash
+pip install dlgo
+```
+
+On the other hand, this repository at the same time contains [Code](https://github.com/maxpumperla/deep_learning_and_the_game_of_go/tree/master/code), and sample chapters for the book "Deep Learning and the Game of Go" (Manning), available for early access [here](https://www.manning.com/books/deep-learning-and-the-game-of-go), which ties into the library and teaches its components bit by biy. If you're following the code samples from the book, check out the branches for
+individual chapters.
+
+*Note for contributors*: To ensure the book stays in sync, consider requesting changes and submitting pull requests against the `improvements` branch, instead of `master` (which we keep reserved for bug fixes etc.).
 
 ![dl_go_cover](dl_go_cover.jpg)
 
 ## Playable demos
 
 The book is all about getting you started to create your own bots. To make the experience more fun and interactive, we built and deployed several bots showcasing the techniques of the respective chapter. So far you can play:
 
-- **Chapter 4** A tiny tree search bot on a 5x5 board [here](https://www.badukai.com/demos/static/play_mcts_55.html).
-- **Chapter 7** A full 19x19 bot powered by a deep neural network trained to predict human moves [here](https://www.badukai.com/demos/static/play_predict_19.html)
-- **Chapter 9** A bot playing on a 9x9 board, using policy gradients, [here](https://www.badukai.com/demos/static/play_pg_99.html) 
+- **Chapter 4** A tiny tree search bot on a 5x5 board [here](https://demos.badukai.com/static/play_mcts_55.html).
+- **Chapter 7** A full 19x19 bot powered by a deep neural network trained to predict human moves [here](https://demos.badukai.com/static/play_predict_19.html)
+- **Chapter 9** A bot playing on a 9x9 board, using policy gradients, [here](https://demos.badukai.com/static/play_pg_99.html)
 
 These demos will be available in the [liveBook](https://www.manning.com/books/deep-learning-and-the-game-of-go) version of the book as well.
 
-<iframe src="https://www.badukai.com/demos/static/play_predict_19.html" height="800" width="800" style="border:2px solid grey; background-color: #f8f8f8;"></iframe>
+<iframe src="https://www.badukai.com/demos/static/play_predict_19.html" height="500" width="800" style="border:2px solid grey; background-color: #f8f8f8;"></iframe>
 
 ## Table of Contents
 
-1. Towards deep learning: a machine learning introduction
-2. Machine learning for go
-3. Representing a game of go: a simple search bot
-4. Search and the early years of game AI
+1. Toward deep learning: a machine learning introduction
+2. Go as a machine learning problem
+3. Implementing your first Go bot
+4. Playing games with tree search
 5. Getting started with neural networks
 6. Designing a neural network for Go data
 7. Learning from data: a deep learning bot
-8. Enter deep reinforcement learning
-9. Reinforcement learning with the policy gradient algorithm
-10. Reinforcement learning with value methods
-11. Reinforcement with actor-critic methods
-12. AlphaGo: Combining approaches
-13. Bots in the wild: deployment and scale-out
+8. Deploying bots in the wild
+9. Enter deep reinforcement learning
+10. Reinforcement learning with policy gradients
+11. Reinforcement learning with value methods
+12. Reinforcement learning with actor-critic methods
+13. AlphaGo: Combining approaches
+14. AlphaGoZero and AlphaZero: Combining approaches
+
+Appendices
+
+- A. Mathematical foundations with Python
+- B. The backpropagation algorithm
+- C. Go programs and servers
+- D. Training and deploying bots using Amazon Web Services
+- E. Submitting a bot to the Online Go Server (OGS)
 
 ![chapter_succession](chapter_succession.png)
 

diff --git a/chapter_succession.png b/chapter_succession.png
diff --git a/code/Dockerfile b/code/Dockerfile
@@ -0,0 +1,67 @@
+# Adapted from: https://github.com/gw0/docker-keras-jupyter/blob/master/Dockerfile
+# docker-debian-cuda - Debian 9 with CUDA Toolkit
+
+FROM gw000/keras:2.1.1-gpu
+MAINTAINER gw0 [http://gw.tnode.com/] <gw.2017@ena.one>
+
+# install py3-tf-cpu/gpu (Python 3, TensorFlow, CPU/GPU)
+RUN apt-get update -qq \
+ && apt-get install --no-install-recommends -y \
+    # install python 3
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    python3-virtualenv \
+    python3-wheel \
+    pkg-config \
+    # requirements for numpy
+    libopenblas-base \
+    python3-numpy \
+    python3-scipy \
+    # requirements for keras
+    python3-h5py \
+    python3-yaml \
+    python3-pydot \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+ARG TENSORFLOW_VERSION=1.4.0
+ARG TENSORFLOW_DEVICE=gpu
+ARG TENSORFLOW_APPEND=_gpu
+RUN pip3 --no-cache-dir install https://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_DEVICE}/tensorflow${TENSORFLOW_APPEND}-${TENSORFLOW_VERSION}-cp35-cp35m-linux_x86_64.whl
+
+
+# install Keras for Python 3
+ARG KERAS_VERSION=2.1.1
+ENV KERAS_BACKEND=tensorflow
+RUN pip3 --no-cache-dir install --no-dependencies git+https://github.com/fchollet/keras.git@${KERAS_VERSION}
+
+# install additional debian packages
+RUN apt-get update -qq \
+ && apt-get install --no-install-recommends -y \
+    # system tools
+    less \
+    procps \
+    vim-tiny \
+    # build dependencies
+    build-essential \
+    libffi-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+ RUN python3 -c "import tensorflow; print(tensorflow.__version__)" \
+ && dpkg-query -l > /dpkg-query-l.txt \
+ && pip3 freeze > /pip3-freeze.txt
+
+ # Copy application to container
+ RUN mkdir -p app
+ WORKDIR /app
+ COPY . /app
+
+ # Install requirements
+ RUN pip install -r requirements.txt
+
+ # Expose default port and start app
+ EXPOSE 5000
+ ENTRYPOINT ["python", "web_demo.py", "--bind-address", "0.0.0.0", "--pg-agent", "/app/agents/9x9_from_nothing/round_007.hdf5", "--predict-agent", "/app/agents/betago.hdf5"]
diff --git a/code/agents/9x9_from_nothing/round_007.hdf5 b/code/agents/9x9_from_nothing/round_007.hdf5
diff --git a/code/agents/betago.hdf5 b/code/agents/betago.hdf5
diff --git a/code/alpha_beta_go.py b/code/alpha_beta_go.py
@@ -0,0 +1,47 @@
+from six.moves import input
+
+from dlgo import goboard
+from dlgo import gotypes
+from dlgo import minimax
+from dlgo.utils import print_board, print_move, point_from_coords
+
+BOARD_SIZE = 5
+
+
+# tag::naive-board-heuristic[]
+def capture_diff(game_state):
+    black_stones = 0
+    white_stones = 0
+    for r in range(1, game_state.board.num_rows + 1):
+        for c in range(1, game_state.board.num_cols + 1):
+            p = gotypes.Point(r, c)
+            color = game_state.board.get(p)
+            if color == gotypes.Player.black:
+                black_stones += 1
+            elif color == gotypes.Player.white:
+                white_stones += 1
+    diff = black_stones - white_stones
+    if game_state.next_player == gotypes.Player.black:
+        return diff
+    return -1 * diff
+# end::naive-board-heuristic[]
+
+
+def main():
+    game = goboard.GameState.new_game(BOARD_SIZE)
+    bot = minimax.AlphaBetaAgent(3, capture_diff)
+
+    while not game.is_over():
+        print_board(game.board)
+        if game.next_player == gotypes.Player.black:
+            human_move = input('-- ')
+            point = point_from_coords(human_move.strip())
+            move = goboard.Move.play(point)
+        else:
+            move = bot.select_move(game)
+        print_move(game.next_player, move)
+        game = game.apply_move(move)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/code/bot_v_bot.py b/code/bot_v_bot.py
@@ -1,24 +1,32 @@
+from __future__ import print_function
 # tag::bot_vs_bot[]
 from dlgo import agent
 from dlgo import goboard
 from dlgo import gotypes
-from dlgo.utils import print_board, print_move
+from dlgo.utils import print_board, print_move, clear_screen
+import time
 
 
 def main():
     board_size = 9
     game = goboard.GameState.new_game(board_size)
     bots = {
-        gotypes.Player.black: agent.RandomBot(),
-        gotypes.Player.white: agent.RandomBot(),
+        gotypes.Player.black: agent.naive.RandomBot(),
+        gotypes.Player.white: agent.naive.RandomBot(),
     }
     while not game.is_over():
+        time.sleep(0.3)  # <1>
+
+        clear_screen()   # <2>
         print_board(game.board)
         bot_move = bots[game.next_player].select_move(game)
         print_move(game.next_player, bot_move)
-        game = game.apply_move(game.next_player, bot_move)
+        game = game.apply_move(bot_move)
 
 
 if __name__ == '__main__':
     main()
+
+# <1> We set a sleep timer to 0.3 seconds so that bot moves aren't printed too fast to observe
+# <2> Before each move we clear the screen. This way the board is always printed to the same position on the command line.
 # end::bot_vs_bot[]