Skip to content

Commit

Permalink
CNNPolicy and test file, mostly functional
Browse files Browse the repository at this point in the history
note: something funny happening with network output size
  • Loading branch information
wrongu committed Feb 19, 2016
1 parent 6a70572 commit 28497b7
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 0 deletions.
144 changes: 144 additions & 0 deletions AlphaGo/models/policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from keras.models import Sequential
from keras.layers import convolutional
from keras.layers.core import Activation, Reshape
import keras.backend as K
from preprocessing import Preprocess

class CNNPolicy(object):
"""uses a convolutional neural network to evaluate the state of the game
and compute a probability distribution over the next action
"""

def __init__(self, feature_list, **kwargs):
"""create a policy object that preprocesses according to feature_list and uses
a neural network specified by keyword arguments (see create_network())
"""
self.preprocessor = Preprocess(feature_list)
kwargs["input_dim"] = self.preprocessor.output_dim
self.model = CNNPolicy.create_network(**kwargs)
self.forward = self._model_forward()

def _model_forward(self):
"""Construct a function using the current keras backend that, when given a batch
of inputs, simply processes them forward and returns the output
This is as opposed to model.compile(), which takes a loss function
and training method.
c.f. https://github.com/fchollet/keras/issues/1426
"""
model_input = self.model.get_input(train=False)
model_output = self.model.get_input(train=False)
return K.function([model_input], [model_output])

def batch_eval_state(self, state_gen, batch=16):
"""Given a stream of states in state_gen, evaluates them in batches
to make best use of GPU resources.
Returns: TBD (stream of results? that would break zip().
streaming pairs of pre-zipped (state, result)?)
"""
raise NotImplementedError()

def eval_state(self, state):
"""Given a GameState object, returns a list of (action, probability) pairs
according to the network outputs
"""
tensor = self.preprocessor.state_to_tensor(state)

# run the tensor through the network
network_output = self.forward([tensor])[0]

# get network activations at legal move locations
# note: may not be a proper distribution by ignoring illegal moves
return [((x,y), network_output[x,y]) for (x,y) in state.get_legal_moves()]

@staticmethod
def create_network(**kwargs):
"""construct a convolutional neural network.
Keword Arguments:
- input_dim: depth of features to be processed by first layer (no default)
- board: width of the go board to be processed (default 19)
- filters_per_layer: number of filters used on every layer (default 128)
- layers: number of convolutional steps (default 12)
- filter_width_K: (where K is between 1 and <layers>) width of filter on
layer K (default 3 except 1st layer which defaults to 5).
Must be odd.
"""
defaults = {
"board": 19,
"filters_per_layer": 128,
"layers": 12,
"filter_width_1": 5
}
# copy defaults, but override with anything in kwargs
params = defaults
params.update(kwargs)

# create the network:
# a series of zero-paddings followed by convolutions
# such that the output dimensions are also board x board
network = Sequential()

# create first layer
half_width = int(params["filter_width_1"] / 2)
network.add(convolutional.ZeroPadding2D(
input_shape=(params["input_dim"], params["board"], params["board"]),
padding=(half_width, half_width)))
network.add(convolutional.Convolution2D(
nb_filter=params["filters_per_layer"],
nb_row=params["filter_width_1"],
nb_col=params["filter_width_1"],
init='uniform',
activation='relu',
border_mode='same'))

# create all other layers
for i in range(2,params["layers"]+1):
# use filter_width_K if it is there, otherwise use 3
filter_key = "filter_width_%d" % i
filter_width = params.get(filter_key, 3)
half_width = int(filter_width / 2)
network.add(convolutional.ZeroPadding2D(padding=(half_width, half_width)))
network.add(convolutional.Convolution2D(
nb_filter=params["filters_per_layer"],
nb_row=filter_width,
nb_col=filter_width,
init='uniform',
activation='relu',
border_mode='same'))

# the last layer maps each <filters_per_layer> featuer to a number
network.add(convolutional.Convolution2D(
nb_filter=1,
nb_row=1,
nb_col=1,
init='uniform',
border_mode='same'))
# reshape output to be board x board
network.add(Reshape((params["board"],params["board"])))
# softmax makes it into a probability distribution
network.add(Activation('softmax'))

return network

def load_model(self, json_file):
"""load the architecture specified in json_file into 'self'
"""
raise NotImplementedError()

def save_model(self, json_file):
"""write the network model and preprocessing features to the specified file
"""
raise NotImplementedError()

def load_params(self, h5_file):
"""load model parameters (weights) in the specified file
"""
raise NotImplementedError()

def save_params(self, h5_file):
"""save model parameters (weights) to the specified file
"""
raise NotImplementedError()
19 changes: 19 additions & 0 deletions tests/test_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from AlphaGo.models.policy import CNNPolicy
from AlphaGo.go import GameState
import unittest

class TestCNNPolicy(unittest.TestCase):

def test_default_policy(self):
policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
policy.eval_state(GameState())
# just hope nothing breaks

def test_output_size(self):
policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19)
output = policy19.forward([policy19.preprocessor.state_to_tensor(GameState(19))])[0]
self.assertEqual(output.shape, (19,19))

policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13)
output = policy13.forward([policy13.preprocessor.state_to_tensor(GameState(13))])[0]
self.assertEqual(output.shape, (13,13))

0 comments on commit 28497b7

Please sign in to comment.