Skip to content

Commit

Permalink
A neural network model for learning
Browse files Browse the repository at this point in the history
  • Loading branch information
shayakbanerjee committed Dec 15, 2017
1 parent ffb5370 commit 151c668
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 10 deletions.
17 changes: 14 additions & 3 deletions game.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from plotting import drawXYPlotByFactor
import os

LEARNING_FILE = 'ultimate_player_nn1.h5'
WIN_PCT_FILE = 'win_pct_player_1.csv'

class GameSequence(object):
def __init__(self, numberOfGames, player1, player2, BoardClass=TTTBoard, BoardDecisionClass=TTTBoardDecision):
self.player1 = player1
Expand Down Expand Up @@ -41,7 +44,7 @@ def playTTTAndPlotResults():
results = []
numberOfSetsOfGames = 40
for i in range(numberOfSetsOfGames):
games = GameSequence(100, randomPlayer, learningPlayer)
games = GameSequence(100, learningPlayer, randomPlayer)
results.append(games.playGamesAndGetWinPercent())
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
Expand All @@ -52,15 +55,23 @@ def playUltimateAndPlotResults():
learningPlayer = RLUTTTPlayer()
randomPlayer = RandomUTTTPlayer()
results = []
numberOfSetsOfGames = 1
numberOfSetsOfGames = 4
if os.path.isfile(LEARNING_FILE):
learningPlayer.loadLearning(LEARNING_FILE)
for i in range(numberOfSetsOfGames):
games = GameSequence(1000, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision)
games = GameSequence(100, learningPlayer, randomPlayer, BoardClass=UTTTBoard, BoardDecisionClass=UTTTBoardDecision)
results.append(games.playGamesAndGetWinPercent())
learningPlayer.saveLearning(LEARNING_FILE)
plotValues = {'X Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[0], results)),
'O Win Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[1], results)),
'Draw Fraction': zip(range(numberOfSetsOfGames), map(lambda x: x[2], results))}
drawXYPlotByFactor(plotValues, 'Set Number', 'Fraction')

def writeResultsToFile(results):
with open(WIN_PCT_FILE, 'a') as outfile:
for result in results:
outfile.write('%s,%s,%s\n'%(result[0], result[1], result[2]))

if __name__ == '__main__':
#playTTTAndPlotResults()
playUltimateAndPlotResults()
73 changes: 71 additions & 2 deletions learning.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
from board import GridStates, TTTBoardDecision
import json
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.utils import plot_model
import numpy as np

class TableLearning(object):
class GenericLearning(object):
def getBoardStateValue(self, player, board, boardState):
raise NotImplementedError

def learnFromMove(self, player, board, prevBoardState):
raise NotImplementedError

def saveModel(self, filename):
raise NotImplementedError

def loadModel(self, filename):
raise NotImplementedError

class TableLearning(GenericLearning):
def __init__(self, DecisionClass=TTTBoardDecision):
self.values = {}
self.DecisionClass = DecisionClass
Expand Down Expand Up @@ -34,4 +51,56 @@ def saveLearning(self, filename):
json.dump(self.values, open(filename,'w'))

def loadLearning(self, filename):
self.values = json.load(open(filename, 'r'))
self.values = json.load(open(filename, 'r'))


class NNUltimateLearning(GenericLearning):
STATE_TO_NUMBER_MAP = {GridStates.EMPTY: 0, GridStates.PLAYER_O: -1, GridStates.PLAYER_X: 1}

def __init__(self, DecisionClass=TTTBoardDecision):
self.DecisionClass = DecisionClass
self.initializeModel()

def initializeModel(self):
self.model = Sequential()
self.model.add(Dense(81, input_dim=81, activation='relu'))
self.model.add(Dense(81, activation='relu'))
self.model.add(Dense(1, activation='sigmoid'))
self.model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
plot_model(self.model, to_file='model.png')

def convertBoardStateToInput(self, boardState):
return np.asarray([map(lambda x: self.STATE_TO_NUMBER_MAP.get(x), boardState)])

def trainModel(self, boardState, y):
self.model.fit(self.convertBoardStateToInput(boardState), np.asarray([y]), verbose=0)

def getPrediction(self, boardState):
return self.model.predict(self.convertBoardStateToInput(boardState))[0]

def getBoardStateValue(self, player, board, boardState): #TODO: Can batch the inputs to do several predictions at once
decision = board.getBoardDecision()
predY = self.getPrediction(boardState)[0]
if decision == self.DecisionClass.WON_X:
predY = 1.0 if player == GridStates.PLAYER_X else 0.0
self.trainModel(boardState, predY)
if decision == self.DecisionClass.WON_O:
predY = 1.0 if player == GridStates.PLAYER_O else 0.0
self.trainModel(boardState, predY)
return predY

def learnFromMove(self, player, board, prevBoardState):
curBoardState = board.getBoardState()
curBoardStateValue = self.getBoardStateValue(player, board, curBoardState)
prevBoardStateValue = self.getPrediction(prevBoardState)
trainY = prevBoardStateValue + 0.2 * (curBoardStateValue - prevBoardStateValue)
self.trainModel(prevBoardState, trainY)

def printValues(self):
pass

def saveLearning(self, filename):
self.model.save(filename)

def loadLearning(self, filename):
self.model = load_model(filename)
2 changes: 0 additions & 2 deletions player.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from board import TTTBoardDecision, TTTBoard
from learning import TableLearning
from util import Util
import random
from copy import deepcopy

class TTTPlayer(object):
def __init__(self):
Expand Down
6 changes: 3 additions & 3 deletions ultimateplayer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from ultimateboard import UTTTBoardDecision, UTTTBoard
from util import Util
from learning import TableLearning
from learning import TableLearning, NNUltimateLearning
import random

class UTTTPlayer(object):
Expand Down Expand Up @@ -39,7 +38,8 @@ def learnFromMove(self, prevBoardState):

class RLUTTTPlayer(UTTTPlayer):
def __init__(self):
self.learningAlgo = TableLearning(UTTTBoardDecision)
#self.learningAlgo = TableLearning(UTTTBoardDecision)
self.learningAlgo = NNUltimateLearning(UTTTBoardDecision)
super(RLUTTTPlayer, self).__init__()

def printValues(self):
Expand Down

0 comments on commit 151c668

Please sign in to comment.