-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Xuan Zuo
committed
Oct 2, 2020
0 parents
commit f5ad67e
Showing
32 changed files
with
1,068,884 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
""""" | ||
This code is taken from https://github.com/philtabor/Deep-Q-Learning-Paper-To-Code and modified as per our requirement | ||
Shahid Mohammed | ||
""""" | ||
|
||
import numpy as np | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch.optim as optim | ||
import torch as T | ||
import random | ||
|
||
#from util import plot_learning_curve | ||
|
||
class LinearDeepQNetwork(nn.Module): | ||
def __init__(self, lr, n_actions, input_dims): | ||
super(LinearDeepQNetwork, self).__init__() | ||
|
||
self.fc1 = nn.Linear(input_dims, 128) | ||
self.fc2 = nn.Linear(128, n_actions) | ||
|
||
self.optimizer = optim.Adam(self.parameters(), lr=lr) | ||
self.loss = nn.MSELoss() | ||
self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu') | ||
self.to(self.device) | ||
|
||
def forward(self, state): | ||
layer1 = F.relu(self.fc1(state)) | ||
actions = self.fc2(layer1) | ||
return actions | ||
|
||
|
||
class Agent(): | ||
def __init__(self, input_dims, n_actions, lr, gamma=0.99, | ||
epsilon=1.0, eps_dec=1e-5, eps_min=0.01): | ||
self.lr = lr | ||
self.input_dims = input_dims | ||
self.n_actions = n_actions | ||
self.gamma = gamma | ||
self.epsilon = epsilon | ||
self.eps_dec = eps_dec | ||
self.eps_min = eps_min | ||
self.action_space = [i for i in range(self.n_actions)] | ||
|
||
self.Q = LinearDeepQNetwork(self.lr, self.n_actions, self.input_dims) | ||
|
||
def choose_action(self, state): | ||
if np.random.random() > self.epsilon: | ||
state1 = T.tensor(state, dtype=T.float).to(self.Q.device) | ||
# state = | ||
actions = self.Q.forward(state1) | ||
action = T.argmax(actions).item() | ||
else: | ||
action = np.random.choice(self.action_space) | ||
|
||
return action | ||
|
||
def decrement_epsilon(self): | ||
self.epsilon = self.epsilon - self.eps_dec \ | ||
if self.epsilon > self.eps_min else self.eps_min | ||
|
||
def learn(self, state, action, reward, state_): | ||
self.Q.optimizer.zero_grad() | ||
states = T.tensor(state, dtype=T.float).to(self.Q.device) | ||
actions = T.tensor(action).to(self.Q.device) | ||
rewards = T.tensor(reward).to(self.Q.device) | ||
states_ = T.tensor(state_, dtype=T.float).to(self.Q.device) | ||
|
||
q_pred = self.Q.forward(states)[actions] | ||
|
||
q_next = self.Q.forward(states_).max() | ||
|
||
q_target = reward + self.gamma*q_next | ||
|
||
loss = self.Q.loss(q_target, q_pred).to(self.Q.device) | ||
loss.backward() | ||
self.Q.optimizer.step() | ||
self.decrement_epsilon() | ||
|
||
def extractData(fileName): | ||
# taken from https://www.tutorialspoint.com/How-to-read-text-file-into-a-list-or-array-with-Python | ||
f = open(fileName, 'r+') | ||
# taken from https://docs.scipy.org/doc/numpy/reference/generated/numpy.fromstring.html | ||
data = [np.fromstring(line, dtype=float, sep=' ') for line in f.readlines()] | ||
|
||
f.close() | ||
return data | ||
|
||
""""" | ||
Random environment for DQN model | ||
""""" | ||
|
||
def getReward(action): | ||
return random.randint(0, 10) | ||
|
||
def getNextState(action, input): | ||
i = random.randint(0, 99) | ||
return input[i] | ||
|
||
""""" | ||
This code for to train the DQN model with our data | ||
""""" | ||
|
||
if __name__ == '__main__': | ||
input = extractData('input.txt') | ||
input1 = np.array(input) | ||
|
||
episodes = 10 | ||
scores = [] | ||
eps_history = [] | ||
|
||
agent = Agent(lr=0.0001, input_dims=7, | ||
n_actions=10) | ||
f = open("results_dqn.txt", "w") | ||
for i in range(episodes): | ||
score = 0 | ||
done = False | ||
initial_state = input1[0] | ||
curr_state = initial_state | ||
j = 0 | ||
while not done: | ||
action = agent.choose_action(initial_state) | ||
next_state = getNextState(action, input1) | ||
|
||
reward = getReward(action) | ||
score += reward | ||
agent.learn(curr_state, action, reward, next_state) | ||
curr_state = next_state | ||
|
||
j += 1 | ||
if j >= 1000 : | ||
done = True | ||
scores.append(score) | ||
eps_history.append(agent.epsilon) | ||
|
||
avg_score = np.mean(scores[-100:]) | ||
f.write("{}{}{}\n".format('episode ', i, 'score %.1f avg score %.1f epsilon %.2f' % | ||
(score, avg_score, agent.epsilon))) | ||
|
||
f.close() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
torch==1.4.0 | ||
numpy==1.16.2 | ||
matplotlib==3.0.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
episode 0score 4924.0 avg score 4924.0 epsilon 0.99 | ||
episode 1score 5027.0 avg score 4975.5 epsilon 0.98 | ||
episode 2score 5002.0 avg score 4984.3 epsilon 0.97 | ||
episode 3score 4972.0 avg score 4981.2 epsilon 0.96 | ||
episode 4score 5124.0 avg score 5009.8 epsilon 0.95 | ||
episode 5score 5092.0 avg score 5023.5 epsilon 0.94 | ||
episode 6score 4930.0 avg score 5010.1 epsilon 0.93 | ||
episode 7score 4984.0 avg score 5006.9 epsilon 0.92 | ||
episode 8score 4973.0 avg score 5003.1 epsilon 0.91 | ||
episode 9score 4918.0 avg score 4994.6 epsilon 0.90 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# project-p3-Zuox99 | ||
# Deep-Reinforcement-Learning-Based-Resource-Provisioning-and-Task-Scheduling-for-Cloud-Service-Provid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""""" | ||
This code is taken from https://github.com/philtabor/Deep-Q-Learning-Paper-To-Code and modified as per our requirement | ||
Shahid Mohammed Shaikbepari | ||
""""" | ||
|
||
import numpy as np | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch.optim as optim | ||
import torch as T | ||
import random | ||
|
||
|
||
#from util import plot_learning_curve | ||
|
||
class LinearDeepQNetwork(nn.Module): | ||
def __init__(self, lr, n_actions, input_dims): | ||
super(LinearDeepQNetwork, self).__init__() | ||
|
||
self.fc1 = nn.Linear(input_dims, 128) | ||
self.fc2 = nn.Linear(128, n_actions) | ||
|
||
self.optimizer = optim.Adam(self.parameters(), lr=lr) | ||
self.loss = nn.MSELoss() | ||
self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu') | ||
self.to(self.device) | ||
#takes the current and returns list of actions | ||
def forward(self, state): | ||
layer1 = F.relu(self.fc1(state)) | ||
actions = self.fc2(layer1) | ||
return actions | ||
|
||
|
||
class Agent(): | ||
def __init__(self, input_dims, n_actions, lr, gamma=0.99, | ||
epsilon=1.0, eps_dec=1e-5, eps_min=0.01): | ||
self.lr = lr | ||
self.input_dims = input_dims | ||
self.n_actions = n_actions | ||
self.gamma = gamma | ||
self.epsilon = epsilon | ||
self.eps_dec = eps_dec | ||
self.eps_min = eps_min | ||
self.action_space = [i for i in range(self.n_actions)] | ||
|
||
self.Q = LinearDeepQNetwork(self.lr, self.n_actions, self.input_dims) | ||
|
||
def choose_action(self, state): | ||
if np.random.random() > self.epsilon: | ||
state1 = T.tensor(state, dtype=T.float).to(self.Q.device) | ||
# state = | ||
actions = self.Q.forward(state1) | ||
action = T.argmax(actions).item() | ||
else: | ||
action = np.random.choice(self.action_space) | ||
|
||
return action | ||
|
||
def decrement_epsilon(self): | ||
self.epsilon = self.epsilon - self.eps_dec \ | ||
if self.epsilon > self.eps_min else self.eps_min | ||
|
||
def learn(self, state, action, reward, state_): | ||
self.Q.optimizer.zero_grad() | ||
states = T.tensor(state, dtype=T.float).to(self.Q.device) | ||
actions = T.tensor(action).to(self.Q.device) | ||
rewards = T.tensor(reward).to(self.Q.device) | ||
states_ = T.tensor(state_, dtype=T.float).to(self.Q.device) | ||
|
||
q_pred = self.Q.forward(states)[actions] | ||
|
||
q_next = self.Q.forward(states_).max() | ||
|
||
q_target = reward + self.gamma*q_next | ||
|
||
loss = self.Q.loss(q_target, q_pred).to(self.Q.device) | ||
loss.backward() | ||
self.Q.optimizer.step() | ||
self.decrement_epsilon() | ||
|
||
def processDQN_stage1(self, initial_state): | ||
action = self.choose_action(initial_state) | ||
return action | ||
def processDQN_stage2(self,initial_state ): | ||
action = self.choose_action(initial_state) | ||
return action |
Oops, something went wrong.