first commit

Zuox99 · Oct 2, 2020 · f5ad67e · f5ad67e
commit f5ad67e
Show file tree

Hide file tree

Showing 32 changed files with 1,068,884 additions and 0 deletions.
diff --git a/DRLbrain/DQN.py b/DRLbrain/DQN.py
@@ -0,0 +1,140 @@
+"""""
+This code is taken from https://github.com/philtabor/Deep-Q-Learning-Paper-To-Code and modified as per our requirement
+Shahid Mohammed
+"""""
+
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch as T
+import random
+
+#from util import plot_learning_curve
+
+class LinearDeepQNetwork(nn.Module):
+    def __init__(self, lr, n_actions, input_dims):
+        super(LinearDeepQNetwork, self).__init__()
+
+        self.fc1 = nn.Linear(input_dims, 128)
+        self.fc2 = nn.Linear(128, n_actions)
+
+        self.optimizer = optim.Adam(self.parameters(), lr=lr)
+        self.loss = nn.MSELoss()
+        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
+        self.to(self.device)
+
+    def forward(self, state):
+        layer1 = F.relu(self.fc1(state))
+        actions = self.fc2(layer1)
+        return actions
+
+
+class Agent():
+    def __init__(self, input_dims, n_actions, lr, gamma=0.99,
+                 epsilon=1.0, eps_dec=1e-5, eps_min=0.01):
+        self.lr = lr
+        self.input_dims = input_dims
+        self.n_actions = n_actions
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.eps_dec = eps_dec
+        self.eps_min = eps_min
+        self.action_space = [i for i in range(self.n_actions)]
+
+        self.Q = LinearDeepQNetwork(self.lr, self.n_actions, self.input_dims)
+
+    def choose_action(self, state):
+        if np.random.random() > self.epsilon:
+            state1 = T.tensor(state, dtype=T.float).to(self.Q.device)
+            # state =
+            actions = self.Q.forward(state1)
+            action = T.argmax(actions).item()
+        else:
+            action = np.random.choice(self.action_space)
+
+        return action
+
+    def decrement_epsilon(self):
+        self.epsilon = self.epsilon - self.eps_dec \
+                        if self.epsilon > self.eps_min else self.eps_min
+
+    def learn(self, state, action, reward, state_):
+        self.Q.optimizer.zero_grad()
+        states = T.tensor(state, dtype=T.float).to(self.Q.device)
+        actions = T.tensor(action).to(self.Q.device)
+        rewards = T.tensor(reward).to(self.Q.device)
+        states_ = T.tensor(state_, dtype=T.float).to(self.Q.device)
+
+        q_pred = self.Q.forward(states)[actions]
+
+        q_next = self.Q.forward(states_).max()
+
+        q_target = reward + self.gamma*q_next
+
+        loss = self.Q.loss(q_target, q_pred).to(self.Q.device)
+        loss.backward()
+        self.Q.optimizer.step()
+        self.decrement_epsilon()
+
+def extractData(fileName):
+    # taken from https://www.tutorialspoint.com/How-to-read-text-file-into-a-list-or-array-with-Python
+    f = open(fileName, 'r+')
+    # taken from https://docs.scipy.org/doc/numpy/reference/generated/numpy.fromstring.html
+    data = [np.fromstring(line, dtype=float, sep=' ') for line in f.readlines()]
+
+    f.close()
+    return data
+
+"""""
+Random environment for DQN model
+"""""
+
+def getReward(action):
+    return random.randint(0, 10)
+
+def getNextState(action, input):
+    i = random.randint(0, 99)
+    return input[i]
+
+"""""
+This code for to train the DQN model with our data
+"""""
+
+if __name__ == '__main__':
+    input = extractData('input.txt')
+    input1 = np.array(input)
+
+    episodes = 10
+    scores = []
+    eps_history = []
+
+    agent = Agent(lr=0.0001, input_dims=7,
+                  n_actions=10)
+    f = open("results_dqn.txt", "w")
+    for i in range(episodes):
+        score = 0
+        done = False
+        initial_state = input1[0]
+        curr_state = initial_state
+        j = 0
+        while not done:
+            action = agent.choose_action(initial_state)
+            next_state = getNextState(action, input1)
+
+            reward = getReward(action)
+            score += reward
+            agent.learn(curr_state, action, reward, next_state)
+            curr_state = next_state
+
+            j += 1
+            if j >= 1000 :
+                done = True
+        scores.append(score)
+        eps_history.append(agent.epsilon)
+
+        avg_score = np.mean(scores[-100:])
+        f.write("{}{}{}\n".format('episode ', i, 'score %.1f avg score %.1f epsilon %.2f' %
+                                  (score, avg_score, agent.epsilon)))
+
+    f.close()
diff --git a/DRLbrain/readme.pdf b/DRLbrain/readme.pdf
diff --git a/DRLbrain/requirements.txt b/DRLbrain/requirements.txt
@@ -0,0 +1,3 @@
+torch==1.4.0
+numpy==1.16.2
+matplotlib==3.0.3
diff --git a/DRLbrain/results_dqn.txt b/DRLbrain/results_dqn.txt
@@ -0,0 +1,10 @@
+episode 0score 4924.0 avg score 4924.0 epsilon 0.99
+episode 1score 5027.0 avg score 4975.5 epsilon 0.98
+episode 2score 5002.0 avg score 4984.3 epsilon 0.97
+episode 3score 4972.0 avg score 4981.2 epsilon 0.96
+episode 4score 5124.0 avg score 5009.8 epsilon 0.95
+episode 5score 5092.0 avg score 5023.5 epsilon 0.94
+episode 6score 4930.0 avg score 5010.1 epsilon 0.93
+episode 7score 4984.0 avg score 5006.9 epsilon 0.92
+episode 8score 4973.0 avg score 5003.1 epsilon 0.91
+episode 9score 4918.0 avg score 4994.6 epsilon 0.90
diff --git a/README.md b/README.md
@@ -0,0 +1,2 @@
+# project-p3-Zuox99
+# Deep-Reinforcement-Learning-Based-Resource-Provisioning-and-Task-Scheduling-for-Cloud-Service-Provid
diff --git a/code/DQN_skeleton.py b/code/DQN_skeleton.py
@@ -0,0 +1,86 @@
+"""""
+This code is taken from https://github.com/philtabor/Deep-Q-Learning-Paper-To-Code and modified as per our requirement
+Shahid Mohammed Shaikbepari
+"""""
+
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch as T
+import random
+
+
+#from util import plot_learning_curve
+
+class LinearDeepQNetwork(nn.Module):
+    def __init__(self, lr, n_actions, input_dims):
+        super(LinearDeepQNetwork, self).__init__()
+
+        self.fc1 = nn.Linear(input_dims, 128)
+        self.fc2 = nn.Linear(128, n_actions)
+
+        self.optimizer = optim.Adam(self.parameters(), lr=lr)
+        self.loss = nn.MSELoss()
+        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
+        self.to(self.device)
+#takes the current and returns list of actions
+    def forward(self, state):
+        layer1 = F.relu(self.fc1(state))
+        actions = self.fc2(layer1)
+        return actions
+
+
+class Agent():
+    def __init__(self, input_dims, n_actions, lr, gamma=0.99,
+                 epsilon=1.0, eps_dec=1e-5, eps_min=0.01):
+        self.lr = lr
+        self.input_dims = input_dims
+        self.n_actions = n_actions
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.eps_dec = eps_dec
+        self.eps_min = eps_min
+        self.action_space = [i for i in range(self.n_actions)]
+
+        self.Q = LinearDeepQNetwork(self.lr, self.n_actions, self.input_dims)
+
+    def choose_action(self, state):
+        if np.random.random() > self.epsilon:
+            state1 = T.tensor(state, dtype=T.float).to(self.Q.device)
+            # state =
+            actions = self.Q.forward(state1)
+            action = T.argmax(actions).item()
+        else:
+            action = np.random.choice(self.action_space)
+
+        return action
+
+    def decrement_epsilon(self):
+        self.epsilon = self.epsilon - self.eps_dec \
+                        if self.epsilon > self.eps_min else self.eps_min
+
+    def learn(self, state, action, reward, state_):
+        self.Q.optimizer.zero_grad()
+        states = T.tensor(state, dtype=T.float).to(self.Q.device)
+        actions = T.tensor(action).to(self.Q.device)
+        rewards = T.tensor(reward).to(self.Q.device)
+        states_ = T.tensor(state_, dtype=T.float).to(self.Q.device)
+
+        q_pred = self.Q.forward(states)[actions]
+
+        q_next = self.Q.forward(states_).max()
+
+        q_target = reward + self.gamma*q_next
+
+        loss = self.Q.loss(q_target, q_pred).to(self.Q.device)
+        loss.backward()
+        self.Q.optimizer.step()
+        self.decrement_epsilon()
+
+    def processDQN_stage1(self, initial_state):
+        action = self.choose_action(initial_state)
+        return action
+    def processDQN_stage2(self,initial_state ):
+        action = self.choose_action(initial_state)
+        return action
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# project-p3-Zuox99
		# Deep-Reinforcement-Learning-Based-Resource-Provisioning-and-Task-Scheduling-for-Cloud-Service-Provid