diff --git a/PPO_continuous.py b/PPO_continuous.py
index af292b0..e74139a 100644
--- a/PPO_continuous.py
+++ b/PPO_continuous.py
@@ -61,14 +61,14 @@ def act(self, state, memory):
         return action.detach()
     
     def evaluate(self, state, action):   
-        action_mean = torch.squeeze(self.actor(state))
+        action_mean = self.actor(state)
         
         action_var = self.action_var.expand_as(action_mean)
         cov_mat = torch.diag_embed(action_var).to(device)
         
         dist = MultivariateNormal(action_mean, cov_mat)
         
-        action_logprobs = dist.log_prob(torch.squeeze(action))
+        action_logprobs = dist.log_prob(action)
         dist_entropy = dist.entropy()
         state_value = self.critic(state)
         
@@ -109,9 +109,9 @@ def update(self, memory):
         rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-5)
         
         # convert list to tensor
-        old_states = torch.squeeze(torch.stack(memory.states).to(device)).detach()
-        old_actions = torch.squeeze(torch.stack(memory.actions).to(device)).detach()
-        old_logprobs = torch.squeeze(torch.stack(memory.logprobs)).to(device).detach()
+        old_states = torch.squeeze(torch.stack(memory.states).to(device), 1).detach()
+        old_actions = torch.squeeze(torch.stack(memory.actions).to(device), 1).detach()
+        old_logprobs = torch.squeeze(torch.stack(memory.logprobs), 1).to(device).detach()
         
         # Optimize policy for K epochs:
         for _ in range(self.K_epochs):