change ix to loc

MorvanZhou · MorvanZhou · commit 1317cc5c4db5 · 2018-01-14T19:00:41.000+11:00
diff --git a/contents/1_command_line_reinforcement_learning/treasure_on_right.py b/contents/1_command_line_reinforcement_learning/treasure_on_right.py
@@ -86,14 +86,14 @@ def rl():
 
             A = choose_action(S, q_table)
             S_, R = get_env_feedback(S, A)  # take action & get next state and reward
-            q_predict = q_table.ix[S, A]
+            q_predict = q_table.loc[S, A]
             if S_ != 'terminal':
                 q_target = R + GAMMA * q_table.iloc[S_, :].max()   # next state is not terminal
             else:
                 q_target = R     # next state is terminal
                 is_terminated = True    # terminate this episode
 
-            q_table.ix[S, A] += ALPHA * (q_target - q_predict)  # update
+            q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
             S = S_  # move to next state
 
             update_env(S, episode, step_counter+1)
diff --git a/contents/2_Q_Learning_maze/RL_brain.py b/contents/2_Q_Learning_maze/RL_brain.py
@@ -22,7 +22,7 @@ def choose_action(self, observation):
         # action selection
         if np.random.uniform() < self.epsilon:
             # choose best action
-            state_action = self.q_table.ix[observation, :]
+            state_action = self.q_table.loc[observation, :]
             state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
             action = state_action.idxmax()
         else:
@@ -32,12 +32,12 @@ def choose_action(self, observation):
 
     def learn(self, s, a, r, s_):
         self.check_state_exist(s_)
-        q_predict = self.q_table.ix[s, a]
+        q_predict = self.q_table.loc[s, a]
         if s_ != 'terminal':
-            q_target = r + self.gamma * self.q_table.ix[s_, :].max()  # next state is not terminal
+            q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
         else:
             q_target = r  # next state is terminal
-        self.q_table.ix[s, a] += self.lr * (q_target - q_predict)  # update
+        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
 
     def check_state_exist(self, state):
         if state not in self.q_table.index:
diff --git a/contents/3_Sarsa_maze/RL_brain.py b/contents/3_Sarsa_maze/RL_brain.py
@@ -34,7 +34,7 @@ def choose_action(self, observation):
         # action selection
         if np.random.rand() < self.epsilon:
             # choose best action
-            state_action = self.q_table.ix[observation, :]
+            state_action = self.q_table.loc[observation, :]
             state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
             action = state_action.idxmax()
         else:
@@ -53,12 +53,12 @@ def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
 
     def learn(self, s, a, r, s_):
         self.check_state_exist(s_)
-        q_predict = self.q_table.ix[s, a]
+        q_predict = self.q_table.loc[s, a]
         if s_ != 'terminal':
-            q_target = r + self.gamma * self.q_table.ix[s_, :].max()  # next state is not terminal
+            q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
         else:
             q_target = r  # next state is terminal
-        self.q_table.ix[s, a] += self.lr * (q_target - q_predict)  # update
+        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
 
 
 # on-policy
@@ -69,9 +69,9 @@ def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
 
     def learn(self, s, a, r, s_, a_):
         self.check_state_exist(s_)
-        q_predict = self.q_table.ix[s, a]
+        q_predict = self.q_table.loc[s, a]
         if s_ != 'terminal':
-            q_target = r + self.gamma * self.q_table.ix[s_, a_]  # next state is not terminal
+            q_target = r + self.gamma * self.q_table.loc[s_, a_]  # next state is not terminal
         else:
             q_target = r  # next state is terminal
-        self.q_table.ix[s, a] += self.lr * (q_target - q_predict)  # update
+        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
diff --git a/contents/4_Sarsa_lambda_maze/RL_brain.py b/contents/4_Sarsa_lambda_maze/RL_brain.py
@@ -34,7 +34,7 @@ def choose_action(self, observation):
         # action selection
         if np.random.rand() < self.epsilon:
             # choose best action
-            state_action = self.q_table.ix[observation, :]
+            state_action = self.q_table.loc[observation, :]
             state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
             action = state_action.idxmax()
         else:
@@ -70,21 +70,21 @@ def check_state_exist(self, state):
 
     def learn(self, s, a, r, s_, a_):
         self.check_state_exist(s_)
-        q_predict = self.q_table.ix[s, a]
+        q_predict = self.q_table.loc[s, a]
         if s_ != 'terminal':
-            q_target = r + self.gamma * self.q_table.ix[s_, a_]  # next state is not terminal
+            q_target = r + self.gamma * self.q_table.loc[s_, a_]  # next state is not terminal
         else:
             q_target = r  # next state is terminal
         error = q_target - q_predict
 
         # increase trace amount for visited state-action pair
 
         # Method 1:
-        # self.eligibility_trace.ix[s, a] += 1
+        # self.eligibility_trace.loc[s, a] += 1
 
         # Method 2:
-        self.eligibility_trace.ix[s, :] *= 0
-        self.eligibility_trace.ix[s, a] = 1
+        self.eligibility_trace.loc[s, :] *= 0
+        self.eligibility_trace.loc[s, a] = 1
 
         # Q update
         self.q_table += self.lr * error * self.eligibility_trace