Skip to content

Commit 1317cc5

Browse files
committed
change ix to loc
1 parent 542c810 commit 1317cc5

File tree

4 files changed

+19
-19
lines changed

4 files changed

+19
-19
lines changed

contents/1_command_line_reinforcement_learning/treasure_on_right.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,14 @@ def rl():
8686

8787
A = choose_action(S, q_table)
8888
S_, R = get_env_feedback(S, A) # take action & get next state and reward
89-
q_predict = q_table.ix[S, A]
89+
q_predict = q_table.loc[S, A]
9090
if S_ != 'terminal':
9191
q_target = R + GAMMA * q_table.iloc[S_, :].max() # next state is not terminal
9292
else:
9393
q_target = R # next state is terminal
9494
is_terminated = True # terminate this episode
9595

96-
q_table.ix[S, A] += ALPHA * (q_target - q_predict) # update
96+
q_table.loc[S, A] += ALPHA * (q_target - q_predict) # update
9797
S = S_ # move to next state
9898

9999
update_env(S, episode, step_counter+1)

contents/2_Q_Learning_maze/RL_brain.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def choose_action(self, observation):
2222
# action selection
2323
if np.random.uniform() < self.epsilon:
2424
# choose best action
25-
state_action = self.q_table.ix[observation, :]
25+
state_action = self.q_table.loc[observation, :]
2626
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
2727
action = state_action.idxmax()
2828
else:
@@ -32,12 +32,12 @@ def choose_action(self, observation):
3232

3333
def learn(self, s, a, r, s_):
3434
self.check_state_exist(s_)
35-
q_predict = self.q_table.ix[s, a]
35+
q_predict = self.q_table.loc[s, a]
3636
if s_ != 'terminal':
37-
q_target = r + self.gamma * self.q_table.ix[s_, :].max() # next state is not terminal
37+
q_target = r + self.gamma * self.q_table.loc[s_, :].max() # next state is not terminal
3838
else:
3939
q_target = r # next state is terminal
40-
self.q_table.ix[s, a] += self.lr * (q_target - q_predict) # update
40+
self.q_table.loc[s, a] += self.lr * (q_target - q_predict) # update
4141

4242
def check_state_exist(self, state):
4343
if state not in self.q_table.index:

contents/3_Sarsa_maze/RL_brain.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def choose_action(self, observation):
3434
# action selection
3535
if np.random.rand() < self.epsilon:
3636
# choose best action
37-
state_action = self.q_table.ix[observation, :]
37+
state_action = self.q_table.loc[observation, :]
3838
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
3939
action = state_action.idxmax()
4040
else:
@@ -53,12 +53,12 @@ def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
5353

5454
def learn(self, s, a, r, s_):
5555
self.check_state_exist(s_)
56-
q_predict = self.q_table.ix[s, a]
56+
q_predict = self.q_table.loc[s, a]
5757
if s_ != 'terminal':
58-
q_target = r + self.gamma * self.q_table.ix[s_, :].max() # next state is not terminal
58+
q_target = r + self.gamma * self.q_table.loc[s_, :].max() # next state is not terminal
5959
else:
6060
q_target = r # next state is terminal
61-
self.q_table.ix[s, a] += self.lr * (q_target - q_predict) # update
61+
self.q_table.loc[s, a] += self.lr * (q_target - q_predict) # update
6262

6363

6464
# on-policy
@@ -69,9 +69,9 @@ def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
6969

7070
def learn(self, s, a, r, s_, a_):
7171
self.check_state_exist(s_)
72-
q_predict = self.q_table.ix[s, a]
72+
q_predict = self.q_table.loc[s, a]
7373
if s_ != 'terminal':
74-
q_target = r + self.gamma * self.q_table.ix[s_, a_] # next state is not terminal
74+
q_target = r + self.gamma * self.q_table.loc[s_, a_] # next state is not terminal
7575
else:
7676
q_target = r # next state is terminal
77-
self.q_table.ix[s, a] += self.lr * (q_target - q_predict) # update
77+
self.q_table.loc[s, a] += self.lr * (q_target - q_predict) # update

contents/4_Sarsa_lambda_maze/RL_brain.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def choose_action(self, observation):
3434
# action selection
3535
if np.random.rand() < self.epsilon:
3636
# choose best action
37-
state_action = self.q_table.ix[observation, :]
37+
state_action = self.q_table.loc[observation, :]
3838
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
3939
action = state_action.idxmax()
4040
else:
@@ -70,21 +70,21 @@ def check_state_exist(self, state):
7070

7171
def learn(self, s, a, r, s_, a_):
7272
self.check_state_exist(s_)
73-
q_predict = self.q_table.ix[s, a]
73+
q_predict = self.q_table.loc[s, a]
7474
if s_ != 'terminal':
75-
q_target = r + self.gamma * self.q_table.ix[s_, a_] # next state is not terminal
75+
q_target = r + self.gamma * self.q_table.loc[s_, a_] # next state is not terminal
7676
else:
7777
q_target = r # next state is terminal
7878
error = q_target - q_predict
7979

8080
# increase trace amount for visited state-action pair
8181

8282
# Method 1:
83-
# self.eligibility_trace.ix[s, a] += 1
83+
# self.eligibility_trace.loc[s, a] += 1
8484

8585
# Method 2:
86-
self.eligibility_trace.ix[s, :] *= 0
87-
self.eligibility_trace.ix[s, a] = 1
86+
self.eligibility_trace.loc[s, :] *= 0
87+
self.eligibility_trace.loc[s, a] = 1
8888

8989
# Q update
9090
self.q_table += self.lr * error * self.eligibility_trace

0 commit comments

Comments
 (0)