Skip to content

Commit

Permalink
[WIP] merged working version with ongoing changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cipri-tom committed Jan 20, 2017
1 parent 8816467 commit dafa430
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 156 deletions.
58 changes: 29 additions & 29 deletions scripts/mountaincar.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,26 @@ class MountainCar():
For the miniproject, you are not meant to change the default parameters
(mass of the car, etc.)
Usage:
Usage:
>>> mc = MountainCar()
Set the agent to apply a rightward force (positive in x)
>>> mc.apply_force(+1) # the actual value doesn't mattter, only the sign
Run an "agent time step" of 1s with 0.01 s integration time step
>>> mc.simulate_timesteps(n = 100, dt = 0.01)
Check the state variables of the agent, and the reward
>>> print mc.x, mc.x_d, mc.R
At some point, one might want to reset the position/speed of the car
>>> mc.reset()
"""

def __init__(self, g = 10.0, d = 100.0, H = 10., m = 10.0,
force_amplitude = 3.0, reward_amplitude = 1.,
def __init__(self, g = 10.0, d = 100.0, H = 10., m = 10.0,
force_amplitude = 3.0, reward_amplitude = 1.,
reward_threshold = 0.0):

# set internal parameters from constructor call
self.g = g # gravitational constant
self.d = d # minima location
Expand All @@ -48,10 +48,10 @@ def reset(self):
"""

# set position to range [-130; -50]
self.x = 80 * np.random.rand() - 130.0
self.x = 80 * np.random.rand() - 130.0
#self.x = -60.0
#print self.x

# set x_dot to range [-5; 5]
self.x_d = 10.0 * np.random.rand() - 5.0
# reset reward
Expand All @@ -64,8 +64,8 @@ def reset(self):
def apply_force(self, direction):
"""Apply a force to the car.
Only three values of force are possible:
right (if direction > 0),
Only three values of force are possible:
right (if direction > 0),
left (direction < 0) or
no force (direction = 0).\
"""
Expand All @@ -75,7 +75,7 @@ def _h(self, x):
"""Return the value of the landscape function h in x.
"""
return (x - self.d)**2 * (x + self.d)**2 / ((self.d**4/self.H)+x**2)

def _h_prime(self, x):
"""Return the value of the first derivative of the landscape function h in x.
"""
Expand All @@ -87,7 +87,7 @@ def _h_second(self, x):
"""
c = self.d**4/self.H
return 2 * (
- 2 * c**2 * (self.d**2 - 3*x**2)
- 2 * c**2 * (self.d**2 - 3*x**2)
+ c * (-self.d**4 + 6*self.d**2 * x**2 + 3*x**4)
+ 3 * self.d**4 * x**2
+ x**6
Expand All @@ -97,15 +97,15 @@ def _energy(self, x, x_d):
"""Return the total energy of the car with variable x and x_d.
"""
# note that v and x dot are not the same: v includes the y direction!
return self.m * (self.g * self._h(x) + 0.5 * (1 + self._h_prime(x)**2) * x_d**2)
return self.m * (self.g * self._h(x) + 0.5 * (1 + self._h_prime(x)**2) * x_d**2)

def simulate_timesteps(self, n = 1, dt = 0.1):
"""Simulate the car dynamics for n timesteps of length dt.
"""

for i in range(n):
self._simulate_single_timestep(dt)

self.t += n*dt

# check for rewards
Expand Down Expand Up @@ -139,34 +139,34 @@ def _get_reward(self):

class MountainCarViewer():
"""Display the state of a MountainCar instance.
Usage:
Usage:
>>> mc = MountainCar()
>>> mv = MoutainCarViewer(mc)
Turn matplotlib's "interactive mode" on and create figure
>>> plb.ion()
>>> mv.create_figure(n_steps = 200, max_time = 200)
This forces matplotlib to draw the fig. before the end of execution
>>> plb.draw()
Simulate the MountainCar, visualizing the state
>>> for n in range(200):
>>> mc.simulate_timesteps(100,0.01)
>>> mv.update_figure()
>>> plb.draw()
"""

def __init__(self, mountain_car):
assert isinstance(mountain_car, MountainCar), \
'Argument to MoutainCarViewer() must be a MountainCar instance'
self.mountain_car = mountain_car

def create_figure(self, n_steps, max_time, f = None):
"""Create a figure showing the progression of the car.
Call update_car_state susequently to update this figure.
Parameters:
Expand All @@ -181,7 +181,7 @@ def create_figure(self, n_steps, max_time, f = None):
else:
self.f = f

# create the to store the arrays
# create the vars to store the arrays
self.times = np.zeros(n_steps + 1)
self.positions = np.zeros((n_steps + 1,2))
self.forces = np.zeros(n_steps + 1)
Expand All @@ -199,14 +199,14 @@ def create_figure(self, n_steps, max_time, f = None):
# create the force plot
self.ax_forces = plb.subplot(2,2,3)
self.h_forces = self._plot_forces()
plb.axis(xmin = 0, xmax = max_time,
plb.axis(xmin = 0, xmax = max_time,
ymin = -1.1 * self.mountain_car.force_amplitude,
ymax = 1.1 * self.mountain_car.force_amplitude)

# create the energy plot
self.ax_energies = plb.subplot(2,2,4)
self.h_energies = self._plot_energy()
plb.axis(xmin = 0, xmax = max_time,
plb.axis(xmin = 0, xmax = max_time,
ymin = 0.0, ymax =1000.)

def update_figure(self):
Expand All @@ -215,7 +215,7 @@ def update_figure(self):
Assumes the figure has already been created with create_figure.
"""

# increment
# increment
self.i += 1
assert self.i < len(self.forces), \
"update_figure was called too many times."
Expand All @@ -241,7 +241,7 @@ def _get_values(self):
def _plot_energy_landscape(self, ax = None):
"""plot the energy landscape for the mountain car in 2D.
Returns the axes instance created. Use plot_energy_landscape to let
Returns the axes instance created. Use plot_energy_landscape to let
the module decide whether you have the right modules for 3D plotting.
"""

Expand All @@ -257,7 +257,7 @@ def _plot_energy_landscape(self, ax = None):
if ax is None:
f = plb.figure()
ax = plb.axes()

C = ax.contourf(X,XD, E,100)
ax.set_xlabel('$x$')
ax.set_ylabel('$\dot x$')
Expand Down Expand Up @@ -286,7 +286,7 @@ def _plot_positions(self, handles = None):
np.atleast_1d(self.positions[self.i,1]),
'o' + color,
markeredgecolor = 'none',
markersize = 9,
markersize = 9,
)[0])
return tuple(handles)
else:
Expand Down
Loading

0 comments on commit dafa430

Please sign in to comment.