Skip to content

Commit 2148675

Browse files
committed
Add solutions
1 parent e3df5f0 commit 2148675

File tree

11 files changed

+190571
-6
lines changed

11 files changed

+190571
-6
lines changed

.DS_Store

0 Bytes
Binary file not shown.

notebooks/lab-8/sl-tpl-ml-part-1.ipynb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@
171171
"cell_type": "markdown",
172172
"metadata": {},
173173
"source": [
174-
"### `suggest_movies(user_ratings)`\n",
175-
"Given a user's rating of all of the movies (i.e., a 9125-dimensional vector with entries between 0 and 5, where 0s represent un-rated movies), this function will return the indices of the **top 5 movies** that match with that user. We'll do this using cosine similarity.\n",
174+
"### `suggest_movies(user_ratings, normalized_ratings, n=5)`\n",
175+
"Given a user's rating of all of the movies (i.e., a 9125-dimensional vector with entries between 0 and 5, where 0s represent un-rated movies), this function will return the indices of the **top `n` movies** that match with that user. We'll do this using cosine similarity.\n",
176176
"\n",
177177
"First, we'll compute a `movie_profile` for each user, which will be a 671-dimensional vector that combines the ratings we received as input with the ratings from the rest of the users. We do this by scaling each column of the matrix by the user's rating of that movie and then adding together all of the columns. For example, if the user rated Inside Out as a 4, Frozen 2 as a 3, and didn't rate any other movies, their profile would be:\n",
178178
"\n",
@@ -197,7 +197,7 @@
197197
"\n",
198198
"The cosine similarity between two vectors $x = (x_1, x_2, \\dots, x_n)$ and $y = (y_1, y_2, \\dots, y_n)$ (which both have norm 1) is defined as their dot product, or the sum of element-wise products of their entries: $x_1 y_1 + x_2 y_2 + \\cdots + x_n y_n$. You can think of the cosine similarity as an estimation of the \"closeness\" between the two vectors.\n",
199199
"\n",
200-
"Find the movies that are closest to our `movie_profile`: compute the cosine similarity between the `movie_profile` and each of the columns in our matrix and return the indices of the top 5 movies, in order from most similar to least similar.\n",
200+
"Find the movies that are closest to our `movie_profile`: compute the cosine similarity between the `movie_profile` and each of the columns in our matrix and return the indices of the top `n` movies, in order from most similar to least similar.\n",
201201
"\n",
202202
"*Challenge: Try to implement this function without using any loops, using `numpy` broadcasting.*"
203203
]
@@ -208,7 +208,7 @@
208208
"metadata": {},
209209
"outputs": [],
210210
"source": [
211-
"def suggest_movies(user_ratings):\n",
211+
"def suggest_movies(user_ratings, normalized_ratings, n=5):\n",
212212
" pass\n",
213213
"\n",
214214
"parth_ratings = {\n",
@@ -217,10 +217,10 @@
217217
" 6294: 5 # Harry Potter and the Goblet of Fire\n",
218218
"}\n",
219219
"\n",
220-
"full_ratings = np.array([parth_ratings.get(i, 0) for i in range(ratings.shape[1])])\n",
220+
"full_parth_ratings = np.array([parth_ratings.get(i, 0) for i in range(ratings.shape[1])])\n",
221221
"\n",
222222
"# As a sanity check, we've computed the value we got for the next line:\n",
223-
"suggest_movies(full_ratings) # => array([8911, 6294, 8460, 5399, 8434])"
223+
"suggest_movies(full_parth_ratings, normalized_ratings) # => array([8911, 6294, 8460, 5399, 8434])"
224224
]
225225
},
226226
{

solutions/lab-8/add.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
def add_all(arguments):
2+
total = 0.0
3+
for item in arguments:
4+
try:
5+
total += float(item)
6+
except ValueError: # Item was non-numeric
7+
pass
8+
return total
9+
10+
def help():
11+
return """Usage: python3 add.py <nums>
12+
Add some numbers together."""
13+
14+
if __name__ == '__main__':
15+
import sys
16+
arguments = sys.argv[1:] # Ignore the executable name
17+
if not arguments:
18+
print(help())
19+
sys.exit(1)
20+
print(add_all(arguments))

solutions/lab-8/airlines.dat

Lines changed: 6162 additions & 0 deletions
Large diffs are not rendered by default.

solutions/lab-8/airports.dat

Lines changed: 7184 additions & 0 deletions
Large diffs are not rendered by default.

solutions/lab-8/flights.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import argparse
2+
import collections
3+
import csv
4+
5+
# For the meaning of these identifiers, read https://openflights.org/data.html
6+
Airport = collections.namedtuple('Airport', ['id', 'name', 'city', 'country', 'faa_iata', 'icao', 'lat', 'long', 'alt', 'utc_offset', 'dst', 'tz', 'type', 'source'])
7+
Airline = collections.namedtuple('Airline', ['id', 'name', 'alias', 'iata', 'icao', 'callsign', 'country', 'active'])
8+
Route = collections.namedtuple('Route', ['airline', 'airline_id', 'source_airport', 'source_airport_id', 'dest_airport', 'dest_airport_id', 'codeshare', 'stops', 'equipment'])
9+
10+
def load_data():
11+
with open('airports.dat') as f:
12+
airports = {}
13+
for line in csv.reader(f):
14+
airport = Airport._make(line)
15+
airports[airport.id] = airport
16+
17+
with open('airlines.dat') as f:
18+
airlines = {}
19+
for line in csv.reader(f):
20+
airline = Airline._make(line)
21+
airlines[airline.id] = airline
22+
23+
with open('routes.dat') as f:
24+
# top-level keyed by source airport ID, next level keyed by destination airport ID
25+
routes = collections.defaultdict(lambda: collections.defaultdict(list))
26+
for line in csv.reader(f):
27+
route = Route._make(line)
28+
routes[route.source_airport][route.dest_airport].append(route)
29+
30+
return airports, airlines, routes
31+
32+
# def get_adjacent_airports(routes, airport):
33+
# return itertools.chain(*(.values() for x in d.values()))
34+
35+
def find_flights(routes, source_airport, destination_airport, max_segments):
36+
# We implement a basic BFS algorithm for following the routes
37+
# Taken from http://eddmann.com/posts/depth-first-search-and-breadth-first-search-in-python/
38+
queue = [(source_airport, [source_airport])]
39+
while queue:
40+
airport, path = queue.pop(0)
41+
if len(path) > max_segments:
42+
return
43+
for next_airport in set(routes[airport].keys()) - set(path):
44+
if next_airport == destination_airport:
45+
yield path + [next_airport]
46+
else:
47+
queue.append((next_airport, path + [next_airport]))
48+
49+
def build_parser():
50+
parser = argparse.ArgumentParser(description='Find flights.')
51+
parser.add_argument('source', help='source airport (e.g. SFO)')
52+
parser.add_argument('destination', help='destination airport (e.g. JFK)')
53+
parser.add_argument('segments', type=int, help='maximum number of segments')
54+
return parser
55+
56+
if __name__ == '__main__':
57+
import sys
58+
parser = build_parser()
59+
args = parser.parse_args(sys.argv[1:])
60+
61+
_airports, _airlines, routes = load_data()
62+
for flight in find_flights(routes, args.source, args.destination, args.segments):
63+
print(' -> '.join(flight))

0 commit comments

Comments
 (0)