stanfordpython
diff --git a/‎.DS_Store
0 Bytes b/‎.DS_Store
0 Bytes
diff --git a/‎notebooks/lab-8/sl-tpl-ml-part-1.ipynb
Lines changed: 6 additions & 6 deletions b/‎notebooks/lab-8/sl-tpl-ml-part-1.ipynb
Lines changed: 6 additions & 6 deletions
diff --git a/‎solutions/lab-8/add.py
Lines changed: 20 additions & 0 deletions b/‎solutions/lab-8/add.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎solutions/lab-8/airlines.dat
Lines changed: 6162 additions & 0 deletions b/‎solutions/lab-8/airlines.dat
Lines changed: 6162 additions & 0 deletions
diff --git a/‎solutions/lab-8/airports.dat
Lines changed: 7184 additions & 0 deletions b/‎solutions/lab-8/airports.dat
Lines changed: 7184 additions & 0 deletions
diff --git a/‎solutions/lab-8/flights.py
Lines changed: 63 additions & 0 deletions b/‎solutions/lab-8/flights.py
Lines changed: 63 additions & 0 deletions
@@ -171,8 +171,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### `suggest_movies(user_ratings)`\n",
-    "Given a user's rating of all of the movies (i.e., a 9125-dimensional vector with entries between 0 and 5, where 0s represent un-rated movies), this function will return the indices of the **top 5 movies** that match with that user. We'll do this using cosine similarity.\n",
+    "### `suggest_movies(user_ratings, normalized_ratings, n=5)`\n",
+    "Given a user's rating of all of the movies (i.e., a 9125-dimensional vector with entries between 0 and 5, where 0s represent un-rated movies), this function will return the indices of the **top `n` movies** that match with that user. We'll do this using cosine similarity.\n",
     "\n",
     "First, we'll compute a `movie_profile` for each user, which will be a 671-dimensional vector that combines the ratings we received as input with the ratings from the rest of the users. We do this by scaling each column of the matrix by the user's rating of that movie and then adding together all of the columns. For example, if the user rated Inside Out as a 4, Frozen 2 as a 3, and didn't rate any other movies, their profile would be:\n",
     "\n",
@@ -197,7 +197,7 @@
     "\n",
     "The cosine similarity between two vectors $x = (x_1, x_2, \\dots, x_n)$ and $y = (y_1, y_2, \\dots, y_n)$ (which both have norm 1) is defined as their dot product, or the sum of element-wise products of their entries: $x_1 y_1 + x_2 y_2 + \\cdots + x_n y_n$. You can think of the cosine similarity as an estimation of the \"closeness\" between the two vectors.\n",
     "\n",
-    "Find the movies that are closest to our `movie_profile`: compute the cosine similarity between the `movie_profile` and each of the columns in our matrix and return the indices of the top 5 movies, in order from most similar to least similar.\n",
+    "Find the movies that are closest to our `movie_profile`: compute the cosine similarity between the `movie_profile` and each of the columns in our matrix and return the indices of the top `n` movies, in order from most similar to least similar.\n",
     "\n",
     "*Challenge: Try to implement this function without using any loops, using `numpy` broadcasting.*"
    ]
@@ -208,7 +208,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def suggest_movies(user_ratings):\n",
+    "def suggest_movies(user_ratings, normalized_ratings, n=5):\n",
     "    pass\n",
     "\n",
     "parth_ratings = {\n",
@@ -217,10 +217,10 @@
     "    6294: 5  # Harry Potter and the Goblet of Fire\n",
     "}\n",
     "\n",
-    "full_ratings = np.array([parth_ratings.get(i, 0) for i in range(ratings.shape[1])])\n",
+    "full_parth_ratings = np.array([parth_ratings.get(i, 0) for i in range(ratings.shape[1])])\n",
     "\n",
     "# As a sanity check, we've computed the value we got for the next line:\n",
-    "suggest_movies(full_ratings) # => array([8911, 6294, 8460, 5399, 8434])"
+    "suggest_movies(full_parth_ratings, normalized_ratings) # => array([8911, 6294, 8460, 5399, 8434])"
    ]
   },
   {
 
@@ -0,0 +1,20 @@
+def add_all(arguments):
+    total = 0.0
+    for item in arguments:
+        try:
+            total += float(item)
+        except ValueError:  # Item was non-numeric
+            pass
+    return total
+
+def help():
+    return """Usage: python3 add.py <nums>
+    Add some numbers together."""
+
+if __name__ == '__main__':
+    import sys
+    arguments = sys.argv[1:]  # Ignore the executable name
+    if not arguments:
+        print(help())
+        sys.exit(1)
+    print(add_all(arguments))
@@ -0,0 +1,63 @@
+import argparse
+import collections
+import csv
+
+# For the meaning of these identifiers, read https://openflights.org/data.html
+Airport = collections.namedtuple('Airport', ['id', 'name', 'city', 'country', 'faa_iata', 'icao', 'lat', 'long', 'alt', 'utc_offset', 'dst', 'tz', 'type', 'source'])
+Airline = collections.namedtuple('Airline', ['id', 'name', 'alias', 'iata', 'icao', 'callsign', 'country', 'active'])
+Route = collections.namedtuple('Route', ['airline', 'airline_id', 'source_airport', 'source_airport_id', 'dest_airport', 'dest_airport_id', 'codeshare', 'stops', 'equipment'])
+
+def load_data():
+    with open('airports.dat') as f:
+        airports = {}
+        for line in csv.reader(f):
+            airport = Airport._make(line)
+            airports[airport.id] = airport
+
+    with open('airlines.dat') as f:
+        airlines = {}
+        for line in csv.reader(f):
+            airline = Airline._make(line)
+            airlines[airline.id] = airline
+
+    with open('routes.dat') as f:
+        # top-level keyed by source airport ID, next level keyed by destination airport ID
+        routes = collections.defaultdict(lambda: collections.defaultdict(list))
+        for line in csv.reader(f):
+            route = Route._make(line)
+            routes[route.source_airport][route.dest_airport].append(route)
+
+    return airports, airlines, routes
+
+# def get_adjacent_airports(routes, airport):
+#     return itertools.chain(*(.values() for x in d.values()))
+
+def find_flights(routes, source_airport, destination_airport, max_segments):
+    # We implement a basic BFS algorithm for following the routes
+    # Taken from http://eddmann.com/posts/depth-first-search-and-breadth-first-search-in-python/
+    queue = [(source_airport, [source_airport])]
+    while queue:
+        airport, path = queue.pop(0)
+        if len(path) > max_segments:
+            return
+        for next_airport in set(routes[airport].keys()) - set(path):
+            if next_airport == destination_airport:
+                yield path + [next_airport]
+            else:
+                queue.append((next_airport, path + [next_airport]))
+
+def build_parser():
+    parser = argparse.ArgumentParser(description='Find flights.')
+    parser.add_argument('source', help='source airport (e.g. SFO)')
+    parser.add_argument('destination', help='destination airport (e.g. JFK)')
+    parser.add_argument('segments', type=int, help='maximum number of segments')
+    return parser
+
+if __name__ == '__main__':
+    import sys
+    parser = build_parser()
+    args = parser.parse_args(sys.argv[1:])
+
+    _airports, _airlines, routes = load_data()
+    for flight in find_flights(routes, args.source, args.destination, args.segments):
+        print(' -> '.join(flight))