Skip to content

Commit f3e7df9

Browse files
committed
Updated sample route data, wait times example
1 parent ec6889b commit f3e7df9

12 files changed

+358
-346
lines changed

1-processing/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ We've developed an algorithm to approximate bus arrival times from the raw geolo
1010

1111
## Running the Example Notebook
1212

13-
The `sample_routes_data_15s.json` file that the example notebook loads is zipped up in `sample_routes_data_15s.rar`, so you'll have to extract it before running the notebook.
13+
The `sample_routes_data_pst_15s.json` file that the example notebook loads is zipped up in `sample_routes_data_pst_15s.rar`, so you'll have to extract it before running the notebook.
-441 KB
Binary file not shown.
442 KB
Binary file not shown.
-192 KB
Binary file not shown.
189 KB
Binary file not shown.

1-processing/sample_stop_computation.ipynb

Lines changed: 73 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"metadata": {},
4646
"outputs": [],
4747
"source": [
48-
"def load_json(filename = 'sample_routes_data_15s.json'):\n",
48+
"def load_json(filename = 'sample_routes_data_pst_15s.json'):\n",
4949
" with open(filename, 'r') as f:\n",
5050
" return json.load(f)"
5151
]
@@ -304,10 +304,10 @@
304304
"name": "stdout",
305305
"output_type": "stream",
306306
"text": [
307-
"Thu Feb 28 09:20:45 PM: Starting with 1.\n",
308-
"Thu Feb 28 09:34:23 PM: Starting with 14.\n",
309-
"Thu Feb 28 09:38:49 PM: skipping buses for 37742 and 14 due to ValueError: Can only compare identically-labeled Series objects\n",
310-
"Thu Feb 28 09:39:14 PM: skipping buses for 7742 and 14 due to ValueError: Can only compare identically-labeled Series objects\n"
307+
"Sun Mar 03 11:58:42 AM: Starting with 14.\n",
308+
"Sun Mar 03 12:00:47 PM: skipping buses for 37742 and 14 due to ValueError: Can only compare identically-labeled Series objects\n",
309+
"Sun Mar 03 12:01:00 PM: skipping buses for 7742 and 14 due to ValueError: Can only compare identically-labeled Series objects\n",
310+
"Sun Mar 03 12:02:47 PM: Starting with 1.\n"
311311
]
312312
}
313313
],
@@ -363,101 +363,101 @@
363363
" </thead>\n",
364364
" <tbody>\n",
365365
" <tr>\n",
366-
" <th>45474</th>\n",
367-
" <td>14___O_F00</td>\n",
368-
" <td>14</td>\n",
369-
" <td>5836</td>\n",
370-
" <td>2018-10-15 14:51:48-08:00</td>\n",
371-
" <td>7261</td>\n",
372-
" </tr>\n",
373-
" <tr>\n",
374-
" <th>37106</th>\n",
375-
" <td>1____I_F00</td>\n",
366+
" <th>79014</th>\n",
367+
" <td>1____O_F00</td>\n",
376368
" <td>1</td>\n",
377-
" <td>4024</td>\n",
378-
" <td>2018-10-15 11:38:45-08:00</td>\n",
379-
" <td>5625</td>\n",
369+
" <td>6311</td>\n",
370+
" <td>2018-10-15 22:54:43-08:00</td>\n",
371+
" <td>5633</td>\n",
380372
" </tr>\n",
381373
" <tr>\n",
382-
" <th>54937</th>\n",
374+
" <th>39052</th>\n",
383375
" <td>1____I_F00</td>\n",
384376
" <td>1</td>\n",
385-
" <td>6296</td>\n",
386-
" <td>2018-10-15 14:55:48-08:00</td>\n",
387-
" <td>5626</td>\n",
377+
" <td>6310</td>\n",
378+
" <td>2018-10-15 12:12:45-08:00</td>\n",
379+
" <td>5596</td>\n",
388380
" </tr>\n",
389381
" <tr>\n",
390-
" <th>51343</th>\n",
382+
" <th>31858</th>\n",
391383
" <td>1____I_F00</td>\n",
392384
" <td>1</td>\n",
393-
" <td>6320</td>\n",
394-
" <td>2018-10-15 14:15:03-08:00</td>\n",
395-
" <td>5633</td>\n",
385+
" <td>3827</td>\n",
386+
" <td>2018-10-15 10:53:29-08:00</td>\n",
387+
" <td>5545</td>\n",
396388
" </tr>\n",
397389
" <tr>\n",
398-
" <th>25377</th>\n",
399-
" <td>1____O_F00</td>\n",
400-
" <td>1</td>\n",
401-
" <td>6295</td>\n",
402-
" <td>2018-10-15 09:25:27-08:00</td>\n",
403-
" <td>5509</td>\n",
390+
" <th>58465</th>\n",
391+
" <td>14___O_F00</td>\n",
392+
" <td>14</td>\n",
393+
" <td>5566</td>\n",
394+
" <td>2018-10-15 18:19:53-08:00</td>\n",
395+
" <td>7228</td>\n",
404396
" </tr>\n",
405397
" <tr>\n",
406-
" <th>47894</th>\n",
398+
" <th>45812</th>\n",
407399
" <td>1____I_F00</td>\n",
408400
" <td>1</td>\n",
409-
" <td>4022</td>\n",
410-
" <td>2018-10-15 13:36:17-08:00</td>\n",
411-
" <td>5566</td>\n",
401+
" <td>4025</td>\n",
402+
" <td>2018-10-15 13:26:32-08:00</td>\n",
403+
" <td>5545</td>\n",
412404
" </tr>\n",
413405
" <tr>\n",
414-
" <th>28874</th>\n",
415-
" <td>1____I_F00</td>\n",
406+
" <th>59382</th>\n",
407+
" <td>14___O_F00</td>\n",
408+
" <td>14</td>\n",
409+
" <td>5536</td>\n",
410+
" <td>2018-10-15 18:34:08-08:00</td>\n",
411+
" <td>7202</td>\n",
412+
" </tr>\n",
413+
" <tr>\n",
414+
" <th>38412</th>\n",
415+
" <td>1____O_F00</td>\n",
416416
" <td>1</td>\n",
417-
" <td>4025</td>\n",
418-
" <td>2018-10-15 10:04:13-08:00</td>\n",
417+
" <td>4015</td>\n",
418+
" <td>2018-10-15 12:05:45-08:00</td>\n",
419419
" <td>5556</td>\n",
420420
" </tr>\n",
421421
" <tr>\n",
422-
" <th>23809</th>\n",
422+
" <th>11200</th>\n",
423423
" <td>1____I_F00</td>\n",
424424
" <td>1</td>\n",
425-
" <td>4021</td>\n",
426-
" <td>2018-10-15 09:07:42-08:00</td>\n",
427-
" <td>5575</td>\n",
425+
" <td>4026</td>\n",
426+
" <td>2018-10-15 07:03:24-08:00</td>\n",
427+
" <td>5553</td>\n",
428428
" </tr>\n",
429429
" <tr>\n",
430-
" <th>35908</th>\n",
431-
" <td>14___I_F00</td>\n",
432-
" <td>14</td>\n",
433-
" <td>5624</td>\n",
434-
" <td>2018-10-15 12:40:16-08:00</td>\n",
435-
" <td>7213</td>\n",
430+
" <th>55589</th>\n",
431+
" <td>1____I_F00</td>\n",
432+
" <td>1</td>\n",
433+
" <td>3846</td>\n",
434+
" <td>2018-10-15 15:16:04-08:00</td>\n",
435+
" <td>5617</td>\n",
436436
" </tr>\n",
437437
" <tr>\n",
438-
" <th>44929</th>\n",
438+
" <th>41420</th>\n",
439439
" <td>1____I_F00</td>\n",
440440
" <td>1</td>\n",
441-
" <td>4277</td>\n",
442-
" <td>2018-10-15 13:02:16-08:00</td>\n",
443-
" <td>5566</td>\n",
441+
" <td>4016</td>\n",
442+
" <td>2018-10-15 12:38:31-08:00</td>\n",
443+
" <td>5553</td>\n",
444444
" </tr>\n",
445445
" </tbody>\n",
446446
"</table>\n",
447447
"</div>"
448448
],
449449
"text/plain": [
450450
" DID ROUTE SID TIME VID\n",
451-
"45474 14___O_F00 14 5836 2018-10-15 14:51:48-08:00 7261\n",
452-
"37106 1____I_F00 1 4024 2018-10-15 11:38:45-08:00 5625\n",
453-
"54937 1____I_F00 1 6296 2018-10-15 14:55:48-08:00 5626\n",
454-
"51343 1____I_F00 1 6320 2018-10-15 14:15:03-08:00 5633\n",
455-
"25377 1____O_F00 1 6295 2018-10-15 09:25:27-08:00 5509\n",
456-
"47894 1____I_F00 1 4022 2018-10-15 13:36:17-08:00 5566\n",
457-
"28874 1____I_F00 1 4025 2018-10-15 10:04:13-08:00 5556\n",
458-
"23809 1____I_F00 1 4021 2018-10-15 09:07:42-08:00 5575\n",
459-
"35908 14___I_F00 14 5624 2018-10-15 12:40:16-08:00 7213\n",
460-
"44929 1____I_F00 1 4277 2018-10-15 13:02:16-08:00 5566"
451+
"79014 1____O_F00 1 6311 2018-10-15 22:54:43-08:00 5633\n",
452+
"39052 1____I_F00 1 6310 2018-10-15 12:12:45-08:00 5596\n",
453+
"31858 1____I_F00 1 3827 2018-10-15 10:53:29-08:00 5545\n",
454+
"58465 14___O_F00 14 5566 2018-10-15 18:19:53-08:00 7228\n",
455+
"45812 1____I_F00 1 4025 2018-10-15 13:26:32-08:00 5545\n",
456+
"59382 14___O_F00 14 5536 2018-10-15 18:34:08-08:00 7202\n",
457+
"38412 1____O_F00 1 4015 2018-10-15 12:05:45-08:00 5556\n",
458+
"11200 1____I_F00 1 4026 2018-10-15 07:03:24-08:00 5553\n",
459+
"55589 1____I_F00 1 3846 2018-10-15 15:16:04-08:00 5617\n",
460+
"41420 1____I_F00 1 4016 2018-10-15 12:38:31-08:00 5553"
461461
]
462462
},
463463
"execution_count": 8,
@@ -489,15 +489,15 @@
489489
"output_type": "stream",
490490
"text": [
491491
"<class 'pandas.core.frame.DataFrame'>\n",
492-
"RangeIndex: 24478 entries, 0 to 24477\n",
492+
"RangeIndex: 24517 entries, 0 to 24516\n",
493493
"Data columns (total 5 columns):\n",
494-
"DID 24478 non-null object\n",
495-
"ROUTE 24478 non-null object\n",
496-
"SID 24478 non-null object\n",
497-
"TIME 24478 non-null object\n",
498-
"VID 24478 non-null object\n",
494+
"DID 24517 non-null object\n",
495+
"ROUTE 24517 non-null object\n",
496+
"SID 24517 non-null object\n",
497+
"TIME 24517 non-null object\n",
498+
"VID 24517 non-null object\n",
499499
"dtypes: object(5)\n",
500-
"memory usage: 8.4 MB\n"
500+
"memory usage: 8.5 MB\n"
501501
]
502502
}
503503
],
@@ -507,11 +507,11 @@
507507
},
508508
{
509509
"cell_type": "code",
510-
"execution_count": 11,
510+
"execution_count": 12,
511511
"metadata": {},
512512
"outputs": [],
513513
"source": [
514-
"with open('sample_routes_stops_15s.json', 'w') as f:\n",
514+
"with open('sample_routes_stops_pst_15s.json', 'w') as f:\n",
515515
" sample_stops.reset_index().to_json(f)"
516516
]
517517
},

2-analysis/README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,13 @@
1-
hello
1+
# Challenge 2: Data Analysis
2+
3+
## Background/Motivation
4+
5+
With the amount of historical data that we have access to, there are many possible directions for data analysis. Broadly speaking, we're interested in being able to measure the efficiency of public transit in the city, and how closely it matches up to expectations (i.e. if a bus is supposed to arrive at a stop every 15 minutes, how often does it actually do so?).
6+
7+
## Challenge
8+
9+
We've provided a small sample of bus arrival data from two routes, and a notebook that produces the average wait times for stops along those routes. Can you produce other useful metrics? Are there any interesting patterns that appear in the data?
10+
11+
## Running the Example Notebook
12+
13+
The `sample_routes_stops_pst_15s.json` file that the example notebook loads is zipped up in `sample_routes_stops_pst_15s.rar`, so you'll have to extract it before running the notebook.

2-analysis/sample_routes_data_15s.rar

-441 KB
Binary file not shown.
442 KB
Binary file not shown.
-192 KB
Binary file not shown.

0 commit comments

Comments
 (0)