-
Notifications
You must be signed in to change notification settings - Fork 0
/
compass_card_data.py
69 lines (57 loc) · 2.38 KB
/
compass_card_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from enum import Enum
import pandas
class CardActions(Enum):
TAP_IN = 1
TAP_OUT = 2
TRANSFER = 3
LOADED = 4
REFUND = 5
class CompassCardData:
# dropping columns occurs after parsing the Transaction column, so it can also be dropped
COLS_TO_DROP = ["Transaction", "LineItem", "LocationDisplay", "OrderDate", "Payment", "OrderNumber", "AuthCode", "Total"]
def __init__(self, filepath):
self.data = pandas.read_csv(filepath)
self.extract_transaction()
self.data.drop(columns=self.COLS_TO_DROP, inplace=True)
def extract_transaction(self):
"""
Splits the "Transaction" column into the more useful "Action" and "Location" columns
Action: one of the CardActions listed above
Location: either the name of a skytrain station, or "Bus Stop xxxxx"
"""
transactions = list(self.data["Transaction"])
# for reasons unknown, I couldn't get this to work with only one map
actions = list(map(lambda t: t.split(" at ")[0], transactions))
locations = list(map(lambda t: t.split(" at ")[1], transactions))
self.data["Action"] = actions
self.data["Location"] = locations
def get_bus_trips(self):
"""
returns a filtered version of self.data that excludes any non-bus trips
"""
bus_trips = self.data[self.data["Location"].str.split(" ").str[0] == "Bus"]
stops = list(map(lambda l: l.split(" ")[-1], list(bus_trips["Location"])))
bus_trips["Stop"] = stops
bus_trips["Stop"] = bus_trips["Stop"].astype(float)
return bus_trips
def get_bus_stops_with_frequencies(self):
"""
returns a pandas.DataFrame with 2 columns: "stop_code" and "frequency"
stop_code: the bus_stop code
frequency: the total number of transactions with that stop code in self.get_bus_trips
"""
bus_trips = self.get_bus_trips()
stops = {}
for stop in list(bus_trips["Stop"]):
if int(stop) not in stops:
stops[int(stop)] = 1
else:
stops[int(stop)] += 1
# convert to pandas dataframe
stop_ids = []
frequencies = []
for key in stops:
stop_ids.append(key)
frequencies.append(stops[key])
df = pandas.DataFrame({"stop_code": stop_ids, "frequency": frequencies})
return df