-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexplore.py
61 lines (50 loc) · 1.9 KB
/
explore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from pymongo import MongoClient
import pprint
client = MongoClient("mongodb://localhost:27017")
db = client.test
# Return the "limit" number of most occurring amenities
def find_amenities(limit):
amenities = db.austin_texas.aggregate([
{"$match": {"amenity": {"$exists": 1}}},
{"$group": {"_id": "$amenity", "count": {"$sum": 1}}},
{"$sort": {"count": -1}},
{"$limit": limit}
])
print("\n- Amenities -")
pprint.pprint([doc for doc in amenities])
# Nothing problematic about amenities
def find_streets():
query = {"address.street" : { "$exists" : 1}}
projection = {"_id" : 0, "address.street": 1}
streets = db.sample_20.find(query, projection)
street_type_dict = {}
for street in streets:
street_name = street.get('address').get('street')
street_type = street_name.split()[-1]
if street_type not in street_type_dict:
street_type_dict[street_type] = 1
else:
street_type_dict[street_type] += 1
street_types = db.street_types
street_types.insert(street_type_dict)
sorted_street_types = db.street_types.aggregate([
{"$match": {"amenity": {"$exists": 1}}},
{"$group": {"_id": "$amenity", "count": {"$sum": 1}}},
{"$sort": {"count": -1}},
{"$limit": limit}
])
print("\n- Street Types -")
pprint.pprint(street_type_dict)
# A few abbreviations that can be changed over.
# Find all bars with opening hours
def find_bar_hours():
bar_hours = db.austin_texas.aggregate([
{"$match": {"amenity": "bar", "opening_hours": {"$exists": 1}}},
{"$project": {"_id": "$name", "hours": "$opening_hours"}}
])
print("\n- Bar Hours in Austin -")
pprint.pprint([doc for doc in bar_hours])
if __name__ == '__main__':
#find_amenities(5)
#find_streets()
find_bar_hours()