-
Notifications
You must be signed in to change notification settings - Fork 527
/
Chapter 3 - Advanced SQLAlchemy Queries.py
198 lines (136 loc) · 6.86 KB
/
Chapter 3 - Advanced SQLAlchemy Queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#Chapter 3 - Advanced SQLAlchemy Queries
#*******************************************************************************************#
#Connecting to a MySQL Database
# Import create_engine function
from sqlalchemy import create_engine
# Create an engine to the census database
engine = create_engine('mysql+pymysql://'+'student:datacamp'+'@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/'+'census')
# Print the table names
print(engine.table_names())
#*******************************************************************************************#
#Calculating a Difference between Two Columns
# Build query to return state names by population difference from 2008 to 2000: stmt
stmt = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])
# Append group by for the state: stmt
stmt = stmt.group_by(census.columns.state)
# Append order by for pop_change descendingly: stmt
stmt = stmt.order_by(desc('pop_change'))
# Return only 5 results: stmt
stmt = stmt.limit(5)
# Use connection to execute the statement and fetch all results
results = connection.execute(stmt).fetchall()
# Print the state and population change for each record
for result in results:
print('{}:{}'.format(result.state, result.pop_change))
#*******************************************************************************************#
#Determining the Overall Percentage of Females
# import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float
# Build an expression to calculate female population in 2000
female_pop2000 = func.sum(
case([
(census.columns.sex == 'F', census.columns.pop2000)
], else_=0))
# Cast an expression to calculate total population in 2000 to Float
total_pop2000 = cast(func.sum(census.columns.pop2000), Float)
# Build a query to calculate the percentage of females in 2000: stmt
stmt = select([female_pop2000 / total_pop2000 * 100])
# Execute the query and store the scalar result: percent_female
percent_female = connection.execute(stmt).scalar()
# Print the percentage
print(percent_female)
#*******************************************************************************************#
#Automatic Joins with an Established Relationship
# Build a statement to join census and state_fact tables: stmt
stmt = select([census.columns.pop2000, state_fact.columns.abbreviation])
# Execute the statement and get the first result: result
result = connection.execute(stmt).first()
# Loop over the keys in the result object and print the key and value
for key in result.keys():
print(key, getattr(result, key))
#*******************************************************************************************#
#Joins
# Build a statement to select the census and state_fact tables: stmt
stmt = select([census, state_fact])
# Add a select_from clause that wraps a join for the census and state_fact
# tables where the census state column and state_fact name column match
stmt = stmt.select_from(
census.join(state_fact, census.columns.state == state_fact.columns.name))
# Execute the statement and get the first result: result
result = connection.execute(stmt).first()
# Loop over the keys in the result object and print the key and value
for key in result.keys():
print(key, getattr(result, key))
#*******************************************************************************************#
## More Practice with Joins
# Build a statement to select the state, sum of 2008 population and census
# division name: stmt
stmt = select([
census.columns.state,
func.sum(census.columns.pop2008),
state_fact.columns.census_division_name
])
# Append select_from to join the census and state_fact tables by the census state and state_fact name columns
stmt = stmt.select_from(
census.join(state_fact, census.columns.state == state_fact.columns.name)
)
# Append a group by for the state_fact name column
stmt = stmt.group_by(state_fact.columns.name)
# Execute the statement and get the results: results
results = connection.execute(stmt).fetchall()
# Loop over the the results object and print each record.
for record in results:
print(record)
#*******************************************************************************************#
#Using alias to handle same table joined queries
# Make an alias of the employees table: managers
managers = employees.alias()
# Build a query to select manager's and their employees names: stmt
stmt = select(
[managers.columns.name.label('manager'),
employees.columns.name.label('employee')]
)
# Match managers id with employees mgr: stmt
stmt = stmt.where(managers.columns.id == employees.columns.mgr)
# Order the statement by the managers name: stmt
stmt = stmt.order_by(managers.columns.name)
# Execute statement: results
results = connection.execute(stmt).fetchall()
# Print records
for record in results:
print(record)
#*******************************************************************************************#
## Leveraging Functions and Group_bys with Hierarchical Data
# Make an alias of the employees table: managers
managers = employees.alias()
# Build a query to select managers and counts of their employees: stmt
stmt = select([managers.columns.name, func.count(employees.columns.id)])
# Append a where clause that ensures the manager id and employee mgr are equal
stmt = stmt.where(managers.columns.id == employees.columns.mgr)
# Group by Managers Name
stmt = stmt.group_by(managers.columns.name)
# Execute statement: results
results = connection.execute(stmt).fetchall()
# print manager
for record in results:
print(record)
#*******************************************************************************************#
#Working on Blocks of Records
# Start a while loop checking for more results
while more_results:
# Fetch the first 50 results from the ResultProxy: partial_results
partial_results = results_proxy.fetchmany(50)
# if empty list, set more_results to False
if partial_results == []:
more_results = False
# Loop over the fetched records and increment the count for the state
for row in partial_results:
if row.state in state_count:
state_count[row.state] +=1
else:
state_count[row.state] = 1
# Close the ResultProxy, and thus the connection
results_proxy.close()
# Print the count by state
print(state_count)
#*******************************************************************************************#