-
Notifications
You must be signed in to change notification settings - Fork 527
/
Chapter 5 -Putting it all together.py
164 lines (104 loc) · 5.7 KB
/
Chapter 5 -Putting it all together.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#Chapter 5 - Putting it all together
#*******************************************************************************************#
#Setup the Engine and MetaData
# Import create_engine, MetaData
from sqlalchemy import create_engine, MetaData
# Define an engine to connect to chapter5.sqlite: engine
engine = create_engine('sqlite:///chapter5.sqlite')
# Initialize MetaData: metadata
metadata = MetaData()
#*******************************************************************************************#
#Create the Table to the Database
# Import Table, Column, String, and Integer
from sqlalchemy import Table, Column, String, Integer
# Build a census table: census
census = Table('census', metadata,
Column('state', String(30)),
Column('sex', String(1)),
Column('age', Integer()),
Column('pop2000', Integer()),
Column('pop2008', Integer()))
# Create the table in the database
metadata.create_all(engine)
#*******************************************************************************************#
#Reading the Data from the CSV
# Create an empty list: values_list
values_list = []
# Iterate over the rows
for row in csv_reader:
# Create a dictionary with the values
data = {'state': row[0], 'sex': row[1], 'age': row[2], 'pop2000': row[3],
'pop2008': row[4]}
# Append the dictionary to the values list
values_list.append(data)
#*******************************************************************************************#
#Load Data from a list into the Table
# Import insert
from sqlalchemy import insert
# Build insert statement: stmt
stmt = insert(census)
# Use values_list to insert data: results
results = connection.execute(stmt, values_list)
# Print rowcount
print(results.rowcount)
#*******************************************************************************************#
#Build a Query to Determine the Average Age by Population
# Import select
from sqlalchemy import select
# Calculate weighted average age: stmt
stmt = select([census.columns.sex,
(func.sum(census.columns.pop2008 * census.columns.age) /
func.sum(census.columns.pop2008)).label('average_age')
])
# Group by sex
stmt = stmt.group_by(census.columns.sex)
# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()
# Print the average age by sex
for result in results:
print(result.sex, result.average_age)
#*******************************************************************************************#
#Build a Query to Determine the Percentage of Population by Gender and State
# import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float
# Build a query to calculate the percentage of females in 2000: stmt
stmt = select([census.columns.state,
(func.sum(
case([
(census.columns.sex == 'F', census.columns.pop2000)
], else_=0)) /
cast(func.sum(census.columns.pop2000), Float) * 100).label('percent_female')
])
# Group By state
stmt = stmt.group_by(census.columns.state)
# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()
# Print the percentage
for result in results:
print(result.state, result.percent_female)
#*******************************************************************************************#
#Build a Query to Determine the Difference by State from the 2000 and 2008 Censuses
# Build query to return state name and population difference from 2008 to 2000
stmt = select([census.columns.state,
(census.columns.pop2008-census.columns.pop2000).label('pop_change')
])
# Group by State
stmt = stmt.group_by(census.columns.state)
# Order by Population Change
stmt = stmt.order_by(desc('pop_change'))
# Limit to top 10
stmt = stmt.limit(10)
# Use connection to execute the statement and fetch all results
results = connection.execute(stmt).fetchall()
# Print the state and population change for each record
for result in results:
print('{}:{}'.format(result.state, result.pop_change))
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#
#*******************************************************************************************#