-
Notifications
You must be signed in to change notification settings - Fork 0
/
causes.py
136 lines (103 loc) · 3.89 KB
/
causes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import csv
import sys
import matplotlib.pyplot as plt
import numpy
from numpy.core.fromnumeric import size
def add_cat(table, category, ndeaths, nmentions):
"""Add a category to the table."""
(odeaths, omentions) = table.get(category, (0, 0))
table[category] = (odeaths + int(ndeaths), omentions + int(nmentions))
def get_data(fname):
"""Parse fname and return the condition and category data."""
conditions = {}
categories = {}
# open and read the CSV file
with open(fname, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
as_of, wk_start, wk_end, group, year, month, state, category, condition, icd10, age, ndeaths, mentions, flag = row
# only consider "all ages"
if age.lower() != "all ages":
continue
# skip rows with zero deaths
if not ndeaths:
continue
# add national totals, disregard state totals
if state != "United States":
continue
# add total totals, disregard yearly and montly totals
if group != "By Total":
continue
try:
ndeaths = int(ndeaths)
except ValueError:
continue
add_cat(conditions, condition, ndeaths, mentions)
add_cat(categories, category, ndeaths, mentions)
return conditions, categories
def make_totals(data, divisor=1):
"""Return a ranked list of total deaths. (chart 1)"""
rv = []
for category, (ndeaths, nmentions) in data.items():
rv.append((ndeaths/divisor, (nmentions-ndeaths)/divisor, category))
return list(sorted(rv, reverse=True))
def make_percents(data):
"""Return a ranked list of COVID percentages. (chart 2)"""
rv = []
for category, (ndeaths, nmentions) in data.items():
rv.append((100 * ndeaths / nmentions, category))
return list(sorted(rv, reverse=True))
def abbrev(val):
val = float(val)
if val > 1000:
val /= 1000
return f"{val:0.0f}k"
return f"{val:0.0f}"
def plot_deaths(table):
# sort rows by deaths (descending)
table_sorted = sorted([(v, k) for k, v in table.items()], reverse=True)
data = []
labels = []
for row_data, row_label in table_sorted:
data.append(row_data)
labels.append(row_label)
data = numpy.array(data, dtype=numpy.uint32)
# data = numpy.divide(data, 1000)
print("L:", labels)
y_pos = numpy.arange(len(labels))
plt.rcdefaults()
fig, ax = plt.subplots(figsize=(11, 5))
# Example data
y_pos = numpy.arange(len(labels))
div = 1000
ax.barh(y_pos, data[:, 1] / div, label="Without COVID-19", align='center')
ax.barh(y_pos, data[:, 0] / div, label="With COVID-19", align='center')
ax.legend(fontsize='small')
ax.set_yticks(y_pos)
ax.set_yticklabels(labels)
ax.invert_yaxis() # labels read top-to-bottom
# add labels
ax.set_title('Death Certificate Comorbidities')
ax.set_xlabel('Total Mentions (Thousands)')
# add per-row data labels
for i in range(len(data)):
f = data[i, 1] # full count
m = data[i, 0] # mentions
lbl = abbrev(m) + " : " + abbrev(f - m)
plt.text(f / div + 10, i + 0.25, lbl, size="small")
x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2 * 1.1, y1, y2))
plt.subplots_adjust(left=0.5, right=0.99)
plt.show()
sys.exit(-1)
if __name__ == "__main__":
# download causes.csv from
# https://data.cdc.gov/NCHS/Conditions-Contributing-to-COVID-19-Deaths-by-Stat/hk9y-quqm
conditions, categories = get_data(sys.argv[1])
# plot_example()
plot_deaths(conditions)
# percents = make_percents(conditions)
# plot_data(
# title='Percentage of COVID-19 Deaths, by Comorbidity',
# xlabel='Case Association (percent)',
# data=percents)