@@ -13,11 +13,15 @@ Setup
13
13
14
14
You can install the stable version of this package from PyPi:
15
15
16
- >>> pip install epidatpy
16
+ .. code-block :: sh
17
+
18
+ pip install epidatpy
17
19
18
20
Or if you want the development version, install from GitHub:
19
21
20
- >>> pip install - e " git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy"
22
+ .. code-block :: sh
23
+
24
+ pip install -e " git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy"
21
25
22
26
**API Keys **
23
27
@@ -48,29 +52,38 @@ the location and times of interest.
48
52
49
53
The ``pub_covidcast `` function lets us access the ``covidcast `` endpoint:
50
54
51
- >>> from epidatpy import EpiDataContext, EpiRange
52
- >>> epidata = EpiDataContext(use_cache = True , cache_max_age_days = 1 )
53
- >>> # Obtain the most up-to-date version of the smoothed covid-like illness (CLI)
54
- >>> # signal from the COVID-19 Trends and Impact survey for the US
55
- >>> apicall = epidata.pub_covidcast(
56
- ... data_source = " fb-survey" ,
57
- ... signals = " smoothed_cli" ,
58
- ... geo_type = " nation" ,
59
- ... time_type = " day" ,
60
- ... geo_values = " us" ,
61
- ... time_values = EpiRange(20210405 , 20210410 ))
62
- EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'nation', 'time_type': 'day', 'geo_values': 'us', 'time_values': '20210405-20210410'})
55
+ .. exec ::
56
+ :context: true
57
+
58
+ from epidatpy import EpiDataContext, EpiRange
59
+ import pandas as pd
60
+
61
+ # Set common options and context
62
+ pd.set_option('display.max_columns', None)
63
+ pd.set_option('display.max_rows', None)
64
+ pd.set_option('display.width', 1000)
65
+
66
+ epidata = EpiDataContext(use_cache=False)
67
+
68
+ # Obtain the most up-to-date version of the smoothed covid-like illness (CLI)
69
+ # signal from the COVID-19 Trends and Impact survey for the US
70
+ apicall = epidata.pub_covidcast(
71
+ data_source = "fb-survey",
72
+ signals = "smoothed_cli",
73
+ geo_type = "nation",
74
+ time_type = "day",
75
+ geo_values = "us",
76
+ time_values = EpiRange(20210405, 20210410))
77
+
78
+ print(apicall)
63
79
64
80
``pub_covidcast `` returns an ``EpiDataCall ``, which can be further converted into different output formats - such as a Pandas DataFrame:
65
81
66
- >>> data = apicall.df()
67
- >>> data.head()
68
- source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
69
- 0 fb-survey smoothed_cli nation us day 2021-04-05 2021-04-10 5 0.675832 0.014826 244046 <NA> 0 0 0
70
- 1 fb-survey smoothed_cli nation us day 2021-04-06 2021-04-11 5 0.690687 0.014998 242979 <NA> 0 0 0
71
- 2 fb-survey smoothed_cli nation us day 2021-04-07 2021-04-12 5 0.690664 0.015023 242153 <NA> 0 0 0
72
- 3 fb-survey smoothed_cli nation us day 2021-04-08 2021-04-13 5 0.706503 0.015236 241380 <NA> 0 0 0
73
- 4 fb-survey smoothed_cli nation us day 2021-04-09 2021-04-14 5 0.724306 0.015466 240256 <NA> 0 0 0
82
+ .. exec ::
83
+ :context: true
84
+
85
+ data = apicall.df()
86
+ print(data.head())
74
87
75
88
Each row represents one observation in the US on one
76
89
day. The geographical abbreviation is given in the ``geo_value `` column, the date in
@@ -85,57 +98,51 @@ entire US, we use the ``geo_type`` argument paired with ``*`` for the
85
98
access data at all locations. Check the help for a given endpoint to see if
86
99
it supports ``* ``.)
87
100
88
- >>> apicall = epidata.pub_covidcast(
89
- ... data_source = " fb-survey" ,
90
- ... signals = " smoothed_cli" ,
91
- ... geo_type = " state" ,
92
- ... time_type = " day" ,
93
- ... geo_values = " *" ,
94
- ... time_values = EpiRange(20210405 , 20210410 ))
95
- EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': '*', 'time_values': '20210405-20210410'})
96
- >>> apicall.df.head()
97
- source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
98
- 0 fb-survey smoothed_cli state ak day 2021-04-05 2021-04-10 5 0.736883 0.275805 720.0 <NA> 0 0 0
99
- 1 fb-survey smoothed_cli state al day 2021-04-05 2021-04-10 5 0.796627 0.137734 3332.1117 <NA> 0 0 0
100
- 2 fb-survey smoothed_cli state ar day 2021-04-05 2021-04-10 5 0.561916 0.131108 2354.9911 <NA> 0 0 0
101
- 3 fb-survey smoothed_cli state az day 2021-04-05 2021-04-10 5 0.62283 0.105354 4742.2778 <NA> 0 0 0
102
- 4 fb-survey smoothed_cli state ca day 2021-04-05 2021-04-10 5 0.444169 0.040576 21382.3806 <NA> 0 0 0
101
+ .. exec ::
102
+ :context: true
103
+
104
+ apicall = epidata.pub_covidcast(
105
+ data_source = "fb-survey",
106
+ signals = "smoothed_cli",
107
+ geo_type = "state",
108
+ time_type = "day",
109
+ geo_values = "*",
110
+ time_values = EpiRange(20210405, 20210410))
111
+
112
+ print(apicall)
113
+ print(apicall.df().head())
103
114
104
115
We can fetch a subset of states by listing out the desired locations:
105
116
106
- >>> apicall = epidata.pub_covidcast(
107
- ... data_source = " fb-survey" ,
108
- ... signals = " smoothed_cli" ,
109
- ... geo_type = " state" ,
110
- ... time_type = " day" ,
111
- ... geo_values = " pa,ca,fl" ,
112
- ... time_values = EpiRange(20210405 , 20210410 ))
113
- EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': 'pa,ca,fl', 'time_values': '20210405-20210410'})
114
- >>> apicall.df.head()
115
- source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
116
- 0 fb-survey smoothed_cli state ca day 2021-04-05 2021-04-10 5 0.444169 0.040576 21382.3806 <NA> 0 0 0
117
- 1 fb-survey smoothed_cli state fl day 2021-04-05 2021-04-10 5 0.690415 0.058204 16099.0005 <NA> 0 0 0
118
- 2 fb-survey smoothed_cli state pa day 2021-04-05 2021-04-10 5 0.715758 0.072999 10894.0057 <NA> 0 0 0
119
- 3 fb-survey smoothed_cli state ca day 2021-04-06 2021-04-11 5 0.45604 0.04127 21176.3902 <NA> 0 0 0
120
- 4 fb-survey smoothed_cli state fl day 2021-04-06 2021-04-11 5 0.730692 0.059907 15975.0007 <NA> 0 0 0
117
+ .. exec ::
118
+ :context: true
119
+
120
+ apicall = epidata.pub_covidcast(
121
+ data_source = "fb-survey",
122
+ signals = "smoothed_cli",
123
+ geo_type = "state",
124
+ time_type = "day",
125
+ geo_values = "pa,ca,fl",
126
+ time_values = EpiRange(20210405, 20210410))
127
+
128
+ print(apicall)
129
+ print(apicall.df().head())
121
130
122
131
We can also request data for a single location at a time, via the ``geo_values `` argument.
123
132
124
- >>> apicall = epidata.pub_covidcast(
125
- ... data_source = " fb-survey" ,
126
- ... signals = " smoothed_cli" ,
127
- ... geo_type = " state" ,
128
- ... time_type = " day" ,
129
- ... geo_values = " pa,ca,fl" ,
130
- ... time_values = EpiRange(20210405 , 20210410 ))
131
- EpiDataCall(endpoint=covidcast/, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'geo_type': 'state', 'time_type': 'day', 'geo_values': 'pa', 'time_values': '20210405-20210410'})
132
- >>> apicall.df.head()
133
- source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size
134
- 0 fb-survey smoothed_cli state pa day 2021-04-05 2021-04-10 5 0.715758 0.072999 10894.0057 <NA> 0 0 0
135
- 1 fb-survey smoothed_cli state pa day 2021-04-06 2021-04-11 5 0.69321 0.070869 10862.0055 <NA> 0 0 0
136
- 2 fb-survey smoothed_cli state pa day 2021-04-07 2021-04-12 5 0.685934 0.070654 10790.0054 <NA> 0 0 0
137
- 3 fb-survey smoothed_cli state pa day 2021-04-08 2021-04-13 5 0.681511 0.071394 10731.0044 <NA> 0 0 0
138
- 4 fb-survey smoothed_cli state pa day 2021-04-09 2021-04-14 5 0.709416 0.072162 10590.0049 <NA> 0 0 0
133
+ .. exec ::
134
+ :context: true
135
+
136
+ apicall = epidata.pub_covidcast(
137
+ data_source = "fb-survey",
138
+ signals = "smoothed_cli",
139
+ geo_type = "state",
140
+ time_type = "day",
141
+ geo_values = "pa",
142
+ time_values = EpiRange(20210405, 20210410))
143
+
144
+ print(apicall)
145
+ print(apicall.df().head())
139
146
140
147
Getting versioned data
141
148
----------------------
@@ -145,26 +152,34 @@ and updates, which is particularly useful for accurately backtesting
145
152
forecasting models. To fetch versioned data, we can use the ``as_of ``
146
153
argument:
147
154
148
- >>> apicall = epidata.pub_covidcast(
149
- ... data_source = " fb-survey" ,
150
- ... signals = " smoothed_cli" ,
151
- ... geo_type = " state" ,
152
- ... time_type = " day" ,
153
- ... geo_values = " pa,ca,fl" ,
154
- ... time_values = EpiRange(20210405 , 20210410 ),
155
- ... as_of = " 2021-06-01" )
155
+ .. exec ::
156
+ :context: true
157
+
158
+ apicall = epidata.pub_covidcast(
159
+ data_source = "fb-survey",
160
+ signals = "smoothed_cli",
161
+ geo_type = "state",
162
+ time_type = "day",
163
+ geo_values = "pa",
164
+ time_values = EpiRange(20210405, 20210410),
165
+ as_of = "2021-06-01")
166
+
167
+ print(apicall)
168
+ print(apicall.df().head())
156
169
157
170
Plotting
158
171
--------
159
172
160
173
Because the output data is a standard Pandas DataFrame, we can easily plot
161
174
it using any of the available Python libraries:
162
175
163
- >>> data.plot(x = " time_value" , y = " value" , title = " Smoothed CLI from Facebook Survey" , xlabel = " Date" , ylabel = " CLI" )
176
+ .. code-block :: python
177
+
178
+ data.plot(x = " time_value" , y = " value" , title = " Smoothed CLI from Facebook Survey" , xlabel = " Date" , ylabel = " CLI" )
164
179
165
180
.. image :: images/Figure_1.png
166
- :width: 800
167
- :alt: Smoothed CLI from Facebook Survey
181
+ :width: 800
182
+ :alt: Smoothed CLI from Facebook Survey
168
183
169
184
Finding locations of interest
170
185
-----------------------------
0 commit comments