Skip to content

Commit

Permalink
Added variable and cross validation
Browse files Browse the repository at this point in the history
Added cross validation and stay at home orders
  • Loading branch information
nabarunDG committed Apr 21, 2020
1 parent da7329a commit 9c74ac0
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 47 deletions.
Binary file modified analysiset.dta
Binary file not shown.
Binary file added crossvalid.dta
Binary file not shown.
103 changes: 103 additions & 0 deletions crossvalidation.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@

// COMPARE March 1 to April 11 in Google and DL data by county-day


// Import Descartes Labs
clear
import delimited "https://raw.githubusercontent.com/descarteslabs/DL-COVID-19/master/DL-us-mobility-daterow.csv", encoding(ISO-8859-9) stringcols(6)

* Drop state aggregates
drop if admin2==""

* Format date
gen date2=date(date,"YMD")
format date2 %td
drop date
rename date2 date

* Note data start and end dates for graphs
su date
local latest: disp %td r(max)
di "`latest'"
local earliest: disp %td r(min)
di "`'earliest'"

* Rename variables for consistency
rename admin1 state
rename admin2 county

* Create quintiles of DL mobility
/*
xtile temp = last3_index, nq(5)
gen iso5=.
replace iso5=1 if temp==5
replace iso5=2 if temp==4
replace iso5=3 if temp==3
replace iso5=4 if temp==2
replace iso5=5 if temp==1
order iso5, a(last3_index)
la var iso5 "Distancing: Lowest (1) to Highest (5)"
drop temp
*/
save dl_x_valid, replace




// Process Google app check-in data
clear
import delimited "/Users/nabarun/Documents/GitHub/covid/fips-google-mobility-daily-as-of-04-20-20.csv", stringcols(1) numericcols(5 6 8)

* Format date
gen date=date(report_date, "YMD")
format date %td
order date, first
drop report_date

* Note data start and end dates for graphs
su date
local latest: disp %td r(max)
di "`latest'"
local earliest: disp %td r(min)
di "`'earliest'"

* Retain latest data
*su googledate
* local latest: disp %td r(max)
* di "Keeping only records in Google mobility scrape from `latest'"
* keep if googledate==r(max)


save google_x_valid, replace


merge 1:1 date fips using dl_x_valid

tab _merge
keep if _merge==3
drop _merge country_code admin_level

* Variable cleanup



foreach var of varlist retail_and_recreation_percent_ch-residential_percent_change_from_ {

capture confirm numeric variable `var'
if !_rc {

destring `var', replace force
*replace `var' = regexr(`var', "NULL","")
*encode `var', replace force
}

}
destring retail_and_recreation_percent_ch grocery_and_pharmacy_percent_cha workplaces_percent_change_from_b, replace force

xtile retailrec = retail_and_recreation_percent_ch, n(10)
xtile dl10 = m50_index, n(10)

egenmore corretail = corr(retailrec,m50_index)

heatplot retailrec dl10

97 changes: 50 additions & 47 deletions data_formatting.do
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@

cd "/Users/nabarun/Documents/GitHub/covid/"

// Import RWJF data

// Import RWJF data: Additional Measures
import excel "/Users/nabarun/Documents/GitHub/covid/2019_County_Health_Rankings_Data_v3.xls", sheet("Additional Measure Data") clear
rename A fips
rename B state
rename C county

rename D lifeexp
la var lifeexp "Life expectancy"
rename J premdeathageadj
rename K premdeathageadj
la var premdeathageadj "Age-Adjusted Premature Mortality"
rename R childmortrate
la var childmortrate "Child Mortality Rate"
Expand All @@ -26,7 +25,7 @@ import excel "/Users/nabarun/Documents/GitHub/covid/2019_County_Health_Rankings_
la var hiv "HIV Prevalence Rate"
rename AQ foodinsec
la var foodinsec "% Food Insecure"
rename AR healthyfoods
rename AS healthyfoods
la var healthyfoods "% Limited access to healthy foods"
rename AU drugod
la var drugod "Durg overdose mortality rate"
Expand Down Expand Up @@ -66,9 +65,9 @@ import excel "/Users/nabarun/Documents/GitHub/covid/2019_County_Health_Rankings_
la var asian_p "% Asian American"
rename CV pacisl_p
la var pacisl_p "% Pacific Islanders"
rename CW hispanic
rename CX hispanic
la var hispanic "% Hispanic"
rename CY nhw_p
rename CZ nhw_p
la var nhw_p "% Non-Hispanic white"
rename DB notenglishprof
la var notenglishprof "% Not proficient in English"
Expand All @@ -77,13 +76,18 @@ import excel "/Users/nabarun/Documents/GitHub/covid/2019_County_Health_Rankings_
rename DG rural
la var rural "% population rural"

keep fips state county lifeexp premdeathageadj childmortrate infantmort freqphysdist freqmentdist diabetic foodinsec healthyfoods drugod crashdeaths nosleep medianincome schoollunch segregation_bw segregation_wnw homiciderate homeown totalpop youth elderly black_p native_p asian_p pacisl_p hispanic nhw_p female rural

local varsofmine "lifeexp premdeathageadj childmortrate infantmort freqphysdist freqmentdist diabetic foodinsec healthyfoods drugod crashdeaths nosleep medianincome schoollunch segregation_bw segregation_wnw homiciderate homeown totalpop youth elderly black_p native_p asian_p pacisl_p hispanic nhw_p female rural housingburden"

save addlmeasures, replace
keep `varsofmine' fips state county

drop if fips==""
drop if fips=="FIPS"

destring `varsofmine', replace force

save addlmeasures, replace

*/
# Deaths Age-Adjusted Mortality

import excel "2019_County_Health_Rankings_Data_v3.xls", sheet("Ranked Measure Data") allstring clear

Expand Down Expand Up @@ -133,6 +137,9 @@ import excel "2019_County_Health_Rankings_Data_v3.xls", sheet("Ranked Measure Da
rename DQ incomeratio
la var incomeratio "Ratio of household income at 80th% to income at 20th %"
drop DR
rename ER overcrowding
la var overcrowding "% of homes with overcrowding"

rename EU drivealone_p
la var drivealone_p "% of workers who drive alone to work"
drop EV EW EX
Expand All @@ -149,47 +156,12 @@ import excel "2019_County_Health_Rankings_Data_v3.xls", sheet("Ranked Measure Da

rename county countyshort

keep fips state countyshort ypll food physicalinactive exercise uninsured uninsured_p pcp_rate pcp pcp_ratio mhproviders mhproviders_rate fluvaccine income80 income20 incomeratio drivealone_p longcommute_p
keep fips state countyshort ypll food physicalinactive exercise uninsured uninsured_p pcp_rate pcp pcp_ratio mhproviders mhproviders_rate fluvaccine income80 income20 incomeratio drivealone_p longcommute_p overcrowding

distinct fips

save covidchrdetail, replace

// Process Google app check-in data
clear
import delimited "/Users/nabarun/Documents/GitHub/covidnc/data/export-2020-04-05.csv"

drop v1

* Convert proportions to percents
ds, has(type numeric)
foreach var of varlist `r(varlist)' {
replace `var'=`var'*100
}

* Format date and retain latest data
rename subunit_name county
order county, a(report_date)
replace report_date=substr(report_date,1,10)
gen googledate=date(report_date, "YMD")
format googledate %td
order googledate, first
drop report_date

su googledate
local latest: disp %td r(max)
di "Keeping only records in Google mobility scrape from `latest'"
keep if googledate==r(max)

* Rename variables for consistency
rename unit_name state

drop unit*

save google_mobility, replace

distinct state county

// Import Descartes Labs
clear
import delimited "https://raw.githubusercontent.com/descarteslabs/DL-COVID-19/master/DL-us-mobility-daterow.csv", encoding(ISO-8859-9) stringcols(6)
Expand Down Expand Up @@ -254,7 +226,9 @@ import delimited "/Users/nabarun/Documents/GitHub/covidnc/data/export-2020-04-05
clear
use covidchrdetail

merge m:1 fips using sixrankings, keep(1 3)
merge m:1 fips using sixrankings, keep(1 3) nogen

merge 1:1 fips using addlmeasures, keep (1 3) nogen

merge 1:1 fips using dlmobility, keep(1 3) nogen

Expand Down Expand Up @@ -295,6 +269,35 @@ import delimited "/Users/nabarun/Documents/GitHub/covidnc/data/export-2020-04-05
* Generate indicator variables for mobility change
qui: tabulate iso5, generate(levels)

// Create variable to indicate staty-at-home orders (1=orders, 0=no orders)
* Source: Mervosh et al. https://www.nytimes.com/interactive/2020/us/coronavirus-stay-at-home-order.html

gen homeorder=1
replace homeorder=0 if state=="Arkansas"
replace homeorder=0 if state=="Iowa"
replace homeorder=0 if state=="North Dakota"
replace homeorder=0 if state=="South Dakota"
replace homeorder=0 if state=="Nebraska"
replace homeorder=0 if state=="Oklahoma"
replace homeorder=1 if state=="Oklahoma" & county=="Oklahoma County" //OKC Edmond
replace homeorder=1 if state=="Oklahoma" & county=="Sequoyah County" // Sallisaw
replace homeorder=1 if state=="Oklahoma" & county=="Payne County" // Stillwater
replace homeorder=1 if state=="Oklahoma" & county=="Carter County" // Ardmore
replace homeorder=1 if state=="Oklahoma" & county=="Cleveland County" // Norman, Moore
replace homeorder=1 if state=="Oklahoma" & county=="Rogers County" // Claremore
replace homeorder=1 if state=="Oklahoma" & county=="Tulsa County" // Tulsa
replace homeorder=0 if state=="Utah"
replace homeorder=1 if state=="Utah" & county=="Davis County"
replace homeorder=1 if state=="Utah" & county=="Salt Lake County"
replace homeorder=1 if state=="Utah" & county=="Summit County"
replace homeorder=0 if state=="Wyoming"
replace homeorder=1 if state=="Wyoming" & county=="Teton County" // Jackson

la var homeorder "Stay at home order for COVID-19"
note homeorder: From Mervosh et al. https://www.nytimes.com/interactive/2020/us/coronavirus-stay-at-home-order.html
note homeorder: As of April 21, 2020



la var fluvaccine "% Medicare Beneficiaries Getting Flu Vaccine"

Expand Down

0 comments on commit 9c74ac0

Please sign in to comment.