|
| 1 | +# ***************************************************************************************************** |
| 2 | +# Program: mergeGPS.R |
| 3 | +# Purpose: Explain how to merge an IR file with the GC (points) file in R |
| 4 | +# Author: Rose Donohue, and Shireen Assaf |
| 5 | +# Date last modified: Aug 2, 2023 by Shireen Assaf |
| 6 | +# |
| 7 | +# Notes: The Nigeria 2018 survey is used for this example. |
| 8 | +# |
| 9 | +# There are two sections to this code: |
| 10 | +# 1. Merging the country GPS locations for each cluster to an IR file (use same logic for other file types) |
| 11 | +# 2. Merging the survey boundaries data at the region level, this code will also produce a map. |
| 12 | +# |
| 13 | +# For both sections you will need a dbf file |
| 14 | +# The cluster GPS locations can be downloaded along with the datasets from the DHS Program website. |
| 15 | +# These files have 'GE' in the name, for example NGGE7BFL |
| 16 | +# |
| 17 | +# To download the survey boundary data for any survey go to: https://spatialdata.dhsprogram.com/boundaries/ |
| 18 | +# These files will have the name sdr_subnational_boundaries |
| 19 | +# |
| 20 | +# *****************************************************************************************************/ |
| 21 | + |
| 22 | +#### Section one: Merging GPS locations at the cluster level #### |
| 23 | + |
| 24 | +#load required packages |
| 25 | +library(haven) |
| 26 | +library(foreign) |
| 27 | +library(data.table) |
| 28 | +library(here) |
| 29 | +here() # check your path, this is where you should have your saved datafiles |
| 30 | + |
| 31 | +#Load the GPS data by reading the shapefile's .dbf file |
| 32 | +gps_dat <- read.dbf(here("Intro_DHSdata_Analysis","NGGE7BFL.dbf")) |
| 33 | +names(gps_dat) |
| 34 | + |
| 35 | +#Load the IR file |
| 36 | +IRdata <- read_dta(here("Intro_DHSdata_Analysis","NGIR7BFL.dta")) |
| 37 | + |
| 38 | +#Rename DHSCLUST in GE dataset to match IR dataset cluster ID variable name to allow for successful merge |
| 39 | +setnames(gps_dat, 'DHSCLUST','v001') |
| 40 | + |
| 41 | +#Join the GPS points (gps_dat) to the IR file |
| 42 | +data_merged <- merge(IRdata, gps_dat, by ='v001') |
| 43 | + |
| 44 | + |
| 45 | +# ***************************************************************************************************** |
| 46 | + |
| 47 | +#### Section two: Merging GPS locations at the region level for survey boundaries #### |
| 48 | + |
| 49 | +#load required packages |
| 50 | +library(haven) |
| 51 | +library(foreign) |
| 52 | +library(data.table) |
| 53 | +library(dplyr) |
| 54 | +library(labelled) # used for Haven labelled variable creation |
| 55 | +library(sf) # to read shape file with geometry |
| 56 | +library(ggplot2) # to create map |
| 57 | +library(here) |
| 58 | +here() # check your path, this is where you should have your saved datafiles |
| 59 | + |
| 60 | +#Load the GPS data by reading the shapefile's .dbf file |
| 61 | +gps_boundary <- read_sf(here("Intro_DHSdata_Analysis","sdr_subnational_boundaries.shp")) |
| 62 | +names(gps_boundary) |
| 63 | + |
| 64 | +#Rename REGCODE to match IR dataset cluster ID variable name to allow for successful merge |
| 65 | +setnames(gps_boundary, 'REGCODE','v024') |
| 66 | + |
| 67 | +#Load the IR file |
| 68 | +IRdata <- read_dta(here("Intro_DHSdata_Analysis","NGIR7BFL.dta")) |
| 69 | + |
| 70 | +# We need to collapse the data to the region level before merging. |
| 71 | +# Choose an indicator of interest that you would want to use in the merged data. |
| 72 | + |
| 73 | +# For example, modern contraceptive use. |
| 74 | +IRdata <- IRdata %>% |
| 75 | + mutate(mcp = ifelse(v313 == 3, 1, 0)) %>% |
| 76 | + set_value_labels(mcp = c(yes = 1, no = 0)) %>% |
| 77 | + set_variable_labels(mcp ="Currently used any modern method") %>% |
| 78 | + mutate(wt = v005/1000000) |
| 79 | + |
| 80 | +# calculate modern contraception for each region |
| 81 | +region_means <- IRdata %>% |
| 82 | + group_by(v024) %>% |
| 83 | + summarize(mcp_mean = mean(mcp, na.rm = TRUE, weights = wt)) |
| 84 | + |
| 85 | +# compute percentages |
| 86 | +region_means <- region_means %>% |
| 87 | + mutate(mcp_per=100*mcp_mean) |
| 88 | + |
| 89 | +# Check the results |
| 90 | +print(region_means) |
| 91 | + |
| 92 | +# Merge the shapefile with the data frame of region means |
| 93 | +gps_boundary <- left_join(gps_boundary, region_means, by = "v024") |
| 94 | + |
| 95 | + |
| 96 | +# Create a map of the percentage of modern contraceptive use for each region |
| 97 | +ggplot() + |
| 98 | + geom_sf(data = gps_boundary, aes(fill = mcp_per, geometry = geometry)) + |
| 99 | + scale_fill_continuous(low = "white", high = "blue") + |
| 100 | + geom_sf_label(aes(label = gps_boundary$REGNAME, geometry = gps_boundary$geometry), |
| 101 | + fill=NA, label.size=0, position=position_jitter(width=0, height=.5), size = 4) + |
| 102 | + theme_minimal() |
| 103 | + |
| 104 | + |
| 105 | +# ***************************************************************************************************** |
0 commit comments