Skip to content

Commit c8752c3

Browse files
committed
Create mergeGPS.R
1 parent c69d700 commit c8752c3

File tree

1 file changed

+105
-0
lines changed
  • Intro_DHSdata_Analysis/4_Using_Multiple_Files

1 file changed

+105
-0
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# *****************************************************************************************************
2+
# Program: mergeGPS.R
3+
# Purpose: Explain how to merge an IR file with the GC (points) file in R
4+
# Author: Rose Donohue, and Shireen Assaf
5+
# Date last modified: Aug 2, 2023 by Shireen Assaf
6+
#
7+
# Notes: The Nigeria 2018 survey is used for this example.
8+
#
9+
# There are two sections to this code:
10+
# 1. Merging the country GPS locations for each cluster to an IR file (use same logic for other file types)
11+
# 2. Merging the survey boundaries data at the region level, this code will also produce a map.
12+
#
13+
# For both sections you will need a dbf file
14+
# The cluster GPS locations can be downloaded along with the datasets from the DHS Program website.
15+
# These files have 'GE' in the name, for example NGGE7BFL
16+
#
17+
# To download the survey boundary data for any survey go to: https://spatialdata.dhsprogram.com/boundaries/
18+
# These files will have the name sdr_subnational_boundaries
19+
#
20+
# *****************************************************************************************************/
21+
22+
#### Section one: Merging GPS locations at the cluster level ####
23+
24+
#load required packages
25+
library(haven)
26+
library(foreign)
27+
library(data.table)
28+
library(here)
29+
here() # check your path, this is where you should have your saved datafiles
30+
31+
#Load the GPS data by reading the shapefile's .dbf file
32+
gps_dat <- read.dbf(here("Intro_DHSdata_Analysis","NGGE7BFL.dbf"))
33+
names(gps_dat)
34+
35+
#Load the IR file
36+
IRdata <- read_dta(here("Intro_DHSdata_Analysis","NGIR7BFL.dta"))
37+
38+
#Rename DHSCLUST in GE dataset to match IR dataset cluster ID variable name to allow for successful merge
39+
setnames(gps_dat, 'DHSCLUST','v001')
40+
41+
#Join the GPS points (gps_dat) to the IR file
42+
data_merged <- merge(IRdata, gps_dat, by ='v001')
43+
44+
45+
# *****************************************************************************************************
46+
47+
#### Section two: Merging GPS locations at the region level for survey boundaries ####
48+
49+
#load required packages
50+
library(haven)
51+
library(foreign)
52+
library(data.table)
53+
library(dplyr)
54+
library(labelled) # used for Haven labelled variable creation
55+
library(sf) # to read shape file with geometry
56+
library(ggplot2) # to create map
57+
library(here)
58+
here() # check your path, this is where you should have your saved datafiles
59+
60+
#Load the GPS data by reading the shapefile's .dbf file
61+
gps_boundary <- read_sf(here("Intro_DHSdata_Analysis","sdr_subnational_boundaries.shp"))
62+
names(gps_boundary)
63+
64+
#Rename REGCODE to match IR dataset cluster ID variable name to allow for successful merge
65+
setnames(gps_boundary, 'REGCODE','v024')
66+
67+
#Load the IR file
68+
IRdata <- read_dta(here("Intro_DHSdata_Analysis","NGIR7BFL.dta"))
69+
70+
# We need to collapse the data to the region level before merging.
71+
# Choose an indicator of interest that you would want to use in the merged data.
72+
73+
# For example, modern contraceptive use.
74+
IRdata <- IRdata %>%
75+
mutate(mcp = ifelse(v313 == 3, 1, 0)) %>%
76+
set_value_labels(mcp = c(yes = 1, no = 0)) %>%
77+
set_variable_labels(mcp ="Currently used any modern method") %>%
78+
mutate(wt = v005/1000000)
79+
80+
# calculate modern contraception for each region
81+
region_means <- IRdata %>%
82+
group_by(v024) %>%
83+
summarize(mcp_mean = mean(mcp, na.rm = TRUE, weights = wt))
84+
85+
# compute percentages
86+
region_means <- region_means %>%
87+
mutate(mcp_per=100*mcp_mean)
88+
89+
# Check the results
90+
print(region_means)
91+
92+
# Merge the shapefile with the data frame of region means
93+
gps_boundary <- left_join(gps_boundary, region_means, by = "v024")
94+
95+
96+
# Create a map of the percentage of modern contraceptive use for each region
97+
ggplot() +
98+
geom_sf(data = gps_boundary, aes(fill = mcp_per, geometry = geometry)) +
99+
scale_fill_continuous(low = "white", high = "blue") +
100+
geom_sf_label(aes(label = gps_boundary$REGNAME, geometry = gps_boundary$geometry),
101+
fill=NA, label.size=0, position=position_jitter(width=0, height=.5), size = 4) +
102+
theme_minimal()
103+
104+
105+
# *****************************************************************************************************

0 commit comments

Comments
 (0)