Skip to content

Commit

Permalink
Change data source folder name
Browse files Browse the repository at this point in the history
  • Loading branch information
fBedecarrats committed Mar 12, 2024
1 parent 07fbf06 commit 84c9683
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion 01-ros-data-catalogue.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ library(haven) # Required for reading STATA files (.dta)
library(labelled) # To work with labelled data from STATA
library(writexl) # Write data frames to Excel format
ros_data_loc <- "data/ROS_data_prepared/"
ros_data_loc <- "data/dta_format/"
# Function to extract variable info for a given year and file
extract_variable_info <- function(year, file) {
Expand Down
2 changes: 1 addition & 1 deletion 02-ros-data-attrition.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ library(labelled) # To work with labelled data from STATA
library(readxl) # Read data frames to Excel format
# Obtain all years from the directory structure
ros_data_loc <- "data/ROS_data_prepared/"
ros_data_loc <- "data/dta_format/"
years <- list.dirs(ros_data_loc, recursive = FALSE, full.names = FALSE)
# Add observatory approximate location
Expand Down
2 changes: 1 addition & 1 deletion 03-ros-data-georeferencing.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ library(gtsummary) # to produce nice summary tables
library(janitor) # to simply add rowsums
# Obtain all years from the directory structure
ros_data_loc <- "data/ROS_data_prepared/"
ros_data_loc <- "data/dta_format/"
years <- list.dirs(ros_data_loc, recursive = FALSE, full.names = FALSE)
```

Expand Down
16 changes: 8 additions & 8 deletions 04-ros-data-preparation.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ First, we start with creating a copy of the original unfiltered and un-anonymize
```{r copy-data}
# Define the paths for the source and target folders
source_folder <- "data/ROS_data_original"
target_folder <- "data/ROS_data_prepared"
target_folder <- "data/dta_format"
# Create the target folder if it does not exist
if (!dir.exists(target_folder)) {
Expand Down Expand Up @@ -83,7 +83,7 @@ For 2015, we have 4 observatories. One that existed in the previous years, Menab

```{r}
# Define the path to the 2015 folder within the target folder
folder_2015 <- "data/ROS_data_prepared/2015"
folder_2015 <- "data/dta_format/2015"
# List all .dta files in the 2015 folder
dta_files <- list.files(folder_2015, pattern = "\\.dta$", full.names = TRUE)
Expand All @@ -92,7 +92,7 @@ dta_files <- list.files(folder_2015, pattern = "\\.dta$", full.names = TRUE)
all_files_filtered <- TRUE
# Get households in selected observatory (Menabe Nord-Est)
hh_to_keep <- read_dta("data/ROS_data_prepared/2015/res_deb.dta") %>%
hh_to_keep <- read_dta("data/dta_format/2015/res_deb.dta") %>%
filter(j0 == 52) %>%
pluck("j5")
Expand Down Expand Up @@ -132,7 +132,7 @@ The main challenge with this procedure is that the names were provided orally by

```{r}
# Usage
ros_data_loc <- "data/ROS_data_prepared/"
ros_data_loc <- "data/dta_format/"
years <- 1995:2015
# Normalizing function as you've provided
Expand Down Expand Up @@ -393,7 +393,7 @@ The Surveyors, supervisors and data entry clerks id numbers are included in the

```{r remove_surveyor_names, eval = FALSE}
for (year in 2011:2015) {
loc <- paste0("data/ROS_data_prepared/", year, "/res_deb.dta")
loc <- paste0("data/dta_format/", year, "/res_deb.dta")
df <- read_dta(loc) %>%
select(-j1_a, -j2_a, -j3_a) %>%
write_dta(loc)
Expand Down Expand Up @@ -427,7 +427,7 @@ if (length(files_with_trimmed_labels) > 0) {
# Define the base path for your folders
base_path <- "data/ROS_data_prepared"
base_path <- "data/dta_format"
# Get the list of yearly folders using base R
year_folders <- list.dirs(base_path, full.names = TRUE, recursive = FALSE)
Expand Down Expand Up @@ -466,8 +466,8 @@ The ROS survey data was originally entered and managed in STATA, which is a prop

```{r convert_data}
# Define source and target directories
source_dir <- "data/ROS_data_prepared"
target_dir <- "data/ROS_data_tsv"
source_dir <- "data/dta_format"
target_dir <- "data/tsv_format"
# Remove the target directory if it exists
if (dir_exists(target_dir)) {
Expand Down
2 changes: 1 addition & 1 deletion index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ library(readxl) # Read data frames to Excel format
library(cowplot) # to combine plots
# Select appropriate folder as data source
data_path <- "data/ROS_data_prepared/"
data_path <- "data/dta_format/"
# Define a function to load and count surveys per observatory for a given year
load_and_count <- function(year, factorize = FALSE) {
Expand Down

0 comments on commit 84c9683

Please sign in to comment.