Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
.Ruserdata
*.DS_Store
docs/
.claude/
6 changes: 4 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export(age_cat_fun)
export(binge_drinker_fun)
export(bmi_fun)
export(bmi_fun_cat)
export(calculate_pct_time)
export(categorize_pct_time)
export(check_worksheet)
export(diet_score_fun)
export(diet_score_fun_cat)
Expand All @@ -45,6 +47,7 @@ export(food_insecurity_der)
export(if_else2)
export(immigration_fun)
export(is_equal)
export(load_schema)
export(low_drink_long_fun)
export(low_drink_score_fun)
export(low_drink_score_fun1)
Expand All @@ -54,15 +57,14 @@ export(multiple_conditions_fun1)
export(multiple_conditions_fun2)
export(pack_years_fun)
export(pack_years_fun_cat)
export(pct_time_fun)
export(pct_time_fun_cat)
export(rec_with_table)
export(resp_condition_fun1)
export(resp_condition_fun2)
export(resp_condition_fun3)
export(set_data_labels)
export(smoke_simple_fun)
export(time_quit_smoking_fun)
importFrom(dplyr,case_when)
importFrom(dplyr,do)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
Expand Down
196 changes: 96 additions & 100 deletions R/percent-time-canada.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,43 @@
#' @title Percent time in Canada
#'
#' @description This function creates a derived variable (pct_time_der) that
#' provides an estimated percentage of the time a person's life was spent in
#' Canada.
#'
#' @param DHHGAGE_cont continuous age variable.
#'
#' @param SDCGCBG whether or not someone was born in Canada (1 - born in Canada,
#' 2 - born outside Canada)
#'
#' @param SDCGRES how long someone has lived in Canada. Note: in the PUMF CCHS
#' datasets, this is a categorical variable with two categories (1 - 0-9 years;
#' 2 - 10+ years).
#'
#' @note Since SDCGRES is a categorical variable measuring length of time, we've
#' set midpoints in the function. A respondent identified as being in Canada
#' for 0-9 years is assigned a value of 4.5 years, and someone who has been in
#' Canada for over 10 years is assigned a value of 15 years.
#'
#' @return Numeric value between 0 and 100 that represents
#' percentage of a respondent's time in Canada
#' @title Calculate percent time in Canada
#'
#' @description Calculates the percentage of a respondent's life spent in
#' Canada. Used by pct_time_der for both PUMF and master databases. The
#' worksheet maps different feeder variables for each database type, but
#' the calculation is the same: for respondents born outside Canada,
#' percent time = years in Canada / age * 100.
#'
#' For PUMF data, the categorical time-in-Canada variable (SDCGRES) is
#' converted to continuous midpoints upstream via the SDCGRES_cont intermediate
#' variable in variable_details.csv (1 = 4.5 years, 2 = 15 years). For master
#' data, continuous years (SDCDRES) are passed directly.
#'
#' @param age continuous age variable.
#' @param born_in_canada whether or not someone was born in Canada
#' (1 - born in Canada, 2 - born outside Canada).
#' @param years_in_canada continuous years in Canada. For PUMF data, this is
#' derived from the categorical SDCGRES via midpoint conversion in
#' variable_details.csv. For master data, this is the actual continuous
#' years from SDCDRES.
#'
#' @return Numeric value between 0 and 100 that represents percentage of a
#' respondent's time in Canada. Returns \code{tagged_na("b")} for invalid or
#' missing inputs.
#'
#' @examples
#' # Using pct_time_fun() to create percent time values between CCHS cycles
#' # pct_time_fun() is specified in variable_details.csv along with the CCHS
#' # variables and cycles included.
#'
#' # To transform pct_time_der across cycles, use rec_with_table() for each CCHS
#' # cycle and specify pct_time_der, along with age (DHHGAGE_cont), whether or
#' # not someone was born in Canada (SDCGCBG), how long someone has lived in
#' # Canada (SDCGRES). Then by using merge_rec_data(), you can combine
#' # pct_time_der across cycles
#'
#' # Using rec_with_table() across CCHS cycles (PUMF)
#' library(cchsflow)
#' pct_time2009_2010 <- rec_with_table(
#' cchs2009_2010_p, c(
#' "DHHGAGE_cont", "SDCGCBG",
#' "SDCGRES", "pct_time_der"
#' "SDCGRES_cont", "pct_time_der"
#' )
#' )
#' head(pct_time2009_2010)
#'
#' pct_time2011_2012 <- rec_with_table(
#' cchs2011_2012_p, c(
#' cchs2011_2012_p, c(
#' "DHHGAGE_cont", "SDCGCBG",
#' "SDCGRES", "pct_time_der"
#' "SDCGRES_cont", "pct_time_der"
#' )
#' )
#' tail(pct_time2011_2012)
Expand All @@ -53,97 +46,100 @@
#' head(combined_pct_time)
#' tail(combined_pct_time)
#'
#' # Using pct_time_fun() to generate a value for percent time spent in Canada
#' # with user inputted values Let's say you are 27 years old who was born
#' # outside of Canada and have been living in Canada for less than 10 years.
#' # Your estimated percent time spent in Canada can be calculated as follows:
#' # Scalar usage (pass continuous years directly)
#' calculate_pct_time(age = 27, born_in_canada = 2, years_in_canada = 4.5)
#'
#' pct_time <- pct_time_fun(DHHGAGE_cont = 27, SDCGCBG = 2, SDCGRES = 1)
#'
#' print(pct_time)
#' # Vector usage
#' calculate_pct_time(
#' age = c(27, 40, 35),
#' born_in_canada = c(2, 1, 2),
#' years_in_canada = c(4.5, 4.5, 15)
#' )
#' @export
pct_time_fun <-
function(DHHGAGE_cont, SDCGCBG, SDCGRES) {
if (is_equal(SDCGCBG, 1)) {
return(100)
}
DHHGAGE_cont <- if_else2(DHHGAGE_cont > 0, DHHGAGE_cont,
return(tagged_na("b")))
SDCGRES <- if_else2(SDCGRES == 1, 4.5,
if_else2(SDCGRES == 2, 15, return(tagged_na("b"))))

if_else2(SDCGCBG == 2, (SDCGRES / DHHGAGE_cont * 100), tagged_na("b"))
}
calculate_pct_time <- function(age, born_in_canada, years_in_canada) {
result <- dplyr::case_when(
born_in_canada == 1 ~ 100,
born_in_canada == 2 & age > 0 & !is.na(years_in_canada) ~
years_in_canada / age * 100,
TRUE ~ tagged_na("b")
)
# Output validation: values outside [0, 100] indicate inconsistent inputs
# (e.g., years_in_canada > age). Valid range: 0-100.
# Documentation-only boundaries in variable_details; enforced here.
dplyr::case_when(
is.na(result) ~ result,
result < 0 | result > 100 ~ tagged_na("b"),
TRUE ~ result
)
}

#' @title Categorical percent time in Canada
#'
#' @description This function creates a categorical derived variable
#' (pct_time_der_cat10) that categorizes the derived percent time in Canada
#' variable (pct_time_der).
#'
#' @details The percent time in Canada provides an estimated percentage of the
#' time a person's life was spent in Canada.The categorical percent time in
#' @title Categorize percent time in Canada
#'
#' @description Categorizes the derived percent time in Canada variable
#' (pct_time_der) into 10 percent intervals for pct_time_der_cat10.
#'
#' @details The percent time in Canada provides an estimated percentage of the
#' time a person's life was spent in Canada. The categorical percent time in
#' Canada divides the continuous value into 10 percent intervals.
#'
#' pct_time_der_cat10 uses the derived variable pct_time_der. pct_time_der uses
#' various variables that have been transformed by cchsflow (see documentation
#' on pct_time_der). In order to categorize percent time in Canada across CCHS
#'
#' pct_time_der_cat10 uses the derived variable pct_time_der. pct_time_der uses
#' various variables that have been transformed by cchsflow (see documentation
#' on pct_time_der). In order to categorize percent time in Canada across CCHS
#' cycles, the variables must be transformed and harmonized.
#'
#' @param pct_time_der derived continuous percent time in Canada.
#' See \code{\link{pct_time_fun}} for documentation on how variable was derived.
#'
#' @return value for categorical percent time in Canada using pct_time_der
#' variable.
#' See \code{\link{calculate_pct_time}} for documentation on how variable was
#' derived.
#'
#' @examples
#' # Using pct_time_fun_cat() to create categorical percent time values
#' # between CCHS cycles.
#' # pct_time_fun_cat() is specified in variable_details.csv along with the CCHS
#' # variables and cycles included.
#'
#' # To transform pct_time_der_cat10 across cycles, use rec_with_table() for
#' # each CCHS cycle.
#' # Since pct_time_der is a derived variable, you will have to specify the
#' # variables that are derived from it.
#' # Then by using merge_rec_data(), you can combine pct_time_der_cat10 across
#' # cycles.
#' @return Character value for categorical percent time in Canada ("1" through
#' "10"), "NA(a)" for not applicable, or "NA(b)" for missing/invalid inputs.
#'
#' @examples
#' # Using rec_with_table() across CCHS cycles
#' library(cchsflow)
#' pct_time_cat2009_2010 <- rec_with_table(
#' cchs2009_2010_p, c(
#' "DHHGAGE_cont", "SDCGCBG",
#' "SDCGRES", "pct_time_der", "pct_time_der_cat10"
#' "SDCGRES_cont", "pct_time_der", "pct_time_der_cat10"
#' )
#' )
#' head(pct_time_cat2009_2010)
#'
#' pct_time_cat2011_2012 <- rec_with_table(
#' cchs2011_2012_p, c(
#' cchs2011_2012_p, c(
#' "DHHGAGE_cont", "SDCGCBG",
#' "SDCGRES", "pct_time_der", "pct_time_der_cat10"
#' "SDCGRES_cont", "pct_time_der", "pct_time_der_cat10"
#' )
#' )
#' tail(pct_time_cat2011_2012)
#'
#' combined_pct_time_cat <- merge_rec_data(pct_time_cat2009_2010,
#' combined_pct_time_cat <- merge_rec_data(pct_time_cat2009_2010,
#' pct_time_cat2011_2012)
#' head(combined_pct_time_cat)
#' tail(combined_pct_time_cat)
#'
#' # Scalar usage
#' categorize_pct_time(55.5)
#'
#' # Vector usage
#' categorize_pct_time(c(5, 25, 55, 85, 100))
#'
#' @export
#'
pct_time_fun_cat <-
function(pct_time_der){
if_else2(pct_time_der >= 0 & pct_time_der <= 10, 1,
if_else2(pct_time_der > 10 & pct_time_der <= 20, 2,
if_else2(pct_time_der > 20 & pct_time_der <= 30, 3,
if_else2(pct_time_der > 30 & pct_time_der <= 40, 4,
if_else2(pct_time_der > 40 & pct_time_der <= 50, 5,
if_else2(pct_time_der > 50 & pct_time_der <= 60, 6,
if_else2(pct_time_der > 60 & pct_time_der <= 70, 7,
if_else2(pct_time_der > 70 & pct_time_der <= 80, 8,
if_else2(pct_time_der > 80 & pct_time_der <= 90, 9,
if_else2(pct_time_der > 90 & pct_time_der <= 100, 10,
if_else2(haven::is_tagged_na(pct_time_der, "a"), "NA(a)", "NA(b)")))))))))))
#'
categorize_pct_time <-
function(pct_time_der) {
dplyr::case_when(
haven::is_tagged_na(pct_time_der, "a") ~ "NA(a)",
pct_time_der >= 0 & pct_time_der <= 10 ~ "1",
pct_time_der > 10 & pct_time_der <= 20 ~ "2",
pct_time_der > 20 & pct_time_der <= 30 ~ "3",
pct_time_der > 30 & pct_time_der <= 40 ~ "4",
pct_time_der > 40 & pct_time_der <= 50 ~ "5",
pct_time_der > 50 & pct_time_der <= 60 ~ "6",
pct_time_der > 60 & pct_time_der <= 70 ~ "7",
pct_time_der > 70 & pct_time_der <= 80 ~ "8",
pct_time_der > 80 & pct_time_der <= 90 ~ "9",
pct_time_der > 90 & pct_time_der <= 100 ~ "10",
TRUE ~ "NA(b)"
)
}
2 changes: 1 addition & 1 deletion R/recode-with-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ is_equal <- function(v1, v2) {
#' tail(combined_bmi)
#' @importFrom haven tagged_na
#' @importFrom stringr str_match
#' @importFrom dplyr rowwise select do
#' @importFrom dplyr rowwise select do case_when
#' @importFrom magrittr %>%
#' @export

Expand Down
Loading