library(tidyr)
library(readr)
library(dplyr)
library(stringr)
library(magrittr)
library(glptools)
library(feather)
library(survey)
path <- "data-raw/health/brfss/"
brfss_micro <- read_feather("data-raw/microdata/brfss_micro.feather")
function(df, var, age_var = "age", pop_var = "population"){
df$var <- df[[var]]
df$age_var <- df[[age_var]]
df$pop_var <- df[[pop_var]]
grouping_vars <- c("FIPS", "year", "race", "sex")
# Label age groups
if(age_var == "age"){
age_groups <-
data.frame(
age_var = 0:100,
age_group = c(
0,
rep(1, 4),
rep(2, 10),
rep(3, 10),
rep(4, 10),
rep(5, 10),
rep(6, 10),
rep(7, 10),
rep(8, 10),
rep(9, 10),
rep(10, 16)))
df <- df %>%
left_join(age_groups, by = c("age_var"))
} else if(age_var == "age_10"){
df$age_group <- df$age_var
}
# Summarise variable and population variable by age groups
df <- df %>%
group_by_at(df %cols_in% c("age_group", grouping_vars)) %>%
summarise(
var = sum(var, na.rm = TRUE),
pop_var = sum(pop_var, na.rm = TRUE)) %>%
ungroup()
# Standard population from the CDC: https://wonder.cdc.gov/wonder/help/ucd.html#2000%20Standard%20Population
std_pop <-
data.frame(
age_group = 0:10,
weight = c(
0.013818,
0.055317,
0.145565,
0.138646,
0.135573,
0.162613,
0.134834,
0.087247,
0.066037,
0.044842,
0.015508))
# Subset standardized population to only ages present in the sample,
# standardize to total 1, and join to data frame
std_pop <- std_pop %>%
filter(age_group %in% df$age_group) %>%
mutate(weight = weight / sum(weight))
df <- df %>%
left_join(std_pop, by = "age_group") %>%
group_by_at(df %cols_in% grouping_vars) %>%
summarise(rate = sum(var / pop_var * weight) * 100000) %>%
ungroup()
df[[var]] <- df$rate
df <- df %>% select(-rate)
df
}
# Recode data
brfss_micro %<>%
filter(year > 2002) %>%
mutate(
poor_or_fair = case_when(
genhlth %in% 1:3 ~ F,
genhlth %in% 4:5 ~ T,
genhlth %in% c(7, 9) ~ NA),
physdays = case_when(
physdays == 88 ~ 0,
physdays %in% 1:30 ~ as.double(physdays),
physdays %in% c(77, 99) ~ NA_real_),
mentdays = case_when(
mentdays == 88 ~ 0,
mentdays %in% 1:30 ~ as.double(mentdays),
mentdays %in% c(77, 99) ~ NA_real_),
diabetes = case_when(
diabetes == 1 ~ T,
diabetes %in% 2:4 ~ F,
diabetes %in% c(7, 9) ~ NA),
asthma = case_when(
asthma == 1 ~ T,
asthma == 2 ~ F,
asthma %in% c(7, 9) ~ NA),
pcp = case_when(
pcp %in% 1:2 ~ T,
pcp == 3 ~ F,
pcp %in% c(7, 9) ~ NA))
age_groups <-
data.frame(
age_var = 0:100,
age_group = c(
0,
rep(1, 4),
rep(2, 10),
rep(3, 10),
rep(4, 10),
rep(5, 10),
rep(6, 10),
rep(7, 10),
rep(8, 10),
rep(9, 10),
rep(10, 16)))
age_groups <-
data.frame(
age_var = 18:99,
age_group = c(
rep(1, length(18:44)),
rep(2, length(45:64)),
rep(3, length(65:99))))
std_pop <-
data.frame(
age_group = 1:3,
weight = c(
0.530534557,
0.299194019,
0.170271424))
brfss_micro %<>% left_join(age_groups, by = c("age" = "age_var"))
std_pop <-
data.frame(
age_group = 0:10,
weight = c(
0.013818,
0.055317,
0.145565,
0.138646,
0.135573,
0.162613,
0.134834,
0.087247,
0.066037,
0.044842,
0.015508))
std_pop <- std_pop %>%
filter(age_group %in% brfss_micro$age_group) %>%
mutate(weight = weight / sum(weight))
brfss_prevelance <- brfss_micro %>%
group_by(MSA, year, age_group) %>%
summarise(age_pop = sum(wgt), .groups = "drop") %>%
group_by(MSA, year) %>%
transmute(
age_group,
group_pct = age_pop / sum(age_pop, na.rm = TRUE)) %>%
ungroup() %>%
left_join(std_pop, by = "age_group") %>%
transmute(
MSA, year, age_group,
wgt_adjustment = weight / group_pct)
# test <- brfss_micro %>%
# group_by(MSA, year) %>%
# left_join(brfss_prevelance, by = c("MSA", "year", "age_group")) %>%
# mutate(wgt2 = wgt * wgt_adjustment) %>%
# summarise(
# pcp_raw = weighted.mean(pcp, wgt, na.rm = TRUE),
# pcp_age = weighted.mean(pcp, wgt2, na.rm = TRUE)) %>%
# ungroup()
#
# test %<>%
# left_join(std_pop, by = "age_group") %>%
# group_by(MSA, year) %>%
# summarise(pcp = sum(pcp * weight)) %>%
# ungroup()
demogs <- c("total", "sex", "race")
tictoc::tic()
brfss_msa_1yr <- bind_df(
svy_race_sex(brfss_micro, "poor_or_fair", "proportion", "wgt", "MSA", breakdowns = demogs),
svy_race_sex(brfss_micro, "physdays", "mean", "wgt", "MSA", breakdowns = demogs),
svy_race_sex(brfss_micro, "mentdays", "mean", "wgt", "MSA", breakdowns = demogs),
svy_race_sex(brfss_micro, "diabetes", "proportion", "wgt", "MSA", breakdowns = demogs),
svy_race_sex(brfss_micro, "asthma", "proportion", "wgt", "MSA", breakdowns = demogs),
svy_race_sex(brfss_micro, "pcp", "proportion", "wgt", "MSA", breakdowns = demogs))
tictoc::toc()
write_feather(brfss_msa_1yr, path %p% "brfss_msa_1yr.feather")
# The Greensboro MSA does not have enough results to be included after 2014.
# Model data based off CHR Greensboro data and the Greensboro AHEC
# (contains Greensboro MSA and 5 other counties)
# Also need to model physical and mentally healthy days
#
# greensboro_chr_14 <- read_csv(path %p% "2014.csv")
# greensboro_chr_15 <- read_csv(path %p% "2015.csv")
# greensboro_chr_16 <- read_csv(path %p% "2016.csv")
#
# greensboro_chr_14 %<>%
# filter(
# STATECODE == "37",
# COUNTYCODE %in% c("081", "151", "157")) %>%
# transmute(
# FIPS = paste0(STATECODE, COUNTYCODE),
# year = 2014,
# poor_or_fair = `Poor or fair health Value` * 100,
# physdays = `Poor physical health days Value`,
# mentdays = `Poor mental health days Value`,
# diabetes = `Diabetes Value` * 100)
#
# greensboro_chr_15 %<>%
# filter(`5-Digit FIPS Code` %in% c("37081", "37151", "37157")) %>%
# transmute(
# FIPS = `5-Digit FIPS Code`,
# year = 2015,
# poor_or_fair = as.numeric(`Poor or fair health Value`) * 100,
# physdays = as.numeric(`Poor physical health days Value`),
# mentdays = as.numeric(`Poor mental health days Value`),
# diabetes = as.numeric(`Diabetes Value`) * 100)
#
# greensboro_chr_16 %<>%
# filter(`5-digit FIPS Code` %in% c("37081", "37151", "37157")) %>%
# transmute(
# FIPS = `5-digit FIPS Code`,
# year = 2016,
# poor_or_fair = as.numeric(`Poor or fair health raw value`) * 100,
# physdays = as.numeric(`Poor physical health days raw value`),
# mentdays = as.numeric(`Poor mental health days raw value`),
# diabetes = as.numeric(`Diabetes prevalence raw value`) * 100)
#
# greensboro_chr_17 <- greensboro_chr_16 %>%
# mutate(year = 2017)
#
# greensboro_chr <- bind_rows(greensboro_chr_14, greensboro_chr_15, greensboro_chr_16, greensboro_chr_17) %>%
# mutate(race = "total", sex = "total") %>%
# left_join(glpdata:::population_msa_counties, by = c("FIPS", "year", "sex", "race")) %>%
# select(-race, -sex)
#
# greensboro_chr %<>%
# select(-FIPS) %>%
# group_by(year) %>%
# summarise_all(~weighted.mean(., population)) %>%
# select(-population) %>%
# filter(year > 2014) %>%
# mutate(MSA = 24660, sex = "total", race = "total")
#
# brfss_msa_1yr %<>%
# filter(!(MSA == 24660 & sex == "total" & race == "total" & year >= 2015)) %>%
# bind_rows(greensboro_chr) %>%
brfss_msa_1yr %<>%
organize()
usethis::use_data(brfss_msa_1yr)
rm(brfss_micro, greensboro_chr_14, greensboro_chr_15,
greensboro_chr_16, greensboro_chr_17, greensboro_chr, path)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.