## ----loadlibs, eval = FALSE, message = FALSE, warning = FALSE-----------------
# library("nhanesA")
#
# demoj = nhanes("DEMO_J")
# dim(demoj)
## ----ll_print, echo = FALSE---------------------------------------------------
cat(sprintf("[1] 9254 46"))
## ----merge_example, eval = FALSE----------------------------------------------
# ## merge DEMO_J and BXP_J using SEQN.
# bpxj = nhanes("BPX_J")
# data = merge(demoj, bpxj, by="SEQN")
# dim(data)
## ----merge_print, echo = FALSE------------------------------------------------
cat(sprintf("[1] 8704 66"))
## ----survey, eval = FALSE, warning = FALSE, message = FALSE-------------------
# library("survey")
# nhanesDesign <- svydesign(id = ~SDMVPSU, # Primary Sampling Units (PSU)
# strata = ~SDMVSTRA, # Stratification used in the survey
# weights = ~WTMEC2YR, # Survey weights
# nest = TRUE, # Whether PSUs are nested within strata
# data = data)
#
## ----surveydesign, eval = FALSE, message = FALSE, warning = FALSE-------------
#
# # subset survey design object
# dfsub = subset(nhanesDesign, data$RIDAGEYR>=40)
#
# # subset the original dataset
# datasub = data[data$RIDAGEYR>=40,]
#
## ----ethtables, eval = FALSE, message = FALSE, warning=FALSE------------------
#
# ## mean on total data set
#
# mean(datasub$BPXDI1, na.rm = TRUE)
## ----ethtables_print, echo = FALSE--------------------------------------------
cat(sprintf("[1] 73.04455"))
## ----eth_means, eval = FALSE--------------------------------------------------
# ##split the data by ethnicity and calculate mean of the unweighted data
#
# unweighted_means <- sapply(split(datasub$BPXDI1, datasub$RIDRETH1), mean, na.rm=TRUE)
# unweighted_means
#
## ----eth_means_print, echo = FALSE--------------------------------------------
df <- data.frame(matrix(1,nrow=1,ncol=5))
names(df) <- c(' Mexican American',' Other Hispanic',' Non-Hispanic White',
' Non-Hispanic Black',' Other Race - Including Multi-Racial')
df[1,] <- list('72.41000','72.97611','70.84130','75.71466','74.41311')
print(df,row.names=FALSE)
## ----svyby, eval = FALSE, message = FALSE, warning = FALSE--------------------
# adjmns = svymean(~BPXDI1, dfsub, na.rm=TRUE)
# adjmns
## ----svyby_print, echo = FALSE------------------------------------------------
df <- data.frame(matrix(1,nrow=1,ncol=3))
names(df) <- c('','mean', 'SE')
df[1,] <- list('BPXDI1', '73.237', '0.5597')
print(df,row.names=FALSE)
## ----eth_combined, eval = FALSE-----------------------------------------------
# # By ethnicity
# adjmnsbyEth = svyby(~BPXDI1, ~RIDRETH1, dfsub, svymean, na.rm=TRUE)
#
# weighted_means <- as.numeric(adjmnsbyEth$BPXDI1)
#
# combined_data <- cbind(unweighted_means, weighted_means)
# colnames(combined_data) <- c("Unweighted", "Weighted")
# combined_data
#
## ----eth_combined_print, echo = FALSE-----------------------------------------
df_uw <- data.frame(matrix(1,nrow=5,ncol=3))
names(df_uw) <- list('','Unweighted', 'Weighted')
df_uw[1,] <- list('Mexican American','72.41000','74.03194')
df_uw[2,] <- list('Other Hispanic','72.97611','74.73756')
df_uw[3,] <- list('Non-Hispanic White','70.84130','72.45422')
df_uw[4,] <- list('Non-Hispanic Black','75.71466','75.71874')
df_uw[5,] <- list('Other Race - Including Multi-Racial','74.41311','74.60215')
print(df_uw,row.names=FALSE)
## ----bygender, eval = FALSE, message = FALSE, warning = FALSE-----------------
# # By Gender
# mns = sapply(split(datasub$BPXDI1, datasub$RIAGENDR), mean, na.rm=TRUE)
#
# adjmnsbyGen = svyby(~BPXDI1, ~RIAGENDR, dfsub, svymean, na.rm=TRUE)
#
# combined_data <- cbind(mns, adjmnsbyGen$BPXDI1)
# colnames(combined_data) <- c("Unweighted", "Weighted")
# combined_data
#
## ----bygender_print, echo = FALSE---------------------------------------------
df <- data.frame(matrix(1,nrow=2,ncol=3))
names(df) <- c('','Unweighted', 'Weighted')
df[1,] <- list('Male', '74.67330', '75.31134')
df[2,] <- list('Female', '71.43385', '71.31453')
print(df,row.names=FALSE)
## ----svq1, eval = FALSE, message = FALSE, warning = FALSE---------------------
#
# # For the unweighted data
#
# quantile(datasub$BPXDI1, c(0.25,0.5,.75), na.rm = TRUE)
## ----echo = FALSE-------------------------------------------------------------
df <- data.frame(matrix(1,nrow=1,ncol=3))
names(df) <- c('25%', '50%', '75%')
df[1,] <- list('66', '74', '82')
print(df,row.names=FALSE)
## ----svq2, eval = FALSE-------------------------------------------------------
# # For the survey weighted data
# svyquantile(~BPXDI1, dfsub, quantiles = c(0.25,0.5,0.75), na.rm=TRUE)
## ----svq2_print, message = FALSE, echo = FALSE--------------------------------
df <- data.frame(matrix(1,nrow=9,ncol=1))
names(df) <- '$BPXDI1 '
df[1,] <- ' quantile ci.2.5 ci.97.5 se'
df[2,] <- '0.25 66 66 68 0.4691643'
df[3,] <- '0.5 74 74 76 0.4691643'
df[4,] <- '0.75 80 80 82 0.4691643'
df[5,] <- ''
df[6,] <- 'attr(,"hasci") '
df[7,] <- '[1] TRUE '
df[8,] <- 'attr(,"class") '
df[9,] <- '[1] "newsvyquantile" '
print(df, row.names=FALSE)
## ----svq3, eval = FALSE-------------------------------------------------------
# # By Gender
# svyby(~BPXDI1, ~RIAGENDR, dfsub, svyquantile, quantiles = c(0.5), na.rm=TRUE)
#
## ----svq3_print, echo = FALSE-------------------------------------------------
df <- data.frame(matrix(1,nrow=2,ncol=3))
names(df) <- c('RIAGENDR','BPXDI1','se.BPXDI1')
df[1,] <- list('Male','76','0.9383286')
df[2,] <- list('Female','72','0.4691643')
print(df, row.names=FALSE)
## ----eval = FALSE, message = FALSE, warning = FALSE---------------------------
#
# # For the entire dataset
# svyvar(~BPXDI1, dfsub, quantiles = c(0.25,0.5,0.75), na.rm=TRUE)
## ----echo=FALSE---------------------------------------------------------------
df <- data.frame(matrix(1,nrow=1,ncol=3))
names(df) <- c('', 'variance', 'SE')
df[1,] <- list('BPXDI1', '173.68', '12.279')
print(df,row.names=FALSE)
## ----eval = FALSE, message = FALSE, warning = FALSE---------------------------
# # By ethnicity
# svyby(~BPXDI1, ~RIDRETH1, dfsub, svyvar, na.rm=TRUE)
## ----echo=FALSE---------------------------------------------------------------
df <- data.frame(matrix(1,nrow=5,ncol=3))
names(df) <- c('RIDRETH1','BPXDI1','se')
df[1,] <- list('Mexican American','200.8781','45.43074')
df[2,] <- list('Other Hispanic','160.0157','23.79938')
df[3,] <- list('Non-Hispanic White','168.3504','18.30355')
df[4,] <- list('Non-Hispanic Black','202.3565','37.12593')
df[5,] <- list('Other Race - Including Multi-Racial','156.9968','17.26037')
print(df,row.names=FALSE)
## ----eval = FALSE, message = FALSE, warning = FALSE---------------------------
# # By Gender
# svyby(~BPXDI1, ~RIAGENDR, dfsub, svyvar, na.rm=TRUE)
#
## ----echo=FALSE---------------------------------------------------------------
df <- data.frame(matrix(1,nrow=2,ncol=3))
names(df) <- c('RIAGENDR', 'BPXDI1', 'se')
df[1,] <- list('Male', '141.2704', '10.52796')
df[2,] <- list('Female', '196.1199', '20.65706')
print(df,row.names=FALSE)
## ----eval = FALSE, message = FALSE, warning = FALSE---------------------------
# weighted_model <- svyglm(BPXDI1 ~ RIDAGEYR + RIDRETH1, design = dfsub, family = gaussian())
# summary(weighted_model)
## ----svyglm, echo = FALSE-----------------------------------------------------
cat(sprintf(paste('Call:',
'\n',
'\nsvyglm(formula = BPXDI1 ~ RIDAGEYR + RIDRETH1, design = dfsub,',
'\n family = gaussian())',
'\n',
'\nSurvey design:',
'\nsubset(nhanesDesign, data$RIDAGEYR >= 40)',
'\n',
'\nCoefficients:',
'\n Estimate Std. Error t value',
'\n(Intercept) 90.96516 1.24720 72.935',
'\nRIDAGEYR -0.31522 0.01853 -17.012',
'\nRIDRETH1Other Hispanic 1.43894 1.25518 1.146',
'\nRIDRETH1Non-Hispanic White 0.49810 0.73132 0.681',
'\nRIDRETH1Non-Hispanic Black 2.92332 1.09594 2.667',
'\nRIDRETH1Other Race - Including Multi-Racial 1.38532 0.63780 2.172',
'\n Pr(>|t|)',
'\n(Intercept) 5.73e-15 ***',
'\nRIDAGEYR 1.04e-08 ***',
'\nRIDRETH1Other Hispanic 0.2783',
'\nRIDRETH1Non-Hispanic White 0.5113',
'\nRIDRETH1Non-Hispanic Black 0.0236 *',
'\nRIDRETH1Other Race - Including Multi-Racial 0.0550 .',
'\n---',
"\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
'\n',
'\n(Dispersion parameter for gaussian family taken to be 162.6653)',
'\n',
'\nNumber of Fisher Scoring iterations: 2'
)))
## ----fig.width=7, eval = FALSE, message = FALSE, warning = FALSE--------------
# library(ggplot2)
#
# ## recalculating means (same as above)
#
# unweighted_means <- sapply(split(datasub$BPXDI1, datasub$RIDRETH1), mean, na.rm=TRUE)
# weighted_means <- as.numeric(adjmnsbyEth$BPXDI1)
#
#
# plot_data <- data.frame(
# Ethnicity = factor(rep(names(unweighted_means), 2),
# levels = names(unweighted_means)),
# Means = c(unweighted_means, weighted_means),
# Type = factor(c(rep("Unweighted", length(unweighted_means)),
# rep("Weighted", length(weighted_means))),
# levels = c("Unweighted", "Weighted"))
# )
#
# # Creating the plot
# ggplot(plot_data, aes(x = Ethnicity, y = Means, fill = Type)) +
# geom_bar(stat = "identity", position = "dodge", width = 0.6) +
# labs(title = "Comparison of Diastolic Blood Pressure by Ethnicity",
# y = "Mean Diastolic Blood Pressure") +
# scale_x_discrete(labels = c('Mex-Amer','Other Hisp','White', 'Black', 'Other')) +
# scale_fill_manual(values = c("blue", "red")) +
# theme_minimal() +
# theme(legend.title = element_blank())
#
#
## ----bp_by_eth_plot, echo = FALSE, fig.width=7, message = FALSE, warning = FALSE----
unweighted_means <- as.numeric(df_uw$Unweighted)
names(unweighted_means) <- df_uw[[1]]
weighted_means <- as.numeric(df_uw$Weighted)
plot_data <- data.frame(
Ethnicity = factor(rep(names(unweighted_means), 2),
levels = names(unweighted_means)),
Means = c(unweighted_means, weighted_means),
Type = factor(c(rep("Unweighted", length(unweighted_means)),
rep("Weighted", length(weighted_means))),
levels = c("Unweighted", "Weighted"))
)
# Creating the plot
library(ggplot2)
ggplot(plot_data, aes(x = Ethnicity, y = Means, fill = Type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.6) +
labs(title = "Comparison of Diastolic Blood Pressure by Ethnicity",
y = "Mean Diastolic Blood Pressure") +
scale_x_discrete(labels = c('Mex-Amer','Other Hisp','White', 'Black', 'Other')) +
scale_fill_manual(values = c("blue", "red")) +
theme_minimal() +
theme(legend.title = element_blank())
## ----fig.width=7, eval = FALSE, message = FALSE, warning = FALSE--------------
#
# unweighted_model <- glm(BPXDI1 ~ RIDAGEYR + RIDRETH1, data = datasub, family = gaussian())
# weighted_model <- svyglm(BPXDI1 ~ RIDAGEYR + RIDRETH1, design = dfsub, family = gaussian())
#
# # For the unweighted model
# unweighted_summary <- summary(unweighted_model)
# unweighted_coefs <- unweighted_summary$coefficients[, "Estimate"]
# unweighted_se <- unweighted_summary$coefficients[, "Std. Error"]
#
# # For the weighted model
# weighted_summary <- summary(weighted_model)
# weighted_coefs <- as.numeric(weighted_summary$coefficients[, "Estimate"])
# weighted_se <- as.numeric(weighted_summary$coefficients[, "Std. Error"])
#
# comparison <- data.frame(
# Variable = names(unweighted_coefs),
# Unweighted = unweighted_coefs,
# Weighted = as.numeric(weighted_coefs)
# )
#
# print(comparison)
## ----visregcompare, echo = FALSE----------------------------------------------
unwt_coef_names <- c("(Intercept)", "RIDAGEYR", "RIDRETH1Other Hispanic", "RIDRETH1Non-Hispanic White", "RIDRETH1Non-Hispanic Black", "RIDRETH1Other Race - Including Multi-Racial")
unweighted_coefs <- c(92.2848149, -0.3460552, 1.2325637, 0.7553912, 4.2996735, 2.0667520)
weighted_coefs <- c(90.9651590, -0.3152158, 1.4389441, 0.4981020, 2.9233165, 1.3853204)
comparison <- data.frame(
Variable = unwt_coef_names,
Unweighted = unweighted_coefs,
Weighted = as.numeric(weighted_coefs)
)
print(comparison)
## ----visregress, eval = FALSE-------------------------------------------------
# unweighted_df <- data.frame(Variable = names(unweighted_coefs),
# Estimate = unweighted_coefs,
# SE = unweighted_se,
# Type = "Unweighted")
#
# weighted_df <- data.frame(Variable = names(unweighted_coefs),
# Estimate = weighted_coefs,
# SE = weighted_se,
# Type = "Weighted")
#
# plot_data <- rbind(unweighted_df, weighted_df)
#
# ggplot(subset(plot_data, Variable!='(Intercept)'), aes(x = Estimate, y = reorder(Variable, Estimate), color = Type)) +
# geom_point(position = position_dodge(0.5), size = 2.5) +
# geom_errorbarh(aes(xmin = Estimate - SE, xmax = Estimate + SE),
# height = 0.2, position = position_dodge(0.5)) +
# geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
# labs(title = "Comparison of Regression Coefficients",
# x = "Coefficient Value", y = "Predictors") +
# theme_minimal() +
# scale_color_manual(values = c("Unweighted" = "blue", "Weighted" = "red")) +
# theme(legend.title = element_blank())
#
## ----regression_coeff, fig.width=7, echo = FALSE, message = FALSE, warning = FALSE----
unweighted_coefs <- c(92.2848149, -0.3460552, 1.2325637, 0.7553912, 4.2996735, 2.0667520)
weighted_coefs <- c(90.9651590, -0.3152158, 1.4389441, 0.4981020, 2.9233165, 1.3853204)
names(unweighted_coefs) <- c("(Intercept)", "RIDAGEYR", "RIDRETH1Other Hispanic", "RIDRETH1Non-Hispanic White", "RIDRETH1Non-Hispanic Black", "RIDRETH1Other Race - Including Multi-Racial")
unweighted_se <- c(1.36340677, 0.02061825, 1.04001732, 0.79478016, 0.83753015, 0.86954266)
weighted_se <- c(1.24720135, 0.01852892, 1.25517931, 0.73131800, 1.09593669, 0.63780221)
unweighted_df <- data.frame(Variable = names(unweighted_coefs),
Estimate = unweighted_coefs,
SE = unweighted_se,
Type = "Unweighted")
weighted_df <- data.frame(Variable = names(unweighted_coefs),
Estimate = weighted_coefs,
SE = weighted_se,
Type = "Weighted")
plot_data <- rbind(unweighted_df, weighted_df)
ggplot(subset(plot_data, Variable!='(Intercept)'), aes(x = Estimate, y = reorder(Variable, Estimate), color = Type)) +
geom_point(position = position_dodge(0.5), size = 2.5) +
geom_errorbarh(aes(xmin = Estimate - SE, xmax = Estimate + SE),
height = 0.2, position = position_dodge(0.5)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
labs(title = "Comparison of Regression Coefficients",
x = "Coefficient Value", y = "Predictors") +
theme_minimal() +
scale_color_manual(values = c("Unweighted" = "blue", "Weighted" = "red")) +
theme(legend.title = element_blank())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.