knitr::opts_chunk$set(fig.pos = "H", out.extra = "", message=FALSE, warning=FALSE, echo=FALSE) library(shellpipes) library(glmmTMB) library(dplyr) library(ggplot2) library(vareffects); varefftheme() library(emmeans) library(effects) commandEnvironments() makeGraphics() set.seed(2121) within.category <- TRUE
\section{Continuous predictors}
\subsection{No interaction}
quants <- seq(0, 1, length.out=100) focal_levels <- quantile(sim_df_cni$age, quants) print("Truth:") print(true_prop_cni) compare_cni_plots <- (combinepreds(mod_cni, focal="age" , funs=c("emmeans", "varpred") , at=list(age=focal_levels) , x.var="age" , plotit=TRUE ) + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey") ) print(compare_cni_plots)
\subsection{Interaction between non-focal predictors}
focal_levels <- quantile(sim_df_wi_nf$age, quants) print("Truth:") print(true_prop_wi_nf) compare_wi_nf_plots <- (combinepreds(mod_wi_nf, focal="age" , funs=c("emmeans", "varpred") , at=list(age=focal_levels) , x.var="age" , plotit=TRUE ) + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey") ) print(compare_wi_nf_plots)
\subsection{Interaction between focal and non-focal predictors}
focal_levels <- quantile(sim_df_wi_f$age, quants) print("Truth:") print(true_prop_wi_f) compare_wi_f_plots <- (combinepreds(mod_wi_f, focal="age" , funs=c("emmeans", "varpred") , at=list(age=focal_levels) , x.var="age" , nesting="wealthindex %in% age" , plotit=TRUE ) + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey") ) print(compare_wi_f_plots)
\section{Categorical predictors}
\subsection{No interaction}
compare_catni_plots <- (combinepreds(mod_catni, focal="gender" , funs=c("emmeans", "varpred") , x.var="gender" , within.category=within.category , x.var.factor=TRUE , plotit=TRUE ) + geom_point(data=focal_prop_catni, aes(x=gender, y=hhsize), colour="black") ) print(compare_catni_plots)
\section{Why the differences?}
We actually don't know which method is correct. Let us consider a simple simulation:
## Simulation N <- 100 extraAge <- 0.4 meanAge <- 0.5 sdAge <- 1 prop <- c(0.6, 0.4) betas <- c(1.5, 0.5, 0.5) gender <- sample(c("Female", "Male"), N, replace = TRUE, prob = prop) df <- (data.frame(gender=gender) %>% mutate(muAge=ifelse(gender=="Female", meanAge+extraAge, meanAge) , age=rnorm(N, muAge, sdAge) ) %>% select(-muAge) ) mm <- model.matrix(~gender+age, df) df$hhsize <- rnorm(N, mean=as.vector(mm %*% betas), sd=1) ## Model mod <- lm(hhsize~gender+age, data=df) print(mod)
Let us take a look at the observed marginals:
observed_margins <- (df %>% group_by(gender) %>% summarise_all(mean) ) observed_margins
varpred_pred <- varpred(mod, "gender", within.category=TRUE, returnall=TRUE) varpred_mm <- varpred_pred$raw$model.matrix print(varpred_mm)
Estimates:
print(varpred_pred)
By hand calculation:
varpred_mm %*% coef(mod)
emmeans_grid <- ref_grid(mod) emmeans_mm <- emmeans_grid@grid print(emmeans_mm)
Estimates:
emmeans_pred <- emmeans(emmeans_grid, spec=~gender) print(emmeans_pred)
By hand calculation:
emmeans_mm <- emmeans_pred@linfct print(emmeans_mm) as.matrix(emmeans_mm) %*% coef(mod)
Mathematically, in varpred
\begin{align} \mathbb{E}(Y|X) = \begin{cases} (\beta_0 + \alpha) + \beta_1 \bar{{Age_{M}}},& \text{ if } gender = Male\\ \beta_0 + \beta_1 \bar{{Age_{F}}},& \text{ if } gender = Female\ \end{cases} \end{align}
emmeans
\begin{align} \mathbb{E}(Y|X) = \begin{cases} (\beta_0 + \alpha) + \beta_1 \bar{{Age}},& \text{ if } gender = Male\\ \beta_0 + \beta_1 \bar{{Age}},& \text{ if } gender = Female\ \end{cases} \end{align}
\section{Dealing with categorical iterations}
df <- (expand.grid(services=c("water", "garbage", "toilet") , gender=c("Male", "Female") , statusP=c("Base year", "Not observed", "Unimproved", "Improved") ) %>% mutate(age = rnorm(n()) , status=rbinom(n(), 1, plogis(rnorm(n(), 3, 5))) ) ) head(df)
Model:
toy_mod <- glm(status~services*age , data=df , family="binomial" # , contrasts=list(services="contr.sum") )
\subsection{varpred}
varpred_all_av <- varpred(toy_mod, "services", handle.inter="emmeans", returnall=TRUE) print(varpred_all_av$raw$model.matrix)
varpred_no_av <- varpred(toy_mod, "services", handle.inter="effects", returnall=TRUE) print(varpred_no_av$raw$model.matrix)
\subsection{emmeans}
emmeans_toy <- emmeans(toy_mod, specs=~services, nesting=NULL, weighting="equal") print(emmeans_toy@linfct)
\subsection{Effects}
effects_toy <- Effect("services", toy_mod) print(effects_toy$model.matrix)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.