knitr::opts_chunk$set(fig.pos = "H", out.extra = "", message=FALSE, warning=FALSE, echo=FALSE)

library(shellpipes)
library(glmmTMB)
library(dplyr)
library(ggplot2)
library(vareffects); varefftheme()
library(emmeans)
library(effects)

commandEnvironments()
makeGraphics()

set.seed(2121)

within.category <- TRUE

\section{Continuous predictors}

\subsection{No interaction}

quants <- seq(0, 1, length.out=100)
focal_levels <- quantile(sim_df_cni$age, quants)
print("Truth:")
print(true_prop_cni)
compare_cni_plots <- (combinepreds(mod_cni, focal="age"
        , funs=c("emmeans", "varpred")
        , at=list(age=focal_levels)
        , x.var="age"
        , plotit=TRUE
    )
    + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey")
)
print(compare_cni_plots)

\subsection{Interaction between non-focal predictors}

focal_levels <- quantile(sim_df_wi_nf$age, quants)
print("Truth:")
print(true_prop_wi_nf)
compare_wi_nf_plots <- (combinepreds(mod_wi_nf, focal="age"
        , funs=c("emmeans", "varpred")
        , at=list(age=focal_levels)
        , x.var="age"
        , plotit=TRUE
    )
    + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey")
)
print(compare_wi_nf_plots)

\subsection{Interaction between focal and non-focal predictors}

focal_levels <- quantile(sim_df_wi_f$age, quants)
print("Truth:")
print(true_prop_wi_f)
compare_wi_f_plots <- (combinepreds(mod_wi_f, focal="age"
        , funs=c("emmeans", "varpred")
        , at=list(age=focal_levels)
        , x.var="age"
        , nesting="wealthindex %in% age"
        , plotit=TRUE
    )
    + geom_vline(aes(xintercept=mean(focal_levels)), lty=2, colour="grey")
)
print(compare_wi_f_plots)

\section{Categorical predictors}

\subsection{No interaction}

compare_catni_plots <- (combinepreds(mod_catni, focal="gender"
        , funs=c("emmeans", "varpred")
        , x.var="gender"
        , within.category=within.category
        , x.var.factor=TRUE
        , plotit=TRUE
    )
    + geom_point(data=focal_prop_catni, aes(x=gender, y=hhsize), colour="black")
)
print(compare_catni_plots)

\section{Why the differences?}

We actually don't know which method is correct. Let us consider a simple simulation:

## Simulation
N <- 100
extraAge <- 0.4
meanAge <- 0.5
sdAge <- 1
prop <- c(0.6, 0.4)
betas <- c(1.5, 0.5, 0.5)
gender <- sample(c("Female", "Male"), N, replace = TRUE, prob = prop)
df <- (data.frame(gender=gender)
    %>% mutate(muAge=ifelse(gender=="Female", meanAge+extraAge, meanAge)
        , age=rnorm(N, muAge, sdAge)
    )
    %>% select(-muAge)
)

mm <- model.matrix(~gender+age, df)
df$hhsize <- rnorm(N, mean=as.vector(mm %*% betas), sd=1)

## Model
mod <- lm(hhsize~gender+age, data=df)
print(mod)

Let us take a look at the observed marginals:

observed_margins <- (df
    %>% group_by(gender)
    %>% summarise_all(mean)
)
observed_margins
varpred_pred <- varpred(mod, "gender", within.category=TRUE, returnall=TRUE)
varpred_mm <- varpred_pred$raw$model.matrix
print(varpred_mm)

Estimates:

print(varpred_pred)

By hand calculation:

varpred_mm %*% coef(mod)
emmeans_grid <- ref_grid(mod)
emmeans_mm <- emmeans_grid@grid
print(emmeans_mm)

Estimates:

emmeans_pred <- emmeans(emmeans_grid, spec=~gender)
print(emmeans_pred)

By hand calculation:

emmeans_mm <- emmeans_pred@linfct
print(emmeans_mm)

as.matrix(emmeans_mm) %*% coef(mod)

Mathematically, in varpred

\begin{align} \mathbb{E}(Y|X) = \begin{cases} (\beta_0 + \alpha) + \beta_1 \bar{{Age_{M}}},& \text{ if } gender = Male\\ \beta_0 + \beta_1 \bar{{Age_{F}}},& \text{ if } gender = Female\ \end{cases} \end{align}

emmeans

\begin{align} \mathbb{E}(Y|X) = \begin{cases} (\beta_0 + \alpha) + \beta_1 \bar{{Age}},& \text{ if } gender = Male\\ \beta_0 + \beta_1 \bar{{Age}},& \text{ if } gender = Female\ \end{cases} \end{align}

\section{Dealing with categorical iterations}

df <- (expand.grid(services=c("water", "garbage", "toilet")
        , gender=c("Male", "Female")
        , statusP=c("Base year", "Not observed", "Unimproved", "Improved")
    )
    %>% mutate(age = rnorm(n())
        , status=rbinom(n(), 1, plogis(rnorm(n(), 3, 5)))
    )
)
head(df)

Model:

toy_mod <- glm(status~services*age
    , data=df
    , family="binomial"
#   , contrasts=list(services="contr.sum")
)

\subsection{varpred}

varpred_all_av <- varpred(toy_mod, "services", handle.inter="emmeans", returnall=TRUE)
print(varpred_all_av$raw$model.matrix)
varpred_no_av <- varpred(toy_mod, "services", handle.inter="effects", returnall=TRUE)
print(varpred_no_av$raw$model.matrix)

\subsection{emmeans}

emmeans_toy <- emmeans(toy_mod, specs=~services, nesting=NULL, weighting="equal")
print(emmeans_toy@linfct)

\subsection{Effects}

effects_toy <- Effect("services", toy_mod)
print(effects_toy$model.matrix)


mac-theobio/effects documentation built on July 6, 2023, 4:19 a.m.