knitr::opts_chunk$set(fig.pos = "H"
    , out.extra = ""
    , message=FALSE
    , warning=FALSE
    , echo=FALSE
    , comment=""
)

library(shellpipes)
library(dplyr)
library(ggplot2)
library(vareffects); varefftheme()
library(emmeans)
library(effects)

commandEnvironments()
makeGraphics()

set.seed(2121)

load("variable_predictions_funs.rda")

cols <- c(Effect="black", varpred="red", emmeans="blue")

\section{Two categorical, single continuous predictors}

\begin{align}\label{sim:lm_no_interaction_cat} \mathrm{hh~size} &= \beta_0 + \beta_{\mathrm{A}}\mathrm{Age}i + \beta{[\mathrm{r}]}\mathrm{Religion_i} + \beta_{[\mathrm{g}]}{\mathrm{Gender}i} + \beta{[\mathrm{rg}]}\mathrm{Gender_i \times Religion_i} + \epsilon_i \nonumber\ \mathrm{Age}i &\sim \mathrm{Normal}(0, 3) \nonumber\ \mathrm{Prob(Gender{i})} &= \begin{cases} 0.4 & \mathrm{i=Female} \nonumber\ 0.6 & \mathrm{i=Male} \nonumber\ \end{cases} \nonumber\ \mathrm{Prob(Religion_{i})} &= \begin{cases} 0.6 & \mathrm{i=Christian} \nonumber\ 0.3 & \mathrm{i=Muslim} \nonumber\ 0.1 & \mathrm{i=Jew} \nonumber\ \end{cases} \nonumber\ \epsilon_i &\sim \mathrm{Normal}(0, 1) \nonumber\ \beta_0 &= 1.5 \nonumber\ \beta_{\mathrm{A}} &= 0.8 \nonumber\ \vdots \nonumber \ i &= 1,\cdots, 100 \end{align}

N <- 100
quant <- seq(0,1,length.out=50)
## Betas
b0 <- 10
b_age <- 0.8
b_jew <- 1
b_muslim <- 0.4
b_male <- 2
b_male_jew <- 0.7
b_male_muslim <- 0.2
betas <- c(b0, b_age, b_jew, b_muslim, b_male, b_male_jew, b_male_muslim)

## Proportions
rel_prob <- c(Christian=0.6, Muslim=0.3, Jew=0.1)
gender_prob <- c(Male=0.6, Female=0.4)

dat <- tibble(age=rnorm(N, 0, 3)
    , rel=sample(c("Christian", "Muslim", "Jew"), size=N, prob=rel_prob, replace=TRUE)
    , gender=sample(c("Male", "Female"), size=N, prob=gender_prob, replace=TRUE)
)
mm <- model.matrix(~1+age+rel*gender, dat)
eta <- as.vector(mm %*% betas)

df <- (dat
    %>% mutate(hhsize=rnorm(N, mean=eta, sd=1))
)
age_at <- quantile(df$age, quant)
head(df)

The model:

depend_eff_mod <- lm(hhsize~rel*gender+age, df)

\newpage

\subsubsection{gender$\star$religion}

depend_eff_pred <- combinepreds(depend_eff_mod, c("emmeans", "varpred", "Effect"), c("gender","rel")
    , x.var = "gender"
    , weights="proportional"
    , x.var.factor = TRUE, plotit = TRUE
)
print(depend_eff_pred 
    + facet_wrap(~rel, scales = "free_y")
    + scale_colour_manual(values=cols)
)

# varpred(depend_eff_mod, "gender")
# varpred(depend_eff_mod, "gender", bias.adjust="population")

\newpage

\subsubsection{Main effect of non-interacting predictor}

depend_eff_age <- combinepreds(depend_eff_mod, c("emmeans", "varpred", "Effect"), "age"
    , x.var = "age"
    , at=list(age=age_at)
    , xlevel=list(age=age_at)
    , nesting=NULL
    , weights="proportional"
    , plotit = TRUE
)
cat("Truth: \n", mean(df$hhsize))
print(depend_eff_age
    + scale_colour_manual(values=cols)
)

\section{Unbalanced data}

N <- 1000
quant <- seq(0,1,length.out=100)
## Betas
b0 <- 10
b_age <- 0.8
b_jew <- 1
b_muslim <- 0.4
b_male <- 2
b_male_jew <- 0.7
b_male_muslim <- 0.2
betas <- c(b0, b_age, b_jew, b_muslim, b_male, b_male_jew, b_male_muslim)

## Proportions
rel_prob <- c(Christian=0.6, Muslim=0.3, Jew=0.1)
gender_prob <- list(Christian=c(0.97, 0.03)
    , Muslim=c(0.01, 0.99)
    , Jew=c(0.5, 0.5)
)

samplefun <- function(prob, n) {
    out <- sample(c("Male", "Female"), size=n, prob=prob, replace=TRUE)
}

dat <- tibble(age=rnorm(N, 0, 3)
    , rel=sample(c("Christian", "Muslim", "Jew"), size=N, prob=rel_prob, replace=TRUE)
)
dat <- (dat
    %>% group_by(rel)
    %>% mutate(gender=ifelse(rel=="Christian"
            , samplefun(gender_prob[["Christian"]], n())
                , ifelse(rel=="Muslim", samplefun(gender_prob[["Muslim"]], n())
                , samplefun(gender_prob[["Jew"]], n())
            )
        )
    )
    %>% ungroup()
)
head(dat)
mm <- model.matrix(~1+age+rel*gender, dat)
eta <- as.vector(mm %*% betas)

unbalanced_df <- (dat
    %>% mutate(hhsize=rnorm(N, mean=eta, sd=1))
)
age_at <- quantile(unbalanced_df$age, quant)
head(unbalanced_df)

The model:

unbalanced_mod <- lm(hhsize~rel*gender+age, unbalanced_df)

\newpage

\subsubsection{gender$\star$religion}

unbalanced_pred <- combinepreds(unbalanced_mod, c("emmeans", "varpred", "Effect"), c("gender","rel")
    , x.var = "gender"
    , weights="proportional"
    , x.var.factor = TRUE, plotit = TRUE
)
print(unbalanced_pred 
    + facet_wrap(~rel, scales = "free_y")
    + scale_colour_manual(values=cols)
)
table(unbalanced_df$rel, unbalanced_df$gender)

\newpage

\subsubsection{Main effect of non-interacting predictor}

We are interested in predicted hhsize at the model center. We can explore this in two different ways:

  1. Center of the focal predictor $$\mathrm{E(hhsize|{\bar{x}{{f}}, \bar{x}{{n}}})}$$

  2. Averaged over levels of focal predictor $$\mathrm{mean ~ E(hhsize_i|{x_{i{f}}, \bar{x}_{{n}}})}$$

A few challenges:

In our simulation example: \begin{align} \mathrm{E(hhsize|age)} &= \mathrm{\beta_0 + \beta_A Age + \beta_J Jew + \beta_M Muslim + \beta_M Male}\nonumber\ &+ \mathrm{\beta_{MJ} Male \times Jew + \beta_{MM} Male \times Muslim} \end{align}

The non-focal categorical interactions can be handled in two ways:

Using Eqn. 2 with approach 1 (center of the focal predictor)


mean_age <- mean(unbalanced_df$age)
mean_hhsize <- mean(unbalanced_df$hhsize)
unbalanced_age <- combinepreds(unbalanced_mod, c("emmeans", "varpred", "Effect")
    , focal = "age"
    , x.var = "age"
    , at=list(age=mean_age)
    , xlevel=list(age=rep(mean_age,5))
    , nesting=NULL
    , weights="proportional"
    , plotit = FALSE
    , ci=FALSE
)
unbalanced_age <- (unbalanced_age
    %>% group_by(model)
    %>% summarize(fit=mean(fit))
    %>% mutate(age=mean_age
        , dev=abs(fit-mean_hhsize)
    )
    %>% data.frame()
)
unbalanced_age

An alternative is using Eqn 2 with approach 2


mean_age <- mean(age_at)
unbalanced_age_levels <- combinepreds(unbalanced_mod, c("emmeans", "varpred", "Effect")
    , focal = "age"
    , x.var = "age"
    , at=list(age=age_at)
    , xlevel=list(age=age_at)
    , nesting=NULL
    , weights="proportional"
    , plotit = FALSE
    , ci=FALSE
)
unbalanced_age_levels <- (unbalanced_age_levels
    %>% group_by(model)
    %>% summarize(fit=mean(fit))
    %>% mutate(age=mean_age
        , dev=abs(fit-mean_hhsize)
    )
    %>% data.frame()
)
unbalanced_age_levels

The later heavily depends on the choice of the levels of the focal predictor



mac-theobio/effects documentation built on July 6, 2023, 4:19 a.m.