#' @title Descriptive and causal variance decompositions
#'
#' @description The ineqx package implements Rosche (202X). [...]
#'
#' @param treat Character string. Treatment variable. Values must be 0/1
#' @param post Character string. Before/after variable. Values must be 0/1
#' @param y Character string. Dependent variable. Variable must be continuous.
#' @param ystat Character string. Either "Var" (default) or "CV2". Choose to analyze (the effect of x on) the variance or squared coefficient of variation.
#' @param group Character string. Variable must be categorical. Grouping variable to decompose variance into within- and between-group components.
#' @param time Character string. c.x with specify penalized splines, i.x will specify dummies. Time variable to analyze change over time.
#' @param weights Character string. Weight variable.
#' @param controls Character vector with additional control variables. E.g. c("c.age", "i.sex", ...)
#' @param decomp Character string. Either "post" (default) or "effect".
#' @param ref Number, vector, or list. Counterfactual reference point. See details.
#' @param AME_mu Dataframe with average marginal effects (Mu)
#' @param AME_sigma Dataframe with average marginal effects (Sigma)
#' @param dat Dataframe
#'
#' @return List with eight elements: "vars", "dMu", "dSigma", "dW", "dB", "dC", "dP", "dT". See details.
#'
#' @examples data(incdat)
#' # Descriptive variance decomposition
#' decomp_descr <- ineqx(y="inc", group="group", time="i.year", ref=1, dat=incdat)
#' plot(decomp_descr, type="dT")
#'
#' # Causal variance decomposition
#' decomp_treat <- ineqx(treat="x", post="t", y="inc", group="group", time="i.year", ref=1, dat=incdat)
#' plot(decomp_treat, type="dT")
#'
#' @export ineqx
#'
#' @author Benjamin Rosche <benjamin.rosche@@gmail.com>
#'
#' @details
#' The main function of \code{ineqx}. [...]
#'
#'
#' \bold{Counterfactual reference point (ref)}
#'
#' \code{ref} can either be a number, c(number, "variable name"), or list(n=c(...), mu=c(...), sigma=c(...), beta=c(...), lambda=c(...)). [...]
#'
#' \bold{Return}
#'
#' ...
ineqx <- function(treat=NULL, post=NULL, y, ystat="Var", group=NULL, time=NULL, weights=NULL, controls=NULL, decomp="post", ref=NULL, AME_mu=NULL, AME_sigma=NULL, dat) {
# dat = dat; treat="treat"; post=NULL; y="inc"; ystat="Var"; group="g"; time=NULL; ref=1; decomp="post"; controls=NULL; weights=NULL; AME_mu=NULL; AME_sigma=NULL
# ---------------------------------------------------------------------------------------------- #
# Dissect input ----
# ---------------------------------------------------------------------------------------------- #
# treat
c(treat, treatcont, treatpoly, notreat) %<-% dissectVar(treat, cicheck="i")
# post
c(post, postcont, postpoly, nopost) %<-% dissectVar(post, cicheck="i")
# y
y <- dissectVar(y, cicheck = "c")[[1]]
# controls
if(!is.null(controls)) {
ctrl_list <- lapply(controls, FUN=function(x) dissectVar(x, cicheck="ci"))
controls <- lapply(ctrl_list, FUN=function(x) x[[1]]) %>% as.character()
}
# group
group <- dissectVar(group, cicheck = "i")[[1]]
# time
c(time, timecont, timepoly, notime) %<-% dissectVar(time, cicheck = "ci")
# Are manual values provided as reference?
refm <- (!is.null(names(ref)) & all(names(ref) %in% c("n", "mu", "sigma", "beta", "lambda")))
# Check variables and values
if(notreat & !nopost) stop("post can only be specified if treat is specified as well.")
if(!notreat) {
if(!as.character(treat) %in% names(dat)) stop(paste0(treat, " not in dataset."))
treat_levels <- dat %>% pull(treat) %>% unique() %>% sort()
if(!0 %in% treat_levels | length(treat_levels)==1) stop(paste0(treat, " must contain 0 (not treated) and at least one other value (treated)."))
if(!nopost & any(!treat_levels %in% c(0,1))) stop("If post is specified, treat values must be 0 (not treated) or 1 (treated).")
if(nopost & length(treat_levels)>2) warning("Treatment effect was calculated as weighted average of ", paste0("0->", treat_levels[treat_levels!=0], sep=" "), " as treat has multiple treatment values.")
}
if(!nopost) {
if(!as.character(post) %in% names(dat)) stop(paste0(post, " not in dataset."))
post_levels <- dat %>% pull(post) %>% unique() %>% sort()
if(!0 %in% post_levels | length(post_levels)==1) stop(paste0(post, " must contain 0 (pre) and at least one other value (post)."))
if(length(post_levels)>2) warning("Treatment effect was calculated as weighted average of ", paste0("0->", post_levels[post_levels!=0], sep=" "), "as post has multiple post-treatment values.")
}
if(!as.character(decomp) %in% c("post", "effect")) stop("decomp must be 'post' or 'effect'.")
if(!as.character(y) %in% names(dat)) stop(paste0(y, " not in dataset."))
if(!as.character(group) %in% names(dat)) stop(paste0(group, " not in dataset."))
if(!is.null(weights)) if(!as.character(weights) %in% names(dat)) stop(paste0(weights, " not in dataset."))
if(!is.null(controls)) if(any(!controls %in% names(dat))) stop(paste0(paste0(controls[!controls %in% names(dat)], collapse = " + "), " not in dataset."))
if(!notime) {
if(!as.character(time) %in% names(dat)) stop(paste0(time, " not in dataset."))
time_levels <- dat %>% pull(time) %>% unique() %>% sort() # time_levels
if(is.null(ref)) stop("Counterfactual reference point must be given.")
if(!refm) if(!ref[1] %in% time_levels) stop(paste0("ref not observed in ", time, "."))
if(refm) if(0 %in% time_levels) stop(paste0(time, "must not contain 0 if manual values are provided for ref."))
}
# Drop rows with missing values and rename variables
dat <-
dat %>%
dplyr::mutate(across(everything(), as.vector)) %>% # remove labels and other attributes
dplyr::select({{ treat }}, {{ post }}, {{ y }}, {{ group }}, {{ time }}, {{ weights }}, all_of(controls)) %>%
tidyr::drop_na() %>%
dplyr::rename(treat={{ treat }}, post={{ post }}, y={{ y }}, w = {{ weights }}, group={{ group }}, time={{ time }})
if(length(controls)>0) dat <- dat %>% dplyr::rename_with(~paste0("c", 1:length(controls)), .cols = all_of(controls))
# Add variable: treat*post
dat <- if(!is.null(post)) dat %>% dplyr::mutate(tp=treat*post) else dat %>% dplyr::mutate(tp=treat)
# Add variable: time=1 if is.null(time)
if(notime) dat <- dat %>% dplyr::mutate(time=1)
# Levels of group and time
group_levels <- dat %>% pull(group) %>% unique() %>% sort()
time_levels <- dat %>% pull(time) %>% unique() %>% sort()
# Weights
if(!is.null(weights)) w <- "w" else w <- NULL
# ---------------------------------------------------------------------------------------------- #
# Create formulas ----
# ---------------------------------------------------------------------------------------------- #
if(!notime) {
form_time <- if(timecont & timepoly==1) " * time" else if(timecont & timepoly>1) paste0(" * bs(time, df=", timepoly, ")") else " * as.factor(time)"
} else form_time <- ""
if(!is.null(controls)) {
form_ctrl <- paste0(lapply(1:length(controls), FUN=function(i) { if(ctrl_list[[i]]$cont & ctrl_list[[i]]$poly==1) paste0("c", i) else if (ctrl_list[[i]]$cont & ctrl_list[[i]]$poly>1) paste0("bs(c", i, ", df=", ctrl_list[[i]]$poly, ")") else paste0("as.factor(c", i, ")") }), collapse = " + ")
form_ctrl <- paste0(" + ", form_ctrl)
} else form_ctrl <- ""
if(!is.null(treat)) form_treat <- paste0(" tp *") else form_treat <- ""
if(!is.null(post)) form_post <- paste0(" + treat * (as.factor(group)", form_time, ") + post") else form_post <- ""
# Is AME_mu and AME_sigma provided? ------------------------------------------------------------ #
if(!notreat & is.null(AME_mu) & is.null(AME_sigma)) {
# -------------------------------------------------------------------------------------------- #
# Variance function regression ----
# -------------------------------------------------------------------------------------------- #
message("Running variance function regression ...")
vfr <- gamlss(formula = as.formula( paste0("y ~", form_treat, " (as.factor(group)", form_time, ")", form_post, form_ctrl) ), # treat post tp group time group#time treat#group treat#time treat#group#time tp#group tp#time tp#group#time
sigma.formula = as.formula( paste0( " ~", form_treat, " (as.factor(group)", form_time, ")", form_post, form_ctrl) ),
weights = w,
data = dat)
# -------------------------------------------------------------------------------------------- #
# Compute average marginal effects ----
# -------------------------------------------------------------------------------------------- #
message("Computing average marginal effects ...")
# 2do: add SE with predict(,se.fit = T)
AME_mu <- suppressWarnings( calcAME(treat="tp", group="group", time="time", what="mu", vfr, dat) )
AME_sigma <- suppressWarnings( calcAME(treat="tp", group="group", time="time", what="sigma", vfr, dat) )
} else if(notreat) {
# If notreat, AME must be empty
AME_mu <- list()
AME_mu[[1]] <-
tibble() %>%
tidyr::expand(time=time_levels, group=group_levels) %>%
dplyr::mutate(effect=0)
AME_mu[[2]] <-
tibble() %>%
tidyr::expand(time=time_levels) %>%
dplyr::mutate(effect=0)
AME_sigma <- AME_mu
} else {
# manual AME_mu + AME_sigma
AME_mu[[1]] <- AME_mu[[1]] %>% dplyr::rename(time = {{ time }}, group = {{ group }})
AME_mu[[2]] <- AME_mu[[2]] %>% dplyr::rename(time = {{ time }})
AME_sigma[[1]] <- AME_sigma[[1]] %>% dplyr::rename(time = {{ time }}, group = {{ group }})
AME_sigma[[2]] <- AME_sigma[[2]] %>% dplyr::rename(time = {{ time }})
}
# ---------------------------------------------------------------------------------------------- #
# Decomposition ----
# ---------------------------------------------------------------------------------------------- #
message("Performing decomposition ...")
## Calculate within/between variance (pre-treatment, post-treatment and their difference = effect) ----
if(!is.null(treat)) {
# Filter to differentiate treat=t/post=NULL and treat=t/post=p
if(nopost) {
f.pre <- "treat==0" # untreated
f.post <- "treat>0" # treated
} else {
f.pre <- "treat>0&tp==0" # treated, pre-treatment
f.post <- "treat>0&tp>0" # treated, post-treatment
}
wibe.pre <- wibe(y="y", group="group", time="time", weights=w, dat=dat %>% dplyr::filter(eval(parse(text=f.pre))))
wibe.post <- wibe(y="y", group="group", time="time", weights=w, dat=dat %>% dplyr::filter(eval(parse(text=f.post))))
wibe.add <- wibe.post[[2]] # will be added to tables in dW, dW etc (=wibe.post[[2]])
} else {
f.post <- TRUE # no filter
wibe.pre <- wibe(y="y", group="group", time="time", weights=w, dat=dat)
wibe.post <- wibe(y="y", group="group", time="time", weights=w, dat=dat)
wibe.add <- wibe.post[[2]]
}
# If effect decomposition is desired, the difference of post and pre must be calculated
if(!notreat & decomp=="effect") {
if(ystat=="Var") {
wibe.add <-
wibe.pre[[1]] %>% dplyr::select(time, group, n, mu, sigma2) %>%
inner_join(wibe.post[[1]] %>% dplyr::select(time, group, n, mu, sigma2), by=c("group","time"), suffix = c(".pre", ".post")) %>%
group_by(time) %>%
dplyr::mutate(pi.post=n.post/sum(n.post)) %>%
dplyr::summarize(
VarW=sum(pi.post*(sigma2.post-sigma2.pre)),
VarB=sum(pi.post*((mu.post-sum(pi.post*mu.post))^2-(mu.pre-sum(pi.post*mu.pre))^2))
) %>%
ungroup() %>%
dplyr::mutate(VarT=VarW+VarB) %>%
dplyr::select(time, VarW, VarB, VarT)
# correct!
}
if(ystat=="CV2") {
wibe.add <-
wibe.pre[[1]] %>% dplyr::select(time, group, n, mu, sigma2) %>%
inner_join(wibe.post[[1]] %>% dplyr::select(time, group, n, mu, sigma2), by=c("group","time"), suffix = c(".pre", ".post")) %>%
group_by(time) %>%
dplyr::mutate(pi.post=n.post/sum(n.post), pi.pre=n.pre/sum(n.pre)) %>%
dplyr::summarize(
CV2W=sum(pi.post*(sigma2.post-sigma2.pre))/sum(pi.post^2*(mu.post^2-mu.pre^2)),
CV2B=sum(pi.post*((mu.post-sum(pi.post*mu.post))^2-(mu.pre-sum(pi.post*mu.pre))^2))/sum(pi.post^2*(mu.post^2-mu.pre)^2),
) %>%
ungroup() %>%
dplyr::mutate(CV2T=CV2W+CV2B) %>%
dplyr::select(time, CV2W, CV2B, CV2T)
# incorrect! NEEDS TO BE FIXED
}
}
## Gather information on t0 and t1 ---------------------------------------------------------------
wibe.pre_postn <-
wibe.pre[[1]] %>%
dplyr::select(-n) %>%
inner_join(
dat %>% dplyr::filter(eval(parse(text=f.post))) %>% group_by(time, group) %>% dplyr::summarise(n=n()), # post-treatment n
by=c("time", "group"))
# > pre-treatment mu + sigma because AME_mu and AME_sigma calculate the treatment effect on pre-treatment mu + beta
# > post-treatment n because only those who get the treatment will affect inequality
dat01 <- calc01(group="group", time="time", ref=ref, wibe.pre=wibe.pre_postn, AME_mu=AME_mu, AME_sigma=AME_sigma, notime=notime, dat=dat)
# If manual references are provided, the implied inequality must be calculated
if(refm) {
ref <- 0
dat0 <- dat01 %>% dplyr::filter(time==0) %>% dplyr::select(time, group, ends_with("1"))
if(ystat=="Var") {
wibe.add <-
wibe.add %>%
add_row(
tibble(
time=0,
VarW=VarW(dat0$n1, dat0$sigma1+dat0$lambda1),
VarB=VarB(dat0$n1, dat0$mu1+dat0$beta1),
VarT=VarW+VarB
)) %>%
arrange(time)
}
if(ystat=="CV2") {
wibe.add <-
wibe.add %>%
add_row(
tibble(
time=0,
CV2W=CV2W(dat0$n1, dat0$mu1+dat0$beta1, dat0$sigma1+dat0$lambda1),
CV2B=CV2B(dat0$n1, dat0$mu1+dat0$beta1),
CV2T=CV2W+CV2B
)) %>%
arrange(time)
}
}
## Calculate impact ------------------------------------------------------------------------------
# Within
deltaW <-
deltaWB(ystat=ystat, wb="w", decomp=decomp, notreat=notreat, dat01=dat01) %>%
purrr::map(
.f = function(x) {
x %>%
left_join(wibe.add %>% dplyr::select(time, paste0(ystat, "W"), paste0(ystat, "T")), by=c("time")) %>%
ungroup()
})
# Between
deltaB <-
deltaWB(ystat=ystat, wb="b", decomp=decomp, notreat=notreat, dat01=dat01) %>%
purrr::map(
.f = function(x) {
x %>%
left_join(wibe.add %>% dplyr::select(time, paste0(ystat, "B"), paste0(ystat, "T")), by=c("time")) %>%
ungroup()
})
# Compositional
deltaC <- deltaC(notreat, deltaW, deltaB)
# Pre-treatment
if(!is.null(treat)) {
deltaP <- deltaP(notreat, deltaW, deltaB)
} else {
deltaP <- NULL
}
# Total
deltaT <- deltaT(notreat, deltaW, deltaB)
if(ystat=="Var") deltaT[[1]] <- deltaT[[1]] %>% dplyr::mutate(dVarT=VarT-VarT[time == ref[1]])
if(ystat=="CV2") deltaT[[1]] <- deltaT[[1]] %>% dplyr::mutate(dCV2T=CV2T-CV2T[time == ref[1]])
# ---------------------------------------------------------------------------------------------- #
# Rename variables again and return output ----
# ---------------------------------------------------------------------------------------------- #
# Function to rename variables within list
rnm <- function(input, time, group) {
if(!is.null(input)) {
if(!is.null(group)) input[[1]] <- input[[1]] %>% dplyr::rename(!!enquo(group) := group)
if(!is.null(time)) input[[1]] <- input[[1]] %>% dplyr::rename(!!enquo(time) := time)
if(!is.null(time)) input[[2]] <- input[[2]] %>% dplyr::rename(!!enquo(time) := time)
} else {
input <- NULL
}
return(input)
}
AME_mu <- rnm(AME_mu, {{ time }}, {{ group }})
AME_sigma <- rnm(AME_sigma, {{ time }}, {{ group }})
deltaW <- rnm(deltaW, {{ time }}, {{ group }})
deltaB <- rnm(deltaB, {{ time }}, {{ group }})
deltaC <- rnm(deltaC, {{ time }}, {{ group }})
deltaP <- rnm(deltaP, {{ time }}, {{ group }})
deltaT <- rnm(deltaT, {{ time }}, NULL)
# Return
vars <- rlang::enexprs(treat, post, y, group, time, ystat, decomp, ref) %>% as.character()
names(vars) <- c("treat", "post", "y", "group", "time", "ystat", "decomp", "ref")
out <- list("vars"=vars, "dMu"=AME_mu, "dSigma"=AME_sigma, "dW"=deltaW, "dB"=deltaB, "dC"=deltaC, "dP"=deltaP, "dT"=deltaT)
class(out) <- "ineqx"
message("Done.")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.