## TODO: Think about mediated effect - should the effects be calculated as they are now
## where the main variable of interest doesn't change but all the subsequent variables
## do? Should it be calculated using the final model, or a model regressing the dependent
## variable on everything from the block of interest forward, or maybe all past blocks?
pGumbel <- function (q, mu = 0, sigma = 1){
stopifnot(sigma > 0)
exp(-exp(-((q - mu)/sigma)))
}
#' Draw coefficients from posterior distribution
#'
#' Draw coefficients from their posterior distributions implied by the model.
#' @param obj A model object - currently supported models are `lm`, `glm`, `polr`, `multinom`, `svyglm`, `svyolr` and `svymultinom`.
#' @param ... Other arguments to be passed down, currently not implemented.
#'
#' @export
draw_coefs <- function(obj, ...){
UseMethod("draw_coefs")
}
#' @method draw_coefs lm
#' @export
draw_coefs.lm <- function(obj, ...){
out <- MASS::mvrnorm(1, coef(obj), vcov(obj))
out <- matrix(out, nrow=1)
out
}
#' @method draw_coefs glm
#' @export
draw_coefs.glm <- function(obj, ...){
out <- MASS::mvrnorm(1, coef(obj), vcov(obj))
out <- matrix(out, nrow=1)
out
}
#' @method draw_coefs svyglm
#' @export
draw_coefs.svyglm <- function(obj, ...){
out <- MASS::mvrnorm(1, coef(obj), vcov(obj))
out <- matrix(out, nrow=1)
out
}
#' @method draw_coefs multinom
#' @export
draw_coefs.multinom <- function(obj, ...){
b <- coef(obj)
ncb <- ncol(b)
v <- vcov(obj)
v <- cbind(matrix(0, nrow=nrow(v), ncol=ncol(b)), v)
v <- rbind(matrix(0, nrow=ncol(b), ncol = ncol(v)), v)
b <- rbind(0, b)
b <- c(t(b))
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
matrix(out[1,], ncol=ncb, byrow=TRUE)
}
#' @method draw_coefs polr
#' @export
draw_coefs.polr <- function(obj, ...){
nb <- length(coef(obj))
nz <- length(obj$zeta) + 2
b <- c(-coef(obj), obj$zeta)
v <- vcov(obj)
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
while(any(diff(out[1,(nb+1):ncol(out)]) >= 0)){
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
}
mat <- matrix(rep(out[1, 1:nb], nz), ncol=nz, nrow=nb)
mat <- rbind(c(-Inf, out[1,(nb+1):ncol(out)], Inf), mat)
t(mat)
}
#' @method draw_coefs svyolr
#' @export
draw_coefs.svyolr <- function(obj, ...){
b <- coef(obj)
zetas <- which(grepl("\\|", names(b)))
betas <- setdiff(1:length(b), zetas)
nb <- length(betas)
nz <- length(zetas) + 2
v <- vcov(obj)
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
while(any(diff(out[1,(nb+1):ncol(out)]) >= 0)){
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
}
mat <- matrix(rep(out[1, 1:nb], nz), ncol=nz, nrow=nb)
mat <- rbind(c(-Inf, out[1,(nb+1):ncol(out)], Inf), mat)
t(mat)
}
#' @method draw_coefs svrepstatmisc
#' @export
draw_coefs.svrepstatmisc <- function(obj, ...){
b <- coef(obj)
ncb <- length(b)/length(grep("\\(Intercept\\)", names(b)))
v <- vcov(obj)
v <- cbind(matrix(0, nrow=nrow(v), ncol=ncb), v)
v <- rbind(matrix(0, nrow=ncb, ncol = ncol(v)), v)
b <- c(rep(0, ncb), b)
out <- MASS::mvrnorm(1, b, v)
out <- matrix(out, nrow=1)
matrix(out[1,], ncol=ncb, byrow=TRUE)
}
#' Set-up for Drawing Values
#'
#' The `prob` function sets up values that will be past `draw_vals`. For linear models and
#' gaussian GLMs, these are simply predicted values. For non-linear models, these are inverse-link
#' transformed linear predictor values (e.g., predicted probabilities).
#' @param obj A model object - currently supported models are `lm`, `glm`, `polr`, `multinom`, `svyglm`, `svyolr` and `svymultinom`.
#' @param b A list of model coefficients generated by `draw_coefs()`.
#' @param data A data frame that will be used to generate predictions.
#' @param ... Other arguments to be passed down, currently none implemented.
#'
#' @importFrom stats plogis pnorm
#' @export
prob <- function(obj, b, data, ...){
UseMethod("prob")
}
#' @method prob polr
#' @export
prob.polr <- function(obj, b, data, ...){
pfun <- switch(obj$method, logistic = plogis, probit = pnorm)
X <- model.matrix(obj, data=data)
q <- pfun(X %*% t(b))
D <- matrix(0, ncol = ncol(q)-1, nrow=ncol(q))
for(j in 1:ncol(D)){
D[j,j] <- -1
D[(j+1),j] <- 1
}
q %*% D
}
#' @method prob svyolr
#' @export
prob.svyolr <- function(obj, b, data, ...){
pfun <- switch(obj$method, logistic = plogis, probit = pnorm)
X <- model.matrix(formula(obj), data=data$variables)
q <- pfun(X %*% t(b))
D <- matrix(0, ncol = ncol(q)-1, nrow=ncol(q))
for(j in 1:ncol(D)){
D[j,j] <- -1
D[(j+1),j] <- 1
}
q %*% D
}
#' @method prob lm
#' @export
prob.lm <- function(obj, b, data, ...){
X <- model.matrix(obj, data=data)
X %*% t(b)
}
#' @method prob glm
#' @export
prob.glm <- function(obj, b, data, ...){
X <- model.matrix(obj, data=data)
obj$family$linkinv(X %*% t(b))
}
#' @method prob svyglm
#' @export
prob.svyglm <- function(obj, b, data, ...){
X <- model.matrix(obj, data=data$variables)
obj$family$linkinv(X %*% t(b))
}
#' @method prob multinom
#' @export
prob.multinom <- function(obj, b, data, ...){
X <- model.matrix(formula(obj), data=data)
q <- exp(X %*% t(b))
prop.table(q, 1)
}
#' @method prob svrepstatmisc
#' @export
prob.svrepstatmisc <- function(obj, b, data, ...){
X <- model.matrix(attr(obj, "formula"), data=data$variables)
q <- exp(X %*% t(b))
prop.table(q, 1)
}
## TODO: finish draw_val for polr, svyolr, multinom, svymultinom and svyglm
#' Take a random draw from a variable variable
#' @param obj Model object of class `lm`, `glm`, `polr` or `multinom`
#' @param probs A list that was produced by the `prob()` function.
#' @param e matrix or vector of errors to add to the predicted values to generate the draw in an `lm` or gaussian `glm`.
#' @param expected Logical indicating whether the function should return a draw from the distribution (`FALSE`) or the expected value (`TRUE`, default).
#' @param ... Other arguments to be passed down, currently not implemented.
#'
#' @importFrom MASS mvrnorm
#' @importFrom stats model.matrix coef vcov rmultinom rbinom rnorm plogis family formula pcauchy pnorm
#' @export
draw_val <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
UseMethod("draw_val")
}
#' @method draw_val lm
#' @export
draw_val.lm <- function(obj,
probs,
e = NULL,
expected=TRUE,
...){
if(!expected){
if(is.null(e)){
sigma <- summary(obj)$sigma
rnorm(length(probs), probs, sigma)
}else{
probs + e
}
}else{
probs
}
}
#' @method draw_val glm
#' @export
draw_val.glm <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
if(!obj$family$family %in% c("binomial", "gaussian"))stop("Currently only gaussian and binomial GLMs are implemented.\n")
if(!expected){
if(family(obj)$family == "gaussian"){
if(is.null(e)){
sigma <- summary(obj)$dispersion
rnorm(length(probs), probs, sigma)
}else{
probs + e
}
}else{
rbinom(length(probs), 1, probs)
}
}else{
probs
}
}
#'@method draw_val svyglm
#'@export
draw_val.svyglm <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
if(!obj$family$family %in% c("binomial", "gaussian"))stop("Currently only gaussian and binomial GLMs are implemented.\n")
if(!expected){
if(obj$family$family == "gaussian"){
if(is.null(e)){
sigma <- c(summary(obj)$dispersion)
rnorm(length(probs), probs, sigma)
}else{
probs + e
}
}else{
rbinom(length(probs), 1, probs)
}
}else{
probs
}
}
#'@method draw_val polr
#'@export
draw_val.polr <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
if(!expected){
fac <- apply(probs, 1, function(z)which.max(rmultinom(1, 1, z)))
fac <- factor(fac, levels=1:length(obj$lev), labels=obj$lev)
unname(fac)
}else{
probs
}
}
#'@method draw_val svyolr
#'@export
draw_val.svyolr <- function(obj, probs, e=NULL, expected=TRUE, ...){
draw_val.polr(obj, probs, e, expected, ...)
}
#'@method draw_val multinom
#'@export
draw_val.multinom <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
if(!expected){
fac <- apply(probs, 1, function(z)which.max(rmultinom(1, 1, z)))
fac <- factor(fac, levels=1:length(obj$lev), labels=obj$lev)
unname(fac)
}else{
probs
}
}
#'@method draw_val svrepstatmisc
#'@export
draw_val.svrepstatmisc <- function(obj,
probs,
e=NULL,
expected=TRUE,
...){
if(!expected){
fac <- apply(probs, 1, function(z)which.max(rmultinom(1, 1, z)))
fac <- factor(fac, levels=1:length(attr(obj, "lev")), labels=attr(obj, "lev"))
unname(fac)
}else{
probs
}
}
#' Find type of variable
#'
#' Find the variable type to identify the appropriate model for the variable
#'
#' @param x a vector of values to be evaluated
#' @param ... other arguments to be passed down, currently not implemented.
#'
#' @importFrom stats na.omit
#' @export
find_type <- function(x, ...){
if(is.numeric(x) & length(unique(na.omit(x))) > 2){
type <- "lm"
}
if(length(unique(na.omit(x))) == 2 & !is.character(x)){
type <- "glm"
}
if(is.factor(x)){
if(is.ordered(x)){
type <- "polr"
}else{
type <- "multinom"
}
}
type
}
#' Utility function to find upstream variables
#'
#' Finds variables in prior blocks.
#' @param blocks A list where each element identifies a block in the funnel of causality.
#' @param varname The name of the focal variable.
#' @param ... Other arguments to be passed down; currently not implemented.
#' @export
upstream_vars <- function(blocks, varname, ...){
inbl <- sapply(blocks, \(x)any(grepl(varname, x)))
ind <- min(which(inbl))
c(unlist(blocks[1:(ind-1)]))
}
#' Utility function to find downstream variables
#'
#' Finds variables in subsequent blocks.
#' @param blocks A list where each element identifies a block in the funnel of causality.
#' @param varname The name of the focal variable.
#' @param ... Other arguments to be passed down; currently not implemented.
#' @export
downstream_vars <- function(blocks, varname, ...){
inbl <- sapply(blocks, \(x)any(grepl(varname, x)))
ind <- min(which(inbl))
if(ind == length(blocks)){
NULL
}else{
c(unlist(blocks[(ind+1):length(blocks)]))
}
}
#' Specify a Vote Path Analysis
#'
#' The vote path model estimates independent models that would otherwise use a block-recursive
#' structure. Each model in each block is fit independently using the appropriate error distribution.
#'
#' @param blocks A list in which each element contains a vector of variable names for variables that will represent
#' that block's effects. The list may be named or not.
#' @param models If `NULL`, the algorithm will use `find_type()` to find the appropriate model. This assumes that variables
#' that should be estimated using ordinal regression models are ordered factors. Otherwise, models can be a named vector of
#' model types for some or all of the variables included in the model.
#' @param data A data frame containing all of the variables in the `blocks` list.
#' @param ... Other arguments to be passed down.
#'
#' @importFrom stats lm glm family binomial reformulate
#' @importFrom MASS polr
#' @importFrom nnet multinom
#' @importFrom survey svyglm svyolr svydesign as.svrepdesign
#'
#' @export
vote_path <- function(blocks,
data,
models = NULL,
...){
orig_bl <- blocks
blocks <- unname(blocks)
if(!inherits(data, "survey.design")){
types <- sapply(c(unlist(unname(blocks))), function(nm)find_type(data[[nm]]))
}else{
types <- sapply(c(unlist(unname(blocks))), function(nm)find_type(data$variables[[nm]]))
}
if(inherits(data, "survey.design")){
if(any(types == "lm")){
types[which(types == "lm")] <- "svyglm_lm"
}
if(any(types == "glm")){
types[which(types == "glm")] <- "svyglm_bin"
}
if(any(types == "polr")){
types[which(types == "polr")] <- "svyolr"
}
if(any(types == "multinom")){
types[which(types == "multinom")] <- "svymultinom"
}
}
if(any(types == "svymultinom")){
if(!requireNamespace("svrepmisc", quietly=TRUE)){
stop("Must install and load svrepmisc with e.g., remotes::install_github('carlganz/svrepmisc')\n")
}
}
allvars <- c(unlist(unname(blocks)))
dv <- allvars[length(allvars)]
if(!is.null(models)){
types[match(names(models), names(types))] <- models
}
depvars <- c(unlist(blocks[2:length(blocks)]))
res <- vector(mode="list", length=length(depvars))
names(res) <- depvars
for(i in 1:length(depvars)){
preds <- upstream_vars(blocks, depvars[i])
form <- reformulate(preds, response=depvars[i])
if(!inherits(data, "survey.design")){
arglist <- list(formula = form, data=data)
}else{
arglist <- list(formula = form, design=data)
}
if(types[depvars[i]] %in% c("polr")){
arglist$Hess <- TRUE
}
if(types[depvars[i]] %in% c("multinom", "svymultinom")){
arglist$maxit <- 250
arglist$Hess <- TRUE
arglist$trace <- FALSE
if(types[depvars[i]] == "svymultinom"){
arglist$design <- as.svrepdesign(arglist$design)
}
}
if(types[depvars[i]] == "glm"){
arglist$family <- binomial
}
if(types[depvars[i]] == "svyglm_lm"){
types[depvars[i]] <- "svyglm"
}
if(types[depvars[i]] == "svyglm_bin"){
types[depvars[i]] <- "svyglm"
arglist$family <- binomial
}
m <- do.call(types[depvars[i]], arglist)
if(types[depvars[i]] == "svymultinom"){
attr(m, "formula") <- arglist$formula
attr(m, "lev") <- levels(data$variables[[depvars[i]]])
}
res[[i]] <- m
}
out <- list(models = res, blocks = orig_bl)
class(out) <- "votepath"
out
}
#' Simulate Variable Effect
#'
#' Simulate a variable's effect in a vote path analysis.
#'
#' @param obj An object of class `votepath`
#' @param data A data frame that contains all of the variables form the analysis.
#' @param varname The name of a variable whose effect will be evaluated.
#' @param diffchange The amount to change `varname`. Changes will be `x-.5*diffchange` and `x+.5*diffchange`.
#' For categorical variables, the first and last categories will be chosen. The `diffchange` parameter
#' defines the change so long as `vals=NULL`. If `vals` is not `NULL`, then those values will be used for everyone.
#' @param vals A vector of length 2 giving the values that will be used to evaluate the effect size. This will override
#' `diffchange`. The values must be of the same class as the variable being changed. For example, if the variable being
#' changed is a factor, the `vals` vector also has to be a factor with the same levels as the variable in `varname`.
#' @param R Number of simulations to be conducted.
#' @param ... Other arguments to be passed down.
#'
#' @importFrom progress progress_bar
#' @importFrom stats sd
#' @export
sim_effect <- function(obj,
data,
varname,
diffchange=c("unit", "sd"),
vals=NULL,
R=100,
...){
mods <- obj$models
blocks <- obj$blocks
dv <- blocks[[length(blocks)]]
out_i <- out_d <- out_t <- out_br <- NULL
pb <- progress_bar$new(total = R)
if(!is.null(vals) & length(vals) != 2)stop("vals must be a vector of length 2\n")
if(!is.null(vals) & inherits(data[[varname]], "factor") & !inherits(vals, "factor"))stop("vals must have the same class as varname\n")
if(is.factor(data[[varname]]) & is.null(vals)){
levs <- levels(data[[varname]])
vals <- factor(c(1,length(levs)),
levels=1:length(levs),
labels=levs)
}
downvars <- downstream_vars(blocks, varname)
te <- de <- NULL
for(r in 1:R){
new_0 <- new_1 <- data
if(is.null(vals)){
delta <- ifelse(diffchange == "sd", sd(data[[varname]], na.rm=TRUE), 1)
new_0[[varname]] <- new_0[[varname]] - .5*delta
new_1[[varname]] <- new_1[[varname]] + .5*delta
}else{
new_0[[varname]] <- vals[1]
new_1[[varname]] <- vals[2]
}
b_direct <- draw_coefs(mods[[length(mods)]], R=1)
p0_direct <- prob(mods[[length(mods)]], b_direct, new_0)
p1_direct <- prob(mods[[length(mods)]], b_direct, new_1)
if(inherits(mods[[length(mods)]], "lm") & !inherits(mods[[length(mods)]], "glm")){
e_final <- rnorm(nrow(new_0), 0, summary(mods[[length(mods)]])$sigma)
}else{
e_final <- NULL
}
if(inherits(mods[[length(mods)]], "glm")){
if(mods[[length(mods)]]$family$family == "gaussian"){
e_final <- rnorm(nrow(new_0), 0, summary(mods[[length(mods)]])$dispersion)
}
}
d0_direct <- draw_val(mods[[length(mods)]], p0_direct, e=e_final)
d1_direct <- draw_val(mods[[length(mods)]], p1_direct, e=e_final)
if(!is.matrix(d0_direct) & !is.factor(d0_direct)){
d0_direct <- matrix(d0_direct, ncol=1)
d1_direct <- matrix(d1_direct, ncol=1)
}
if(is.factor(d0_direct)){
tab0 <- table(d0_direct)/sum(table(d0_direct))
tab1 <- table(d1_direct)/sum(table(d1_direct))
}
if(is.matrix(d0_direct)){
de <- rbind(de, colMeans(d1_direct-d0_direct))
}else{
de <- rbind(de, tab1-tab0)
}
if(length(downvars) > 0){
for(i in 1:length(downvars)){
b <- draw_coefs(mods[[downvars[i]]], R=1)
p0 <- prob(mods[[downvars[i]]], b, new_0)
p1 <- prob(mods[[downvars[i]]], b, new_1)
if(inherits(mods[[downvars[i]]], "lm") & !inherits(mods[[downvars[i]]], "glm")){
e <- rnorm(nrow(new_0), 0, summary(mods[[downvars[i]]])$sigma)
}else{
e <- NULL
}
if(inherits(mods[[downvars[i]]], "glm")){
if(mods[[downvars[i]]]$family$family == "gaussian"){
e <- rnorm(nrow(new_0), 0, summary(mods[[downvars[i]]])$dispersion)
}
}
new_0[[downvars[i]]] <- draw_val(mods[[downvars[i]]], p0, e=e, expected=FALSE)
new_1[[downvars[i]]] <- draw_val(mods[[downvars[i]]], p1, e=e, expected=FALSE)
}
}
p0_final <- prob(mods[[length(mods)]], b_direct, new_0)
p1_final <- prob(mods[[length(mods)]], b_direct, new_1)
tmp0 <- draw_val(mods[[length(mods)]], p0_final, e=e_final)
tmp1 <- draw_val(mods[[length(mods)]], p1_final, e=e_final)
if(!is.matrix(tmp0) & !is.factor(tmp0)){
tmp0 <- matrix(tmp0, ncol=1)
tmp1 <- matrix(tmp1, ncol=1)
}
if(is.factor(tmp0)){
tab0 <- table(tmp0)/sum(table(tmp0))
tab1 <- table(tmp1)/sum(table(tmp1))
}
if(is.matrix(tmp0)){
te <- rbind(te, colMeans(tmp1-tmp0))
}else{
te <- rbind(te, tab1-tab0)
}
pb$tick()
}
ie <- te-de
if("lev" %in% names(obj$models[[length(obj$models)]])){
cnms <- obj$models[[length(obj$models)]]$lev
colnames(de) <- colnames(te) <- colnames(ie) <- cnms
}
res <- list(total = te, direct= de, indirect=ie)
class(res) <- "simeff"
return(res)
}
##' Summary method for VotePath Simulated Effects
##'
##' @description summary method for objects of class \code{simeff}
##' @param object Object of class \code{simeff}
##' @param ... Other arguments, currently unimplemented
##' @param conf.level Level at which to make the confidence interval
##'
##' @importFrom tibble as_tibble
##' @importFrom magrittr `%>%`
##' @importFrom dplyr mutate bind_rows
##' @importFrom stats quantile
##'
##' @export
##'
##' @method summary simeff
summary.simeff <- function(object, ..., conf.level=.95){
a1 <- (1-conf.level)/2
a2 <- 1-a1
total_sum = apply(object$total, 2, function(x)c(mean(x), unname(quantile(x, c(a1, a2)))))
rownames(total_sum) <- c("Mean", "Lower", "Upper")
total_sum <- as_tibble(t(total_sum), rownames="DV") %>%
mutate(effect = "Total (calculated)")
direct_sum = apply(object$direct, 2, function(x)c(mean(x), unname(quantile(x, c(a1, a2)))))
rownames(direct_sum) <- c("Mean", "Lower", "Upper")
direct_sum <- as_tibble(t(direct_sum), rownames="DV") %>%
mutate(effect = "Direct (calculated)")
indirect_sum_infer = apply(object$indirect, 2, function(x)c(mean(x), unname(quantile(x, c(a1, a2)))))
rownames(indirect_sum_infer) <- c("Mean", "Lower", "Upper")
indirect_sum_infer <- as_tibble(t(indirect_sum_infer), rownames="DV") %>%
mutate(effect = "Indirect (inferred)")
res <- list(
total_c = total_sum,
direct_c = direct_sum,
indirect_i = indirect_sum_infer)
class(res) <- "summary.simeff"
res
}
#' Print method for summary.simeff objects.
#'
#' Print method for the output from summary.simeff.
#'
#' @param x Object of class `summary.simeff`.
#' @param ... Other arguments to be passed down, currently unimplemeneted.
#'
#' @method print summary.simeff
#' @export
print.summary.simeff <- function(x,
...){
cat("Total Effect\n")
print(x$total_c)
cat("Direct Effect\n")
print(x$direct_c)
cat("Indirect Effect\n")
print(x$indirect_i)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.