Nothing
#' A function to estimate conditional expected values and higher order moments
#'
#' This function fits a glm regression specified by the user to
#' estimate conditional moments.
#'
#'
#'
#' @param treat is the name of the treatment variable contained in \code{data}.
#' @param treat_formula an object of class "formula" (or one that can be coerced to that class)
#' that regresses \code{treat} on a linear combination of \code{X}:
#' a symbolic description of the model to be fitted.
#' @param data is a dataframe containing \code{Y}, \code{treat}, and \code{X}.
#' @param treat_mod a description of the error distribution
#' to be used in the model for treatment. Options include:
#' \code{"Normal"} for normal model,
#' \code{"LogNormal"} for lognormal model,
#' \code{"Sqrt"} for square-root transformation
#' to a normal treatment,
#' \code{"Poisson"} for Poisson model,
#' \code{"NegBinom"} for negative binomial model,
#' \code{"Gamma"} for gamma model.
#' @param link_function is either "log", "inverse", or "identity" for the "Gamma" \code{treat_mod}.
#'
#' @param ... additional arguments to be passed to the low level treatment regression fitting functions.
#'
#' @return
#' \code{t_mod} returns a list containing the following elements:
#' \item{T_data}{a dataframe containing estimated treatment, estimated treatment squared,
#' estimated treatment cube, estimated treatment quartic, and estimated gps.}
#' \item{T_result}{the result of the treatment model fit.}
#'
#' @references Schafer, J.L., Galagate, D.L. (2015). Causal inference with a
#' continuous treatment and outcome: alternative estimators for parametric
#' dose-response models. \emph{Manuscript in preparation}.
#'
#' @seealso \code{\link{ismw_est}}, \code{\link{reg_est}},
#' \code{\link{wtrg_est}}, \code{\link{aipwee_est}}, etc. for other estimates.
#'
#'
#' \code{\link{overlap_fun}} to prepare the \code{data} for use in the different estimates.
#'
#' @examples
#'
#' ## Example from Schafer (2015).
#'
#' example_data <- sim_data
#'
#' t_mod_list <- t_mod(treat = T,
#' treat_formula = T ~ B.1 + B.2 + B.3 + B.4 + B.5 + B.6 + B.7 + B.8,
#' data = example_data,
#' treat_mod = "Normal")
#'
#' cond_exp_data <- t_mod_list$T_data
#'
#' full_data <- cbind(example_data, cond_exp_data)
#'
#' rm(example_data, t_mod_list, cond_exp_data, full_data)
#'
#'
#' @usage
#'
#' t_mod(treat,
#' treat_formula,
#' data,
#' treat_mod,
#' link_function,
#' ...)
#'
#' @export
#'
t_mod <- function(treat,
treat_formula,
data,
treat_mod,
link_function,
...){
# treat is the name of the treatment variable
# treat_formula is the formula for the treatment model
# data will contain all the data: X, treat
# treat_mod is th treatment model to fit
# link_function is the link function used, if needed
# The outcome is a list of 2 objects:
# (1) a data.frame containing the estimated conditional higher order moments.
# (2) the result from the treament model fit.
#save input
tempcall <- match.call()
#some basic input checks
if (!("treat" %in% names(tempcall))) stop("No treat variable specified")
if (!("treat_formula" %in% names(tempcall))) stop("No treat_formula model specified")
if (!("data" %in% names(tempcall))) stop("No data specified")
if (!("treat_mod" %in% names(tempcall)) | ("treat_mod" %in% names(tempcall) & !(tempcall$treat_mod %in% c("NegBinom", "Poisson", "Gamma", "LogNormal", "Sqrt", "Normal")))) stop("No valid family specified (\"NegBinom\", \"Poisson\", \"Gamma\", \"Log\", \"Sqrt\", \"Normal\")")
if (tempcall$treat_mod == "Gamma") {if(!(tempcall$link_function %in% c("log", "inverse"))) stop("No valid link function specified for family = Gamma (\"log\", \"inverse\")")}
if (tempcall$treat_mod == "binomial") {if(!(tempcall$link_function %in% c("logit", "probit", "cauchit", "log", "cloglog"))) stop("No valid link function specified for family = binomial (\"logit\", \"probit\", \"cauchit\", \"log\", \"cloglog\")")}
if (tempcall$treat_mod == "ordinal" ) {if(!(tempcall$link_function %in% c("logit", "probit", "cauchit", "cloglog"))) stop("No valid link function specified for family = ordinal (\"logit\", \"probit\", \"cauchit\", \"cloglog\")")}
#make new dataframe for newly computed variables, to prevent variable name conflicts
tempdat <- data.frame(
treat = data[,as.character(tempcall$treat)]
)
samp_dat <- data
# make a formula for the treatment model
# formula_t <- eval(parse(text = paste(deparse(tempcall$treat, width.cutoff = 500), deparse(tempcall$treat_formula, width.cutoff = 500), sep = "")))
formula_t <- tempcall$treat_formula
if (treat_mod == "NegBinom"){
result <- MASS::glm.nb(formula_t,
data = samp_dat,
...)
cond_mean <- result$fitted.values
cond_var <- cond_mean + cond_mean^2/result$theta
prob_nb_est <- (cond_var - cond_mean) / cond_var
pp <- (cond_var^2 - cond_mean) / cond_var^2
rr <- cond_mean^2 / (cond_var^2 - cond_mean)
# Need to get estimates of higher moments of NegBinom distributed RV using the MGF
# fill in the formulas....
est_treat <- pp * rr /(1 - pp)
est_treat_sq <- pp * rr * (pp * rr + 1) / (pp - 1)^2
est_treat_cube <- - pp * rr * (pp^2 * rr^2 + 3 * pp * rr + pp + 1) / (pp - 1)^3
est_treat_quartic <- (pp^4 * rr^4 + 6 * pp^3 * rr^3 + pp^2 * (4 * pp + 7) * rr^2 + pp * (pp^2 + 4 * pp + 1) * rr) / (pp - 1)^4
gps <- dnbinom(x = tempdat$treat,
size = result$theta,
mu = result$fitted.values,
log = FALSE)
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else if (treat_mod == "Poisson"){
result <- glm(formula_t,
family = "poisson",
data = samp_dat,
...)
cond_mean <- result$fitted.values
# Need to get estimates of higher moments of gamma distributed RV using the MGF
est_treat <- cond_mean
est_treat_sq <- cond_mean^2 + cond_mean
est_treat_cube <- cond_mean^3 + 3*cond_mean^2 + cond_mean
est_treat_quartic <- cond_mean^4 + 6*cond_mean^2 + 7*cond_mean^2 + cond_mean
gps <- dpois(x = tempdat$treat,
lambda = cond_mean)
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else if (treat_mod == "Gamma"){
result <- glm(formula_t,
family = Gamma(link = link_function),
data = samp_dat,
... )
est_treat <- result$fitted
shape_gamma <- as.numeric(MASS::gamma.shape(result)[1])
theta_given_X <- result$fitted.values/shape_gamma
GPS_gamma <- dgamma(tempdat$treat,
shape = shape_gamma,
scale = theta_given_X)
# Need to get estimates of higher moments of gamma distributed RV using the MGF
est_treat <- shape_gamma * theta_given_X
est_treat_sq <- theta_given_X^2 * shape_gamma * (shape_gamma + 1)
est_treat_cube <- theta_given_X^3 * shape_gamma * (shape_gamma + 1) * (shape_gamma + 2)
est_treat_quartic <- theta_given_X^4 * shape_gamma * (shape_gamma + 1) * (shape_gamma + 2) * (shape_gamma + 3)
gps <- dgamma(tempdat$treat,
shape = shape_gamma,
scale = theta_given_X)
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else if (treat_mod == "LogNormal"){
samp_dat[, as.character(tempcall$treat)] <- log(samp_dat[, as.character(tempcall$treat)])
result <- lm(formula_t,
data = samp_dat,
... )
est_log_treat <- result$fitted
sigma_est <- summary(result)$sigma
# moment function calculator for lognormal RV
# mu is the mean from normal and sigma is from the normal density
moment_fun <- function(mu,
sigma,
k){
return(exp(k * mu + k^2 * sigma^2 / 2) )
}
# Need to get estimates of higher moments of normally distributed RV using the MGF
est_treat <- moment_fun(est_log_treat,
sigma_est,
1)
est_treat_sq <- moment_fun(est_log_treat,
sigma_est,
2)
est_treat_cube <- moment_fun(est_log_treat,
sigma_est,
3)
est_treat_quartic <- moment_fun(est_log_treat,
sigma_est,
4)
gps <- dnorm((samp_dat[, as.character(tempcall$treat)] - result$fitted)/summary(result)$sigma,
mean = 0,
sd = 1 )
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else if (treat_mod == "Sqrt"){
samp_dat[, as.character(tempcall$treat)] <- sqrt(samp_dat[, as.character(tempcall$treat)])
result <- lm(formula_t,
data = samp_dat,
... )
est_sqrt_treat <- result$fitted
sigma_est <- summary(result)$sigma
# Need to get estimates of higher moments of normally distributed RV using the MGF
est_treat <- result$fitted^2 +
var(result$fitted)
est_treat_sq <- result$fitted^4 +
6 * result$fitted^2 * var(result$fitted) +
3 * var(result$fitted)^2
est_treat_cube <- result$fitted^6 +
15 * result$fitted^4 * var(result$fitted) +
45 * result$fitted^2 * var(result$fitted)^2 +
15 * var(result$fitted)^3
est_treat_quartic <- result$fitted^8 +
28 * result$fitted^6 * var(result$fitted) +
210 * result$fitted^4 * var(result$fitted)^2 +
420 * result$fitted^2 * var(result$fitted)^3 +
105 * var(result$fitted)^4
gps <- dnorm((samp_dat[, as.character(tempcall$treat)] - result$fitted)/summary(result)$sigma,
mean = 0,
sd = 1 )
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else if (treat_mod == "Normal"){
result <- lm(formula_t,
data = samp_dat,
... )
est_treat <- result$fitted
sigma_est <- summary(result)$sigma
# Need to get estimates of higher moments of normally distributed RV using the MGF
est_treat <- result$fitted
est_treat_sq <- result$fitted^2 + var(result$fitted)
est_treat_cube <- result$fitted^3 + 3 * result$fitted * var(result$fitted)
est_treat_quartic <- result$fitted^4 + 6 * result$fitted^2 * var(result$fitted) + 3 * var(result$fitted)^2
gps <- dnorm(tempdat$treat,
mean = est_treat,
sd = summary(result)$sigma )
t_mod_data <- data.frame(cbind(est_treat,
est_treat_sq,
est_treat_cube,
est_treat_quartic,
gps))
} else {
stop("No valid treatment model chosen. Please try again.")
}
return(list(T_data = t_mod_data, T_result = result))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.