#' Compute the Statistical Mode of a Vector
#' @aliases Mode mode
#' @param x a vector of numeric, factor, or ordered values
#' @return the statistical mode of the vector. If more than one mode exists,
#' the last one in the factor order is arbitrarily chosen (by design)
#' @export
#' @author Christopher Gandrud and Matt Owen
Mode <- function (x) {
# build a table of values of x
tab <- table(as.factor(x))
# find the mode, if there is more than one arbitrarily pick the last
max_tab <- names(which(tab == max(tab)))
v <- max_tab[length(max_tab)]
# if it came in as a factor, we need to re-cast it as a factor, with the same exact levels
if (is.factor(x))
return(factor(v, levels = levels(x)))
# re-cast as any other data-type
as(v, class(x))
}
## Zelig 3 and 4 backward compatibility
## This enables backward compatibility, but results in a warning when library attached
# mode <- Mode
#' Compute the Statistical Median of a Vector
#' @param x a vector of numeric or ordered values
#' @param na.rm ignored
#' @return the median of the vector
#' @export
#' @author Matt Owen
Median <- function (x, na.rm=NULL) {
v <- ifelse(is.numeric(x),
median(x),
levels(x)[ceiling(median(as.numeric(x)))]
)
if (is.ordered(x))
v <- factor(v, levels(x))
v
}
#' Create a table, but ensure that the correct
#' columns exist. In particular, this allows for
#' entires with zero as a value, which is not
#' the default for standard tables
#' @param x a vector
#' @param levels a vector of levels
#' @param ... parameters for table
#' @return a table
#' @author Matt Owen
table.levels <- function (x, levels, ...) {
# if levels are not explicitly set, then
# search inside of x
if (missing(levels)) {
levels <- attr(x, 'levels')
table(factor(x, levels=levels), ...)
}
# otherwise just do the normal thing
else {
table(factor(x, levels=levels), ...)
}
}
#' Compute central tendancy as approrpriate to data type
#' @param val a vector of values
#' @return a mean (if numeric) or a median (if ordered) or mode (otherwise)
#' @export
avg <- function(val) {
if (is.numeric(val))
mean(val)
else if (is.ordered(val))
Median(val)
else
Mode(val)
}
#' Set new value of a factor variable, checking for existing levels
#' @param fv factor variable
#' @param v value
#' @return a factor variable with a value \code{val} and the same levels
#' @keywords internal
setfactor <- function (fv, v) {
lev <- levels(fv)
if (!v %in% lev)
stop("Wrong factor")
return(factor(v, levels = lev))
}
#' Set new value of a variable as approrpriate to data type
#' @param val old value
#' @param newval new value
#' @return a variable of the same type with a value \code{val}
#' @keywords internal
setval <- function(val, newval) {
if (is.numeric(val))
newval
else if (is.ordered(val))
newval
else if (is.logical(val))
newval
else {
lev <- levels(val)
if (!newval %in% lev)
stop("Wrong factor", call. = FALSE)
return(factor(newval, levels = lev))
}
}
#' Calculate the reduced dataset to be used in \code{\link{setx}}
#'
#' #' This method is used internally
#'
#' @param dataset Zelig object data, possibly split to deal with \code{by}
#' argument
#' @param s list of variables and their tentative \code{setx} values
#' @param formula a simplified version of the Zelig object formula (typically
#' with 1 on the lhs)
#' @param data Zelig object data
#' @param avg function of data transformations
#' @return a list of all the model variables either at their central tendancy or
#' their \code{setx} value
#'
#' @keywords internal
#' @author Christine Choirat and Christopher Gandrud
#' @export
reduce = function(dataset, s, formula, data, avg = avg) {
pred <- try(terms(fit <- lm(formula, data), "predvars"), silent = TRUE)
if ("try-error" %in% class(pred)) # exp and weibull
pred <- try(terms(fit <- survreg(formula, data), "predvars"),
silent = TRUE)
dataset <- model.frame(fit)
ldata <- lapply(dataset, avg)
if (length(s) > 0) {
n <- union(as.character(attr(pred, "predvars"))[-1], names(dataset))
if (is.list(s[[1]])) s <- s[[1]]
m <- match(names(s), n)
ma <- m[!is.na(m)]
if (!all(complete.cases(m))) {
w <- paste("Variable '", names(s[is.na(m)]), "' not in data set.\n",
sep = "")
stop(w, call. = FALSE)
}
for (i in seq(n[ma])) {
ldata[n[ma]][i][[1]] <- setval(dataset[n[ma]][i][[1]],
s[n[ma]][i][[1]])
}
}
return(ldata)
}
#' Create QI summary matrix
#' @param qi quantity of interest in the discrete case
#' @return a formatted qi
#' @keywords internal
#' @author Christine Choirat
statmat <- function(qi) {
if (!is.matrix(qi))
qi <- as.matrix(qi, ncol = 1)
m <- t(apply(qi, 2, quantile, c(.5, .025, .975), na.rm = TRUE))
n <- matrix(apply(qi, 2, mean, na.rm = TRUE))
colnames(n) <- "mean"
o <- matrix(apply(qi, 2, sd, na.rm = TRUE))
colnames(o) <- "sd"
p <- cbind(n, o, m)
return(p)
}
#' Describe Here
#' @param qi quantity of interest in the discrete case
#' @param num number of simulations
#' @return a formatted quantity of interest
#' @keywords internal
#' @author Christine Choirat
statlevel <- function(qi, num) {
if (is.matrix(qi)){
#m <- t(apply(qi, 2, table)) / num
all.levels <- levels(qi)
m <- t(apply(qi, 2, function(x)
table(factor(x, levels=all.levels)))) / num
} else {
m <- table(qi) / num
}
return(m)
}
#' Pass Quantities of Interest to Appropriate Summary Function
#'
#' @param qi quantity of interest (e.g., estimated value or predicted value)
#' @param num number of simulations
#' @return a formatted qi
#' @keywords internal
#' @author Christine Choirat
stat <- function(qi, num) {
if (is.null(attr(qi, "levels")))
return(statmat(qi))
else
return(statlevel(qi, num))
}
#' Generate Formulae that Consider Clustering
#'
#' This method is used internally by the "Zelig" Package to interpret
#' clustering in GEE models.
#' @param formula a formula object
#' @param cluster a vector
#' @return a formula object describing clustering
cluster.formula <- function (formula, cluster) {
# Convert LHS of formula to a string
lhs <- deparse(formula[[2]])
cluster.part <- if (is.null(cluster))
# NULL values require
sprintf("cluster(1:nrow(%s))", lhs)
else
# Otherwise we trust user input
sprintf("cluster(%s)", cluster)
update(formula, paste(". ~ .", cluster.part, sep = " + "))
}
#' Zelig Copy of plyr::mutate to avoid namespace conflict with dplyr
#'
#' @source Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data
#' Analysis. Journal of Statistical Software, 40(1), 1-29. URL
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @keywords internal
zelig_mutate <- function (.data, ...)
{
stopifnot(is.data.frame(.data) || is.list(.data) || is.environment(.data))
cols <- as.list(substitute(list(...))[-1])
cols <- cols[names(cols) != ""]
for (col in names(cols)) {
.data[[col]] <- eval(cols[[col]], .data, parent.frame())
}
.data
}
#' Convenience function for setrange and setrange1
#'
#' @param x data passed to setrange or setrange1
#' @keywords internal
expand_grid_setrange <- function(x) {
# m <- expand.grid(x)
set_lengths <- unlist(lapply(x, length))
unique_set_lengths <- unique(as.vector(set_lengths))
m <- data.frame()
for (i in unique_set_lengths) {
temp_df <- data.frame(row.names = 1:i)
for (u in 1:length(x)) {
if (length(x[[u]]) == i) {
temp_df <- cbind(temp_df, x[[u]])
names(temp_df)[ncol(temp_df)] <- names(x)[u]
}
}
if (nrow(m) == 0) m <- temp_df
else m <- merge(m, temp_df)
}
if (nrow(m) == 1)
warning('Only one fitted observation provided to setrange.\nConsider using setx instead.',
call. = FALSE)
return(m)
}
#' Bundle Multiply Imputed Data Sets into an Object for Zelig
#'
#' This object prepares multiply imputed data sets so they can be used by
#' \code{zelig}.
#' @note This function creates a list of \code{data.frame} objects, which
#' resembles the storage of imputed data sets in the \code{amelia} object.
#' @param ... a set of \code{data.frame}'s or a single list of \code{data.frame}'s
#' @return an \code{mi} object composed of a list of data frames.
#'
#' @author Matt Owen, James Honaker, and Christopher Gandrud
#'
#' @examples
#' # create datasets
#' n <- 100
#' x1 <- runif(n)
#' x2 <- runif(n)
#' y <- rnorm(n)
#' data.1 <- data.frame(y = y, x = x1)
#' data.2 <- data.frame(y = y, x = x2)
#'
#' # merge datasets into one object as if imputed datasets
#'
#' mi.out <- to_zelig_mi(data.1, data.2)
#'
#' # pass object in place of data argument
#' z.out <- zelig(y ~ x, model = "ls", data = mi.out)
#' @export
to_zelig_mi <- function (...) {
# Get arguments as list
imputations <- list(...)
# If user passes a list of data.frames rather than several data.frames as separate arguments
if((class(imputations[[1]]) == 'list') & (length(imputations) == 1)){
imputations = imputations[[1]]
}
# Labelling
names(imputations) <- paste0("imp", 1:length(imputations))
# Ensure that everything is a data.frame
for (k in length(imputations):1) {
if (!is.data.frame(imputations[[k]])){
imputations[[k]] <- NULL
warning("Item ", k, " of the provided objects is not a data.frame and will be ignored.\n")
}
}
if(length(imputations) < 1){
stop("The resulting object contains no data.frames, and as such is not a valid multiple imputation object.",
call. = FALSE)
}
if(length(imputations) < 2){
stop("The resulting object contains only one data.frame, and as such is not a valid multiple imputation object.",
call. = FALSE)
}
class(imputations) <-c("mi", "list")
return(imputations)
}
#' Enables backwards compatability for preparing non-amelia imputed data sets
#' for \code{zelig}.
#'
#' See \code{\link{to_zelig_mi}}
#'
#' @param ... a set of \code{data.frame}'s
#' @return an \code{mi} object composed of a list of data frames.
mi <- to_zelig_mi
#' Conduct variable transformations called inside a \code{zelig} call
#'
#' @param formula model formulae
#' @param data data frame used in \code{formula}
#' @param FUN character string of the transformation function. Currently
#' supports \code{factor} and \code{log}.
#' @param check logical whether to just check if a formula contains an
#' internally called transformation and return \code{TRUE} or \code{FALSE}
#' @param f_out logical whether to return the converted formula
#' @param d_out logical whether to return the converted data frame. Note:
#' \code{f_out} must be missing
#'
#' @author Christopher Gandrud
#' @keywords internal
transformer <- function(formula, data, FUN = 'log', check, f_out, d_out) {
if (!missing(data)) {
if (is.data.frame(data))
is_df <- TRUE
else if (!is.data.frame(data) & is.list(data))
is_df <- FALSE
else
stop('data must be either a data.frame or a list', call. = FALSE)
}
if (FUN == 'as.factor') FUN_temp <- 'as\\.factor'
else FUN_temp <- FUN
FUN_str <- sprintf('%s.*\\(', FUN_temp)
f <- as.character(formula)[3]
f_split <- unlist(strsplit(f, split = '\\+'))
to_transform <- grep(pattern = FUN_str, f_split)
if (!missing(check)) {
if (length(to_transform) > 0) return(TRUE)
else return(FALSE)
}
if (length(to_transform) > 0) {
to_transform_raw <- trimws(f_split[to_transform])
if (FUN == 'factor')
to_transform_raw <- gsub('^as\\.', '', to_transform_raw)
to_transform_plain_args <- gsub(FUN_str, '', to_transform_raw)
to_transform_plain <- gsub(',\\(.*)', '', to_transform_plain_args)
to_transform_plain <- gsub('\\)', '', to_transform_plain)
to_transform_plain <- trimws(gsub(',.*', '', to_transform_plain))
if (is_df)
not_in_data <- !all(to_transform_plain %in% names(data))
else if (!isTRUE(is_df))
not_in_data <- !all(to_transform_plain %in% names(data[[1]]))
if (not_in_data) stop('Unable to find variable to transform.')
if (!missing(f_out)) {
f_split[to_transform] <- to_transform_plain
rhs <- paste(f_split, collapse = ' + ')
lhs <- gsub('\\(\\)', '', formula[2])
f_new <- paste(lhs, '~', rhs)
f_out <- as.Formula(f_new)
return(f_out)
}
else if (!missing(d_out)) {
transformer_fun <- trimws(gsub('\\(.*', '', to_transform_raw))
transformer_args_str <- gsub('\\)', '', to_transform_plain_args)
transformer_args_list <- list()
for (i in seq_along(transformer_args_str)) {
args_temp <- unlist(strsplit(gsub(' ', '' ,
transformer_args_str[i]), ','))
if (is_df)
args_temp[1] <- sprintf('data[, "%s"]', args_temp[1])
else if (!isTRUE(is_df))
args_temp[1] <- sprintf('data[[h]][, "%s"]', args_temp[1])
arg_names <- gsub('\\=.*', '', args_temp)
arg_names[1] <- 'x'
args_temp <- gsub('.*\\=', '', args_temp)
args_temp_list <- list()
if (is_df) {
for (u in seq_along(args_temp))
args_temp_list[[u]] <- eval(parse(text = args_temp[u]))
}
else if (!isTRUE(is_df)) {
for (h in seq_along(data)) {
temp_list <- list()
for (u in seq_along(args_temp)) {
temp_list[[u]] <- eval(parse(text = args_temp[u]))
names(temp_list)[u] <- arg_names[u]
}
args_temp_list[[h]] <- temp_list
}
}
if (is_df) {
names(args_temp_list) <- arg_names
data[, to_transform_plain[i]] <- do.call(
what = transformer_fun[i],
args = args_temp_list)
}
else if (!isTRUE(is_df)) {
for (j in seq_along(data)) {
data[[j]][, to_transform_plain[i]] <- do.call(
what = transformer_fun[i],
args = args_temp_list[[j]])
}
}
}
return(data)
}
}
else if (length(to_transform) == 0) {
if (!missing(f_out)) return(formula)
else if (d_out) return(data)
}
}
#' Remove package names from fitted model object calls.
#'
#' Enables \code{\link{from_zelig_model}} output to work with stargazer.
#' @param x a fitted model object result
#' @keywords internal
strip_package_name <- function(x) {
if ("vglm" %in% class(x)) # maybe generalise to all s4?
call_temp <- gsub('^.*(?=(::))', '', x@call[1], perl = TRUE)
else
call_temp <- gsub('^.*(?=(::))', '', x$call[1], perl = TRUE)
call_temp <- gsub('::', '', call_temp, perl = TRUE)
if ("vglm" %in% class(x))
x@call[1] <- as.call(list(as.symbol(call_temp)))
else
x$call[1] <- as.call(list(as.symbol(call_temp)))
return(x)
}
#' Extract p-values from a fitted model object
#' @param x a fitted Zelig object
#' @keywords internal
p_pull <- function(x) {
if ("vglm" %in% class(x)) { # maybe generalise to all s4?
p_values <- summary(x)@coef3[, 'Pr(>|z|)']
}
else {
p_values <- summary(x)$coefficients
if ('Pr(>|t|)' %in% colnames(p_values)) {
p_values <- p_values[, 'Pr(>|t|)']
} else {
p_values <- p_values[, 'Pr(>|z|)']
}
}
return(p_values)
}
#' Extract standard errors from a fitted model object
#' @param x a fitted Zelig object
#' @keywords internal
se_pull <- function(x) {
if ("vglm" %in% class(x)) # maybe generalise to all s4?
se <- summary(x)@coef3[, "Std. Error"]
else
se <- summary(x)$coefficients[, "Std. Error"]
return(se)
}
#' Drop intercept columns or values from a data frame or named vector,
#' respectively
#'
#' @param x a data frame or named vector
#' @keywords internal
rm_intercept <- function(x) {
intercept_names <- c('(Intercept)', 'X.Intercept.', '(Intercept).*')
names_x <- names(x)
if (any(intercept_names %in% names(x))) {
keep <- !(names(x) %in% intercept_names)
if (is.data.frame(x))
x <- data.frame(x[, names_x[keep]])
else if (is.vector(x))
x <- x[keep]
names(x) <- names_x[keep]
}
return(x)
}
#' Combines estimated coefficients and associated statistics
#' from models estimated with multiply imputed data sets or bootstrapped
#'
#' @param obj a zelig object with an estimated model
#' @param out_type either \code{"matrix"} or \code{"list"} specifying
#' whether the results should be returned as a matrix or a list.
#' @param bagging logical whether or not to bag the bootstrapped coefficients
#' @param messages logical whether or not to return messages for what is being
#' returned
#'
#' @return If the model uses multiply imputed or bootstrapped data then a
#' matrix (default) or list of combined coefficients (\code{coef}), standard
#' errors (\code{se}), z values (\code{zvalue}), p-values (\code{p}) is
#' returned. Rubin's Rules are used to combine output from multiply imputed
#' data. An error is returned if no imputations were included or there wasn't
#' bootstrapping. Please use \code{get_coef}, \code{get_se}, and
#' \code{get_pvalue} methods instead in cases where there are no imputations or
#' bootstrap.
#'
#' @examples
#' set.seed(123)
#'
#' ## Multiple imputation example
#' # Create fake imputed data
#' n <- 100
#' x1 <- runif(n)
#' x2 <- runif(n)
#' y <- rnorm(n)
#' data.1 <- data.frame(y = y, x = x1)
#' data.2 <- data.frame(y = y, x = x2)
#'
#' # Estimate model
#' mi.out <- to_zelig_mi(data.1, data.2)
#' z.out.mi <- zelig(y ~ x, model = "ls", data = mi.out)
#'
#' # Combine and extract coefficients and standard errors
#' combine_coef_se(z.out.mi)
#'
#' ## Bootstrap example
#' z.out.boot <- zelig(y ~ x, model = "ls", data = data.1, bootstrap = 20)
#' combine_coef_se(z.out.boot)
#'
#' @author Christopher Gandrud and James Honaker
#' @source Partially based on \code{\link{mi.meld}} from Amelia.
#'
#' @export
combine_coef_se <- function(obj, out_type = 'matrix', bagging = FALSE,
messages = TRUE)
{
is_zelig(obj)
is_uninitializedField(obj$zelig.out)
if (!(out_type %in% c('matrix', 'list')))
stop('out_type must be either "matrix" or "list"', call. = FALSE)
if (obj$mi || obj$bootstrap) {
coeflist <- obj$get_coef()
vcovlist <- obj$get_vcov()
coef_names <- names(coeflist[[1]])
am.m <- length(coeflist)
if (obj$bootstrap & !obj$mi) am.m <- am.m - 1
am.k <- length(coeflist[[1]])
if (obj$bootstrap & !obj$mi)
q <- matrix(unlist(coeflist[-(am.m + 1)]), nrow = am.m,
ncol = am.k, byrow = TRUE)
else if (obj$mi) {
q <- matrix(unlist(coeflist), nrow = am.m, ncol = am.k,
byrow = TRUE)
se <- matrix(NA, nrow = am.m, ncol = am.k)
for(i in 1:am.m){
se[i, ] <- sqrt(diag(vcovlist[[i]]))
}
}
ones <- matrix(1, nrow = 1, ncol = am.m)
comb_q <- (ones %*% q)/am.m
if (obj$mi) ave.se2 <- (ones %*% (se^2)) / am.m
diff <- q - matrix(1, nrow = am.m, ncol = 1) %*% comb_q
sq2 <- (ones %*% (diff^2))/(am.m - 1)
if (obj$mi) {
if (messages) message('Combining imputations. . .')
comb_se <- sqrt(ave.se2 + sq2 * (1 + 1/am.m))
coef <- as.vector(comb_q)
se <- as.vector(comb_se)
}
else if (obj$bootstrap & !obj$mi) {
if (messages) message('Combining bootstraps . . .')
comb_se <- sqrt(sq2 * (1 + 1/am.m))
if (bagging) {
coef <- as.vector(comb_q)
} else {
coef <- coeflist[[am.m + 1]]
}
se <- as.vector(comb_se)
}
zvalue <- coef / se
pr_z <- 2 * (1 - pnorm(abs(zvalue)))
if (out_type == 'matrix') {
out <- cbind(coef, se, zvalue, pr_z)
colnames(out) <- c("Estimate", "Std.Error", "z value", "Pr(>|z|)")
rownames(out) <- coef_names
}
else if (out_type == 'list') {
out <- list(coef = coef, se = se, zvalue = zvalue, p = pr_z)
for (i in seq(out)) names(out[[i]]) <- coef_names
}
return(out)
}
else if (!(obj$mi || obj$bootstrap)) {
message('No multiply imputed or bootstrapped estimates found.\nReturning untransformed list of coefficients and standard errors.')
out <- list(coef = coef(obj),
se = get_se(obj),
pvalue = get_pvalue(obj)
)
return(out)
}
}
#' Find vcov for GEE models
#'
#' @param obj a \code{geeglm} class object.
vcov_gee <- function(obj) {
if (!("geeglm" %in% class(obj)))
stop('Not a geeglm class object', call. = FALSE)
out <- obj$geese$vbeta
return(out)
}
#' Find vcov for quantile regression models
#'
#' @param obj a \code{rq} class object.
vcov_rq <- function(obj) {
if (!("rq" %in% class(obj)))
stop('Not an rq class object', call. = FALSE)
out <- summary(obj, cov = TRUE)$cov
return(out)
}
#' Find odds ratios for coefficients and standard errors
#' for glm.summary class objects
#'
#' @param obj a \code{glm.summary} class object
#' @param label_mod_coef character string for how to modify the coefficient
#' label.
#' @param label_mod_se character string for how to modify the standard error
#' label.
or_summary <- function(obj, label_mod_coef = "(OR)",
label_mod_se = "(OR)"){
if (class(obj) != "summary.glm")
stop("obj must be of summary.glm class.",
call. = FALSE)
obj$coefficients[, 1] <- exp(obj$coefficients[, 1])
var_diag = diag(vcov(obj))
obj$coefficients[, 2] <- sqrt(obj$coefficients[, 1] ^ 2 * var_diag)
colnames(obj$coefficients)[c(1, 2)] <- paste(
colnames(obj$coefficients)[c(1, 2)],
c(label_mod_coef, label_mod_se))
return(obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.