# Copyright (C) 2017 Institute for Defense Analyses
#
# This file is part of ciTools.
#
# ciTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ciTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ciTools. If not, see <http://www.gnu.org/licenses/>.
#' Response Probabilities for Generalized Linear Models
#'
#' This is the method \code{add_probs} uses if the model fit is an
#' object of class \code{glm}. Probabilities are determined through
#' simulation, using the same method as \code{add_pi.glm}. Currently,
#' only logistic, Poisson, Quasipoisson, and Gamma models are
#' supported.
#'
#' Any of the five comparisons may be made for a Poisson,
#' quasipoisson, or Gamma model: \code{comparison = "<"}, \code{">"},
#' \code{"="}, \code{"<="}, or \code{">="}. For logistic regression,
#' the comparison statement must be equivalent to \eqn{Pr(Y|x = 0)} or
#' \eqn{Pr(Y|x = 1)}.
#'
#' If \code{add_probs} is called on a Poisson, quasiPoisson or Gamma
#' model, a simulation is performed using \code{arm::sim}.
#'
#' If \code{add_probs} is called on a logistic model, the fitted
#' probabilities are used directly (no simulation is required).
#'
#' If \code{add_probs} is called on a Gaussian GLM, the returned
#' probabilities are identical to those given by
#' \code{add_probs.lm}. In this case, the comparisons \code{<} and
#' \code{<=} are identical (likewise for \code{>} and \code{>=}). If
#' the comparison \code{=} is used in the Gaussian GLM, an informative
#' error is returned.
#'
#' @param df A data frame of new data.
#' @param fit An object of class \code{glm}. Predictions are made with
#' this object.
#' @param q A double. A quantile of the response distribution.
#' @param name \code{NULL} or a string. If \code{NULL}, probabilities
#' automatically will be named by \code{add_probs()}, otherwise,
#' the probabilities will be named \code{name} in the returned
#' dataframe.
#' @param yhatName A string. Name of the vector of predictions.
#' @param comparison A character vector of length one. If
#' \code{comparison = "<"}, then \eqn{Pr(Y|X < q)} is
#' calculated. Any comparison is allowed in Poisson regression,
#' but only certain comparisons may be made in Logistic
#' regression. See the Details section.
#' @param nSims A positive integer. Controls the number of simulated
#' draws to make if the model is Poisson.
#' @param ... Additional arguments.
#'
#' @return A dataframe, \code{df}, with predicted values and
#' probabilities attached.
#'
#' @seealso \code{\link{add_ci.glm}} for confidence intervals for
#' \code{glm} objects, \code{\link{add_pi.glm}} for prediction
#' intervals of \code{glm} objects, and
#' \code{\link{add_quantile.glm}} for response quantiles of
#' \code{glm} objects.
#'
#' @examples
#' # Fit a Poisson model
#' fit <- glm(dist ~ speed, data = cars, family = "poisson")
#'
#' # Determine the probability that a new dist is less than 20, given
#' # the Poisson model.
#' add_probs(cars, fit, q = 20)
#'
#' # Determine the probability that a new dist is greater than 20,
#' # given the Poisson model.
#' add_probs(cars, fit, q = 30, comparison = ">")
#'
#' # Determine the probability that a new dist is greater than or
#' # equal to 20, given the Poisson model.
#' add_probs(cars, fit, q = 30, comparison = ">=")
#'
#' # Fit a logistic model
#' fit2 <- glm(I(dist > 30) ~ speed, data = cars, family = "binomial")
#' add_probs(cars, fit2, q = 0, comparison = "=")
#' add_probs(cars, fit2, q = 1, comparison = "=")
#'
#' @export
add_probs.glm <- function(df, fit, q, name = NULL, yhatName = "pred",
comparison = "<", nSims = 2000, ...){
if (is.null(name) & (comparison == "<"))
name <- paste("prob_less_than", q, sep="")
if (is.null(name) & (comparison == ">"))
name <- paste("prob_greater_than", q, sep="")
if (is.null(name) & (comparison == "<="))
name <- paste("prob_less_than_or_equal_to", q, sep="")
if (is.null(name) & (comparison == ">="))
name <- paste("prob_greater_than_or_equal_to", q, sep="")
if (is.null(name) & (comparison == "="))
name <- paste("prob_equal_to", q, sep="")
if ((name %in% colnames(df)))
warning ("These probabilities may have already been appended to your dataframe. Overwriting.")
if (fit$family$family %in% c("poisson", "quasipoisson"))
warning("The response is not continuous, so estimated probabilities are only approximate")
if (fit$family$family == "binomial"){
if(max(fit$prior.weights) == 1){ #distinguish between Bernoulli and binomial regression
warning("Equivalent to Pr(Y = 0) (or Pr(Y = 1) if comparison = '>' is specified)")
probs_logistic(df, fit, q, name, yhatName, comparison)
} else {
warning("Treating weights as indicating the number of trials for a
binomial regression where the response is the proportion of successes")
sim_probs_other(df, fit, q, name, yhatName, nSims, comparison)
}
} else if (fit$family$family == "gaussian"){
probs_gaussian(df, fit, q, name, yhatName, comparison)
} else if (fit$family$family %in% c("poisson", "qausipoisson", "Gamma")){
sim_probs_other(df, fit, q, name, yhatName, nSims, comparison)
} else {
stop("Unsupported family")
}
}
probs_gaussian <- function(df, fit, q, name, yhatName, comparison){
sigma_sq <- summary(fit)$dispersion
inverselink <- fit$family$linkinv
out <- predict(fit, newdata = df, se.fit = TRUE)
se_terms <- out$se.fit
se_global <- sqrt(sigma_sq + se_terms^2)
t_quantile <- (q - inverselink(out$fit)) / se_global
if (comparison %in% c("<", "<="))
t_prob <- pt(q = t_quantile, df = fit$df.residual)
if (comparison %in% c(">", ">="))
t_prob <- 1 - pt(q = t_quantile, df = fit$df.residual)
if (comparison == "=")
stop("The response is assumed to be continuous -- the probability of this event is 0")
if(is.null(df[[yhatName]]))
df[[yhatName]] <- inverselink(out$fit)
df[[name]] <- t_prob
data.frame(df)
}
probs_logistic <- function(df, fit, q, name, yhatName, comparison){
out <- predict(fit, newdata = df, type = "response")
if (((comparison == "=") && (q == 0)) || ((comparison == "<") && (q < 1) && (q > 0)))
probs <- 1 - out
else
probs <- out
if(is.null(df[[yhatName]]))
df[[yhatName]] <- out
df[[name]] <- probs
data.frame(df)
}
sim_probs_other <- function(df, fit, q, name, yhatName, nSims, comparison){
out <- predict(fit, newdata = df, type = "response")
sim_response <- get_sim_response(df = df, fit = fit, nSims = nSims)
probs <- apply(sim_response, 1, FUN = calc_prob, quant = q, comparison = comparison)
if(fit$family$family == "binomial"){
out <- out * fit$prior.weights
warning("For binomial models, add_probs's column of fitted values refelct E(Y|X) rather than typical default for logistic regression, pHat")
}
if(is.null(df[[yhatName]]))
df[[yhatName]] <- out
df[[name]] <- probs
data.frame(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.