# R/tbr_binom.R In tbrf: Time-Based Rolling Functions

#### Documented in binom_citbr_binomtbr_binom_window

#' Time-Based Rolling Binomial Probability
#'
#' Produces a a rolling time-window based vector of binomial probability and
#' confidence intervals.
#' @param .tbl dataframe with two variables.
#' @param x indicates the variable column containing "success" and "failure"
#'   observations coded as 1 or 0.
#' @param tcolumn indicates the variable column containing Date or Date-Time
#'   values.
#' @param unit character, one of "years", "months", "weeks", "days", "hours",
#'   "minutes", "seconds"
#' @param n numeric, describing the length of the time window in the selected
#'   units.
#' @param alpha numeric, probability of a type 1 error, so confidence
#'   coefficient = 1-alpha
#'
#' @import dplyr
#' @import rlang
#' @importFrom purrr map
#' @importFrom tidyr unnest
#' @return tibble with binomial point estimate and confidence intervals.
#' @export
#' @examples
#' ## Generate Sample Data
#' df <- tibble::tibble(
#' date = sample(seq(as.Date('2000-01-01'), as.Date('2015/12/30'), by = "day"), 100),
#' value = rbinom(100, 1, 0.25)
#' )
#'
#' ## Run Function
#' tbr_binom(df, x = value,
#' tcolumn = date, unit = "years", n = 5,
#' alpha = 0.1)
tbr_binom <- function(.tbl, x, tcolumn, unit = "years", n, alpha = 0.05) {

.tbl <- .tbl %>%
arrange(!! rlang::enquo(tcolumn)) %>%
mutate("temp" := purrr::map(row_number(),
~tbr_binom_window(x = !! rlang::enquo(x), #column that indicates success/failure
tcolumn = !! rlang::enquo(tcolumn), #posix formatted time column
unit = unit,
n = n,
alpha = alpha,
i = .x))) %>%
tidyr::unnest(.data$temp) .tbl <- tibble::as_tibble(.tbl) return(.tbl) } #' Binomial test based on time window #' #' @param x column containing "success" and "failure" observations as 0 or 1 #' @param tcolumn formatted time column #' @param unit character, one of "years", "months", "weeks", "days", "hours", #' "minutes", "seconds" #' @param n numeric, describing the length of the time window. #' @param i rows #' @param alpha numeric, probability of a type 1 error, so confidence #' coefficient = 1-alpha #' #' @return list #' @keywords internal tbr_binom_window <- function(x, tcolumn, unit = "years", n, i, alpha) { # checks for valid unit values u <- (c("years", "months", "weeks", "days", "hours", "minutes", "seconds")) if (!unit %in% u) { stop("unit must be one of ", paste(u, collapse = ", ")) } # creates a time-based window window <- open_window(x, tcolumn, unit = unit, n, i) df <- tibble(window) %>% summarise(n = n(), successes = as.integer(sum(window))) # calculates the binomial test with confidence intervals results <- binom_ci(x = df$successes, n = df\$n, alpha = alpha, return.df = TRUE)

return(results)
}

# Binomial CI -------------------------------------------------------------

## Copyright (C) 2001 Frank E Harrell Jr
##
## This program is free software; you can redistribute it and/or modify it
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##

#' Confidence Intervals for Binomial Probabilities
#'
#' An implementation of the \code{binconf} function in Frank
#' Harrell's Hmisc package. Produces 1-alpha confidence intervals for binomial
#' probabilities.
#' @param x vector containing the number of "successes" for binomial variates.
#' @param n vector containing the numbers of corresponding observations.
#' @param alpha probability of a type I error, so confidence coefficient =
#'   1-alpha.
#' @param method character string specifying which method to use. The "exact"
#'   method uses the F distribution to compute exact (based on the binomial cdf)
#'   intervals; the "wilson" interval is score-test-based; and the "asymptotic"
#'   is the text-book, asymptotic normal interval. Following Agresti and Coull,
#'   the Wilson interval is to be preferred and so is the default.
#' @param return.df logical flag to indicate that a data frame rather than a
#'   matrix be returned.
#'
#' @importFrom stats qf qnorm
#' @export
#' @author Frank Harrell, modified by Michael Schramm
#' @keywords internal
#' @references A. Agresti and B.A. Coull, Approximate is better than "exact" for
#'   interval estimation of binomial proportions, \emph{American Statistician,}
#'   \bold{52}:119--126, 1998.
#'
#'   R.G. Newcombe, Logit confidence intervals and the inverse sinh
#'   transformation, \emph{American Statistician,} \bold{55}:200--202, 2001.
#'
#'   L.D. Brown, T.T. Cai and A. DasGupta, Interval estimation for a binomial
#'   proportion (with discussion), \emph{Statistical Science,}
#'   \bold{16}:101--133, 2001.
#' @examples binom_ci(46,50,method="wilson")

binom_ci <- function(x, n, alpha = 0.05,
method = c("wilson","exact","asymptotic"),
return.df = FALSE)
{
## ..modifications for printing and the addition of a
##   method argument and the asymptotic interval
##   and to accept vector arguments were

method <- match.arg(method)
bc <- function(x, n, alpha, method)
{
nu1 <- 2 * (n - x + 1)
nu2 <- 2 * x
ll <- if (x > 0)
x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1))
else
0

nu1p <- nu2 + 2
nu2p <- nu1 - 2
pp <- if (x < n)
qf(1 - alpha/2, nu1p, nu2p)
else
1

ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp)
zcrit <-  -qnorm(alpha/2)
z2 <- zcrit * zcrit
p <- x/n
cl <- (p + z2/2/n + c(-1, 1) * zcrit *
sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n)

if (x == 1)
cl <-  -log(1 - alpha)/n

if (x == (n - 1))
cl <- 1 + log(1 - alpha)/n

asymp.lcl <- x/n - qnorm(1 - alpha/2) *
sqrt(((x/n) * (1 - x/n))/n)

asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)
)/n)
res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl))
res <- cbind(rep(x/n, 3), res)

##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c(
## "Point Estimate", "Lower", "Upper"))
switch(method,
wilson =     res[2,  ],
exact =      res[1,  ],
asymptotic = res[3,  ])
}

if ((length(x) != length(n)) & length(x) == 1)
x <- rep(x, length(n))
if ((length(x) != length(n)) & length(n) == 1)
n <- rep(n, length(x))

mat <- matrix(ncol = 3, nrow = length(x))
for (i in 1:length(x))
mat[i,  ] <- bc(x[i], n[i], alpha = alpha, method = method)

dimnames(mat) <- list(rep("", dim(mat)),
c("PointEst", "Lower", "Upper"))

if (return.df)
mat <- as.data.frame(mat, row.names = NULL)

mat
}


## Try the tbrf package in your browser

Any scripts or data that you put into this service are public.

tbrf documentation built on April 14, 2020, 7:10 p.m.