Nothing
#' Group-Independence Test Function
#'
#' This function takes in two variables of equal length, the first of which is a categorical variable, and performs a test of independence between them. It returns a character string with the results of that test for putting in a table.
#'
#' In an attempt (and perhaps an encouragement) to use this function in weird ways, and because it's not really expected to be used directly, input is not sanitized. Have fun!
#'
#' @param x A categorical variable.
#' @param y A variable to test for independence with \code{x}. This can be a factor or numeric variable. If you want a numeric variable treated as categorical, convert to a factor first.
#' @param w A vector of weights to pass to the appropriate test.
#' @param factor.test Used when \code{y} is a factor, a function that takes \code{x} and \code{y} as its first arguments and returns a list with three arguments: (1) The name of the test for printing, (2) the test statistic, and (3) the p-value. Defaults to a Chi-squared test if there are no weights, or a design-based F statistic (Rao & Scott Aadjustment, see \code{survey::svychisq}) with weights, which requires that the \code{survey} package be installed. WARNING: the Chi-squared test's assumptions fail with small sample sizes. This function will be attempted for all non-numeric \code{y}.
#' @param numeric.test Used when \code{y} is numeric, a function that takes \code{x} and \code{y} as its first arguments and returns a list with three arguments: (1) The name of the test for printing, (2) the test statistic, and (3) the p-value. Defaults to a group differences F test. If you only have two groups and would prefer an absolute t-statistic to an F-statistic, pass \code{vtable:::groupt.it}.
#' @param star.cutoffs A numeric vector indicating the p-value cutoffs to use for reporting significance stars. Defaults to \code{c(.01,.05,.1)}. If you don't want stars, remove them from the \code{format} argument.
#' @param star.markers A character vector indicating the symbols to use to indicate significance cutoffs associated with \code{star.cuoffs}. Defaults to \code{c('***','**','*')}. If you don't want stars, remove them from the \code{format} argument.
#' @param digits Number of digits after the decimal to round the test statistic and p-value to.
#' @param fixed.digits \code{FALSE} will cut off trailing \code{0}s when rounding. \code{TRUE} retains them. Defaults to \code{FALSE}.
#' @param format The way in which the four elements returned by (or calculated after) the test - \code{{name}}, \code{{stat}}, \code{{pval}}, and \code{{stars}} - will be arranged in the string output. Note that the default \code{'{name}={stat}{stars}'} does not contain the p-value, and also does not contain superscript for the stars since it doesn't know what markup language you're aiming for. For LaTeX you may prefer \code{'{name}$={stat}^{{stars}}$'}, and for HTML \code{'{name}={stat}<sup>{stars}</sup>'}.
#' @param opts The options listed above, entered in named-list format.
#' @examples
#'
#' data(mtcars)
#' independence.test(mtcars$cyl,mtcars$mpg)
#'
#' @export independence.test
independence.test <- function(x,y,w=NA,
factor.test = NA,
numeric.test = NA,
star.cutoffs = c(.01,.05,.1),
star.markers = c('***','**','*'),
digits = 3,
fixed.digits = FALSE,
format = '{name}={stat}{stars}',
opts = list()) {
#Bring in opts
list2env(opts,envir=environment())
#Are we using factor.test or numeric.test
cla <- is.numeric(y)
# Backwards consistency
if (length(w) == 1) {
if (is.na(w)) {
w <- NULL
}
}
#Fill in defaults
if (identical(factor.test,NA)) {
factor.test <- chisq.it
}
if (identical(numeric.test,NA)) {
numeric.test <- groupf.it
}
if (cla) {
result <- numeric.test(x,y,w)
} else {
result <- factor.test(x,y,w)
}
#Get stars
#Order smallest to biggest
star.markers <- star.markers[order(star.cutoffs)]
star.cutoffs <- star.cutoffs[order(star.cutoffs)]
#Find the first value that qualifies
underneath <- result[[3]] < star.cutoffs
stars <- star.markers[underneath][1]
stars <- ifelse(is.na(stars),'',stars)
#Rounding
#First, check if we're going to get a 0, so we can set that separate
is.zero <- result[[3]] < 10^(-digits)
if (fixed.digits) {
result[[2]] <- format(result[[2]],digits=digits,nsmall=digits)
result[[3]] <- format(result[[3]],digits=digits,nsmall=digits)
} else {
result[[2]] <- round(result[[2]],digits=digits)
result[[3]] <- round(result[[3]],digits=digits)
}
if (is.zero) {
result[[3]] <- paste0('<',10^(-digits))
}
#And format the result
printout <- format
#Fill in our four things
printout <- gsub('\\{name\\}',result[[1]],printout)
printout <- gsub('\\{stat\\}',result[[2]],printout)
printout <- gsub('\\{pval\\}',result[[3]],printout)
printout <- gsub('\\{stars\\}',stars,printout)
return(printout)
}
groupt.it <- function(x, y, w = NULL) {
if (length(unique(x)) > 2) {
stop('groupt.it cannot be used with more than two groups.')
}
return(groupf.it(x=x, y=y, w=w, t = TRUE))
}
# Internal chi-square and group-F tests that return things in independence.test format
chisq.it <- function(x,y,w=NULL) {
if (is.null(w)) {
suppressWarnings(result <- stats::chisq.test(x,y))
return(list(
'X2',
unname(result$statistic),
result$p.value
))
} else {
# Create survey design
d <- data.frame(x = x, y = y, w = w)
errmess <- try(sdes <- survey::svydesign(~1, data = d, weights = ~w))
if (grepl('Error in loadNamespace',errmess[1])) {
stop('Using weights with group.test = TRUE and factor variables requires the survey package. install.packages("survey")')
}
ftest <- survey::svychisq(~x+y, sdes)
return(list(
'F',
unname(ftest$statistic),
unname(ftest$p.value)
))
}
}
groupf.it <- function(x,y,w=NULL, t = FALSE) {
result <- stats::anova(stats::lm(y~factor(x),weights = w))
statname <- 'F'
stat <- result$`F value`[1]
if (t) {
statname <- 't'
stat <- sqrt(stat)
}
return(list(
statname,
stat,
result$`Pr(>F)`[1]
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.