Nothing
#' @title Cramer's V for chi-square goodness-of-fit tests
#'
#' @description Calculates Cramer's V for a vector of counts and expected
#' counts; confidence intervals by bootstrap.
#'
#' @param x A vector of observed counts.
#' @param p A vector of expected or default probabilities.
#' @param ci If \code{TRUE}, returns confidence intervals by bootstrap.
#' May be slow.
#' @param conf The level for the confidence interval.
#' @param type The type of confidence interval to use.
#' Can be any of "\code{norm}", "\code{basic}",
#' "\code{perc}", or "\code{bca}".
#' Passed to \code{boot.ci}.
#' @param R The number of replications to use for bootstrap.
#' @param histogram If \code{TRUE}, produces a histogram of bootstrapped values.
#' @param digits The number of significant digits in the output.
#' @param reportIncomplete If \code{FALSE} (the default),
#' \code{NA} will be reported in cases where there
#' are instances of the calculation of the statistic
#' failing during the bootstrap procedure.
#' @param verbose If \code{TRUE}, prints additional statistics.
#' @param ... Additional arguments passed to \code{chisq.test}.
#'
#' @details This modification of Cramer's V
#' could be used to indicate an effect size
#' in cases where a chi-square goodness-of-fit test might be used.
#' It indicates the degree of deviation of observed counts
#' from the expected probabilities.
#'
#' In the case of equally-distributed expected frequencies,
#' Cramer's V will be equal to 1 when all counts are in one category,
#' and it will be equal to 0 when the counts are equally distributed
#' across categories.
#' This does not hold if the expected frequencies are not
#' equally-distributed.
#'
#' Because V is always positive,
#' if \code{type="perc"},
#' the confidence interval will
#' never cross zero, and should not
#' be used for statistical inference.
#' However, if \code{type="norm"}, the confidence interval
#' may cross zero.
#'
#' When V is close to 0 or 1,
#' or with small counts,
#' the confidence intervals
#' determined by this
#' method may not be reliable, or the procedure may fail.
#'
#' In addition, the function will not return a confidence
#' interval if there are zeros in any cell.
#'
#' @author Salvatore Mangiafico, \email{mangiafico@njaes.rutgers.edu}
#'
#' @references \url{https://rcompanion.org/handbook/H_03.html}
#'
#' @seealso \code{\link{cramerV}}
#'
#' @concept effect size
#' @concept Cramer's V
#' @concept chi square test
#' @concept confidence interval
#'
#' @return A single statistic, Cramer's V.
#' Or a small data frame consisting of Cramer's V,
#' and the lower and upper confidence limits.
#'
#' @examples
#' ### Equal probabilities example
#' ### From https://rcompanion.org/handbook/H_03.html
#' nail.color = c("Red", "None", "White", "Green", "Purple", "Blue")
#' observed = c( 19, 3, 1, 1, 2, 2 )
#' expected = c( 1/6, 1/6, 1/6, 1/6, 1/6, 1/6 )
#' chisq.test(x = observed, p = expected)
#' cramerVFit(x = observed, p = expected)
#'
#' ### Unequal probabilities example
#' ### From https://rcompanion.org/handbook/H_03.html
#' race = c("White", "Black", "American Indian", "Asian", "Pacific Islander",
#' "Two or more races")
#' observed = c(20, 9, 9, 1, 1, 1)
#' expected = c(0.775, 0.132, 0.012, 0.054, 0.002, 0.025)
#' chisq.test(x = observed, p = expected)
#' cramerVFit(x = observed, p = expected)
#'
#' ### Examples of perfect and zero fits
#' cramerVFit(c(100, 0, 0, 0, 0))
#' cramerVFit(c(10, 10, 10, 10, 10))
#'
#' @importFrom stats chisq.test
#' @importFrom boot boot boot.ci
#'
#' @export
cramerVFit = function(x, p=rep(1/length(x), length(x)),
ci=FALSE, conf=0.95, type="perc",
R=1000, histogram=FALSE, digits=4,
reportIncomplete=FALSE,
verbose=FALSE, ...) {
CV=NULL
N = sum(x)
Chi.sq = suppressWarnings(chisq.test(x=x, p=p, ...)$statistic)
K = length(x)
CV = sqrt(Chi.sq/N/(K-1))
if(verbose){
cat("\n")
cat("Number of cells =", signif(K, digits=digits))
cat("\n")
cat("N =", signif(N, digits=digits))
cat("\n")
cat("Chi-squared =", signif(Chi.sq, digits=digits))
cat("\n")
cat("V =", signif(CV, digits=digits))
cat("\n")
cat("\n")
}
CV = signif(as.numeric(CV), digits=digits)
if(is.nan(CV) & ci==TRUE){
return(data.frame(Cramer.V=CV, lower.ci=NA, upper.ci=NA))}
if(any(x==0) & ci==TRUE){
return(data.frame(Cramer.V=CV, lower.ci=NA, upper.ci=NA))}
if(ci==TRUE){
Counts = as.data.frame(x)
Long=data.frame(x = rep(row.names(Counts), Counts$x))
rownames(Long) = seq(1:nrow(Long))
L1 = length(x)
Function = function(input, index){
Input = input[index,]
NOTEQUAL=0
if(length(unique(Input)) != L1){NOTEQUAL=1}
if(NOTEQUAL==1){FLAG=1; return(c(NA,FLAG))}
if(NOTEQUAL==0){
Obs = as.vector(table(Input))
CV = NULL
N = sum(Obs)
Chi.sq = suppressWarnings(chisq.test(x=Obs, p=p, ...)$statistic)
K = length(Obs)
CV = sqrt(Chi.sq/N/(K-1))
CV = signif(as.numeric(CV), digits=digits)
FLAG = 0
return(c(CV,FLAG))}
}
Boot = boot(Long, Function, R=R)
BCI = boot.ci(Boot, conf=conf, type=type)
if(type=="norm") {CI1=BCI$normal[2]; CI2=BCI$normal[3]}
if(type=="basic"){CI1=BCI$basic[4]; CI2=BCI$basic[5]}
if(type=="perc") {CI1=BCI$percent[4]; CI2=BCI$percent[5]}
if(type=="bca") {CI1=BCI$bca[4]; CI2=BCI$bca[5]}
if(sum(Boot$t[,2])>0 & reportIncomplete==FALSE) {CI1=NA; CI2=NA}
CI1=signif(CI1, digits=digits)
CI2=signif(CI2, digits=digits)
if(histogram==TRUE){hist(Boot$t[,1], col = "darkgray",
main="", xlab="V")}
}
if(ci==FALSE){names(CV)="Cramer V"; return(CV)}
if(ci==TRUE){return(data.frame(Cramer.V=CV, lower.ci=CI1, upper.ci=CI2))}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.