Nothing
#' @title Test of percentiles by permutation test
#'
#' @description Conducts a permutation test to compare two groups for medians,
#' percentiles, or proportion below a threshold value.
#'
#' @param formula A formula indicating the response variable and
#' the independent variable. e.g. y ~ group.
#' @param data The data frame to use.
#' @param x If no formula is given, the response variable for one group.
#' @param y The response variable for the other group.
#' @param test The statistic to compare between groups. Can be
#' \code{"median"}, \code{"percentile"}, \code{"iqr"},
#' \code{"proportion"},
#' \code{"mean"}, or \code{"variance"}.
#' @param tau If \code{"percentile"} is chosen as the \code{test},
#' \code{tau} indicates the percentile to test. Expressed
#' as a quantile. That is, 0.5 indicates a test for medians.
#' 0.75 indicates a test for 75th percentiles.
#' @param type The \code{type} value passed to the \code{quantile} function.
#' @param threshold If \code{"proportion"} is chosen as the \code{test},
#' \code{threshold} indicates the value of the dependent variable
#' to use as the threshold. For example, to test if there is a
#' different in the proportion of observations below $10,000,
#' \code{threshold = 10000} would be used.
#' @param comparison If \code{"proportion"} is chosen as the \code{test},
#' \code{comparison} indicates the inequality to use. Options are
#' \code{"<"}, \code{"<="}, \code{">"}, \code{">="}, or , \code{"=="}
#' @param r The number of replicates in the permutation test.
#' @param digits The number of significant digits in the output.
#' @param progress If \code{TRUE}, prints a dot for every 1 percent of
#' progress while conducting the test.
#'
#' @details The function will test for a difference in medians, percentiles,
#' interquartile ranges, proportion of observations above or below
#' some threshold value,
#' means, or variances between two groups
#' by permutation test.
#'
#' The permutation test simply permutes the observed values over the
#' two groups and counts how often the calculated statistic is
#' at least as extreme as the original observed statistic.
#'
#' The input should include either \code{formula} and \code{data};
#' or \code{x} and \code{y}.
#'
#' The function removes cases with NA in any of the variables.
#'
#' If the independent variable has more than two groups,
#' only the first two levels of the factor variable will be used.
#'
#' The p-value returned is a two-sided test.
#'
#' @note The parsing of the formula is simplistic.
#' The first variable on the
#' left side is used as the measurement variable.
#' The first variable on the
#' right side is used for the independent variable.
#'
#' @author Salvatore Mangiafico, \email{mangiafico@njaes.rutgers.edu}
#'
#' @references \url{https://rcompanion.org/handbook/F_15.html}
#'
#' @concept percentile
#' @concept median
#' @concept quantile
#' @concept permutation test
#'
#' @return A list of three data frames with
#' the data used, a summary for each group,
#' and the p-value from the test.
#'
#' @examples
#' data(BrendonSmall)
#' percentileTest(Sodium ~ Instructor,
#' data=BrendonSmall,
#' test="median")
#'
#' percentileTest(Sodium ~ Instructor,
#' data=BrendonSmall,
#' test="percentile",
#' tau = 0.75)
#'
#' percentileTest(Sodium ~ Instructor,
#' data=BrendonSmall,
#' test="proportion",
#' threshold = 1300)
#'
#' @importFrom utils tail
#' @importFrom stats median var quantile IQR complete.cases
#'
#' @export
percentileTest =
function(formula=NULL, data=NULL,
x=NULL, y=NULL,
test="median", tau=0.5, type=7,
threshold = NA, comparison = "<",
r=1000, digits=4, progress="TRUE")
{
if(is.null(formula)){
xy = c(x,y)
xname = paste(as.character(substitute(x)), collapse=" ")
yname = paste(as.character(substitute(y)), collapse=" ")
g = factor(c(rep(xname, length(x)), rep(yname, length(y))))
}
if(!is.null(formula)){
xy = eval(parse(text=paste0("data","$",all.vars(formula[[2]])[1])))
g = eval(parse(text=paste0("data","$",all.vars(formula[[3]])[1])))
if(!is.factor(g)){g=factor(g)}
}
Complete = complete.cases(xy, g)
xy = xy[Complete]
g = g[Complete]
g = droplevels(g)
x = xy[g == levels(g)[1]]
y = xy[g == levels(g)[2]]
xy = c(x, y)
PropA = NA
PropB = NA
if(test=="mean"){ Diff = abs(mean(x) - mean(y))}
if(test=="median"){ Diff = abs(median(x) - median(y))}
if(test=="variance"){ Diff = abs(var(x) - var(y))}
if(test=="percentile"){Diff = abs(quantile(x, probs=tau, type=type) -
quantile(y, probs=tau, type=type))}
if(test=="iqr"){ Diff = abs(IQR(x, type=type) -
IQR(y, type=type))}
if(test=="proportion"){
if(is.na(threshold)){threshold = median(c(x,y))}
SumA = sum(eval(parse(text=paste0("x", comparison, threshold))))
SumB = sum(eval(parse(text=paste0("y", comparison, threshold))))
PropA = SumA / length(x)
PropB = SumB / length(y)
Diff = abs(PropA - PropB)
}
Count = 0
if (progress){tick=r/100}
for(i in 1:r){
S = sample(xy, replace=FALSE)
Sx = S[g == levels(g)[1]]
Sy = S[g == levels(g)[2]]
if(test=="mean"){ Sdiff = abs(mean(Sx) - mean(Sy))}
if(test=="median"){ Sdiff = abs(median(Sx) - median(Sy))}
if(test=="variance"){ Sdiff = abs(var(Sx) - var(Sy))}
if(test=="percentile"){Sdiff = abs(quantile(Sx, probs=tau, type=type) -
quantile(Sy, probs=tau, type=type))}
if(test=="iqr"){ Sdiff = abs(IQR(Sx, type=type) -
IQR(Sy, type=type))}
if(test=="proportion"){
SA = sum(eval(parse(text=paste0("Sx", comparison, threshold))))
SB = sum(eval(parse(text=paste0("Sy", comparison, threshold))))
PA = SA / length(Sx)
PB = SB / length(Sy)
Sdiff = abs(PA -PB)
}
if(Sdiff >= Diff){Count=Count+1}
if(progress){if(i%%tick==0){cat(".")}}
}
if(progress){cat("\n"); cat("\n")}
P.value = Count/r
Z = data.frame(
Statistic = rep("NA", 2),
n = rep(as.numeric(NA), 2),
mean = rep(as.numeric(NA), 2),
sd = rep(as.numeric(NA), 2),
min = rep(as.numeric(NA), 2),
p25 = rep(as.numeric(NA), 2),
median = rep(as.numeric(NA), 2),
p75 = rep(as.numeric(NA), 2),
max = rep(as.numeric(NA), 2),
iqr = rep(as.numeric(NA), 2),
comparison = rep("NA", 2),
threshold = rep(as.numeric(NA), 2),
proportion = rep(as.numeric(NA), 2),
stringsAsFactors=FALSE)
Z[1,1] = levels(g)[1]
Z[1,2] = signif(length(x),digits)
Z[1,3] = signif(mean(x),digits)
Z[1,4] = signif(sd(x),digits)
Z[1,5] = signif(min(x),digits)
Z[1,6] = signif(quantile(x,0.25),digits)
Z[1,7] = signif(median(x),digits)
Z[1,8] = signif(quantile(x,0.75),digits)
Z[1,9] = signif(max(x),digits)
Z[1,10] = signif(IQR(x),digits)
Z[1,11] = comparison
Z[1,12] = signif(threshold, digits)
Z[1,13] = signif(PropA, digits)
Z[2,1] = levels(g)[2]
Z[2,2] = signif(length(y),digits)
Z[2,3] = signif(mean(y),digits)
Z[2,4] = signif(sd(y),digits)
Z[2,5] = signif(min(y),digits)
Z[2,6] = signif(quantile(y,0.25),digits)
Z[2,7] = signif(median(y),digits)
Z[2,8] = signif(quantile(y,0.75),digits)
Z[2,9] = signif(max(y),digits)
Z[2,10] = signif(IQR(y),digits)
Z[2,11] = comparison
Z[2,12] = signif(threshold, digits)
Z[2,13] = signif(PropB, digits)
colnames(Z)[1]=""
if(test!="proportion"){Z = Z[, !names(Z) %in%
c('comparison', 'threshold', 'proportion')]}
U = data.frame(
Statistic = rep("NA", 1),
p.value = rep(as.numeric(NA), 1),
stringsAsFactors=FALSE)
U[1,1] = "p-value"
U[1,2] = signif(P.value, digits)
colnames(U)[1]=""
V = data.frame(
Formula = "NA",
Data = "NA",
Test = "NA",
tau = as.numeric(NA),
stringsAsFactors=FALSE)
if(!is.null(formula)){V[1,1] = deparse(formula)}
if(!is.null(formula)){V[1,2] = as.character(substitute(data))}
V[1,3] = test
V[1,4] = tau
if(is.null(formula)){V[1,1] = xname
V[1,2] = yname
colnames(V)[1:2]=c("x", "y")
}
if(test!="percentile"){V = V[, !names(V) %in% c('tau')]}
W = list(Test=V,
Summary=Z,
Result=U)
return(W)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.