globalVariables(c("stats", "aggregate", "pchisq", "rchisq", "rnorm", "sd"))
# Miller test for raw data ----------------------------
#' Asymptotic test for the equality of coefficients of variation from k populations, using measurement data
#'
#' Test for k samples (k sample populations with unequal sized) from
#' Feltz CJ, Miller GE (1996) An asymptotic test for the equality of coefficients
#' of variation from k population. Stat Med 15:647–658
#'
#' @param x a numeric vector containing individual measurement values
#' @param y a vector of any type containing a grouping variable
#' @param seed optional, an integer that is the starting point used in the generation of a sequence of random numbers. Include a seed if you want reproducible output.
#'
#' @return a list with the test statistic and p-value
#' @export
#'
#' @examples
#'
#' y <- unlist(lapply(letters[1:5], function(i) rep(i, 20)))
#' x <- rnorm(100)
#'
#' asymptotic_test(x, y)
#'
asymptotic_test <-
function(x, y, seed){
## set the seed to ensure reproducibiilty
if (!missing(seed))
set.seed(seed)
if (!is.numeric(x) && (!is.numeric(y) | !is.character(y) | !is.integer(y))) {
warning("x is not numeric or y is not numeric, character or integer: returning NA")
return(NA_real_)
}
if (anyNA(x)) {
warning("x cannot contain any NA values: returning NA")
return(NA_real_)
}
if (anyNA(y)) {
warning("y cannot contain any NA values: returning NA")
return(NA_real_)
}
# k is the number of groups
k <- length(unique(y))
# j is an index referring to the group number
# n_j is the sample size of the jth group
n_j <- data.frame(table(y))$Freq
# s_j is the sd of the jth population
s_j <- aggregate(x, by = list(y), FUN = sd)[2]
# x_j is the mean of the jth population
x_j <- aggregate(x, by = list(y), FUN = mean)[2]
m_j <- n_j - 1
D <- (sum(m_j * (s_j/x_j))) / sum(m_j)
D_AD <- (sum(m_j * (s_j/x_j - D)^2 )) / ( D^2 * (0.5 + D^2) )
# D_AD distributes as a central chi-sq random variable with k - 1 degrees of freedom
p_value <- pchisq(D_AD,
k - 1,
lower.tail = FALSE)
return(list(D_AD = D_AD,
p_value = p_value))
}
# Miller test for summary data ------------------------
#' Asymptotic test for the equality of coefficients of variation from k populations, using summary statistics when raw measurement data are not available.
#'
#' Test for k samples (k sample populations with unequal sized) from
#' Feltz CJ, Miller GE (1996) An asymptotic test for the equality of coefficients
#' of variation from k population. Stat Med 15:647–658
#'
#' @param k a numeric vector the number of groups
#' @param n a numeric vector the numer of measurements in each group
#' @param s a numeric vector the standard deviation of each group
#' @param x a numeric vector the mean of each group
#' @param seed optional, an integer that is the starting point used in the generation of a sequence of random numbers. Include a seed if you want reproducible output.
#'
#' @return a list with the test statistic and p-value
#' @export
#'
#' @examples
#'
#' # Summary stats from Feltz and Miller 1996
#'
#' miller <- data.frame(test = c('ELISA', 'WEHI', '`Viral inhibition`'),
#' Mean = c(6.8, 8.5, 6.0),
#' CV = c(0.090, 0.462, 0.340),
#' N = c(5, 5, 5))
#' # compute SD from mean and cv
#' miller$SD <- with(miller, CV * Mean)
#'
#' asymptotic_test2(k = nrow(miller), n = miller$N, s = miller$SD, x = miller$Mean)
#'
asymptotic_test2 <-
function(k, n, s, x, seed){
## set the seed to ensure reproducibiilty
if (!missing(seed))
set.seed(seed)
if (!is.integer(k) && !is.numeric(k) && !is.integer(n) && !is.numeric(n) && !is.numeric(s) && !is.numeric(x) ){
warning("input values are not numeric: returning NA")
return(NA_real_)
}
# k is the number of groups
k <- k
# j is an index referring to the group number
# n_j is the sample size of the jth group
n_j <- n
# s_j is the sd of the jth population
s_j <- s
# x_j is the mean of the jth population
x_j <- x
m_j <- n_j - 1
D <- (sum(m_j * (s_j/x_j))) / sum(m_j)
D_AD <- (sum(m_j * (s_j/x_j - D)^2 )) / ( D^2 * (0.5 + D^2) )
# D_AD distributes as a central chi-sq random variable with k - 1 degrees of freedom
p_value <- pchisq(D_AD,
k - 1,
lower.tail = FALSE)
return(list(D_AD = D_AD,
p_value = p_value))
}
# Krishnamoorthy test for raw data --------------------
#' Modified signed-likelihood ratio test (SLRT) for equality of CVs, using measurement data
#' @references \url{http://link.springer.com/article/10.1007/s00180-013-0445-2}
#' Krishnamoorthy, K. & Lee, M. Comput Stat (2014) 29: 215. doi:10.1007/s00180-013-0445-2
#'
#'
#' @param nr numeric vector length one, number of simulation runs, default is 1e3
#' @param x a numeric vector containing individual measurement values
#' @param y a vector of any type containing a grouping variable
#' @param seed optional, an integer that is the starting point used in the generation of a sequence of random numbers. Include a seed if you want reproducible output.
#'
#' @return a list with the test statistic and p-value
#' @export
#'
#' @examples
#'
#' x <- rnorm(100)
#' y <- unlist(lapply(letters[1:5], function(i) rep(i, 20)))
#'
#' mslr_test(nr = 1e3, x, y)
#'
#'
mslr_test <-
function(nr = 1e3, x, y, seed){
## set the seed to ensure reproducibiilty
if (!missing(seed))
set.seed(seed)
if (!is.numeric(x) && !is.numeric(y) && !is.character(y)) {
warning("x is not numeric or y is not numeric or character: returning NA")
return(NA_real_)
}
if (anyNA(x)) {
warning("x cannot contain any NA values: returning NA")
return(NA_real_)
}
if (anyNA(y)) {
warning("y cannot contain any NA values: returning NA")
return(NA_real_)
}
# j is an index referring to the group number
# n is the sample size of the jth group
n <- data.frame(table(y))$Freq
# s is the sd of the jth population
s <- aggregate(x, by = list(y), FUN = sd)$x
# x_j is the mean of the jth population
x <- aggregate(x, by = list(y), FUN = mean)$x
k <- length(x)
gv <- as.vector(nr)
df <- n - 1
xst0 <- LRT_STAT(n, x, s)
uh0 <- xst0[1:k]
tauh0 <- xst0[k+1]; stat0 <- xst0[k+2]
sh0 <- tauh0*uh0
se0 <- tauh0*uh0/sqrt(n)
# PB estimates of the mean and SD of the LRT
for(ii in 1:nr){
z <- rnorm(k)
x <- uh0 + z*se0
ch <- rchisq(k,df)
s <- sh0*sqrt(ch/df)
xst <- LRT_STAT(n,x,s)
gv[ii] <- xst[k+2]
}
am <- mean(gv); sd <- sd(gv)
# end PB estimates
statm <- sqrt(2.0*(k-1))*(stat0-am)/sd+(k-1)
pval <- 1.0-pchisq(statm,k-1)
return(list(MSLRT = statm,
p_value = pval))
}
#' # Modified signed-likelihood ratio test (SLRT) for equality of CVs, using summary statistics when raw measurement data are not available.
#'
#'
# Krishnamoorthy, K. & Lee, M. Comput Stat (2014) 29: 215. doi:10.1007/s00180-013-0445-2
#' @references \url{http://link.springer.com/article/10.1007/s00180-013-0445-2}
#
#'
#' @param nr numeric vector lenght one, number of simulation runs
#' @param n a numeric vector, the number of observations in each group
#' @param x a numeric vector, the mean of each group
#' @param s a numeric vector, the standard deviation of each group
#' @param seed optional, an integer that is the starting point used in the generation of a sequence of random numbers. Include a seed if you want reproducible output.
#'
#' @return a list with the test statistic and p-value
#' @export
#'
#' @examples
#'
#' # Summary stats from Feltz and Miller 1996
#'
#' miller <- data.frame(test = c('ELISA', 'WEHI', '`Viral inhibition`'),
#' Mean = c(6.8, 8.5, 6.0),
#' CV = c(0.090, 0.462, 0.340),
#' N = c(5, 5, 5))
#' # compute SD from mean and cv
#' miller$SD <- with(miller, CV * Mean)
#'
#' mslr_test2(nr = 1e3, n = miller$N, s = miller$SD, x = miller$Mean)
#'
#'
mslr_test2 <-
function(nr, n, x, s, seed){
## set the seed to ensure reproducibiilty
if (!missing(seed))
set.seed(seed)
if (!is.numeric(nr) &&!is.numeric(n) && !is.integer(n) && !is.numeric(s) && !is.numeric(x)) {
warning("input values are not numeric: returning NA")
return(NA_real_)
}
k <- length(x)
gv <- as.vector(nr)
df <- n - 1
xst0 <- LRT_STAT(n, x, s)
uh0 <- xst0[1:k]
tauh0 <- xst0[k+1]; stat0 <- xst0[k+2]
sh0 <- tauh0*uh0
se0 <- tauh0*uh0/sqrt(n)
# PB estimates of the mean and SD of the LRT
for(ii in 1:nr){
z <- rnorm(k)
x <- uh0 + z*se0
ch <- rchisq(k,df)
s <- sh0*sqrt(ch/df)
xst <- LRT_STAT(n,x,s)
gv[ii] <- xst[k+2]
}
am <- mean(gv); sd <- sd(gv)
# end PB estimates
statm <- sqrt(2.0*(k-1))*(stat0-am)/sd+(k-1)
pval <- 1.0-pchisq(statm,k-1)
return(list(MSLRT = statm,
p_value = pval))
}
#' LRT_STAT, required by mlrt_test
#'
#' @param n ... as above
#' @param x ...
#' @param s ...
#' @param seed optional, an integer that is the starting point used in the generation of a sequence of random numbers. Include a seed if you want reproducible output.
#'
#' @return xx
#'
LRT_STAT <- function(n, x, s, seed){
## set the seed to ensure reproducibiilty
if (!missing(seed))
set.seed(seed)
k <- length(x)
df <- n-1
ssq <- s**2
vsq <- df*ssq/n
v <- sqrt(vsq)
sn <- sum(n)
#MLES
tau0 <- sum(n*vsq/x**2)/sn
l <- 1
repeat{
uh <- (-x+sqrt(x**2+4.0*tau0*(vsq+x**2)))/2.0/tau0
tau <- sum(n*(vsq+(x-uh)**2)/uh**2)/sn
if(abs(tau-tau0) <= 1.0e-7 || l > 30) break
l <- l + 1
tau0 <- tau
}
tauh <- sqrt(tau)
#END MLES
elf <- 0.0; clf <- 0.0
for(j in 1:k){
clf <- clf-n[j]*log(tauh*uh[j])-(n[j]*(vsq[j]+(x[j]-uh[j])**2))/(2.0*tauh**2*uh[j]**2)
elf <- elf-n[j]*log(v[j])-n[j]/2.0
}
stat <- 2.0*(elf-clf)
return(c(uh,tauh,stat))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.