Nothing
#' Cochran's Q test
#'
#' Conduct Cochran's Q test assuming equal columns proportions for matched binary
#' responses versus the alternative hypothesis of unequal column proportions.
#' @name cochranQ
#' @usage cochranQ(X, quiet = FALSE, digits = getOption("digits"))
#' @param X matrix of \eqn{I} assessors (rows) and \eqn{J} products (columns)
#' where values are \code{0} (not checked) or \code{1} (checked)
#' @param quiet if \code{FALSE} (default) then it prints information related to
#' the test; if \code{TRUE} it returns only the test statistic (\code{Q})
#' @param digits for rounding
#' @return Cochran's Q test results (statistic, degrees of freedom, p-value)
#' @export
#' @details
#' Method returns test statistic, degrees of freedom, and p value from Cochran's
#' Q test.
#'
#' @encoding UTF-8
#' @seealso \code{\link[cata]{mcnemarQ}}
#' @author J.C. Castura
#' @references
#' Cochran, W.G. (1950). The comparison of percentages in matched samples.
#' \emph{Biometrika}, 37, 256-266, \doi{10.2307/2332378}
#'
#' Meyners, M., Castura, J.C., & Carr, B.T. (2013). Existing and
#' new approaches for the analysis of CATA data. \emph{Food Quality and Preference},
#' 30, 309-319, \doi{10.1016/j.foodqual.2013.06.010}
#' @examples
#' # Cochran's Q test on the first 50 consumers on the first attribute ("Fresh")
#' cochranQ(bread$cata[1:50, , 1], digits=3)
#'
#' # Same, returning only test statistics for the first 4 attributes
#' t(res <- apply(bread$cata[1:50, , 1:4], 3, cochranQ, quiet=TRUE, digits=3))
cochranQ <- function(X, quiet = FALSE, digits = getOption("digits")){
if(is.vector(X)){
X <- matrix(X, nrow = 1)
}
X <- matrix(as.integer(X), nrow = nrow(X), ncol = ncol(X), dimnames=dimnames(X))
Jn <- ncol(X)
X <- X[which((rowSums(X) < Jn) & (rowSums(X) > 0)), , drop=FALSE]
if(sum(X) == 0){
if(!quiet){
print("No variability in X")
}
return(c(Q = NA, df = Jn - 1, p.value = NA, effect.size = NA))
}
Jn <- ncol(X)
# Cochran's Q test
numQ <- Jn * (Jn-1) * (sum(colSums(X)^2) - (sum(X)^2)/Jn)
denomQ <- Jn * sum(X) - sum(rowSums(X)^2)
Q = numQ / denomQ
# chance-corrected effect size
# In <- nrow(X) # blocks
# if(In == 1){
# udelta <- delta <- 0
# R = NA
# } else { # this condition is needed to calculate delta
# Ipairs <- utils::combn(In, 2)
# dnum <- sum(apply(Ipairs, 2, function(pindx) {
# sum(abs(X[pindx[1], ] - X[pindx[2], ]))
# }))
# delta <- dnum/(Jn * ncol(Ipairs))
# p_i <- apply(X, 1, mean)
# udelta <- 1/ncol(Ipairs) * ((sum(p_i)*(In - sum(p_i))) - sum((p_i*(1-p_i))))
# R = 1 - delta/udelta
# }
# report output
o <- c(Q = round(Q, digits = digits),
df = Jn - 1,
p.value = round(stats::pchisq(Q, Jn - 1, lower.tail = FALSE),
digits = digits))#, effect.size=round(R, digits = digits))
if(!quiet){
cat(paste(c("",
"Cochran's Q test",
"----------------",
"H0: Citation rates equal for all products (columns)",
"H1: Citation rates not equal for all products",
""),
collapse = '\n'))
print(o[1:3])
# cat(paste(c("",
# "Chance-corrected effect size: ", o["effect.size"], ""),
# collapse = '\n'))
}
invisible(o)
}
#' McNemar's test
#'
#' Pairwise tests are conducted using the two-tailed binomial test. These tests
#' can be conducted after Cochran's Q test.
#' @name mcnemarQ
#' @usage mcnemarQ(X, quiet = FALSE, digits = getOption("digits"))
#' @param X matrix of \eqn{I} assessors (rows) and \eqn{J} products (columns)
#' where values are \code{0} (not checked) or \code{1} (checked)
#' @param quiet if \code{FALSE} (default) then it prints information related to
#' the test; if \code{TRUE} it returns only the test statistic (\code{Q})
#' @param digits for rounding
#' @return Test results for all McNemar pairwise tests conducted via the
#' binomial test
#' @export
#' @encoding UTF-8
#' @seealso \code{\link[cata]{cochranQ}}
#' @author J.C. Castura
#' @references
#' Cochran, W.G. (1950). The comparison of percentages in matched samples.
#' \emph{Biometrika}, 37, 256-266. \doi{10.2307/2332378}
#'
#' McNemar, Q. (1947). Note on the sampling error of the difference between
#' correlated proportions or percentages. \emph{Psychometrika}, 12(2), 153-157.
#' \doi{10.1007/BF02295996}
#'
#' Meyners, M., Castura, J.C., & Carr, B.T. (2013). Existing and
#' new approaches for the analysis of CATA data. \emph{Food Quality and
#' Preference}, 30, 309-319, \doi{10.1016/j.foodqual.2013.06.010}
#'
#' @examples
#' # McNemar's exact pairwise test for all product pairs
#' # on the first 50 consumers and the first attribute ("Fresh")
#' mcnemarQ(bread$cata[1:50, , 1])
#'
#' # Same, returning only results for the first 4 attributes
#' (res <- apply(bread$cata[1:50, , 1:4], 3, mcnemarQ, quiet=TRUE, simplify=FALSE))
mcnemarQ <- function(X, quiet = FALSE, digits = getOption("digits")){
if(is.vector(X)){
if(!quiet){
print("X must be a matrix with at least 2 rows and 2 columns")
}
return(NA)
}
X <- matrix(as.integer(X), nrow = nrow(X), ncol = ncol(X), dimnames=dimnames(X))
Jn <- ncol(X)
X <- X[which((rowSums(X) < Jn) & (rowSums(X) > 0)), , drop=FALSE] # this also deals with missings!
if(sum(X) == 0){
if(!quiet){
print("No variability in X")
}
return(NA)
}
res <- matrix(NA, nrow = choose(Jn, 2), ncol = 6,
dimnames = list(NULL, c("index.1", "index.2", "b", "c",
"proportion", "p.value")))
res[,1:2] <- t(utils::combn(Jn, 2))
if(Jn == 2){
res[,3:4] <- colSums(X)
res[,5] <- res[,3] / sum(res[,3:4])
} else {
res[,3:4] <- apply(barray(X), 2:3, sum)
res[,5] <- res[,3] / rowSums(res[,3:4])
}
res[,6] <- mapply(function(x, y){
2*sum(stats::dbinom(0:min(x, sum(x,y)-x), size = sum(x,y),
prob = .5))}, res[,3], res[,4])
res[,6] <- mapply(min, res[,6], 1)
if(!quiet){
cat(paste(c("",
"McNemar's pairwise test",
"-----------------------", "",
"H0: Citation rates equal for both products",
"H1: Citation rates not equal for both products",
"", ""),
collapse = '\n'))
res.print <- res
res.print[,5:6] <- round(res.print[,5:6], digits = digits)
print(as.data.frame(res.print, row.names = ""))
}
invisible(res)
}
#' Convert 3d array of CATA data to 4d array of CATA differences
#'
#' Converts a three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) to a four-dimensional array of product
#' comparisons (\eqn{I} assessors, \eqn{J(J-1)/2}
#' product comparisons, two outcomes (of type \code{b} or \code{c}), \eqn{M}
#' attributes)
#'
#' @name barray
#' @usage barray(X, values = "bc", type.in = "binary", type.out = "binary")
#' @param X three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) where values are \code{0} (not checked)
#' or \code{1} (checked)
#' @param values \code{"bc"} (default) returns two outcomes: \code{b} and
#' \code{c}; otherwise \code{"abcd"} returns four outcomes: \code{a}, \code{b},
#' \code{c}, \code{d}.
#' @param type.in type of data submitted; default (\code{binary}) may be set to
#' \code{ordinal} or \code{scale}.
#' @param type.out currently only \code{binary} is implemented
#' @return A four-dimensional array of product comparisons having \eqn{I}
#' assessors, \eqn{J(J-1)/2} product comparisons, outcomes (see \code{values}
#' parameter), \eqn{M} attributes
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Varela, P., & Næs, T. (2022).
#' Clustering consumers based on product discrimination in check-all-that-apply
#' (CATA) data. \emph{Food Quality and Preference}, 104564.
#' \doi{10.1016/j.foodqual.2022.104564}.
#'
#' @examples
#' # Get the 4d array of CATA differences for the first 8 consumers
#' b <- barray(bread$cata[1:8,,])
barray <- function(X, values = "bc", type.in = "binary", type.out = "binary"){
abcd_1attribute <- function(X, type.in = "binary"){
.abcd_1pair <- function(x, y, type.in = "binary"){
x <- c(x) # product 'x' (1 response per consumer)
y <- c(y) # product 'y' (1 response per consumer)
if(type.in == "binary"){
return(list(a = sum(x+y == 2), b = sum(x-y == 1),
c = sum(x-y == -1), d = sum(x+y == 0)))
}
if(type.in %in% c("scale", "ordinal")){
return(list(a = sum(all(x == y, x != 0)),
b = sum(x > y),
c = sum(x < y),
d = sum(all(x == y, x == 0))))
}
}
if(is.vector(X)){
X <- matrix(X, nrow = 1)
} else {
X <- as.matrix(X) # assessors x products
}
pair.items <- utils::combn(ncol(X),2)
this.pair <- matrix(0, nrow = ncol(pair.items), ncol = 4,
dimnames = list(NULL, letters[1:4]))
for(i in 1:ncol(pair.items)){
this.pair[i, ] <- unlist(.abcd_1pair(X[,pair.items[1,i]],
X[,pair.items[2,i]], type.in = type.in))
}
return(this.pair)
}
nI <- dim(X)[1]
nJ <- dim(X)[2]
if(length(dim(X))==2){
oX <- X
X <- array(NA, c(nI, nJ, 1), dimnames = list(dimnames(X)[[1]],
dimnames(X)[[2]], 1))
X[,,1] <- oX
}
nM <- dim(X)[3]
out <- array(0, c(nI, nJ*(nJ-1)/2, 4, nM),
dimnames = list(dimnames(X)[[1]],
apply(t(utils::combn(nJ,2)), 1, paste0, collapse = "_"),
letters[1:4],
dimnames(X)[[3]]#colnames(X[1,,])
))
for (i in 1:nI){
for(m in 1:nM){
this.data <- X[i,,m]
out[i,,1:4,m] <- abcd_1attribute(this.data, type.in = type.in)
}
}
if (values == "bc") {
return(out[,, 2:3,, drop=FALSE])
} else {
if (values == "abcd") {
return(out)
} else {
return(print(paste("Invalid option: values =", values), quote = FALSE))
}
}
}
#' Converts 3d array of CATA data to a wide 2d matrix format
#'
#' Converts a three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) to a two-dimensional matrix
#' (\eqn{J} products, (\eqn{I} assessors, \eqn{M} attributes))
#'
#' @name toWideMatrix
#' @usage toWideMatrix(X)
#' @param X three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) where values are \code{0} (not checked)
#' or \code{1} (checked)
#' @return A matrix with \code{J} products in rows and \eqn{I}
#' assessors \eqn{\times M} attributes in columns
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @examples
#' # convert CATA results from the first 8 consumers and the first 4 attributes
#' # to a wide matrix
#' toWideMatrix(bread$cata[1:8,,1:4])
toWideMatrix <- function(X){
out <- X[1,,]
for(i in 2:dim(X)[[1L]]){
out <- cbind(out, X[i,,])
}
return(out)
}
#' Converts 3d array of CATA data to a tall 2d matrix format
#'
#' Converts a three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) to a two-dimensional matrix with
#' (\eqn{I} assessors, \eqn{J} products) rows and (\eqn{M}
#' attributes) columns, optionally preceded by two columns of row headers.
#'
#' @name toMatrix
#' @usage toMatrix(X, header.rows = TRUE, oneI = FALSE, oneM = FALSE)
#' @param X three-dimensional array (\eqn{I} assessors, \eqn{J}
#' products, \eqn{M} attributes) where values are \code{0} (not checked)
#' or \code{1} (checked)
#' @param header.rows \code{TRUE} (default) includes row headers; set to
#' \code{FALSE} to exclude these headers
#' @param oneI indicates whether calculation is for one assessor (default:
#' \code{FALSE})
#' @param oneM indicates whether calculation is for one attribute (default:
#' \code{FALSE})
#' @return A matrix with \eqn{I} assessors \eqn{\times J} products in rows
#' and \eqn{M} attributes in columns (preceded by 2 columns)
#' of headers if \code{header.rows = TRUE}
#' @export
#' @encoding UTF-8
#' @examples
#' # convert CATA results from the first 8 consumers and the first 4 attributes
#' # to a tall matrix
#' toMatrix(bread$cata[1:8,,1:4])
toMatrix <- function(X, header.rows = TRUE, oneI = FALSE, oneM = FALSE){
if(any(oneI, oneM)) {
if(all(oneI, oneM)){
dims <- c(1, length(X), 1)
} else {
dims <- dim(X) # 2D
if(oneI){
dims <- c(1, dims) # nI was 1
}
if(oneM){
dims <- c(dims, 1) # nM was 1
}
}
X <- array(X, dim = dims)
}
if(length(dim(X)) == 2){
X <- array(X, c(dim(X), 1), dimnames = list(
dimnames(X)[[1]], dimnames(X)[[2]], "response"))
}
dims <- dim(X)
if(length(dims) != 3){
return("Function is to convert an array to a matrix")
}
nI <- dims[1]
nJ <- dims[2]
nM <- dims[3]
if(is.null(dimnames(X)[[1]])[1]) dimnames(X)[[1]] <- 1:nI
if(is.null(dimnames(X)[[2]])[1]) dimnames(X)[[2]] <- 1:nJ
if(is.null(dimnames(X)[[3]])[1]) dimnames(X)[[3]] <- 1:nM
this.rownames <- paste0(rep(dimnames(X)[[1]], each = nJ), "_",
rep(dimnames(X)[[2]], times = nI))
Xout <- matrix(NA, ncol = dim(X)[[3]], nrow = prod(dim(X)[-3]),
dimnames = list(this.rownames, dimnames(X)[[3]]))
for(cc in 1:nM){
Xout[,cc] <- c(aperm(X[,,cc], 2:1))
}
if(header.rows){
Xout <- data.frame(subject = as.factor(rep(dimnames(X)[[1]], each = nJ)),
product = as.factor(rep(dimnames(X)[[2]], times = nI)),
Xout)
}
return(Xout)
}
#' Calculate \eqn{RV} Coefficient
#'
#' Calculate \eqn{RV} coefficient
#' @name rv.coef
#' @usage rv.coef(X, Y, method = 1)
#' @param X input matrix (same dimensions as \code{Y})
#' @param Y input matrix (same dimensions as \code{X})
#' @param method \code{1} (default) and \code{2} give identical \eqn{RV} coefficients
#' @return \eqn{RV} coefficient
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Robert, P., & Escoufier, Y. (1976). A unifying tool for linear
#' multivariate statistical methods: the RV-coefficient. \emph{Journal of the Royal
#' Statistical Society: Series C (Applied Statistics)}, 25, 257-265.
#' \doi{10.2307/2347233}
#' @examples
#' # Generate some data
#' set.seed(123)
#' X <- matrix(rnorm(8), nrow = 4)
#' Y <- matrix(rnorm(8), nrow = 4)
#'
#' # get the RV coefficient
#' rv.coef(X, Y)
rv.coef <- function(X, Y, method = 1){
tr <- function(X){
sum(diag(X), na.rm = TRUE)
}
X <- sweep(X, 2, colMeans(X))#apply(X, 2, mean), "-")
Y <- sweep(Y, 2, colMeans(Y))#apply(Y, 2, mean), "-")
XX <- tcrossprod(X,X)
YY <- tcrossprod(Y,Y)
if(method==1){
out <- tr(XX %*% YY) /
sqrt(tr(XX %*% XX) %*% tr(YY %*% YY) )
}
if(method==2){
out <- sum(c(XX)*c(YY), na.rm=TRUE) /
sqrt(sum(XX^2, na.rm=TRUE)*sum(YY^2, na.rm=TRUE))
}
return(c(out))
}
#' Salton's cosine measure
#'
#' Calculate Salton's cosine measure
#' @name salton
#' @usage salton(X, Y)
#' @param X input matrix (same dimensions as \code{Y})
#' @param Y input matrix (same dimensions as \code{X})
#' @return Salton's cosine measure
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Salton, G., & McGill, M.J. (1983). \emph{Introduction to Modern
#' Information Retrieval}. Toronto: McGraw-Hill.
#' @examples
#' # Generate some data
#' set.seed(123)
#' X <- matrix(rnorm(8), nrow = 4)
#' Y <- matrix(rnorm(8), nrow = 4)
#'
#' # get Salton's cosine measure
#' salton(X, Y)
salton <- function(X, Y){
out <- sum(c(X)*c(Y)) / sqrt(sum(X^2)*sum(Y^2))
return(out)
}
#' Apply top-k box coding to scale data
#'
#' Apply top-k box coding to scale data. Using defaults give top-2 box (T2B) coding.
#' @name code.topk
#' @usage code.topk(X, zero.below = 8, one.above = 7)
#' @param X input matrix
#' @param zero.below default is \code{8}; values below this numeric threshold
#' will be coded \code{0}; use \code{NULL} if there is no such threshold
#' @param one.above default is \code{7}; values above this numeric threshold
#' will be coded \code{1}; use \code{NULL} if there is no such threshold
#' @return matrix \code{X} with top-k coding applied
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Pohjanheimo, T., Varela, P., & Næs, T. (2023).
#' An approach for clustering consumers by their top-box and top-choice responses.
#' \emph{Journal of Sensory Studies}, e12860. \doi{10.1111/joss.12860}
#' @examples
#' # Generate some data
#' set.seed(123)
#' X <- matrix(sample(1:9, 100, replace = TRUE), nrow = 5)
#'
#' # apply top-2 box (T2B) coding
#' code.topk(X, zero.below = 8, one.above = 7)
code.topk <- function(X, zero.below = 8, one.above = 7){
Y <- X
if(!is.null(zero.below) && !is.null(one.above)){
if(zero.below != one.above + 1){
return(
print("zero.below should be equal to one.above+1 if both specified"))
}
}
if(!is.null(zero.below)){
Y[Y < zero.below] <- 0
}
if(!is.null(one.above)){
Y[Y > one.above] <- 1
}
return(matrix(as.integer(Y), nrow = nrow(Y), ncol = ncol(Y), dimnames = dimnames(Y)) )
#return(Y)
}
#' Apply top-c choices coding to a vector of scale data from a respondent
#'
#' Apply top-c choices coding to a vector of scale data from a respondent
#' @name topc
#' @usage topc(x, c = 2, coding = "B")
#' @param x input matrix
#' @param c number of top choices considered to be 'success'; other choices are
#' considered to be 'failure' and are coded \code{0}
#' @param coding \code{"B"} (default) codes all successes as \code{1};
#' \code{"N"} codes all successes with their numeric coding
#' @return matrix \code{X} with top-k coding applied
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Pohjanheimo, T., Varela, P., & Næs, T. (2023).
#' An approach for clustering consumers by their top-box and top-choice responses.
#' \emph{Journal of Sensory Studies}, e12860. \doi{10.1111/joss.12860}
#' @examples
#' # Generate some data
#' set.seed(123)
#' X <- matrix(sample(1:9, 100, replace = TRUE), nrow = 5)
#'
#' # apply top-2 choice (T2C) coding
#' apply(X, 1, topc)
topc <- function(x, c = 2, coding = "B"){
#anything with a value of 0 is special, so give in the max value
y <- max(x)-x+1
ranky <- rank(y, ties.method = "average")
ranky.u <- sort(unique(ranky))
it <- 0
indx <- c()
nc <- 0
while(nc < c){
it <- it + 1
indx <- c(indx, which(ranky == ranky.u[it]))
nc <- length(indx)
}
out <- x*0
if(coding %in% "B"){
out[indx] <- 1
}
if(coding %in% "N"){
out[indx] <- x[indx]
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.