#' Permutation Test of Heterogeneity
#'
#' Tests whether whether participants (items) are homogeneous without assuming
#' item (participant) homogeneity.
#'
#' @param data matrix or data frame with three columns: person code/index, item
#' label, response category. Can also be the path to a .csv file with
#' frequencies (comma-separated; first line defines category labels)
#' @param tree a list that defines which categories belong to the same
#' multinomial distribution (i.e., the the same MPT tree). For instance:
#' \code{tree = list(tree.old = c("hit","cr"), tree.new = c("fa","miss"))}.
#' Category labels must match the values of the third column of \code{data}
#' @param source whether to test for \code{"person"} or \code{"item"}
#' homogeneity
#' @param rep number of permutations to be sampled
#' @param nCPU number of CPUs used for parallel Monte Carlo sampling of
#' permutations
#'
#' @details If an item/person has zero frequencies on all categories in an MPT
#' tree, these zeros are neglected when computing mean frequencies per column.
#' As an example, consider a simple recognition test with a fixed assignments of
#' words to the learn/test list. In such an experiment, all learned words will
#' result in hits or misses (i.e., the MPT tree of old items), whereas new words
#' are always false alarms/correct rejections and thus belong to the MPT tree of
#' new items (this is not necessarily the case if words are assigned randomly).
#'
#' Note that the test does still assume independence of observations. However,
#' it does not require item homogeneity when testing participant heterogeneity
#' (in contrast to the chi-square test: \code{\link{testHetChi}}).
#' @seealso \code{\link{testHetChi}}, \code{\link{plotFreq}}
#' @author Daniel W. Heck
#' @references Smith, J. B., & Batchelder, W. H. (2008). Assessing individual
#' differences in categorical data. Psychonomic Bulletin & Review, 15,
#' 713-731. \doi{10.3758/PBR.15.4.713}
#'
#' @examples
#' # generate homogeneous data
#' # (N=15 participants, M=30 items)
#' data <- data.frame(
#' id = rep(1:15, each = 30),
#' item = rep(1:30, 15)
#' )
#' data$cat <- sample(c("h", "cr", "m", "fa"), 15 * 30,
#' replace = TRUE,
#' prob = c(.7, .3, .4, .6)
#' )
#' head(data)
#' tree <- list(
#' old = c("h", "m"),
#' new = c("fa", "cr")
#' )
#'
#' # test participant homogeneity:
#' tmp <- testHetPerm(data, tree, rep = 200, nCPU = 1)
#' tmp[2:3]
#' @export
testHetPerm <- function(
data,
tree,
source = "person",
rep = 1000,
nCPU = 4
) {
# some made up data for a simple memory recognition experiment:
# data <- data.frame(id = c(1,1,1,1,1,1,1,3,3,3,3,3,3,3,3),
# item = c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5),
# cat = c("h","cr","h","m","fa",
# "h","cr","fa","fa","m",
# "m","cr","h","fa","h"))
# tree <- list(old = c("h","m"),
# new = c("fa", "cr"))
if (is.character(data)) {
data <- read.csv(file = data)
}
data <- as.data.frame(data)
if (ncol(data) != 3) {
stop("'data' must have three columns (person code, item label, response category).")
}
################### NOT ELEGANT
if (source == "item") {
data <- data[, c(2, 1, 3)]
}
persons <- as.vector(unlist(data[, 1]))
items <- as.vector(unlist(data[, 2]))
cats <- as.vector(unlist(data[, 3]))
# # number of participants/items:
N <- length(unique(persons))
if (missing(tree)) {
warning(
"It is assumed that all columns of 'freq' stem from a single multinomial distribution",
"\n (i.e., from a single MPT tree)"
)
tree <- list(single = unique(cats))
}
if (is.null(names(tree))) {
names(tree) <- paste0("t.", 1:length(tree))
}
# number of categories per tree:
K <- sapply(tree, length)
if (sum(K) != length(unique(cats))) {
stop("Number of category labels in 'data' does not match number of labels in 'tree'.")
}
################# observed test statistic
tree.labels <- rep(names(tree), sapply(tree, length))
names(tree.labels) <- unlist(tree)
chi.obs <- testHetChi(table(persons, cats),
tree = tree.labels[sort(unique(cats))]
)$chisq
tree.vec <- tree.labels[data[, 3]]
################# sampled test statistic
single.rep <- function(i, data, tree.vec, tree.labels) {
# for each person: permute categorical responses:
cat.perm <- ave(data[, 3], # categorical responses
data[, 2], tree.vec, # group by (A) items (B) MPT tree
FUN = sample
) # permutation
testHetChi(table(data[, 1], cat.perm),
tree = tree.labels[sort(unique(data[, 3]))]
)$chisq
}
# test:
# cbind(data, replicate(3, ave(data[,3], data[,2], tree.vec, FUN = sample)))
if (nCPU == 1) {
chi.samp <- sapply(1:rep, single.rep,
data = data, tree.vec = tree.vec,
tree.labels = tree.labels
)
} else {
cl <- makeCluster(nCPU)
chi.samp <- parSapply(cl, 1:rep, single.rep,
data = data, tree.vec = tree.vec,
tree.labels = tree.labels
)
stopCluster(cl)
}
list(
chi.samp = chi.samp,
chi.obs = chi.obs,
prob = mean(chi.samp > chi.obs)
)
}
# # The R code below tests for subject variability
# reps<-10000 #number of permutations
# subjs<-29 #number of participants in data set
# items<-50 #number of items in data set
# cate<-4 #number of response categories
# permutedata <-matrix(0,subjs,items)
# contingency<-matrix(0,subjs,cate)
# x <- matrix(round(runif(items*subjs, -.49, 4.49)),subjs,items)
#
#
# #initiate variable to hold contingency tables from permuted data sets
# for (i in 1:reps){
# for (ii in 1:items) { #randomly permutes each row of data matrix D
# permutedata[,ii]<-x[sample(subjs,subjs),ii]
# }
# for (cc in 1:cate) { #converts data matrix to Nx2 contingency table
# contingency[cc,]<-apply(permutedata==cc,1,sum)
# #calculated permuted contingency table
# }
# #any test of interest can then be run on the permuted contingency table
# var(colSums(contingency))
# }
#
#
# contingency=zeros(subjs, cate);
# %initiate variable to hold contingency tables from permuted data sets
# for i=1:reps
# for ii=1:subjs %randomly permutes each row of data matrix D
# permuteddata(ii,:)=datamatrix(ii,randperm(items));
# end
# for cc=1:cate %converts data matrix to Nx2 contingency table
# contingency(:,cc)=sum(permuteddata==cc)';
# end
# %the statistical test of interest
# end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.