#This is a package for the Values experiments.
#' New combinations Function
#'
#' This function creates a vector of all the combinations of itemList taken minNumPerSide at a time to maxNumPerSide at a time.
#' @param numItems the total number of items used for the combinations.
#' @param minNumPerSide the minimum number of items in a combination. DEFAULT = 1.
#' @param maxNumPerSide the maxmimun number of items in a combination. DEFAULT = 1.
#' @keywords combinations values
#' @return a vector of all the combinations (by item number)
#' @export
#' @examples ch.combnVector (5, 1,3)
ch.combnVector <- function(numItems, minNumPerSide = 1, maxNumPerSide = 1, addSelfMatch = TRUE) {
numSideSeq <- seq(minNumPerSide,maxNumPerSide,1)
df.combns <- NULL
for(i in numSideSeq) {
#get combination
tmpC <- t(combn(numItems, i))
#add self matching conditions for item combinations greater than 1
if(addSelfMatch) {
if(i > 1) {
selfMatch <- array(rep(1:numItems,i), dim=c(numItems,i))
tmpC <- rbind(tmpC, selfMatch)
}
}
#fill extra columns with NA and make into a dataframe
columnsNeeded <- maxNumPerSide - ncol(tmpC)
if(columnsNeeded > 0) {
for (j in 1:columnsNeeded) {
extraNA <- array(NA,dim=c(nrow(tmpC),columnsNeeded))
}
tmpC <- data.frame (tmpC,extraNA)
} else {
tmpC <- data.frame (tmpC)
}
#rename columns to generic
for(j in 1:ncol(tmpC)) {
colnames(tmpC)[j] <- paste("I", j, sep="")
}
#combine into common dataframe
if (is.null(df.combns)) {
df.combns <- tmpC
} else {
df.combns <- rbind(df.combns, tmpC)
}
}
return(df.combns)
}
#' Bootstrap the overlap of two distributions Function
#'
#' This function does a bootstrap to calculate the percentage of samples that Distribution X is greater than Distribution Y.
#' @param xDist Distribution X.
#' @param yDist Distribution Y.
#' @param numRuns the number of runs in the bootstrap. DEFAULT = 100.
#' @keywords distribution overlap bootstrap values
#' @return the average percentage of samples that xDist was greater than yDist, the sd of the average percentage, Overlap = a transformation of average percentage so 1=complete overlap and 0 = no overlap, direction = 1 if xDist > yDist; else -1
#' @export
#' @examples ch.distOverlap (xValue, yValue,100)
ch.distOverlap <- function(xDist, yDist, numRuns) {
percentage <- vector(mode="numeric", length = numRuns)
for(j in 1:numRuns){
samplesize <- (length(xDist)+length(yDist))/2
rx <-sample(xDist, size=samplesize, replace=TRUE, prob=NULL)
ry <-sample(yDist, size=samplesize, replace=TRUE, prob=NULL)
winner <-ifelse(rx>ry, 1, 0)
percentage[j] <-sum(winner)/length(winner)
}
averageP <-mean(percentage)
sdP <-sd(percentage)
overlap <-1-abs(((averageP-0.5)/0.5))
direction <-ifelse(averageP>=0.5,1,-1)
return(c(percent = averageP, sd = sdP, overlap = overlap, direction = direction))
}
#' For each in a set of combinations, run ch.distOverlap Function
#'
#' This function runs ch.distOverlap for all the combinations of distributions identified in df.combns.
#' @param values a vector of the values in the distributions.
#' @param items a vector that identifies the item that each value is linked to.
#' @param itemSet a vector of items that you want to run the bootstraps for.
#' @param df.combns a vector of combinations that identifies the items (by index number of itemSet) whose values should be combined and compared against every other combination in this vector.
#' @param numRuns the number of runs to do in the bootstrap. DEFAULT = 1000
#' @param outFile the name of a text file that will be output as the bootstrap runs. This is filled incrementally just in case the program dies. DEFAULT = NULL - will not write a file
#' @keywords distribution overlap bootstrap values
#' @return a dataframe with the overlap statistics for each set of distributions compared. See ch.distOverlap
#' @export
#' @examples ch.batchOverlap (values= data1$responses, items = data1$items,itemSet = itemSet,df.combns = df.combns, numRuns=1000, outFile ="out.txt" )
ch.batchOverlap <- function(values, items, itemSet, df.combns, numRuns=1000, outFile = NULL) {
data <- data.frame(values, items)
numCombinations <- nrow(df.combns)
maxNumPerSide <- ncol(df.combns)
totalNumOverlaps <- (numCombinations^2- numCombinations)/2
averageP <- vector(mode="numeric", length = totalNumOverlaps)
sdP <- vector(mode="numeric", length = totalNumOverlaps)
overlap <- vector(mode="numeric", length = totalNumOverlaps)
direction <- vector(mode="numeric", length = totalNumOverlaps)
itemAarray <- array("XX", dim = c(totalNumOverlaps,maxNumPerSide))
itemBarray <- array("XX", dim = c(totalNumOverlaps,maxNumPerSide))
i <- 1
for(l in 1:(numCombinations - 1)){
print(paste(l, "of", (numCombinations - 1), sep=" "))
for(k in l:(numCombinations - 1)){
xValue <- NULL
yValue <- NULL
#get the raw data for each item and append it into a single vector (like a single distribution)
for(m in 1:maxNumPerSide) {
itemAarray[i,m] <- toString(itemSet[df.combns[l,m]])
itemBarray[i,m] <- toString(itemSet[df.combns[k+1,m]])
if (!is.na(itemAarray[i,m])) {
xTmp <- data$values[data$items==itemAarray[i,m]]
if(is.null(xValue)) {
xValue <- xTmp
} else {
xValue <- append(xValue,xTmp)
}
}
if (!is.na(itemBarray[i,m])) {
yTmp <- data$values[data$items==itemBarray[i,m]]
if(is.null(yValue)) {
yValue <- yTmp
} else {
yValue <- append(yValue,yTmp)
}
}
}
#run bootstrap to get overlap information
pOut <- ch.distOverlap(xValue,yValue,numRuns)
averageP[i] <-pOut["percent"]
sdP[i] <-pOut["sd"]
overlap[i] <-pOut["overlap"]
direction[i] <-pOut["direction"]
i=i+1
}
#put data in dataframe
alldat <-data.frame(itemAarray[1:(i-1),], itemBarray[1:(i-1),], averageP = averageP[1:(i-1)],sdp = sdP[1:(i-1)], overlap = overlap[1:(i-1)],direction = direction[1:(i-1)] )
#rename columns
for(nps in 1:maxNumPerSide) {
colnames(alldat)[nps] <- paste("IA", nps, sep="")
colnames(alldat)[nps+maxNumPerSide] <- paste("IB", nps, sep="")
}
if(!is.null(outFile)) {
write.table(alldat, file=outFile, quote=F, sep="\t", row.names = F, append=F)
}
}
return(alldat)
}
#' Extends ch.batchOverlap to two separate X and Y datasets Function
#'
#' This function runs ch.distOverlap for all the combinations of distributions identified in df.combns.
#' @param xValues a vector of the values in the X Distributions.
#' @param xItems a vector that identifies the item that each xValue is linked to.
#' @param yValues a vector of the values in the Y Distributions.
#' @param yItems a vector that identifies the item that each yValue is linked to.
#' @param itemSet a vector of items that you want to run the bootstraps for.
#' @param df.combns a vector of combinations that identifies the items (by index number of itemSet) whose values should be combined and compared against every other combination in this vector.
#' @param numRuns the number of runs to do in the bootstrap. DEFAULT = 1000
#' @param outFile the name of a text file that will be output as the bootstrap runs. This is filled incrementally just in case the program dies. DEFAULT = NULL - will not write a file
#' @keywords distribution overlap bootstrap values
#' @return a dataframe with the overlap statistics for each set of distributions compared. See ch.distOverlap
#' @export
#' @examples ch.batchOverlapXY (data1$responses, data1$items,data2$responses, data2$items,itemSet,df.combns, "out.txt" )
ch.batchOverlapXY <- function(xValues, xItems, yValues, yItems, itemSet, df.combns, numRuns = 1000, outFile = NULL) {
xData <- data.frame(values = xValues, items = xItems)
yData <- data.frame(values = yValues, items = yItems)
numCombinations <- nrow(df.combns)
maxNumPerSide <- ncol(df.combns)
totalNumOverlaps <- (numCombinations^2- numCombinations)/2
averageP <- vector(mode="numeric", length = totalNumOverlaps)
sdP <- vector(mode="numeric", length = totalNumOverlaps)
overlap <- vector(mode="numeric", length = totalNumOverlaps)
direction <- vector(mode="numeric", length = totalNumOverlaps)
itemAarray <- array("XX", dim = c(totalNumOverlaps,maxNumPerSide))
itemBarray <- array("XX", dim = c(totalNumOverlaps,maxNumPerSide))
i <- 1
for(l in 1:(numCombinations - 1)){
print(paste(l, "of", (numCombinations - 1), sep=" "))
for(k in l:(numCombinations - 1)){
xValue <- NULL
yValue <- NULL
#get the raw data for each item and append it into a single vector (like a single distribution)
for(m in 1:maxNumPerSide) {
itemAarray[i,m] <- toString(itemSet[df.combns[l,m]])
itemBarray[i,m] <- toString(itemSet[df.combns[k+1,m]])
if (!is.na(itemAarray[i,m])) {
xTmp <- xData$values[xData$items==itemAarray[i,m]]
if(is.null(xValue)) {
xValue <- xTmp
} else {
xValue <- append(xValue,xTmp)
}
}
if (!is.na(itemBarray[i,m])) {
yTmp <- yData$values[yData$items==itemBarray[i,m]]
if(is.null(yValue)) {
yValue <- yTmp
} else {
yValue <- append(yValue,yTmp)
}
}
}
#run bootstrap to get overlap information
pOut <- ch.distOverlap(xValue,yValue,numRuns)
averageP[i] <-pOut["percent"]
sdP[i] <-pOut["sd"]
overlap[i] <-pOut["overlap"]
direction[i] <-pOut["direction"]
i=i+1
}
#put data in dataframe
alldat <-data.frame(itemAarray[1:(i-1),], itemBarray[1:(i-1),], averageP = averageP[1:(i-1)],sdp = sdP[1:(i-1)], overlap = overlap[1:(i-1)],direction = direction[1:(i-1)] )
#rename columns
for(nps in 1:maxNumPerSide) {
colnames(alldat)[nps] <- paste("IA", nps, sep="")
colnames(alldat)[nps+maxNumPerSide] <- paste("IB", nps, sep="")
}
if(!is.null(outFile)) {
write.table(alldat, file=outFile, quote=F, sep="\t", row.names = F, append=F)
}
}
return(alldat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.