Nothing
#------------------------------------------------------------------------------
#' modifiedRecursive trimming procedure.
#'
#' \code{modifiedRecursive} takes a data frame of RT data and returns trimmed rt
#' data that fall below a set standard deviation above the each participant's
#' mean for each condition, with the criterion changing as more trials are
#' removed, as described in van Selst & Jolicoeur (1994).
#'
#' @param data A data frame with columns containing: participant identification
#' number ('pptVar'); condition identification, if applicable ('condVar');
#' response time data ('rtVar'); and accuracy ('accVar'). The RT can be in
#' seconds (e.g., 0.654) or milliseconds (e.g., 654). Typically, "condition"
#' will consist of strings. Accuracy must be coded as 1 for correct and 0 for
#' error responses.
#' @param minRT The lower criteria for acceptable response time. Must be in
#' the same form as rt column in data frame (e.g., in seconds OR milliseconds).
#' All RTs below this value are removed before proceeding with SD trimming.
#' @param pptVar The quoted name of the column in the data that identifies
#' participants.
#' @param condVar The quoted name of the column in the data that includes the
#' conditions.
#' @param rtVar The quoted name of the column in the data containing reaction
#' times.
#' @param accVar The quoted name of the column in the data containing accuracy,
#' coded as 0 or 1 for incorrect and correct trial, respectively.
#' @param omitErrors If set to TRUE, error trials will be removed before
#' conducting trimming procedure. Final data returned will not be influenced
#' by errors in this case.
#' @param returnType Request nature of returned data. "raw" returns trial-
#' level data excluding trimmed data; "mean" returns mean response times per
#' participant for each experimental condition identified; "median" returns
#' median response times per participant for each experimental condition
#' identified.
#' @param digits How many decimal places to round to after trimming?
#'
#' @references Van Selst, M. & Jolicoeur, P. (1994). A solution to the effect
#' of sample size on outlier elimination. \emph{Quarterly Journal of Experimental
#' Psychology, 47} (A), 631-650.
#'
#'
#' @examples
#' # load the example data that ships with trimr
#' data(exampleData)
#'
#' # perform the trimming, returning mean RT
#' trimmedData <- modifiedRecursive(data = exampleData, minRT = 150,
#' returnType = "mean")
#'
#' @importFrom stats sd
#' @importFrom dplyr %>%
#' @importFrom dplyr row_number
#' @export
modifiedRecursive <- function(data,
minRT,
pptVar = "participant",
condVar = "condition",
rtVar = "rt",
accVar = "accuracy",
omitErrors = TRUE,
returnType = "mean",
digits = 3) {
# remove errors if the user has asked for it
if(omitErrors == TRUE){
trimmedData <- data[data[[accVar]] == 1, ]
} else {
trimmedData <- data
}
# get the list of participant numbers
participant <- unique(data[[pptVar]])
# get the list of experimental conditions
conditionList <- unique(data[, condVar])
# trim the data
trimmedData <- trimmedData[trimmedData[[rtVar]] > minRT, ]
#-------
### returnType == mean
if(returnType == "mean"){
# ready the final data set
# make a df here to preserve ppt column
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# intialise looping variable for subjects
i <- 1
# loop over all subjects
for(currSub in participant){
# intialise looping variable for conditions. It starts at 2 because the
# first column in the data file containing condition information is the
# second one.
j <- 2
# loop over all conditions
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[pptVar]] == currSub &
trimmedData[[condVar]] == currCond, ]
# find the average, and add to the data frame
finalData[i, j] <-
round(modifiedRecursiveTrim(tempData[[rtVar]],
returnType = returnType),
digits = digits)
# update condition loop counter
j <- j + 1
}
# update participant loop counter
i <- i + 1
}
return(finalData)
} # end of returnType = "mean" section
#-------
### returnType == median
if(returnType == "median"){
# ready the final data set
# make a df here to preserve ppt column
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# intialise looping variable for subjects
i <- 1
# loop over all subjects
for(currSub in participant){
# intialise looping variable for conditions. It starts at 2 because the
# first column in the data file containing condition information is the
# second one.
j <- 2
# loop over all conditions
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[pptVar]] == currSub &
trimmedData[[condVar]] == currCond, ]
# find the average, and add to the data frame
finalData[i, j] <-
round(modifiedRecursiveTrim(tempData[[rtVar]],
returnType = returnType),
digits = digits)
# update condition loop counter
j <- j + 1
}
# update participant loop counter
i <- i + 1
}
return(finalData)
} # end of returnType = "median" section
#-------
### returnType == raw
if(returnType == "raw"){
# ready the final data set
finalData <- matrix(0, nrow = 0, ncol = length(colnames(trimmedData)))
colnames(finalData) <- colnames(trimmedData)
# loop over all subjects
for(currSub in participant){
# loop over each condition
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[pptVar]] == currSub &
trimmedData[[condVar]] == currCond, ]
# do the trimming
tempData <- modifiedRecursiveTrim(tempData, rtVar = rtVar,
returnType = returnType)
# update the final data frame
finalData <- rbind(finalData, tempData)
}
}
return(finalData)
} # end of returnType == "raw" section
}
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### The function that acually does the trimming
modifiedRecursiveTrim <- function(data, rtVar = "rt", returnType = "mean"){
# load the linear interpolation data file (hidden from user)
criterion <- linearInterpolation
#-------
### returnType == mean
if(returnType == "mean"){
# repeat the following
repeat{
# the data needs to have more than 2 RTs in order to function.
# (thanks to Ayala Allon for noticing this.)
if(length(data) <= 2){
break
}
### get the parameters for the moving criterion
# get the sample size of the data
sampleSize <- length(data)
# if the sample size is greater than 100, use SDs for N = 100
if(sampleSize > 100){
sampleSize <- 100
}
# look up the SD to use for the current sampleSize
rowNumber <- which(criterion$sampleSize == sampleSize)
stDev <- criterion$modifiedRecursive[rowNumber]
### now do the removal of trials
# what is the maximum RT value in the current data?
x <- max(data)
# temporarily remove it
tempData <- data[data != x]
# find the mean, SD, & cutoffs for temporary data
curMean <- mean(tempData)
curSD <- sd(tempData)
maxCutoff <- curMean + (stDev * curSD)
minCutoff <- curMean - (stDev * curSD)
# now, go back to the main data (with temporary RTs replaced)
# find the largest & smallest RT values
x <- max(data)
y <- min(data)
# initalise this variable before looping. This will store how many trials
# have been removed on each recursion. The loop stops when no trials
# have been removed on a particular iteration
removedTrials <- 0
# if there is a data point above the cutoff, then remove it
if(x > maxCutoff){
data <- data[data != x]
removedTrials <- 1
}
# if there is a data point below the cutoff, then remove it
if(y < minCutoff){
data <- data[data != y]
removedTrials <- 1
}
# when there are no trials removed on the current iteration, break the
# loop
if(removedTrials == 0){
break
}
# alternatively, stop the loop when the data set hits 4 RTs left (as in
# vanSelst & Jolicoeur, 1994)
if(length(data) < 5){
break
}
} # end of repeat loop
# now compute the mean of the final data set. If there are only two RTs,
# then return NA
if(length(data) == 2){
finalData <- NA
} else {
finalData <- mean(data)
}
# return the data
return(finalData)
}
#-------
### returnType == median
if(returnType == "median"){
# repeat the following
repeat{
# the data needs to have more than 2 RTs in order to function.
# (thanks to Ayala Allon for noticing this.)
if(length(data) <= 2){
break
}
### get the parameters for the moving criterion
# get the sample size of the data
sampleSize <- length(data)
# if the sample size is greater than 100, use SDs for N = 100
if(sampleSize > 100){
sampleSize <- 100
}
# look up the SD to use for the current sampleSize
rowNumber <- which(criterion$sampleSize == sampleSize)
stDev <- criterion$modifiedRecursive[rowNumber]
### now do the removal of trials
# what is the maximum RT value in the current data?
x <- max(data)
# temporarily remove it
tempData <- data[data != x]
# find the mean, SD, & cutoffs for temporary data
curMean <- mean(tempData)
curSD <- sd(tempData)
maxCutoff <- curMean + (stDev * curSD)
minCutoff <- curMean - (stDev * curSD)
# now, go back to the main data (with temporary RTs replaced)
# find the largest & smallest RT values
x <- max(data)
y <- min(data)
# initalise this variable before looping. This will store how many trials
# have been removed on each recursion. The loop stops when no trials
# have been removed on a particular iteration
removedTrials <- 0
# if there is a data point above the cutoff, then remove it
if(x > maxCutoff){
data <- data[data != x]
removedTrials <- 1
}
# if there is a data point below the cutoff, then remove it
if(y < minCutoff){
data <- data[data != y]
removedTrials <- 1
}
# when there are no trials removed on the current iteration, break the
# loop
if(removedTrials == 0){
break
}
# alternatively, stop the loop when the data set hits 4 RTs left (as in
# vanSelst & Jolicoeur, 1994)
if(length(data) < 5){
break
}
} # end of repeat loop
# now compute the mean of the final data set. If there are only two RTs,
# then return NA
if(length(data) == 2){
finalData <- NA
} else {
finalData <- median(data)
}
# return the data
return(finalData)
}
#-------
### returnType == raw
if(returnType == "raw"){
# repeat the following
repeat{
# the data needs to have more than 2 RTs in order to function.
# (thanks to Ayala Allon for noticing this.)
if(nrow(data) <= 2){
break
}
### get the parameters for the moving criterion
# get the sample size of the data
sampleSize <- nrow(data)
# if the sample size is greater than 100, use SDs for N = 100
if(sampleSize > 100){
sampleSize <- 100
}
# look up the SD to use for the current sampleSize
rowNumber <- which(criterion$sampleSize == sampleSize)
stDev <- criterion$modifiedRecursive[rowNumber]
### now do the removal of trials
# what is the maximum RT value in the current data?
x <- which.max(data[[rtVar]])
# temporarily remove it
tempData <- data %>% filter(row_number() != x)
# find the mean, SD, & cutoffs for temporary data
curMean <- mean(tempData[[rtVar]])
curSD <- sd(tempData[[rtVar]])
maxCutoff <- curMean + (stDev * curSD)
minCutoff <- curMean - (stDev * curSD)
# now, go back to the main data (with temporary RTs replaced)
# find the largest & smallest RT values
x_pos <- which.max(data[[rtVar]])
x <- data[x_pos, ][[rtVar]]
y_pos <- which.min(data[[rtVar]])
y <- data[y_pos, ][[rtVar]]
# initalise this variable before looping. This will store how many trials
# have been removed on each recursion. The loop stops when no trials
# have been removed on a particular iteration
removedTrials <- 0
# if there is a data point above the cutoff, then remove it
if(x > maxCutoff){
data <- data %>% filter(row_number() != x_pos)
removedTrials <- 1
}
# if there is a data point below the cutoff, then remove it
if(y < minCutoff){
data <- data %>% filter(row_number() != y_pos)
removedTrials <- 1
}
# when there are no trials removed on the current iteration, break the
# loop
if(removedTrials == 0){
break
}
# alternatively, stop the loop when the data set hits 4 RTs left (as in
# vanSelst & Jolicoeur, 1994)
if(nrow(data) < 5){
break
}
} # end of repeat loop
# now compute the mean of the final data set. If there are only two RTs,
# then return NA
if(nrow(data) == 2){
finalData <- NA
} else {
finalData <- data
}
# return the data
return(finalData)
}
} # end of function
#------------------------------------------------------------------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.