Nothing
#------------------------------------------------------------------------------
#' RT trimming with standard deviation criterion
#'
#' \code{sdTrim} takes a data frame of RT data and returns trimmed rt
#' data that fall below a set set criterion (based on standard deviations
#' above a particular mean). The criterion can be based on the mean of the
#' whole set of data, based on the mean per experimental condition, based on
#' the mean per participant, or based on the mean of each participant in each
#' experimental condition.
#'
#' By passing a data frame containing raw response time data, together with
#' trimming criteria, the function will return trimmed data, either in the form
#' of trial-level data or in the form of means/medians for each subject &
#' condition.
#'
#' @param data A data frame with columns containing: participant identification
#' number ('pptVar'); condition identification, if applicable ('condVar');
#' response time data ('rtVar'); and accuracy ('accVar'). The RT can be in
#' seconds (e.g., 0.654) or milliseconds (e.g., 654). Typically, "condition"
#' will consist of strings. Accuracy must be coded as 1 for correct and 0 for
#' error responses.
#' @param minRT The lower criteria for acceptable response time. Must be in
#' the same form as rt column in data frame (e.g., in seconds OR milliseconds).
#' All RTs below this value are removed before proceeding with SD trimming.
#' @param sd The upper criteria for standard deviation cut-off.
#' @param pptVar The quoted name of the column in the data that identifies
#' participants.
#' @param condVar The quoted name of the column in the data that includes the
#' conditions.
#' @param rtVar The quoted name of the column in the data containing reaction
#' times.
#' @param accVar The quoted name of the column in the data containing accuracy,
#' coded as 0 or 1 for incorrect and correct trial, respectively.
#' @param perCondition Set to TRUE if the user wishes the trimming to occur per
#' condition of the experimental design.
#' @param perParticipant Set to TRUE if the user wishes the trimming to occur
#' per participant.
#' @param omitErrors If set to TRUE, error trials will be removed before
#' conducting trimming procedure. Final data returned will not be influenced
#' by errors in this case.
#' @param returnType Request nature of returned data. "raw" returns trial-
#' level data excluding trimmed data; "mean" returns mean response times per
#' participant for each experimental condition identified; "median" returns
#' median response times per participant for each experimental condition
#' identified.
#' @param digits How many decimal places to round to after trimming?
#' @examples
#' # load the example data that ships with trimr
#' data(exampleData)
#'
#' # perform the trimming with SD trimming per condition, returning mean RT
#' trimmedData <- sdTrim(data = exampleData, minRT = 150, sd = 2.5,
#' perCondition = TRUE, perParticipant = FALSE, returnType = "mean")
#'
#' @importFrom stats median sd
#'
#' @export
sdTrim <- function(data,
minRT,
sd,
pptVar = "participant",
condVar = "condition",
rtVar = "rt",
accVar = "accuracy",
perCondition = TRUE,
perParticipant = TRUE,
omitErrors = TRUE,
returnType = "mean",
digits = 3) {
###-------------
if(perCondition == FALSE & perParticipant == FALSE){
# change the variable name for sd (as this is an R function)
stDev <- sd
# remove errors if the user has asked for it
if(omitErrors == TRUE){
trimmedData <- data[data[[accVar]] == 1, ]
} else {
trimmedData <- data
}
# get the list of participant numbers
participant <- unique(data[[pptVar]])
# get the list of experimental conditions
conditionList <- unique(data[, condVar])
# trim the data
trimmedData <- trimmedData[trimmedData[[rtVar]] > minRT, ]
# what is the mean & SD of the whole group's data?
meanRT <- mean(trimmedData[[rtVar]])
sdRT <- sd(trimmedData[[rtVar]])
# what is the cut-off value?
cutoff <- meanRT + (stDev * sdRT)
# remove these rts
trimmedData <- trimmedData[trimmedData[[rtVar]] < cutoff, ]
# if the user asked for trial-level data, return immediately to user
if(returnType == "raw"){
return(trimmedData)
}
# if the user has asked for means, then split the data into separate
# conditions, and display the means per condition.
if(returnType == "mean"){
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# loop over all conditions, and over all subjects, and find mean RT
j <- 2 # to keep track of conditions looped over. Starts at 2 as this is
# where the first condition's column is.
for(currCondition in conditionList){
# get the current condition's data
tempData <- trimmedData[trimmedData[[condVar]] == currCondition, ]
#now loop over all participants
i <- 1
for(currParticipant in participant){
# get that participant's data
participantData <- tempData[tempData[[pptVar]] == currParticipant, ]
# calculate & store their mean response time
finalData[i, j] <- round(mean(participantData[[rtVar]]),
digits = digits)
# update participant counter
i <- i + 1
}
# update nCondition counter
j <- j + 1
} # end of condition loop
return(finalData)
} ## end MEAN sub-function
# if the user has asked for medians, then split the data into separate
# conditions, and display the medians per condition.
if(returnType == "median"){
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# loop over all conditions, and over all subjects, and find mean RT
j <- 2 # to keep track of conditions looped over. Starts at 2 as this is
# where the first condition's column is.
for(currCondition in conditionList){
# get the current condition's data
tempData <- trimmedData[trimmedData[[condVar]] == currCondition, ]
#now loop over all participants
i <- 1
for(currParticipant in participant){
# get that participant's data
participantData <- tempData[tempData[[pptVar]] == currParticipant, ]
# calculate & store their mean response time
finalData[i, j] <- round(median(participantData[[rtVar]]),
digits = digits)
# update participant counter
i <- i + 1
}
# update nCondition counter
j <- j + 1
} # end of condition loop
return(finalData)
}
} # end of perCell == FALSE & perParticipant == FALSE
###-------------
if(perCondition == TRUE & perParticipant == FALSE){
# change the variable name for sd (as this is an R function)
stDev <- sd
# remove errors if the user has asked for it
if(omitErrors == TRUE){
trimmedData <- data[data[[accVar]] == 1, ]
} else {
trimmedData <- data
}
# get the list of participant numbers
participant <- unique(data[[pptVar]])
# get the list of experimental conditions
conditionList <- unique(data[, condVar])
# trim the data
trimmedData <- trimmedData[trimmedData[[rtVar]] > minRT, ]
### do "raw"
if(returnType == "raw"){
# initialise variable to keep trimmed data in
finalData <- NULL
# loop over each condition
for(cond in conditionList){
# get the data, & find cutoff
curData <- trimmedData[trimmedData[[condVar]] == cond, ]
curMean <- mean(curData[[rtVar]])
curSD <- sd(curData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- curData[curData[[rtVar]] < curCutoff, ]
# bind the data
finalData <- rbind(finalData, curData)
}
return(finalData)
}
### do "mean"
if(returnType == "mean"){
## first, find the cutoff for each condition, and remove the necessary
## trials
# initialise variable to keep trimmed data in
tempData <- NULL
for(cond in conditionList){
# get the data, & find cutoff
curData <- trimmedData[trimmedData[[condVar]] == cond, ]
curMean <- mean(curData[[rtVar]])
curSD <- sd(curData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- curData[curData[[rtVar]] < curCutoff, ]
# bind the data
tempData <- rbind(tempData, curData)
}
# change variable names
trimmedData <- tempData
tempData <- NULL
## now loop over each subject and calculate their average
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# loop over conditions & subjects and calculate their average
# to index over conditions. It starts at 2 because this is the first
# column in the data frame containing condition information
j <- 2
for(curCondition in conditionList){
# get the current condition's data
tempData <- trimmedData[trimmedData[[condVar]] == curCondition, ]
#now loop over all participants
i <- 1
for(currParticipant in participant){
# get that participant's data
participantData <- tempData[tempData[[pptVar]] == currParticipant, ]
# calculate & store their mean response time
finalData[i, j] <- round(mean(participantData[[rtVar]]),
digits = digits)
# update participant counter
i <- i + 1
}
# update nCondition counter
j <- j + 1
}
return(finalData)
}
} # end of perCell == TRUE & perParticipant == FALSE
###-------------
if(perCondition == FALSE & perParticipant == TRUE){
# change the variable name for sd (as this is an R function)
stDev <- sd
# remove errors if the user has asked for it
if(omitErrors == TRUE){
trimmedData <- data[data[[accVar]] == 1, ]
} else {
trimmedData <- data
}
# get the list of participant numbers
participant <- unique(data[[pptVar]])
# get the list of experimental conditions
conditionList <- unique(data[, condVar])
# trim the data
trimmedData <- trimmedData[trimmedData[[rtVar]] > minRT, ]
### do "raw"
if(returnType == "raw"){
# initialise variable to keep trimmed data in
finalData <- NULL
# loop over each subject
for(currSub in participant){
# get the current subject's data
curData <- trimmedData[trimmedData[[pptVar]] == currSub, ]
# find their mean, sd, & cutoff
curMean <- mean(curData[[rtVar]])
curSD <- sd(curData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- curData[curData[[rtVar]] < curCutoff, ]
# bind the data
finalData <- rbind(finalData, curData)
}
return(finalData)
}
### do "mean"
if(returnType == "mean"){
# initialise variable to keep trimmed data in
tempData <- NULL
# loop over each subject
for(currSub in participant){
# get the current subject's data
curData <- trimmedData[trimmedData[[pptVar]] == currSub, ]
# find their mean, sd, & cutoff
curMean <- mean(curData[[rtVar]])
curSD <- sd(curData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- curData[curData[[rtVar]] < curCutoff, ]
# bind the data
tempData <- rbind(tempData, curData)
}
# change variable names
trimmedData <- tempData
tempData <- NULL
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# convert to data frame
finalData <- data.frame(finalData)
# loop over conditions & subjects and calculate their average
# to index over conditions. It starts at 2 because this is the first
# column in the data frame containing condition information
j <- 2
for(curCondition in conditionList){
# get the current condition's data
tempData <- trimmedData[trimmedData[[condVar]] == curCondition, ]
#now loop over all participants
i <- 1
for(currParticipant in participant){
# get that participant's data
participantData <- tempData[tempData[[pptVar]] == currParticipant, ]
# calculate & store their mean response time
finalData[i, j] <- round(mean(participantData[[rtVar]]),
digits = digits)
# update participant counter
i <- i + 1
}
# update nCondition counter
j <- j + 1
}
return(finalData)
}
### do "median"
if(returnType == "median"){
# initialise variable to keep trimmed data in
tempData <- NULL
# loop over each subject
for(currSub in participant){
# get the current subject's data
curData <- trimmedData[trimmedData[[pptVar]] == currSub, ]
# find their mean, sd, & cutoff
curMean <- mean(curData[[rtVar]])
curSD <- sd(curData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- curData[curData[[rtVar]] < curCutoff, ]
# bind the data
tempData <- rbind(tempData, curData)
}
# change variable names
trimmedData <- tempData
tempData <- NULL
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# loop over conditions & subjects and calculate their average
# to index over conditions. It starts at 2 because this is the first
# column in the data frame containing condition information
j <- 2
for(curCondition in conditionList){
# get the current condition's data
tempData <- trimmedData[trimmedData[[condVar]] == curCondition, ]
#now loop over all participants
i <- 1
for(currParticipant in participant){
# get that participant's data
participantData <- tempData[tempData[[pptVar]] == currParticipant, ]
# calculate & store their mean response time
finalData[i, j] <- round(median(participantData[[rtVar]]),
digits = digits)
# update participant counter
i <- i + 1
}
# update nCondition counter
j <- j + 1
}
return(finalData)
}
} # end of perCell == FALSE & perParticipant == TRUE
###-------------
if(perCondition == TRUE & perParticipant == TRUE){
# change the variable name for sd (as this is an R function)
stDev <- sd
# remove errors if the user has asked for it
if(omitErrors == TRUE){
trimmedData <- data[data[[accVar]] == 1, ]
} else {
trimmedData <- data
}
# get the list of participant numbers
participant <- unique(data[[pptVar]])
# get the list of experimental conditions
conditionList <- unique(data[, condVar])
# trim the data
trimmedData <- trimmedData[trimmedData[[rtVar]] > minRT, ]
### do "raw"
if(returnType == "raw"){
# initialise variable to keep trimmed data in
finalData <- NULL
# loop over all participants
for(currSub in participant){
# loop over all conditions
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[condVar]] == currCond &
trimmedData[[pptVar]] == currSub, ]
# find the cutoff
curMean <- mean(tempData[[rtVar]])
curSD <- sd(tempData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# perform the trim
curData <- tempData[tempData[[rtVar]] < curCutoff, ]
# store the data
finalData <- rbind(finalData, curData)
}
}
return(finalData)
}
### do "mean"
if(returnType == "mean"){
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# convert to data frame
finalData <- data.frame(finalData)
# intialise looping variable for subjects
i <- 1
# loop over all subjects
for(currSub in participant){
# intialise looping variable for conditions. It starts at 2 because the
# first column in the data file containing condition information is the
# second one.
j <- 2
# loop over all conditions
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[pptVar]] == currSub &
trimmedData[[condVar]] == currCond, ]
# find the cutoff
curMean <- mean(tempData[[rtVar]])
curSD <- sd(tempData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- tempData[tempData[[rtVar]] < curCutoff, ]
# find the average, and add to the data frame
finalData[i, j] <- round(mean(curData[[rtVar]]),
digits = digits)
# update condition loop counter
j <- j + 1
}
# update participant loop counter
i <- i + 1
}
return(finalData)
}
### do "median"
if(returnType == "median"){
# ready the final data set
finalData <- as.data.frame(matrix(0, nrow = length(participant),
ncol = length(conditionList)))
# give the columns the condition names
colnames(finalData) <- conditionList
# add the participant column
finalData <- cbind(participant, finalData)
# intialise looping variable for subjects
i <- 1
# loop over all subjects
for(currSub in participant){
# intialise looping variable for conditions. It starts at 2 because the
# first column in the data file containing condition information is the
# second one.
j <- 2
# loop over all conditions
for(currCond in conditionList){
# get the relevant data
tempData <- trimmedData[trimmedData[[pptVar]] == currSub &
trimmedData[[condVar]] == currCond, ]
# find the cutoff
curMean <- mean(tempData[[rtVar]])
curSD <- sd(tempData[[rtVar]])
curCutoff <- curMean + (stDev * curSD)
# trim the data
curData <- tempData[tempData[[rtVar]] < curCutoff, ]
# find the average, and add to the data frame
finalData[i, j] <- round(median(curData[[rtVar]]), digits = digits)
# update condition loop counter
j <- j + 1
}
# update participant loop counter
i <- i + 1
}
return(finalData)
}
} # end of perCell == TRUE & perParticipant == TRUE
} # end of function
#------------------------------------------------------------------------------
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.