#' Filter reaction times values with thresholds values and/or standard
#' deviation from the mean by condition
#'
#' A function to remove outliers of a given data frame. Based on the
#' outliers function of the present package. filtRT applies the
#' outliers function according to each subject and/or condition.
#' Data can be excluded based on a minimal/maximal values (thresholds)
#' and/or based on a given standard deviation value from the mean.
#' Outliers are excluded either from all values or regarding each
#' subject and condition.
#' Return a data frame of filtered data as well as the percentage
#' of data filtered.
#'
#' @author
#' Guillaume T. Vallet \email{gtvallet@@gmail.com}, University of de Montreal (Canada);
#'
#' Benoit A. Riou \email{riouba@@gmail.com}, Lyon2 University (France)
#'
#' @param data A data frame in the long format (one row per record).
#' @param RT A string indicating the column names the data to filter.
#' Defaults to 'RT'.
#' @param vars A vector of name to indicate the column names of variables
#' to use to filter the data by condition, usually subjects and at least
#' one independent variable.
#' @param fpass A vector length two with the minimal and maximal accepted
#' value to use for a first filtering of the reaction times.
#' To filter only the lowest or highest values, indicate NA as value.
#' For instance c(100, NA) will only remove RT < 100ms.
#' Defaults to NULL.
#' @param sdv A number indicating how many standard deviations should
#' be used to filter the data. Defaults to NULL.
#' @return Return a list with a data frame of filtered data and
#' a data frame of number of data excluded and its relative percentage
#' per condition.
#' @keywords reaction times, filter, outliers
#' @export
#' @seealso \code{\link{outliers}}
#' @examples
#' # Generate fake data with a subject number in the first colum, a fake experimental condition
#' in the second column and fake reaction times in the third column
#' df = rbind(data.frame(Subj=1, Cond="Test", RT=rnorm(25, mean=550, sd=48)),
#' data.frame(Subj=1, Cond="Control", RT=rnorm(25, mean=680, sd=62)),
#' data.frame(Subj=2, Cond="Test", RT=rnorm(25, mean=585, sd=54)),
#' data.frame(Subj=2, Cond="Control", RT=rnorm(25, mean=720, sd=59)))
#'
#' # Adding extreme values
#' df[75,3] = df[5,3]+300
#' df[25,3] = df[5,3]+500
#' df[79,3] = df[19,3]-350
#' df[33,3] = df[33,3]+420
#' df[40,3] = df[40,3]-520
#' df[27,3] = df[27,3]-350
#' df[9,3] = 50
#' df[86,3] = 4250
#' df[65,3] = 99
#' df[3,3] = 1999
#'
#' Filter with low and high thresolds and with 3 standard deviations
#' by subject and condition
#' filtRT(df, RT='RT', vars=c('Subj', 'Cond'), fpass=c(100,1000), sdv=3)
#'
#' # Filter with only a low thresold with 2 standard deviations by subjects
#' filtRT(df, RT='RT', vars='Subj', fpass=c(100, NA), sdv=2)
#'
filtRT = function(dt, RT='RT', vars=NULL, fpass=NULL, sdv=NULL){
# GT Vallet -- Lyon 2 University
# 2013/07/01 -- v01
# 2014/05/01 -- v02 Adding vars as an option.
# Filtering only lowest or highest RT value.
# Adding the percentage of data filtered.
# 2014/11/28 -- v03 Remove Plyr functions to avoid conflicts with dplyr
if( is.null(vars) ){
dt.fil = outliers(dt, fpass, RT, sdv)
dt.filtered = dt.fil[[1]]
filtered = dt.fil[[2]]
}else{
factors = lapply(vars, function(x) factor(dt[,x]))
names(factors) = vars
dt.fil = by(dt, factors, function(x) outliers(x, fpass, RT, sdv, tokeep=vars))
dt.filtered = as.data.frame(do.call('rbind', lapply(dt.fil, FUN = `[[`, 1)))
filtered = as.data.frame(do.call('rbind', lapply(dt.fil, FUN = `[[`, 2)))
}
return(list(Data_Filtered=dt.filtered, Nb_Data_Filtered=filtered))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.