R/cdfFunctions.R

Defines functions cdf cdfBinsize binsizeCDFs cdfProportions getModelCDFs

Documented in cdf

###
# functions relating to cumulative distribution functions (CDFs)


#------------------------------------------------------------------------------
#' Find cumulative distribution function (CDF) values for a single condition
#'
#' \code{cdf} takes a data frame for a single experimental condition and
#' returns a vector of requested CDF values.
#'
#' The function only deals with one experimental condition. There is another
#' function (\code{cdfAll}) which will return CDFs for all experimental
#' conditions. If there are more than one subject in the data frame being
#' passed to this function, the function first finds the CDF values for each
#' subject, and then takes the average for each quantile. This average is then
#' returned to the user.
#'
#' @param data A data frame containing the data to be passed to the function.
#' At the very least, the data frame must contain columns named "accuracy"
#' logging the accuracy (1 for correct, 0 for error) and "rt" containing the
#' response time data. If the user wishes to find the average CDFs across
#' multiple subjects, then another column must be included ("subject") with
#' numbers identifying unique subjects. See \code{?exampleData} for a data
#' frame formatted correctly.
#'
#' @param quantiles The quantile values to be found by the function. By
#' default, the function finds the .1, .3, .5, .7, and .9 CDF values.
#'
#' @param correctTrials If set to 1, the function will find the CDFs of
#' correct trials. Set to 2 to find the CDFs of error trials. Set to 3 to find
#' CDFs of ALL trials. Note, though, that CDFs of error trials may be less
#' accurate due to usually-low number of error trials.
#'
#' @param multipleSubjects Inform the function whether the data frame contains
#' data from multiple subjects. If set to TRUE, the function returns the
#' average CDF values across all subjects. If set to FALSE, the function
#' assumes all data being passed is just from one subject.
#'
#'
#' @examples
#' ### example of multiple subjects and default quantile values
#'
#' # only select the congruent data from the example data set
#' data <- subset(exampleData, exampleData$congruency == "congruent")
#'
#' # get the CDFs
#' getCDF <- cdf(data)
#'
#' ### example of single subject and different quantile values
#'
#' # only select subject 1 from the example data. Also, select only the
#' # "absent" condition and incongruent trials. This is an example when working
#' # with multiple conditions (besides target congruency).
#' data <- subset(exampleData, exampleData$subject == 1 &
#'     exampleData$condition == "absent" &
#'     exampleData$congruency == "incongruent")
#'
#' # set new quantile values
#' newQuantiles <- c(.1, .2, .3, .4, .5, .6, .7, .8,  .9)
#'
#' # get the CDFs
#' getCDF <- cdf(data, quantiles = newQuantiles, multipleSubjects = FALSE)
#'

#' @export
cdf <- function(data, quantiles = c(.1, .3, .5, .7, .9),
                correctTrials = 1, multipleSubjects = TRUE){

  # perform the simple operation of calculating CDFs if only one subject
  if(multipleSubjects == FALSE){


    # select whether the user wants correct trials or error trials (or all!)
    if(correctTrials == 1){
      tempData <- subset(data, data$accuracy == 1)
    }
    if(correctTrials == 2){
      tempData <- subset(data, data$accuracy == 0)
    }
    if(correctTrials == 3){
      tempData <- data
    }

    # calculate the CDFs
    cdfs <- as.numeric(quantile(tempData$rt, quantiles))

    # return them to the user
    return(cdfs)
  }


  # if there are multiple subjects, find average CDF across these subjects
  if(multipleSubjects == TRUE){

    # find the unique subject numbers
    subs <- unique(data$subject)

    # how many subjects are there?
    nSubs <- length(subs)

    # create a n*m matrix where rows (n) reflect quantile, and columns (m) are
    # subjects. At the end, return the average of each row (quantile)
    cdfData <- matrix(0, nrow = length(quantiles), ncol = nSubs)

    # loop over all subjects, find their CDFs, and place in cdfData matrix
    for(i in 1:nSubs){

      tempData <- subset(data, data$subject == subs[i])

        # select whether the user wants correct trials or error trials (or all!)
        if(correctTrials == 1){
          tempData <- subset(data, data$accuracy == 1)
        }
        if(correctTrials == 2){
          tempData <- subset(data, data$accuracy == 0)
        }
        if(correctTrials == 3){
          tempData <- data
        }


      # log the result
      cdfData[, i] <- quantile(tempData$rt, quantiles)

    }

    #calculate average CDFs across subjects
    averageCDF <- apply(cdfData, 1, mean)

  }

  # return them to the user
  return(averageCDF)

}
#------------------------------------------------------------------------------



#------------------------------------------------------------------------------
# Given a set of quantiles for CDFs, return the proportion of data within each
# bin. For example, the CDFs c(.1, .3, .5, .7, .9) have proportions of
# c(.1, .2, .2, .2, .2, .1). This is required because the model will try to
# predict response times which match the proportions in the human data.

#' @export
cdfBinsize <- function(cdfs){

  # get empty vector of the right length
  props <- numeric(length = (length(cdfs) + 1))

  # loop over all cdf values
  for(i in 1:length(cdfs)){

    # do the first one manually
    if(i == 1){
      props[i] <- cdfs[i] - 0
    }


    # do the intermediate bins automatically
    if(i > 1 & i <= length(cdfs)){
      props[i] <- cdfs[i] - cdfs[i - 1]
    }

    # do the final one manually
    if(i == length(cdfs)){
      props[i + 1] <- 1 - cdfs[i]
    }

  } # end of bin loop

  # return the proportions
  return(props)

} # end of function
#------------------------------------------------------------------------------


#------------------------------------------------------------------------------
# The opposite of cdfBinsize. Given a set of proportions, work out the CDFs
# For example, the proportions c(.1, .2, .2, .2, .2, .1) have CDFs of
# c(.1, .3, .5, .7, .9).
#'@export
binsizeCDFs <- function(proportions){

  # initialise empty vector for cdfs
  cdfs <- numeric(length(proportions) - 1)

  for(i in 1:length(cdfs)){
    cdfs[i] <- sum(proportions[1:i])
  }
  return(cdfs)
}
#------------------------------------------------------------------------------



#------------------------------------------------------------------------------
# Calculate the proportion of correct responses in each cdf bin for a given
# condition. Takes two parameters: data (the data), and correctProportions; the
# latter is a vector of proportions for each bin if accuracy were 100%.
#
# This function finds the accuracy for each subject (if applicable) and
# multiplies the correctProportions vector by this value. This gives the
# proportion in each bin for each subject. Then, the function returns the
# average proportion in each bin.

#'@export
cdfProportions <- function(data, correctProportions, multipleSubjects = TRUE){

  # if there is only one subject, then find the overall proportions and return
  # them (i.e., there is no loop or averaging)
  if(multipleSubjects == FALSE){

    # find the accuracy
    accuracy <- sum(data$accuracy) / nrow(data)

    #scale the proportions by the accuracy
    proportions <- correctProportions * accuracy

    # return to user
    return(proportions)
  }

  if(multipleSubjects == TRUE){

    # what are the unique subject numbers?
    subs <- unique(data$subject)

    # how many subjects are there?
    nSubs <- length(subs)

    # initiate a matrix to store all subject's proportions in
    allProportions <- matrix(0, nrow = nSubs,
                             ncol = length(correctProportions))

    # Loop over each subject
    for(i in 1:nSubs){

      # get the current subject's data
      subjectData <- subset(data, data$subject == subs[i])

      # calculate their accuracy
      accuracy <- sum(subjectData$accuracy) / nrow(subjectData)

      # scale the proportions by the accuracy, and store the result
      allProportions[i, ] <- correctProportions * accuracy

    }

  }

  # calculate the average proportions
  allProportions <- apply(allProportions, 2, mean)

  # return to the user
  return(allProportions)

} # end of function
#------------------------------------------------------------------------------





#------------------------------------------------------------------------------

#'@export
# Get model proportions from human CDFs (the RTs, not proportions).
# Returns proportions

getModelCDFs <- function(modelData, cdfs){


  # only select the correct trials
  modelData <- subset(modelData, modelData[, 2] == 1)

  # initiate empty vector to store model CDFs in
  props <- numeric(length(cdfs))

  # loop over each human CDF cutoff point, and find the proportion of model
  # data in each bin
  for(i in 1:length(cdfs)){
    x <- subset(modelData, modelData[, 1] <= cdfs[i])
    props[i] <- length(x[, 1]) / nrow(modelData)
  }

  return(props)

}
#------------------------------------------------------------------------------
JimGrange/flankr2 documentation built on Oct. 30, 2019, 7:44 p.m.