R/gaussian_scaled.R

#Calculate the underlying continuous Gaussian, not discretized. But how high should it be?

#Given a dataset of 100 points. Take the Gaussian density, area under the curve is 1,
#and make the area under the curve = 100?
#But wait a second - the curve goes forever, but I'm only showing -17 to 17.
#No, but each of those 100 points is actually a grainsize portion of the curve.
#So, calculate the area of the Gaussian density for each bin. Then multiply by a normalizing
#factor so that the total area of all the bins equals the number of datapoints.
#This is neglecting that we're talking about a truncated Gaussian, and
#  possiby tapered by frequency of possible SPEs

areaOfGaussianBin<- function(binStart,binWidth,latency,precision) {
  #Calculate area under the unit curve for that bin
  area <- pnorm(binStart+binWidth,latency,precision) - pnorm(binStart,latency,precision)
  return (area)
}

#' Calculate the mixture model's Gaussian, scaled to the number of datapoints in the data
#'
#' Used in plot_hist_with_fit etc.
#'
#'
#' @param efficacy proportion of trials drawn from the gaussian distribution
#' @param latency center of gaussian relative to SPE=0
#' @param precision sigma of gaussian
#' @param numObservations number of trials, to help scale height
#' @param minSPE minimum possible serial position error
#' @param maxSPE maximum possible serial position error
#' @param grain how finely to plot gaussian over the histogram, scaling affected by grain
#'
#' @export
#'
#' @examples
#' df <-  subset(P2E2pilot,subject=="CB" & target==1 & condition==1)
#' calc_curves_dataframe(df, -11, 11, 16)
#'
gaussian_scaled<- function(efficacy,latency,precision,numObservations,minSPE,maxSPE,grain) {
  #At each minSPE:grain:maxSPE value, calculate Gaussian at
  #  height appropriate for plotting with the histogram of the SPE data

  #Calculate area under the unit Gaussian curve for each bin

  #Calculate beginning x value of each bin, e.g. -0.5 for 0 if grain =1
  binStarts= seq(minSPE-grain/2, maxSPE-grain/2, grain)
  #Calculate area under the unit Gaussian for each bin. This corresponds
  #Send each SPE individually to the areaOfGaussianBin function with additional parameters
  binAreasGaussian<- sapply(binStarts, areaOfGaussianBin,   grain,latency,precision)

  #Now we have the probability of each bin, but they don't sum to 1 because it wasn't at truncated Gaussian
  #We can now accomplish truncation of the Gaussian by summing the area of all the bins and dividing
  #that into each bin, to normalize it so that the total of all the bins =1.
  probEachBin<- binAreasGaussian / sum(binAreasGaussian)
  #An alternative, arguably better way to do it would be to assume that anything in the Gaussian tails
  # beyond the  bins on either end ends up in those end bins.

  #Now there's the matter of the actual dataset.
  #The area under the histogram does not sum to 1, it instead sums to the number of trials.
  #So we need to achieve the same thing for our theoretical Gaussian curve, except that
  #our Gaussian curve is responsibe for only efficacy proportion of the trials.

  #numObservations reflects grain of 1. But if using finer grain to plot, need to scale up
  numTheoreticalObservations <- numObservations * 1/grain

  gaussianThis<- probEachBin * numTheoreticalObservations * efficacy

  #Slide it into a dataframe with the bins indicated
  gaussianThis<-data.frame(x=seq(minSPE,maxSPE,grain), gaussianFreq=gaussianThis)

  #Note: not tapered

  return(gaussianThis)
}


#Below is mistaken old way that used plain density without integrating the area of each bin
gaussianScaledForDataOld<- function(efficacy,latency,precision,numObservations,minSPE,maxSPE,grain) {
  #Return the correct heights of the Gaussian at each minSPE:grain:maxSPE at
  # the right height for plotting with the histogram of the SPE data
  domain<-seq(minSPE,maxSPE,grain)
  gaussianThis<- dnorm(domain,latency,precision)
  #Calculate points at appropriate height for this data
  #print(paste0("lengthSPE=",length(SPE)))
  gaussianThis<- gaussianThis * efficacy * numObservations
  gaussianThis<-data.frame(x=domain, gaussianFreq=gaussianThis)
  return(gaussianThis)
}


#' Calculate fine-grained Gaussian from parameters dataframe
#'
#'
#' @param df data.frame with columns (of only length 1) efficacy, latency, precision
#' @param minSPE smallest possible serial position error
#' @param maxSPE largest possible serial position error
#' @param grain grain, which is width of bins for which will calculate the area in the Gaussian
#'
#' @export
#'
#' @examples
#'  \dontrun{
#' df <-  subset(P2E2pilot,subject=="CB" & target==1 & condition==1)
#' plot_hist_with_fit(df, -11, 11, df$targetSP, 16, TRUE, TRUE, TRUE)
#'  }
#'
#Modify gaussian_scaled to take parameter values from a dataframe.
#Recall that need to calculate Gaussian heights at this finer grain for each condition
#Each call to gaussianScaledForData returns a larger dataframe than was sent.
# To do that with dplyr, need to be part of dataframe

gaussian_scaled_from_df<- function(df,minSPE,maxSPE,grain) {
  #Should be sent a one-line dataframe with efficacy,latency,precision
  #Want to expand that into entire curve, with many different SPE values
  #nPerCond is number of observations per condition
  stopifnot( "nPerCond" %in% colnames(df) )
  curve<- gaussian_scaled(df$efficacy,df$latency,df$precision,df$nPerCond,minSPE,maxSPE,grain)

  #To merge the parameter estimates with every line of this set of x,gassianFreq points for the curve:
  # merge(iris, data.frame(time=1:10), by=NULL) https://stackoverflow.com/questions/11693599/alternative-to-expand-grid-for-data-frames
  withParams<- merge(df, curve, by=NULL)
  return(withParams)
}

# Example call:
#gaussianFine<- estimates_M %>% group_by_at(.vars = group_vars) %>% do(
#  gaussianScaledFromDataframe(.,minSPE,maxSPE,grain) )
alexholcombe/mixRSVP documentation built on June 7, 2019, 3:50 p.m.