R/likelihood.R

#library(signal) # Provides interp1 function
# interp1 <- function(x, v, xq)
# {
#   cat("x ", x, " v ", v, " xq ", xq, "\n")
#   x2 <- x[order(x)]
#   v2 <- v[order(x)]
#
#   vq <- rep(0, length(xq))
#   for (idx in 1:length(xq)) {
#     i <- 1
#     while (xq[idx] >= x2[i]) i = i + 1
#
#     # Now x[i - 1] < xq and x[i] >= xq
#     m <- (v2[i] - v2[i - 1]) / (x2[i] - x2[i - 1])
#     vq[idx] <- v2[i - 1] + m * (xq[idx] - x2[i - 1])
#   }
#
#   return(vq)
# }

#' Calculate area under a particular domain of a Gaussian distribution
#'
#'
#' @param binStart x value of bin start
#' @param binWidth bin width
#' @param latency mean of Gaussian
#' @param precision sigma of Gaussian
#' @importFrom stats dnorm pnorm runif
#' @return Area under the bin for a Gaussian distribution with the parameters mean=latency, sigma=precision
areaUnderGaussianBin<- function(binStart,binWidth,latency,precision) {
  #Calculate area under the unit curve for that bin
  #Do it by determining area of Gaussian up to top of bin and subtracting area up to bottom of bin

  if (precision <0) {#don't know why this happens even when lower bound on precision is specified as zero and
    #method is set to L-BFGS-B, which is supposed to obey bounds very well.
    #Will try method bobyqa, which is the other one supposed to obey bounds.
    warning("You sent me a precision value < 0. That is just crazy.")
  }
  areaToTopOfBin <- pnorm(binStart+binWidth,latency,precision)

  #I'm seeing a lot of: <simpleWarning in pnorm(binStart, latency, precision): NaNs produced>
  #happening as a result of "Check null warning no longer occurs" test in test-fit_model.R
  areaToBottomOfBin <- tryCatch( pnorm(binStart,latency,precision),
                                 error=function(e) e,
                                 warning=function(w) return(list(pnorm(binStart,latency,precision),w))
                       )
  #If there's a warning, return list with first element the answer and second element the warning
  if( is(areaToBottomOfBin[2],"warning")  ) {
    print(paste("Got a warning as a result of calling pnorm with:",binStart,latency,precision))
  }
  if( is.nan(areaToBottomOfBin[[1]]) ) {
    print(paste("Got NaN as a result of calling pnorm with:",binStart,latency,precision))
    #So a problem is we get Nan with a call like pnorm(13,1,-9e-05) because precision can't be negative
  }
  areaToBottom<- areaToBottomOfBin[[1]]

  areaOfBin <- tryCatch(  areaToTopOfBin - areaToBottom,
            error = function(e) e )
  if( is(areaOfBin,"error") ) {
    print("no")
  }
  return (areaOfBin)
}

areaUnderGaussianBinEachObservation<- function(SPEs, mu, sigma) {
 binWidth<-1
 binStarts <- SPEs - binWidth/2
 ans<- sapply(binStarts, binWidth, mu, sigma)
 return (ans)
}

areaUnderGaussianBinTapered<- function(binStart,binWidth,latency,precision,guessingDistribution,minSPE,maxSPE) {
  #Calculate area under the unit curve for that bin

  area<- areaUnderGaussianBin(binStart,binWidth,latency,precision)

  binCenter <- round(binStart+binWidth/2)
  #Which bin index is this? So can look up in guessingDistribution
  binIndex<- binCenter - minSPE + 1

  #Taper the Guassian area by the guessing distribution. Guessing distribution could be a large number but
  # everything will be normalized at the end
  area<- area * guessingDistribution[binIndex]

  return (area)
}

areaUnderTruncatedGaussianTapered<- function(latency,precision,guessingDistribution,minSPE,maxSPE) {
  #Calculate total area of the truncated and tapered Gaussian
  binStarts= seq(minSPE-1/2, maxSPE-1/2, 1)
  #Send each bin individually to the areaOfGaussianBin function with additional parameters
  binAreasGaussian<-sapply(binStarts, areaUnderGaussianBinTapered, 1,latency,precision,guessingDistribution,minSPE,maxSPE)

  totalArea<- sum(binAreasGaussian)

  return (totalArea)
}

areaUnderGaussianBinEachObservationTapered<- function(SPEs, mu, sigma, guessingDistribution,minSPE,maxSPE) {

 #Taper the Gaussian curve based on the guessingDistribution
 SPEsBinStarts = SPEs - .5

 #Proportional to the probability of each bin, tapered. Will need to be normalized by total tapered area
 areasTapered<- sapply(SPEsBinStarts, areaUnderGaussianBinTapered, 1, mu, sigma, guessingDistribution,minSPE,maxSPE)
 return (areasTapered)
}


likelihood_mixture <- function(x,p,mu,sigma,minSPE,maxSPE,guessingDistribution) {
  #Calculate the likelihood of each data point (each SPE observed - each x)
  #The data is discrete SPEs, but the model is a continuous Gaussian.
  #The probability of an individual SPE is not the height of the normal at the bin's center,
  #but arguably is the average height of the curve across the whole bin, which here is captured
  #by integrating the area under the curve for the bin domain.
  #E.g., for an SPE of 0, from -.5 to +.5  . See goodbournMatlabLikelihoodVersusR.png for a picture
  gaussianComponentProbs<- areaUnderGaussianBinEachObservationTapered(x,mu,sigma,guessingDistribution,minSPE,maxSPE)
  gaussianComponentProbs<- unlist(gaussianComponentProbs) #ended up as list rather than vector with Cheryl's data
  #Now we have the probability of each observation, except they're not really probabilities
  #because the density they were based on doesn't sum to 1 because it wasn't a Gaussian
  #truncated to the domain of possible events. Plus it was tapered by the guessingDistribution

  #Make legit probabilities and compensate for  truncation of the Gaussian by summing the area of all the bins
  #and dividing that into each bin, to normalize it so that the total of all the bins =1.
  totalAreaUnderTruncatedGaussianTapered<-
              areaUnderTruncatedGaussianTapered(mu, sigma, guessingDistribution,minSPE,maxSPE)

  gaussianComponentProbs<- gaussianComponentProbs / totalAreaUnderTruncatedGaussianTapered
  #(An alternative, arguably better way to do it would be to assume that anything in the Gaussian tails
  # beyond the  bins on either end ends up in those end bins, rather than effectively redistributing the
  # tails across the whole thing.)

  #Calculate the likelihood of each observation according to the guessing component of the mixture
  #Convert SPEs to bind indices, so can look up in guessingDistribution
  distroIndices<- x - minSPE + 1 #So minSPE becomes 1
  guessingComponentProbs<- guessingDistribution[distroIndices]
  #Make it a probability
  guessingComponentProbs<- guessingComponentProbs / sum(guessingDistribution)

  #Do the mixture: deliver the probability of each observation reflecting both components.
  pGaussianComponent <- p
  pGuessingComponent <- 1-p

  gaussianContrib <- pGaussianComponent * gaussianComponentProbs
  guessingContrib <- pGuessingComponent * guessingComponentProbs

  finalProbs<- gaussianContrib + guessingContrib

  #This code from Pat looks like it deals with the situation of normResult and uniResult being
  # different shapes (a row versus a column) but I don't know how that can happen. And the conditional doesn't
  # seem to test for that
  # if (sum(length(gaussianContrib)==length(guessingContrib))!=2){ #This doesn't seem to make sense
  #   msg=paste0('Looks like one of these is the transposed shape of the other, str(gaussianContrib)=',
  #              str(gaussianContrib),' str(guessingContrib)=',str(guessingContrib) )
  #   warning(msg)
  #   finalProbs <- gaussianContrib + t(guessingContrib)
  # }
  return(finalProbs)
}


likelihood_mixture_MATLAB <- function(x,p,mu,sigma,minSPE,maxSPE,guessingDistribution){
  #Calculate the likelihood of each data point (each SPE observed) according to the mixture model.
  #Port of Patrick's MATLAB way of doing it. His function originally called pdf_Mixture_Single to contrast with Dual in AB analysis

    xDomain <- minSPE:maxSPE

    #First we calculate normalizing factors.
    #Ideally we'd be working with probability density functions, which are guaranteed to sum to 1.
    #Instead we're using a guessingDistribution that was not programmed to be the unit density,
    # and we're using the Gaussian curve which is a density function but we're using it in a weird way:
    # First, it's truncated by xDomain and second, it's discretized and instead of using the area
    # under the Gaussian for the whole discrete bin, instead the height of the density is used.,
    # so even if it wasn't truncated, it wouldn't add up to 1.
    pseudo_normal <- dnorm(xDomain,mu,sigma)*guessingDistribution
    #cat("pseudo_normal ", pseudo_normal, "\n")

    #normalising factors
    normFactor_uniform <- sum(guessingDistribution)
    normFactor_normal <- sum(pseudo_normal)

    #How could these ever be zero? If mu was out of range?
    if (normFactor_uniform  == 0 || is.nan(normFactor_uniform)){
        normFactor_uniform <- 10^-8
    }

    if (normFactor_normal == 0 || is.nan(normFactor_normal)){
        normFactor_normal <- 10^-8
    }

    #For all SPEs, determine the height of the guessingDistribution
    #Use interpolate in case there is a rounding problem? Alex doesn't understand why interp used
    uniResultTemp <- signal::interp1(xDomain, guessingDistribution, x)
    uniResultTemp[is.na(uniResultTemp)] <- 0
    #I guess Pat called it uni because this is like the one-component model of only guessing occurring?

    #Multiply Gaussian density by guessing density
    #I'm thinking Pat did this to window the Gaussian by the tapering of possible SPEs. This isn't
    # quite the right thing to do but is probably close enough because the tails are so light in those
    # extreme (affected by tapering) SPEs.
    normResultTemp <- dnorm(x,mu,sigma) * uniResultTemp

    #I think this code turns each curve height at each SPE into a legit probability by dividing by
    # the total of the curve heights.
    uniResultTemp <- uniResultTemp/normFactor_uniform
    normResultTemp <- normResultTemp/normFactor_normal

    #Do the mixture: deliver the probability of each observation reflecting both components.
    propNorm <- p
    propUniform <- 1-p

    normResult <- propNorm * normResultTemp
    uniResult <-  propUniform * uniResultTemp

    #This code looks like it deals with the situation of normResult and uniResult being
    # different shapes (a row versus a column) but I don't know how that can happen.
    if (sum(length(normResult)==length(uniResult))==2){
        result <- normResult+uniResult
    } else {
        result <- normResult+t(uniResult)
    }

    #xIndex = x-min(xDomain)+1;
    #results = tempResult(xIndex);
    # cat("tempResult ", tempResult, "\n")
    #tempResult <- tempResult[1]
    return(result)
}


logLik_conditionGivenParams<- function(df, numItemsInStream, params) {
  #Note that the likelihood depends on the number of observations. So it'd be unfair to compare the
  # fit across different datasets. Could divide by the number of observations to calculate
  # the average likelihood but probably probabilities don't work that way.
  possibleTargetSP<- sort(unique(df$targetSP))
  minTargetSP <- min(possibleTargetSP)
  maxTargetSP <- max(possibleTargetSP)
  minSPE <- 1 - maxTargetSP
  maxSPE <- numItemsInStream - minTargetSP
  #calculate the guessing distribution, empirically (based on actual targetSP)
  pseudoUniform <- createGuessingDistribution(minSPE,maxSPE,df$targetSP,numItemsInStream)
  p<- params[[1]]
  mu<- params[[2]]
  sigma<- params[[3]]
  likelihoodEachObservation <- likelihood_mixture(df$SPE, p, mu, sigma, minSPE,maxSPE, pseudoUniform)
  # Sometimes pdf_normmixture_single returns 0. And the log of 0 is -Inf. So we add
  # 1e-8 to make the value we return finite. This allows optim() to successfully
  # optimise the function.
  return(sum(log(likelihoodEachObservation + 1e-8)))
}

################
likelihoodOneConditionForDplyr<- function(df,numItemsInStream) {
  params<- df[1,c("efficacy","latency","precision")]
  l<- logLik_conditionGivenParams(df, numItemsInStream, params)
  return(data.frame(val=l))
}

logLikGuessing <- function(df, numItemsInStream)
{
  #Calculate log likelihood of pure guessing model. Likelihood means probability of the data.
  #The guessing model is that 100% of resopnses are drawn from the pseudoUniform distribution
  #An alternative way to calculate this would be to set efficacy is zero. I should try that in the test.

  possibleTargetSP<- sort(unique(df$targetSP))
  minTargetSP <- min(possibleTargetSP)
  maxTargetSP <- max(possibleTargetSP)
  minSPE <- 1 - maxTargetSP
  maxSPE <- numItemsInStream - minTargetSP

  guessingDistribution <- createGuessingDistribution(minSPE,maxSPE,df$targetSP,numItemsInStream)
  #This guessing distribution is actually not scaled to be a proper probability distribution, so to get probability
  #divide by total.
  guessingProbDistribution <- guessingDistribution / sum(guessingDistribution)

  #Calculate the likelihood of each observation according to the guessing component of the mixture
  #Convert SPEs to indices, so can look up in guessingDistribution
  #the smallest possible SPE has index 1.
  distroIndices<- df$SPE - minSPE + 1 #So minSPE becomes 1
  guessingComponentProbs<- guessingProbDistribution[distroIndices]

  # Sometimes pdf_normmixture_single returns 0. And the log of 0 is -Inf. So we add
  # 1e-8 to make the value we return finite. This allows optim() to successfully
  # optimise the function. I don't know that guessing probabilities ever get so small,
  # so we may not need that for them, but because we're going to be comparing the two models, we'll include it.
  logLikelihoods<- log(guessingComponentProbs + 1e-8)
  #Really the probability of the dataset is the product of all the probabilities, prod(guessingComponentProbs + 1e-8),
  #but you run the risk of creating a number too small for the computer to handle. Therefore you take advantage of
  #the log property that you can log the individuals and then sum them, so the below is equivalent to log(prod(guessingComponentProbs + 1e-8))
  return(sum(logLikelihoods))
}
alexholcombe/mixRSVP documentation built on June 7, 2019, 3:50 p.m.