# tests/testthat/test-model-comparison.R In alexholcombe/mixRSVP: Mixture Model Temporal Errors In RSVP Data

context("test-model-comparison.R")

#To test manually, test_file("tests/testthat/test-model-comparison.R")

test_that("simplest ground truth", {
#Not testing any functions in model-comparison,
#instead just testing my understanding of the chi-square likelihood ratio test that programmed  into
#likelihoodRatioTest

#Based on https://stats.stackexchange.com/questions/155474/r-why-does-lrtest-not-match-anovatest-lrt

set.seed(1) # Reproducibility
n=100
#y = runif(n, min=-1, max=1)
x= (1:n) / n
b =  .1 # .000000001 #intercept
noise = runif(n, min=-.5, max=.5)

# Make y dependent on the other two variables
y = b + 1*x + noise # ifelse(a==1, 0.25, 0)
mydata = data.frame(y,x,b)

plot(x,y)

# Models
base = lm(y ~ 0 + x, data=mydata)  #regression with no intercept
full = lm(y ~ x, data=mydata)      #regression with intercept

# Homebrew log-likelihood test
like.diff = logLik(full) - logLik(base) #you can see here that it's not the negative log likelhhiood, rather the
#positive log likelihood is used. And it's a ratio because it's a simplification of logs.
#df.diff = full\$df.residual - base\$df.residual
df.diff = base\$df.residual - full\$df.residual #this is not the number of free parameters, rather
#it reflects the number of points too but those cancel out, leaving only the diff of number of parameters
#Why does he use base - full instead of full - base

#Lower p-value means the base model is less likely.
p<- stats::pchisq(  as.numeric(like.diff) * 2, df=df.diff, lower.tail=F  )

#.027 is the p-value I got when first programming it
expect_equal( p, .027, tolerance = .001)

#When df.diff is greater (more parameters in the full model), p-value should be higher
#because full model has more parameters so should be penalized
df.diff = df.diff + 1
p2<- stats::pchisq(  as.numeric(like.diff) * 2, df=df.diff, lower.tail=F   )
expect_true( p2 - p > 0)
} )

test_that("fake guessing observer fails likelihood ratio test", {

#set up a fake observer who guesses all the time
df <- backwards2_E1
numItemsInStream<- length( df\$letterSeq[1,] )
df <- subset(df, subject=="AA" & condition==2 & target==1)
possibleTargetSP<- sort(unique(df\$targetSP))
minTargetSP <- min(possibleTargetSP)
maxTargetSP <- max(possibleTargetSP)
minSPE <- 1 - maxTargetSP
maxSPE <- numItemsInStream - minTargetSP
bounds <- parameterBounds()
nTrials<- nrow(df)
set.seed(1) # Reproducibility

#create guesser
guessingDistribution <- createGuessingDistribution(minSPE,maxSPE,df\$targetSP,numItemsInStream)
#Use the actual guessing distribution as the SPEs by sampling without replacement
guesses<- sample(minSPE:maxSPE,nTrials,replace=TRUE,prob=guessingDistribution) #https://stackoverflow.com/a/46590082/302378
#hist(guesses, breaks=seq(minSPE,maxSPE) )
dGuesser <- df
dGuesser\$SPE <- guesses

nGuessing<- -logLikGuessing(dGuesser,numItemsInStream)

#fit <- fitOneCondition(dGuesser, numItemsInStream, bounds, nReplicates = 1).
#Above function fit resulted in effficay 1e-05, -2.86, 2.92, val=343.5077
nMixture<- 343.5077
#Check whether mixture fit significantly better than guessing distribution
#send log likelihood, not negative log likelihood
p <- likelihoodRatioTest(-nMixture,-nGuessing,3)

#p-value should be essentially 1 because the data is from the reduced model (guessing)

expect_equal(p, 1, tolerance=1e-03)

})

test_that("hybrid observer", {

#set up a fake observer who guesses all the time
df <- backwards2_E1
numItemsInStream<- length( df\$letterSeq[1,] )
df <- subset(df, subject=="AA" & condition==2 & target==1)
possibleTargetSP<- sort(unique(df\$targetSP))
minTargetSP <- min(possibleTargetSP)
maxTargetSP <- max(possibleTargetSP)
minSPE <- 1 - maxTargetSP
maxSPE <- numItemsInStream - minTargetSP
bounds <- parameterBounds()
set.seed(1) # Reproducibility
nTrials<- nrow(df)
#create guesser
guessingDistribution <- createGuessingDistribution(minSPE,maxSPE,df\$targetSP,numItemsInStream)
#Use the actual guessing distribution as the SPEs by sampling without replacement
guesses<- sample(minSPE:maxSPE,nTrials,replace=TRUE,prob=guessingDistribution) #https://stackoverflow.com/a/46590082/302378
#hist(guesses, breaks=seq(minSPE,maxSPE) )
guesser <- df
guesser\$SPE <- guesses

#Create observer who guesses on every trial
loser <- guesser #rbind(guesser)#,df)
#hist(loser\$SPE, breaks=seq(minSPE,maxSPE,by=1))

nGuessing<- -logLikGuessing(loser,numItemsInStream)

fit <- fitOneCondition(loser, numItemsInStream, bounds, nReplicates = 1)
#efficacy around .18
nMixture<- fit\$val

#Check whether mixture fit significantly better than guessing distribution
#and return both the statistical test and the likelihood difference

p <- likelihoodRatioTest(-nMixture,-nGuessing,3)
#this example fails to reject the null, as it should because participant is pure guesser
#p-value should be bad because the data is from the reduced model (guessing)
expect_equal(p, .21, tolerance=1e-01)

#ANOTHER TEST, of data that is not 100% guesses

#Create observer who is not 100% guesses
loser <- rbind(guesser,df[1:10,]) #rbind(guesser[1:1,],df)
#hist(loser\$SPE, breaks=seq(minSPE,maxSPE,by=1))

nGuessing<- -logLikGuessing(loser,numItemsInStream)

fit <- fitOneCondition(loser, numItemsInStream, bounds, nReplicates = 1)
#efficacy around .18
nMixture<- fit\$val

#Check whether mixture fit significantly better than guessing distribution
p <- likelihoodRatioTest(-nMixture,-nGuessing,3)

#Notice that including just 10 observations from a not particularly good participant, of 110 trials total
expect_equal(p, .07766, tolerance=1e-04)

})
alexholcombe/mixRSVP documentation built on Aug. 30, 2018, 5:43 a.m.