In aforren1/multimles: Maximum Likelihood Examples For BLAM

(note to self: run rmarkdown::render('vignettes/MLEs.Rmd') to make html available)

knitr::opts_chunk$set(fig.width=7, fig.height=6, 
                      warning=FALSE, message=FALSE)
library(svglite)
knitr::opts_chunk$set(
  dev = "svglite",
  fig.ext = ".svg"
)
#fix text though, svglite flubs it up

First, we'll load some of the necessary packages:

# Some of these can be pruned, they're placeholders for ideas
library(lme4)
library(bbmle)
library(optimx)
library(plotly)
library(psyphy)
library(lattice)
library(ggplot2)
library(dplyr)
library(nlme)
library(zoo)
library(boot)
library(tidyr)
theme_set(theme_gray(base_size = 8))

For example, to read in the csv containing the data for the free RT...

free_rt <- read.csv(system.file("extdata", "bigrt.csv", package = 'multimles'),
                    header = FALSE,
                    col.names = c('id', 'RT', 'reachDir', 'hit'))

and for the forced RT,

forced_rt <- read.csv(system.file("extdata", "bigtr.csv", package = 'multimles'), 
                      header = FALSE,
                      col.names = c('id', 'RT', 'reachDir', 'hit'))

To clean up the data a bit, try things like

    free_rt <- free_rt[complete.cases(free_rt),]
    free_rt <- free_rt[!(free_rt$RT > 0.5),]
    free_rt <- free_rt[!(free_rt$RT < 0),]
    # Should have 1860 obs. left over (check with str())

Or (in dplyr syntax):

free_rt <- free_rt %>% filter(!is.nan(reachDir), RT < 0.5, RT > 0)
forced_rt <- forced_rt %>% filter(!is.nan(reachDir), RT < 0.5, RT > 0)

Alternatives are left as an exercise to the reader.

Plots

First, a little bit more pushing around...

free_rt <- free_rt %>% arrange(RT) %>% 
  group_by(id) %>% mutate(rollrt = rollmean(RT, 10, na.pad = TRUE),
                          rollhit = rollmean(hit, 10, na.pad = TRUE))
forced_rt <- forced_rt %>% arrange(RT) %>% 
  group_by(id) %>% mutate(rollrt = rollmean(RT, 15, na.pad = TRUE),
                          rollhit = rollmean(hit, 15, na.pad = TRUE))

And plots:

ggplot(forced_rt, aes(rollrt, rollhit,colour='Forced RT')) + 
    geom_line(size = 1) + 
    geom_line(data = free_rt, aes(rollrt, rollhit, colour = 'Free RT'),
              size = 1) +
    facet_wrap(~id)

Maximum Likelihood Estimation

There were eight targets in this experiment. The objective function in MATLAB is specified as:

LL = @(params) -sum(hit.*log((1/8+asymptErr*normcdf(RT,params(1),params(2))*7/8)) + (1-hit).*log(1-(1/8+asymptErr*normcdf(RT,params(1),params(2))*7/8)));

Where asymptErr was the upper asymptote to the probit curve (starting value of 0.9), params(1) was the mean (start 0.3), and params(2) was the sd (start 0.1).

A way to do it in R is with the bbmle package, a wrapper around the mle? function plus a number of very useful helper functions.

llsig <- function(mu, sigma, asymptErr, rt, hit, Ntargs){
    p1 <- log(1/Ntargs + asymptErr * pnorm(rt, mu, sigma)*(1 - 1/Ntargs))
    p2 <- log(1 - (1/Ntargs + asymptErr * pnorm(rt, mu, sigma) * (1 - 1/Ntargs)))
    -sum(hit %*% p1 + (1 - hit) %*% p2)
}

start_vals <- list(mu = 0.3, sigma = 0.1)
subdat <- dplyr::filter(forced_rt, id == 5)
mod1 <- mle2(llsig,
            start = start_vals, 
            method = "BFGS", 
            optimizer = "optim",
            data = list(rt = subdat$RT, hit = subdat$hit),
            fixed = list(Ntargs = 8, asymptErr = 0.9))

It takes some work to get answers out of mle2, and the optimizers in R tend not to be as robust as those in MATLAB (at least I've heard/experienced).

One option is to go the hierarchical/multilevel/mixed model route, which can save us an awful lot of parameters to estimate, plus make interpretation of individual vs. group performance quite a bit cleaner.

In nlme, here's a (mostly) parameterized model (random asymptote, mean, and sd, but no correlation in the random effects. The fully parameterized model was tending toward overparameterization, as we got an error suggesting the approx. Hessian of the deviance was not positive definite):

mnlme <- nlme(hit ~ 1/8 + 7/8 * asym * pnorm(RT, mu, sigma), 
              data = forced_rt,
           fixed = asym + mu + sigma ~ 1, 
           random = pdDiag(asym + mu + sigma ~ 1), 
           start = c(asym = .9, mu = .25, sigma = .04), 
           groups = ~id) 
#consider adding a function to describe within-group variance,
#like weights = varExp(form = ~RT). The model fits better and sort of
#makes sense, though I haven't wrapped my head around what it *means*.
#It does make sense to have unequal variance (large at low RT, small at high)
summary(mnlme)

Plus coef():

coef(mnlme) # What we would sort of get if we fit each individual

We'll see how those line up with the rolling average (interactively!):

forced_rt$pred <- predict(mnlme)
forced_rt$pred0 <- predict(mnlme, level = 0)
temp <- ggplot(forced_rt, aes(rollrt, rollhit)) + 
    geom_line(aes(colour = 'Rolling mean'), size = 1) + 
    geom_line(aes(y = pred, colour = 'Predicted from nlme'), size = 1) +
    geom_point(aes(y = ifelse(hit == 0, hit - 0.05, hit + 0.05), colour = 'Raw data'),
               shape = 108, size = 2) +
    facet_wrap(~id)
temp
#ggplotly(temp) # really wonky for now, just show static plot

ggplot(forced_rt, aes(RT, pred0)) + 
    geom_line(aes(y = pred, colour = 'indiv_pred', group = id), 
              size = 1, alpha = .5) + 
    geom_line(aes(colour = 'Pop pred'), size = 2)

It works fine, conditional on starting values being relatively close to the solution.

Free Reaction Time

We'll try a random intercept model first, and use parametric bootstrap for inference.

# this is a mess, redo
# mlme <- lmer(formula = RT ~ 1 + (1 | id), data = free_rt) # Use REML: does this count as "small"?
# m2 <- lmer(formula = RT ~ (1|trial) + (1 | id), data = free_rt, REML = FALSE)
# boots <- bootMer(m2, FUN = function(x) 
#   predict(x, newdata = data.frame(id = unique(free_rt$id))), 
#   seed = 1, nsim = 10000)
# temp <- data.frame(boots$t)
# names(temp) <- dimnames(boots$t)[[2]]
# temp <- gather(temp, id, val) # from tidyr
# temp2 <- data.frame(xx = boots$t0, id = unique(free_rt$id))
# 
# ggplot(free_rt, aes(x = RT)) + 
#     geom_histogram(aes(fill = 'raw', y = ..count../sum(..count..)), # rescale counts
#                    bins = 20, alpha = .7) + 
#     geom_histogram(data = temp, aes(x = val, y = ..count../sum(..count..),
#                                     fill = 'pred'), bins = 20, alpha = .7) + 
#     geom_vline(data = temp2, aes(xintercept = xx), size = 1, colour = 'black',
#                linetype = 'longdash') +
#     facet_wrap(~id)

This seems to give reasonable estimates. There may be some bias between the original prediction and the median of the parametric bootstrap, but the magnitude of that bias is relatively small.

#print(boots)

Template Model Builder Example

# https://github.com/kaskr/adcomp/wiki/Documentation
# See https://github.com/kaskr/adcomp/wiki/Tutorial
# Also see https://groups.nceas.ucsb.edu/non-linear-modeling/projects
# writeLines(con = file("temp.cpp"), 
# c('
# #include <TMB.hpp>
# 
# template<class Type>
# Type objective_function<Type>::operator() ()
# {
#     DATA_INTEGER(ntarg);    // number of targets
#     DATA_VECTOR(hit);       // Obs ervations
#     DATA_VECTOR(rt);        // Reaction time
#     DATA_FACTOR(id);        // individuals
#     DATA_FACTOR(ngroup);    // nobs per individual
#     DATA_INTEGER(nsub)      // num of inidividuals
#     PARAMETER_VECTOR(betas); // mu, sigma, asym
# //    PARAMETER_VECTOR(log_sig_re); // random effects
# //    PARAMETER_VECTOR(log_sig_re_u);
# //    PARAMETER_MATRIX(sig_re_u);
#     PARAMETER(log_sig1);
#     PARAMETER(log_sig2);
#     PARAMETER(log_sig3);
#     PARAMETER(log_sig_u1);
#     PARAMETER(log_sig_u2);
#     PARAMETER(log_sig_u3);
#     PARAMETER_VECTOR(sig_re_u1);
#     PARAMETER_VECTOR(sig_re_u2);
#     PARAMETER_VECTOR(sig_re_u3);
#     PARAMETER_VECTOR(redund);
# 
# 
#     Type sig1 = exp(log_sig1);
#     Type sig2 = exp(log_sig2);
#     Type sig3 = exp(log_sig3);
# 
#     Type sig1_u = exp(log_sig_u1);
#     Type sig2_u = exp(log_sig_u2);
#     Type sig3_u = exp(log_sig_u3);
# 
#     ADREPORT(sig1);
#     ADREPORT(sig2);
#     ADREPORT(sig3);
#     ADREPORT(sig1_u);
#     ADREPORT(sig2_u);
#     ADREPORT(sig3_u);
# 
#      using namespace density;
#      int i,j,ii; // i for id, j for observation, ii for ?
# 
#      Type g=0;
#      ii = 0;
#      for (i = 0; i < nsub; i++) {
#          Type u1 = sig_re_u1[i+nsub];
#          Type u2 = sig_re_u2[i+nsub];
#          Type u3 = sig_re_u3[i+nsub];
#          g -= -(log_sig_u1);
#          g -= -.5*pow(u1/sig1_u, 2);
#          g -= -(log_sig_u2);
#          g -= -.5*pow(u2/sig2_u, 2);
#          g -= -(log_sig_u3);
#          g -= -.5*pow(u3/sig3_u, 2);
#          vector<Type> a(3); 
#          a[0] = betas[0] + u1; // theyre awfully small anyway
#          a[1] = betas[1] + u2;
#          a[2] = betas[2] + u3;
#          
#          Type tmp;
#          Type f;
#          // hasnt been un-oranged yet, and Im a bit lost
#          for(j=0;j<ngroup(i);j++)
#          {
#              f = (1/ntarg)+(1-1/ntarg)*a[2]*pnorm(rt[ii],a[0],a[1]);
#              tmp = (hit[ii] - f)/sig1;
#              g -= -log_sig1 -log_sig2 -log_sig3 - 0.5*tmp*tmp;
#              ii++;
#          }
# 
#      }
# 
# return g;
# }
# 
# ')); closeAllConnections()
# 
# dat <- list(rt = forced_rt$RT, hit = forced_rt$hit, 
#             ntarg = 8, ids = unique(forced_rt$id),
#             nsub = length(unique(forced_rt$id)),
#             ngroup = c(table(forced_rt$id))) # remove NaNs!
# 
# obj <- MakeADFun(data = dat,
#                  parameters = list(
#                    betas = c(0, 0, 0),
#                    log_sig_re = rep(0.5, 3),
#                    log_sig_re_u = rep(1, 3),
#                    sig_re_u = matrix(0, nrow = dat$ids, ncol = 3)
#                  ),
#                  random = c('sig_re_u'))