R/in_sample_elpd_loo.R

#'@title Estimate delta elpd for pairs of models
#'@param dat Data
#'@param fits List of models. Should either be output of cause_grid_adapt2_v7 or "0" to reperesent the null model
#'@param pval_thresh P-value threshold for data if desired
#'@param nsamps Number of samples to take from the posterior
#'@export
in_sample_elpd_loo <- function(dat, fits, pval_thresh=Inf, nsamps=1000){

    stopifnot(all(c("beta_hat_1", "seb1", "beta_hat_2", "seb2") %in% names(dat)))
    stopifnot(length(fits) >=2)
    #For each data point in, calculate log likelihood under posterior
    k <- length(fits)
    mods <- expand.grid(m1 = 1:k, m2=1:k)
    mods <- mods %>% filter(m1 < m2) %>%
            mutate(delta_elpd = NA, se_delta_elpd = NA)

    if(!is.null(names(fits))){
      mods$model1 <- names(fits)[mods$m1]
      mods$model2 <- names(fits)[mods$m2]
    }else{
      mods$model1 <- m1
      mods$model2 <- m2
    }
    dat <- dat %>%
           mutate(pval_1 = 2*pnorm(abs(beta_hat_1/seb1), lower.tail=FALSE)) %>%
           filter(pval_1 <= pval_thresh)


    lls <- lapply(fits, FUN=function(fit){
                     if(is.null(fit$post)){
                       llmat <- ll_v7_loo(0, 0, 0,
                                             fit$rho, fit$mix_grid$S1, fit$mix_grid$S2, fit$mix_grid$pi,
                                             dat$beta_hat_1, dat$beta_hat_2, dat$seb1, dat$seb2)
                       return(llmat)
                     }
                     samps <-samp_from_grid(fit$post, c("q", "gamma", "eta"), nsamps)
                     llmat <- t(ll_v7_loo(samps$gamma, samps$eta, samps$q,
					                                fit$rho,
                                          fit$mix_grid$S1, fit$mix_grid$S2, fit$mix_grid$pi,
                                          dat$beta_hat_1, dat$beta_hat_2, dat$seb1, dat$seb2))
                     return(llmat)
                })

    loos <- lapply(lls, function(llmat){
      if(ncol(llmat) == 1) return(NULL)
      loo(llmat, r_eff = rep(1, nrow(dat)))
    })

    for(j in seq(nrow(mods))){
      i1 <- mods$m1[j]
      i2 <- mods$m2[j]
      if(is.null(loos[[i1]])){
        diff <- lls[[i1]] - loos[[i2]]$pointwise[,1]
        est <- sum(diff)
        se <- sd(diff)*sqrt(nrow(dat))
      }else if(is.null(loos[i2])){
        diff <- loos[[i1]]$pointwise[,1] - lls[[i2]]
        est <- sum(diff)
        se <- sd(diff)*sqrt(nrow(dat))
      }else{
        comp <- compare(loos[[i2]], loos[[i1]])
        est <- as.numeric(comp["elpd_diff"])
        se <- as.numeric(comp["se"])
      }
      mods$delta_elpd[j] <- est
      mods$se_delta_elpd[j] <- se
    }
    mods <- mods %>% mutate(z = delta_elpd/se_delta_elpd) %>%
            select(model1, model2, delta_elpd, se_delta_elpd, z)
    return(mods)
}
jean997/sherlockAsh documentation built on May 18, 2019, 11:45 p.m.