R/trans_newdata.R

#' @title Transformations for prediction
#' @description Does data transformation on the newdata for prediction and flags shrinkage estimates
#' @param object A merMod-class object
#' @param dat A new \code{data.frame} models above for which transformations are desired
#' @param title A logical scalar denoting if the level-2 grouping variable is at the 'title' level. Defaults to FALSE
#' @param level3 A logical scalar denoting if the models are 3-level models. Defaults to FALSE
#' @return A transformed \code{data.frame}.
trans_newdata <- function(dat, object, title= FALSE, level3= FALSE) {
  require(lme4)
  mf <- model.frame(object)
  
  if (level3 == FALSE) {
    # 01. Remove obs from newdata with new FE levels in format2  
    dat <- transform(dat, format2= factor(format2, levels= unique(mf$format2)))
    
    # 02. Flag new RE levels / shrinkage estimators
    if (title == TRUE) {
      newRE <- unique(dat$title)[ which(!(unique(dat$title) %in% unique(mf$title)))]
      dat$shrinkage <- ifelse(dat$title %in% newRE, TRUE, FALSE)
    } else {
      newRE <- unique(dat$title_format2)[ which(!(unique(dat$title_format2) %in% unique(mf$title_format2)))]
      dat$shrinkage <- ifelse(dat$title %in% newRE, TRUE, FALSE)
    }
    return(dat)
  } else {
    # 01. Remove obs from newdata with new FE levels in format2  
    fe.names <- names(fixef(object))
    dat.levels <- names(table(interaction(paste0("geography", dat$geography), 
                                          paste0("format2", dat$format2))))
    dat$l3 <- interaction(paste0("geography", dat$geography), paste0("format2", dat$format2))
    dat.levels <- gsub("[.]", ":", dat.levels)
    dat.levels2 <- dat.levels[!(dat.levels %in% fe.names)] # extract problematic levels
    dat.levels2 <- gsub("[:]",".", dat.levels2)
    
    dat <- dat[!(dat$l3 %in% dat.levels2),]
    
    # 02. Flag new RE levels / shrinkage estimators
    dat.list <- list(dat= split(dat, factor(dat$geography)), mf=split(mf, factor(mf$geography)))
    
    if (title == TRUE) {
      for (j in 1:length(dat.list[[1]])) {
        nm <- names(dat.list$dat)[[j]]
        newRE <- unique(dat.list$dat[[j]]$title[ which(!unique(dat.list$dat[[j]]$title %in% 
                                                        unique(dat.list$mf[[nm]]$title)))])
        dat.list$dat[[j]]$shrinkage <- ifelse(dat.list$dat[[j]]$title %in% newRE, TRUE, FALSE)
      }
    } else {
      for (j in 1:length(dat.list[[1]])) {
        nm <- names(dat.list$dat)[[j]]
        newRE <- unique(dat.list$dat[[j]]$title_format2[ which(!unique(dat.list$dat[[j]]$title_format2 %in% 
                                                         unique(dat.list$mf[[nm]]$title_format2)))])
        dat.list$dat[[j]]$shrinkage <- ifelse(dat.list$dat[[j]]$title_format2 %in% newRE, TRUE, FALSE)
      }
    }
    
    dat <- data.frame(do.call("rbind", dat.list[[1]]))
    return(dat)
  }
}
alexWhitworth/concord documentation built on May 11, 2019, 11:25 p.m.