R/analyze.R

# Fit an lmer model with an equation and data, return list of the model, and convergence diagnostics (with the equation as the name)
fitLMERsingle <- function(eq, data){
  oldWarn <- getOption("warn") # save the old warning state
  options(warn = -1) # ignore all warnings

  fitOut <- lme4::lmer(stats::as.formula(eq), data)
  convLme4 <- fitOut@optinfo$conv$lme4
  if(is.list(convLme4) & length(convLme4) != 0){
    # check if the lme4 convergence list has any contents
    warnCode <- convLme4$code
    warnMessages <- convLme4$messages
  } else {
    # if the lme4 convergence list has no contents, setup success variables
    warnCode <- 0
    warnMessages <- ""
  }

  options(warn = oldWarn) # ignore all warnings
  return(list(modelObject=fitOut, converged={warnCode==0}, convWarnCode=warnCode, convWarnMsgs=warnMessages))
}

# Vectorized version of fitLMERsingle
fitLMER <- Vectorize(fitLMERsingle, vectorize.args = "eq", SIMPLIFY = FALSE)

# generate equations (formulas) with two predictors from maximal to simpler
# this is depricated, because eqsGen is a better use of modelMetadata.json
eqsGen2preds <- function(outcome, predictor1, predictor2, grouping1 = "obsisSubj"){
  eqs <- character()
  eqs <- append(eqs, paste0(outcome, "~", predictor1, "*", predictor2, "+", "(", "1+", predictor1, "*", predictor2, "|", grouping1, ")"))
  eqs <- append(eqs, paste0(outcome, "~", predictor1, "*", predictor2, "+", "(", "1+", predictor1, "+", predictor2, "|", grouping1, ")"))
  eqs <- append(eqs, paste0(outcome, "~", predictor1, "+", predictor2, "+", "(", "1+", predictor1, "+", predictor2, "|", grouping1, ")"))
  return(eqs)
}

# generate equations from variables to use and formulas (both of which come from modelMetadata.json)
eqsGen <- Vectorize(function(variablesToUse, formula){
  with(variablesToUse, eval(parse(text=formula)))
}, vectorize.args = "formula", USE.NAMES = TRUE)


#' Fit the right models based on the name of the dataset being passed
#'
#' @param dataSet this is the data set to use. Exampls: "action", "estimation"
#' @param analysis this is the analysis of data that is going to be analyzed from \code{data}. Examples: "maxGrip.stickAsContinuous", "meanGrip.stickAsContinuous" The name of this option determines which model will be fit.
#' @param data a data object as generated by \code{\link{readExtractedMocapData}}
#' @param modelMd a modelMetadata object, by default it uses the \code{modelMetadata} that comes with the package.
#' @return a list of fit models
#'
fitModels <- function(dataSet, analysis, data, modelMd){

  # check if the analysis of analysis is one of the known ones.
  # update this message if the models by analysis is moved elsewhere.
  if(!{analysis %in% names(modelMd$models$analyses)}){
    stop("Error, the analysis ", analysis, " can't be found in the analyses specifications that are available. Please update the modelMetadata object or modelMetadata.json", sep="")
  }

  modelsOut <- fitLMER(eqsGen(modelMd$models$analyses[[analysis]]$variablesToUse,
                                    modelMd$models$modelStructures)
                       , data=data[[dataSet]]$data)

  return(modelsOut)
}

#' Pick one model as the best
#'
#' \code{findTheBestModel} finds the first (by default) model in a list that does not have convergence errors. As the formula lists are configured now, this will return the most complex model (that does not have convergence errors). This does not run any model comparison, it just finds the first (or last if specified with option \code{last=TRUE})
#'
#' @param models A list of models, as generated by \code{\link{fitModels}}
#' @param last A logical, should the last model be picked? By default this is \code{FALSE}, which means the first model will be picked.
#' @return Returns a list of length two, the first, named \code{chosenModel} is the model chosen, \code{allModels} has all of the models, and is a copy of \code{models}
#'
findTheBestModel <- function(models, last = FALSE){

  # check that at least one model converged?

  if(last){
    modName <- names(which.max(which(unlist(lapply(models, function(x) x$converged)))))
  } else {
    modName <- names(which.min(which(unlist(lapply(models, function(x) x$converged)))))
  }
  bestModel <- list(models[[modName]])
  names(bestModel) <- modName

  return(list("bestModel" = bestModel, "allModels" = models))
}


#' Model all data in a data object
#'
#' takes data, and returns an object that includes the original data as well as fit models.
#'
#' @param data A data object, as generated by \code{\link{readExtractedMocapData}}
#' @param refitModels logical, if there are models that are already fit, should they be refit? Default: \code{FALSE}
#' @param modelMd a modelMetadata object. If you have loaded a different modelMetadata object that you would like to use, place it here. The default is `modelMetadata` which comes with the package.
#' @param ... options to pass to \code{\link{fitModels}} (or \code{\link{findTheBestModel}}, if reprogrammed adjusted)
#' @return Returns a data object (with all of the same data as the data object given in the \code{data} argument) with all of the models that were fit included.
#'
#' @export
modelAllData <- function(data, refitModels = FALSE, modelMd = modelMetadata, ...){
  data <- checkData(data, modelMd = modelMd)

  dataSets <- names(data)

  # exclude full data if present
  dataSets <- dataSets[dataSets!="fullData"]

  for (dataSet in dataSets){
    if(refitModels | !"analyses" %in% names(data[[dataSet]])) {
      # only fit if refitModels is TRUE, or if there are no analyses already.
      # this should be changed to also check analysesToRun probably.
      analyses <- data[[dataSet]]$analysesToRun
      if(length(analyses)<1){
        warning("There are no analyses in analysesToRun for the dataSet ", dataSet, " so no analyses were run for that data set.", sep="")
      } else{
        data[[dataSet]][["analyses"]] <- list()
      }
      for(analysis in analyses){
        models <- fitModels(dataSet=dataSet, analysis=analysis, data=data, modelMd = modelMd)
        modelsProced <- findTheBestModel(models, ...)
        data[[dataSet]][["analyses"]][[analysis]] <- modelsProced
      }
    }
  }
  return(data)
}
jonkeane/mocapGrip documentation built on May 19, 2019, 7:30 p.m.