pmsimstats: Personalized medicine simulated statistics

Documented in lme_analysis

#' LME Analysis
#'
#' \code{lme_analysis} analyzes the data from a single simulated trial
#'
#' Analyzes the simulated data (including all paths) from a single simulated
#' trial. Most often called by \code{generateSimualtedResults}. See vignettes
#' for additional information.
#'
#' @param trialdesign_set The set of all pathways for the clinical trial
#' @param dat A dat file as generated by \link{generateData}
#' @param op an options file with the following components:
#'   \itemize{
#'     \item{\code{op$useDE=TRUE}}{  Binary: should the LME model use expectnacy
#'       information if available?}
#'     \item{\code{op$t_random_slope=FALSE}}{  Binary: should the LME model use
#'       random slopes, as well as random intercepts, for participants?}
#'     \item{\code{op$full_model_out=FALSE}}{  Binary: if false, will output model
#'       parameters as a row of numeric values for simulation processing (most common);
#'       if true, will output an array that allows you to examine the results of a single
#'       analysis in more detail}
#'     \item{\code{op$simplecarryover=FALSE}}{  Binary: if true will add "time since
#'       drug" as a free parameter in the model}
#'     \item{\code{op$carryover_t1half=0}}{  If non-zero, will build in an exponential
#'       falloff in the "on drug or not" term in the analysis (Db), using this as the
#'       half-life.}
#'   }
#' @return Generally returns a one row data table with components:
#'   \itemize{
#'     \item{\code{out$beta}}{  The coefficient corresponding to an interaction
#'       between biomarker and outcome}
#'     \item{\code{out$betaSE}}{  Standard error of the above coefficient}
#'     \item{\code{out$p}}{  p-value conveying the statistical significance of the coefficient}
#'     \item{\code{out$issingular}}{  Was the model fit flagged as singular?}
#'     \item{\code{out$warning}}{  Any warnings from \code{fitMsgs}}
#'   }
#'   However, if you have full_model_out set to TRUE, you will instead get an array with components:
#'   \itemize{
#'   \item{\code{form}}{  The model fit by lmer}
#'   \item{\code{fit}}{  The output of lmer}
#'   \item{\code{datamerged}}{  The data in the form it was provided to lmer}
#'   \item{\code{stdout}}{  The standard one row data table that's usually returned when
#'      full_mode_out is set to FALSE, with beta, betaSE, etc}
#'   }
#'
#' @examples
#' # See vignettes 1 & 2 for examples of how to call from the function generateSimualtedResults.R
#' # See vignette 3 for an example of how to apply lme_analysis to actual clincal trial results
#' @export

lme_analysis<-function(trialdesign_set,dat,op){

  if(missing(op)){
    op$useDE<-TRUE
    op$t_random_slope<-FALSE
    op$full_model_out<-FALSE
    op$carryover_t1half <- 0
    op$simplecarryover <- FALSE
    op$carryover_scalefactor=1
  }else if(!("simplecarryover"%in%names(op))){
    op$simplecarryover=FALSE
  }
  if(!("carryover_t1half"%in%names(op))){
    op$carryover_t1half=0
    op$carryover_scalefactor=1
  }
  if((op$carryover_t1half>0)&(op$simplecarryover==TRUE)){
    error("Don't know what it means to have two different carryover models, so disallowing simplecarryover and a half-life based carryover at the same time for now.")
  }

  n_groups<-length(trialdesign_set)
  datout<-list(rep(NA),n_groups)
  lastptID<-0
  for(g in 1:n_groups){
    trialdesign<-trialdesign_set[[g]]
    datSingle<-dat[path==g]

    # I. Turn data into this format

    # Format trialdesign:
    # Add a BL row
    trialdesign<-rbind(list(timeptnames="BL",t_wk=0,e=0,tod=0,tsd=0,tpb=0),trialdesign)
    # For minimization of coding erors, t_wk is length of the block, not time since
    # start, so create a "t" that is just true time with bl as 0:
    trialdesign[,t:=as.integer(NA)]
    trialdesign$t<-cumulative(trialdesign$t_wk)

    # Format dat:
    # Make pt data dt that just has the numbers we actually want
    timeptnames=union(trialdesign$timeptnames,"BL") # makes sure don't end up with it twice
    evalstring<-paste("data<-datSingle[,.(ptID,bm,",
                      paste(timeptnames,sep="",collapse=","),")]",sep="")
    eval(parse(text=evalstring))

    # Make sure the ptID is unique even if we're merging across groups:
    data$ptID<-data$ptID+lastptID
    lastptID<-max(data$ptID)

    # Turn it long-form, then merge in the trialdesign data
    data.m1<-melt(data,id.vars=c("ptID","bm"),measure.vars=trialdesign$timeptnames,
                  variable.name="timeptnames",value.name="Sx",na.rm=FALSE)
    data.m2<-merge(data.m1,
                   trialdesign[,.(timeptnames,t,De=e,Db=(tod>0),tsd)],
                   by="timeptnames",all=TRUE)

    # Add a scaler version of Db - Dbc, for continuous - to allow for
    # more complex carryover modeling
    data.m2[,Dbc:=as.numeric(NA)]
    data.m2[Db==TRUE,Dbc:=1]
    data.m2[Db==FALSE,Dbc:=((1/2)^(op$carryover_scalefactor*tsd/op$carryover_t1half))]

    datout[[g]]<-data.m2
  }

  datamerged<-datout[[1]]
  if(n_groups>1){for(g in 2:n_groups){datamerged<-rbind(datamerged,datout[[g]])}}

  # Tests we'll use to sort out what model to use:
  # 1) Test whether can possibly include the expectancy-related factor:
  varInExp<-length(unique(trialdesign$e[2:length(trialdesign$e)]))
  # 2) Test whether can use Db vs need to use t for the interaction term:
  datamerged[t>0,meanDb:=mean(Db),by=ptID]
  datamerged[t>0,DbVar:=((meanDb!=0)&(meanDb!=1)),by=ptID]
  varInDb<-(sum(datamerged[t>0]$DbVar==TRUE)>0)
  if(!varInDb){
    # Only analyze folks who are ever on drug, in this case:
    iEverOnDrug<-unique(datamerged[meanDb==1]$ptID)
    datamerged<-datamerged[ptID%in%iEverOnDrug]
  }
  # 3) Test whether we can use the carryover term, ie, tsd is sometimes non-zero
  varintsd=(nrow(datamerged[tsd!=0])>0)

  # Use these to pick a model
  modelbase="Sx~bm+t"
  if(varInDb){
    modelbase=paste0(modelbase,"+Dbc+bm*Dbc")
  }else{
    modelbase=paste0(modelbase,"+bm*t")
  }
  if((varInExp>1)&(op$useDE==TRUE)){
    modelbase=paste0(modelbase,"+De")
  }
  if(op$simplecarryover&varintsd){
    modelbase=paste0(modelbase,"+tsd")
  }
  if(op$t_random_slope){
    modelbase=paste0(modelbase,"+(1+t|ptID)")
  }else{
    modelbase=paste0(modelbase,"+(1|ptID)")
  }
  # now knit together and evaluate - poor programming form!
  eval(parse(text=paste0("form<-",modelbase)))
  # if(varInDb){
  #   if(op$t_random_slope){
  #     if((varInExp>1)&(op$useDE==TRUE)){
  #       form<-Sx~bm+De+Db+t+bm*Db+(1+t|ptID)
  #     } else {
  #       form<-Sx~bm+Db+t+bm*Db+(1+t|ptID)
  #     }
  #   }else{
  #     if((varInExp>1)&(op$useDE==TRUE)){
  #       form<-Sx~bm+De+Db+t+bm*Db+(1|ptID)
  #     } else {
  #       form<-Sx~bm+Db+t+bm*Db+(1|ptID)
  #     }
  #   }
  # }else{
  #   if(op$t_random_slope){
  #     if((varInExp>1)&(op$useDE==TRUE)){
  #       form<-Sx~bm+De+t+bm*t+(1+t|ptID)
  #     } else {
  #       form<-Sx~bm+t+bm*t+(1+t|ptID)
  #     }
  #   }else{
  #     if((varInExp>1)&(op$useDE==TRUE)){
  #       form<-Sx~bm+De+t+bm*t+(1|ptID)
  #     } else {
  #       form<-Sx~bm+t+bm*t+(1|ptID)
  #     }
  #   }
  # }

  # Run (should insert tryCatch here, but not working!)
  fit<-lmer(form,data=datamerged)
  holdWarning<-summary(fit)$fitMsgs
  if(length(holdWarning)==0) holdWarning<-as.character(NA)

  # set issingular flag off, turn on if get singular warning
  issingular<-FALSE
  if(length(summary(fit)$optinfo$conv$lme4$messages)>0){
    if(summary(fit)$optinfo$conv$lme4$messages[[1]]=="boundary (singular) fit: see ?isSingular"){
      issingular<-TRUE
    }
  }

  # Package output
  c<-summary(fit)$coefficients
  if(varInDb){
    p<-c['bm:Dbc','Pr(>|t|)']
    beta<-c['bm:Dbc','Estimate']
    betaSE<-c['bm:Dbc','Std. Error']
  }else{
    p<-c['bm:t','Pr(>|t|)']
    beta<-c['bm:t','Estimate']
    betaSE<-c['bm:t','Std. Error']
  }

  # pvalue plan from http://mindingthebrain.blogspot.in/2014/02/three-ways-to-get-parameter-specific-p.html
  # this is the medium-conservative option
  out<-data.table(beta=beta,betaSE=betaSE,p=p,issingular=issingular,warning=holdWarning)

  # If full model output reqeusted, will repackage a bit:
  if(op$full_model_out){
    out<-list(form=form,fit=fit,datamerged=datamerged,stdout=out)
  }

  return(out)
}