R/ctKalman.R

Defines functions plot.ctKalmanDF ctKalman ctKalmanTIP

Documented in ctKalman plot.ctKalmanDF

ctKalmanTIP <- function(sf,tipreds='all',subject=1,timestep='auto',plot=TRUE,returndat=FALSE,...){
  if(tipreds[1] %in% 'all') tipreds <- sf$ctstanmodel$TIpredNames
  if(length(subject) > 1) stop('>1 subject!')
  
  sdat <- standatact_specificsubjects(standata = sf$standata,subjects = subject)
  sdat$tipredsdata[,sf$ctstanmodel$TIpredNames] <- 0 #set all tipreds to zero
  
  #create datalong structure
  dat <- data.frame(id=sdat$subject,time=sdat$time)
  datti <- suppressWarnings(merge(dat,data.frame(id=sdat$subject,time=sdat$time,sdat$tipredsdata),all=TRUE))
  datti <- datti[order(datti$id,datti$time),]
  colnames(datti)[1:2] <- c(sf$ctstanmodelbase$subjectIDname,sf$ctstanmodelbase$timeName)
  colnames(dat)[1:2] <- c(sf$ctstanmodelbase$subjectIDname,sf$ctstanmodelbase$timeName)
  addm <- matrix(NA,nrow=nrow(dat),ncol=length(sf$ctstanmodel$manifestNames))
  colnames(addm) <- sf$ctstanmodel$manifestNames
  tdpreds <- sdat$tdpreds
  colnames(tdpreds) <- sf$ctstanmodel$TDpredNames
  dat<-cbind(datti,addm,tdpreds)
  # dat <- merge(datti,addm)
  
  tisd <- apply(sf$standata$tipredsdata,2,sd,na.rm=TRUE)
  timu <- apply(sf$standata$tipredsdata,2,mean,na.rm=TRUE)
  
  newdat=dat
  for(tip in 1:length(tipreds)){
    for(direction in c(-1,1)){
      tdat <- newdat
      tdat[[sf$ctstanmodelbase$subjectIDname]] <- paste0(tipreds[tip],
        ifelse(direction==1,' high',' low'))
      tdat[,tipreds[tip]] <- tisd[tip] * direction
      #add to full dat
      dat <- rbind(dat,tdat)
    }
  }
  dat[[sf$ctstanmodelbase$subjectIDname]][dat[[sf$ctstanmodelbase$subjectIDname]] %in% 1] <- 0
  sf$standata <- suppressMessages(ctStanData(sf$ctstanmodel,dat,optimize=TRUE))
  k=ctKalman(fit = sf,subjects=1:sf$standata$nsubjects,realid=TRUE,timestep=timestep)
  k=plot.ctKalmanDF(k,plot=FALSE,...)
  # k$labels$colour[1] <- 'Covariate'
  k$data$`Direction` <- factor(ifelse(grepl(' high$',k$data$Subject),'+ 1','- 1'))
  k$data$Subject <- gsub(' high','',k$data$Subject)
  k$data$Subject <- gsub(' low','',k$data$Subject)
  kdat <- k$data
  colnames(kdat)[colnames(kdat)%in%'Subject'] <- 'Covariate'
  kdat$Covariate <- factor(kdat$Covariate)
  
  Covariate = Direction = Time = Value = Variable = NULL #global vars nonsense
  
  if(returndat) return(kdat) else{
    k=ggplot(data = kdat,mapping = aes(y=Value,x=Time,linetype=Direction,colour=Covariate))+
      geom_line(size=1)+
      scale_colour_manual(values=c(1,2:length(unique(kdat$Covariate))),
        labels=c('No covariate',levels(kdat$Covariate)[-1]))+
      facet_wrap(vars(Variable),scales = 'free_y')+theme_bw()
    return(k)
  }
}

#' ctKalman 
#'
#' Outputs predicted, updated, and smoothed estimates of manifest indicators and latent states, 
#' with covariances, for specific subjects from data fit with \code{\link{ctStanFit}}, 
#' based on either the mode (if optimized) or mean (if sampled) of parameter distribution.
#' 
#' @param fit fit object as generated by \code{\link{ctStanFit}}.
#' @param timerange Either 'asdata' to just use the observed data range, or a numeric vector of length 2 denoting start and end of time range, 
#' allowing for estimates outside the range of observed data. Ranges smaller than the observed data
#' are ignored.
#' @param timestep Either 'asdata' to just use the observed data 
#' (which also requires 'asdata' for timerange) or a positive numeric value
#' indicating the time step to use for interpolating values. Lower values give a more accurate / smooth representation,
#' but take a little more time to calculate. 
#' @param subjects vector of integers denoting which subjects (from 1 to N) to plot predictions for. 
#' @param removeObs Logical or integer. If TRUE, observations (but not covariates)
#' are set to NA, so only expectations based on parameters and covariates are returned. If a positive integer N, 
#' every N observations are retained while others are set NA for computing model expectations -- useful for observing prediction performance
#' forward further in time than one observation.
#' @param standardisederrors if TRUE, also include standardised error output (based on covariance
#' per time point).
#' @param plot Logical. If TRUE, plots output instead of returning it. 
#' See \code{\link{plot.ctKalmanDF}} 
#' (Stan based fit) for the possible arguments.
#' @param realid use original (not necessarily integer sequence) subject id's? Otherwise use integers 1:N.
#' @param ... additional arguments to pass to \code{\link{plot.ctKalmanDF}}.
#' @return Returns a list containing matrix objects etaprior, etaupd, etasmooth, y, yprior, 
#' yupd, ysmooth, prederror, time, loglik,  with values for each time point in each row. 
#' eta refers to latent states and y to manifest indicators - y itself is thus just 
#' the input data. 
#' Covariance matrices etapriorcov, etaupdcov, etasmoothcov, ypriorcov, yupdcov, ysmoothcov,  
#' are returned in a row * column * time array. 
#' Some outputs are unavailable for ctStan fits at present.
#' If plot=TRUE, nothing is returned but a plot is generated.
#' @examples
#' \donttest{
#' 
#' #Basic
#' ctKalman(ctstantestfit, timerange=c(0,60), plot=TRUE)
#' 
#' #Multiple subjects, y and yprior, showing plot arguments
#' plot1<-ctKalman(ctstantestfit, timerange=c(0,60), timestep=.1, plot=TRUE,
#'   subjects=2:3, 
#'   kalmanvec=c('y','yprior'),
#'   errorvec=c(NA,'ypriorcov')) #'auto' would also have achieved this
#'   
#'  #modify plot as per normal with ggplot
#'  print(plot1+ggplot2::coord_cartesian(xlim=c(0,10)))
#'  
#'  #or generate custom plot from scratch:#'  
#'  k=ctKalman(ctstantestfit, timerange=c(0,60), timestep=.1, subjects=2:3)
#'  library(ggplot2)
#'  ggplot(k[k$Element %in% 'yprior',],
#'    aes(x=Time, y=value,colour=Subject,linetype=Row)) +
#'    geom_line() +
#'    theme_bw()
#'
#'  }
#' @export

ctKalman<-function(fit, timerange='asdata', timestep='auto',
  subjects=fit$setup$idmap[1,1], removeObs = FALSE, plot=FALSE, 
  standardisederrors=TRUE,realid=TRUE,...){
  
  
  if('ctsemFit' %in% class(fit)) stop('This function is no longer supported with ctsemOMX, try ctsem')
  if(!'ctStanFit' %in% class(fit)) stop('fit object is not from ctStanFit!')
  
  
  # get subjects ------------------------------------------------------------
  idmap <- fit$setup$idmap #store now because we may reduce it
  subjectsarg <- subjects
  if(realid) subjects <- idmap[which(idmap[,1] %in% subjects),2]
  
  if(length(subjects) == 0){
    if(all(!is.na(as.integer(subjectsarg)))){
      subjects <- as.integer(subjectsarg)
      warning('Specified subjects not found in original id set -- assuming integers correspond to internal integer mapping. Consider setting realid=FALSE')
    } else stop('Specified subjects not found in original id set, and (some) are not integers...')
  }
  subjects <- sort(subjects) #in case not entered in ascending order
  
  out <- ctStanKalman(fit,pointest=length(fit$stanfit$stanfit@sim)==0, removeObs=removeObs, subjects=subjects,timestep = timestep,
    collapsefunc=mean, indvarstates = FALSE,standardisederrors = standardisederrors) #extract state predictions
  # browser()
  out <- meltkalman(out)
  if(realid){
    out$Subject <- factor(idmap[
      match(out$Subject,idmap[,2]),1])
  }
  # out=out[!(out$Subject %in% subjects) %in% FALSE,]
  
  
  if(plot) {
    plot(x=out,subjects=subjects,...)
  } else return(out)
}




#' Plots Kalman filter output from ctKalman.
#'
#' @param x Output from \code{\link{ctKalman}}. In general it is easier to call 
#' \code{\link{ctKalman}} directly with the \code{plot=TRUE} argument, which calls this function.
#' @param subjects vector of integers denoting which subjects (from 1 to N) to plot predictions for. 
#' @param kalmanvec string vector of names of any elements of the output you wish to plot, 
#' the defaults of 'y' and 'ysmooth' plot the original data, 'y', 
#' and the estimates of the 'true' value of y given all data. Replacing 'y' by 'eta' will 
#' plot latent states instead (though 'eta' alone does not exist) and replacing 'smooth' 
#' with 'upd' or 'prior' respectively plots updated (conditional on all data up to current time point)
#' or prior (conditional on all previous data) estimates.
#' @param errorvec vector of names indicating which kalmanvec elements to plot uncertainty bands for. 
#' 'auto' plots all possible.
#' @param elementNames if NA, all relevant object elements are included -- e.g. if yprior is in the kalmanvec
#' argument, all manifest variables are plotted, and likewise for latent states if etasmooth was specified.
#' Alternatively, a character vector specifying the manifest and latent names to plot explicitly can be specified.
#' @param plot if FALSE, plots are not generated and the ggplot object is simply returned invisibly.
#' @param errormultiply Numeric denoting the multiplication factor of the std deviation of errorvec objects. 
#' Defaults to 1.96, for 95\% intervals.
#' @param polygonsteps Number of steps to use for uncertainty band shading. 
#' @param polygonalpha Numeric for the opacity of the uncertainty region.
#' @param facets when multiple subjects are included in multivariate plots, the default is to facet plots 
#' by variable type. This can be set to NA for no facets, or \code{ggplot2::vars(Subject)} for facetting by subject.
#' @param ... not used.
#' @return A ggplot2 object. Side effect -- Generates plots.
#' @method plot ctKalmanDF
#' @export plot.ctKalmanDF
#' @export
#' @examples
#' ### Get output from ctKalman
#' x<-ctKalman(ctstantestfit,subjects=2,timestep=.01)
#' 
#' ### Plot with plot.ctKalmanDF
#' plot(x, subjects=2)
#' 
#' ###Single step procedure:
#' ctKalman(ctstantestfit,subjects=2,
#'   kalmanvec=c('y','yprior'),
#'   elementNames=c('Y1','Y2'), 
#'   plot=TRUE,timestep=.01)
plot.ctKalmanDF<-function(x, subjects=unique(x$Subject), kalmanvec=c('y','yprior'),
  errorvec='auto', errormultiply=1.96,plot=TRUE,elementNames=NA,
  polygonsteps=10,polygonalpha=.1,
  facets=vars(Variable),
  ...){
  
  kdf <- (x)
  
  if(!'ctKalmanDF' %in% class(kdf)) stop('not a ctKalmanDF object')
  
  if(1==99) Time <- Value <- Subject <- Row <- Variable <- Element <- NULL
  colnames(kdf)[colnames(kdf) %in% 'Row'] <- "Variable"
  colnames(kdf)[colnames(kdf) %in% 'value'] <- "Value"
  
  if(any(!is.na(elementNames))) kdf <- subset(kdf,Variable %in% elementNames)
  
  klines <- kalmanvec[grep('(prior)|(upd)|(smooth)',kalmanvec)]
  if(all(errorvec %in% 'auto')) errorvec <- klines
  errorvec <- errorvec[grep('(prior)|(upd)|(smooth)',errorvec)]
  # kpoints<- kalmanvec[-grep('(prior)|(upd)|(smooth)',kalmanvec)]
  colvec=ifelse(length(subjects) > 1, 'Subject', 'Variable')
  ltyvec <- setNames( rep(0,length(kalmanvec)),kalmanvec)
  ltyvec[kalmanvec %in% klines] = setNames(1:length(klines),klines)
  # if(length(kalmanvec) > length(klines)) ltyvec <- 
  #   c(setNames(rep(0,length(kalmanvec)-length(klines)),kalmanvec[!kalmanvec %in% klines]),
  #     ltyvec)
  shapevec<-ltyvec
  shapevec[shapevec %in% 0] <- 1
  shapevec[shapevec>0] <- NA
  shapevec[ltyvec %in% 0] <- 19
  # 
  d<-subset(kdf,Element %in% kalmanvec)
  
  g <- ggplot(d,
    aes_string(x='Time',y='Value',colour=colvec,linetype='Element',shape='Element')) +
    scale_linetype_manual(breaks=names(ltyvec),values=ltyvec)+
    guides(linetype='none',shape='none')+
    scale_shape_manual(breaks=names(shapevec),values=shapevec)
  # labs(linetype='Element',shape='Element',colour='Element',fill='Element')+
  # guides(fill=FALSE)
  # 
  if(length(unique(ltyvec[!is.na(ltyvec)]))<1) g<-g+guides(linetype='none')
  # if(length(unique(shapevec[!is.na(shapevec)]))<1) 
  # g<-g+ guides(shape=FALSE)
  if(!is.na(facets[1]) && length(subjects) > 1 && length(unique(subset(kdf,Element %in% kalmanvec)$Variable)) > 1){
    g <- g+ facet_wrap(facets,scales = 'free') 
  } 
  g <- g + ylab(ifelse(length(unique(d$Variable))==1, unique(d$Variable),'Variable'))
  
  polygonsteps <- polygonsteps + 1
  polysteps <- seq(errormultiply,0,-errormultiply/(polygonsteps+1))[c(-polygonsteps+1)]
  if(any(!is.na(errorvec))){
    for(si in polysteps){
      # alphasum <- alphasum + polygonalpha/polygonsteps
      
      d2 <- subset(d,Element %in% errorvec)
      d2$sd <- d2$sd *si
      
      if(length(subjects) ==1){
        g<- g+ 
          geom_ribbon(data=d2,aes(ymin=(Value-sd),x=Time,
            ymax=(Value+sd),fill=(Variable)),
            alpha=ifelse(si== polysteps[1],.05,polygonalpha/polygonsteps),
            inherit.aes = FALSE,linetype=0)
        if(si== polysteps[1]) g <- g + 
            geom_line(data=d2,aes(y=(Value-sd),colour=Variable),linetype='dotted',alpha=.4) + 
            geom_line(data=d2,aes(y=(Value+sd),colour=Variable),linetype='dotted',alpha=.4)
      } else {
        g <- g+ 
          geom_ribbon(data=d2,aes(ymin=(Value-sd),x=Time,
            ymax=(Value+sd),fill=(Subject)),inherit.aes = FALSE,
            alpha=polygonalpha/polygonsteps,linetype=0)
        if(si== polysteps[1]) g <- g + 
            geom_line(data=d2,aes(y=(Value-sd),colour=Subject),linetype='dotted',alpha=.7) + 
            geom_line(data=d2,aes(y=(Value+sd),colour=Subject),linetype='dotted',alpha=.7)
      }
    } 
  }
  g <- g + 
    geom_line()+
    geom_point()+
    theme_minimal()+
    guides(fill='none')
  
  # if(plot) suppressWarnings(print(g))
  return(g)
  
}

Try the ctsem package in your browser

Any scripts or data that you put into this service are public.

ctsem documentation built on Nov. 2, 2023, 6:03 p.m.