R/eventData.R

# This file contains the public functions associated with eventData object
# in the predict from data part of the package. Note the estimateAccrualParameter
# function can be found in the accrual.R file.

##' @include fromDataSimParam.R eventPrediction_package.R
NULL

##' @import survival methods
NULL

##' @importFrom  mvtnorm rmvnorm
NULL

##' @importFrom  scales date_format
NULL

# Function that will check the validity of EventData object
# when constructed
# @param object EventData object
checkEventData <- function(object){
  errors <- ""
  
  required.colnames <- c("has.event", "rand.date", "withdrawn", "subject", "time","censored.at.follow.up")
  
  data <- object@subject.data
  
  if(nrow(data)>0){
  
    if(any(!data$has.event %in% c(0,1))) errors <- paste(errors,"has.event must be 1 or 0" )
    if(any(!data$withdrawn %in% c(0,1))) errors <- paste(errors,"withdrawn must be 1 or 0" )
    if(any(!data$censored.at.follow.up %in% c(0,1))) errors <- paste(errors,"censored.at.follow.up must be 1 or 0" )

    if(any(!required.colnames %in% colnames(data))) errors <- paste(errors,"invalid column names")
  
    
    if(class(data$rand.date)!="Date") errors <- paste(errors,"Invalid rand.date")
    if(class(data$time)!="numeric") {
      errors <- paste(errors,"Times must be numeric")
    }
    else{
      if(all(data$time==0)) errors <- paste(errors,"time value incorrect all are 0!")  
      if(any(data$time < 0 | is.na(data$time))) {
        errors <- paste(errors,"subjects cannot have non-positive time on trial. Please check subjects ",
                      paste(data[data$time <= 0 | is.na(data$time),]$subject,collapse=", "))
      
      }
    }
  
    if(any(duplicated(data$subject))) errors <- paste(errors,"subject ID must be unique")
  
    if(all(data$has.event==0)){
      warning("No events have occurred - a model cannot be fit")
    }
    if(all(data$has.event==1)) errors <- paste(errors,"all events have occurred!")
  
    idx <- which(data$has.event==1& data$withdrawn==1)
    if(length(idx)){
      errors <- paste(errors,"subjects cannot be both withdrawn and have an event. Please check subjects ",
                    paste(data$subject[idx],collapse=", "))
    }
  
  }
  
  if(!is.numeric(object@followup)||length(object@followup)!= 1|| object@followup <= 0){
    errors <- paste(errors,"Invalid followup argument")
  }
  
  if(errors == "") TRUE else errors
}
  
##' Class representing data to use for new predictions
##' @slot subject.data a data frame with 6 columns
##' "subject", "rand.date", "has.event", "withdrawn", "censored.at.follow.up" and "time" and "site" and "event.type"
##' see vignette and \code{EventData} for further details. 
##' @slot followup A numeric value for the fixed follow up period a subject
##' is followed. This is in days. If there is no fixed followup period then 
##' Inf should be used
##' @export
setClass("EventData", 
          slots= list(subject.data = "data.frame",
                      followup="numeric"),
          validity = checkEventData)


##' Constructor for event data object
##' 
##' All dates must be in one of the following formats:
##' YYYY-MM-DD, DD/MM/YY or DD Month YYYY
##'  
##' 
##' @param data A data frame 
##' @param subject string, the column name of subject identifiers
##' @param rand.date string, the column name of randomization dates
##' @param has.event string, the column name of whether a subject had an event
##' (1) or not (0) 
##' @param withdrawn string, the column name of whether a subject has withdrawn
##' (1) or not (0)  
##' @param time Either a string, the column name of time each subject has been on the study
##' or a list with elements with (some of) the following named elements.
##' \code{last.date}, \code{event.date}, \code{prog.date}, \code{dth.date} and \code{withdrawn.date}
##' In this case the package will attempt to derive the time column 
##' See the vignette and then eventPrediction:::AddTimeColumn for further details
##' @param site optional column for subject site
##' @param event.type optional column for the event type (e.g. unstable angina) if not included then `Had Event' will be used 
##' @param remove.0.time logical, if TRUE then all subjects with time = NA or 0 are removed from the
##' data set and not included in the object. If FALSE then they are included in the simulation (but not in the model fitting)
##' @param followup A numeric value for the fixed follow up period a subject
##' is followed. This is in days. If there is no fixed followup period then 
##' Inf should be used
##' @return An \code{EventData} object
##' @export
EventData <- function(data,subject,rand.date,has.event,withdrawn,time,site=NULL,event.type=NULL,remove.0.time=FALSE,followup=Inf){
  
  #set time argument
  arg <- if(!is.list(time))  time else NULL
   
  #check columns are in data frame 
  for(x in c(subject,rand.date,has.event,withdrawn,arg)){
    if(!is.null(x) && !x %in% colnames(data)){
      stop(paste("Column name",x,"not found in data frame"))
    }
  }
  
  if(!is.numeric(followup) || length(followup)> 1 || followup <= 0){
    stop("Invalid followup argument")
  }
  
  #validate the input columns, deriving time from other columns if needed
  data[,rand.date] <- if(nrow(data)>0) FixDates(data[,rand.date]) else data[,rand.date]
  time <- if(!is.list(time))  data[,time] else AddTimeColumn(data,rand.date,has.event,withdrawn,subject,time)
  site <- if(is.null(site)) rep(NA,nrow(data)) else data[,site] 
  
  if(is.null(event.type)) 
    event.type <- ifelse(data[,has.event],"Has Event",factor(NA))
  else{
    event.type <- factor(data[,event.type])
    if("" %in% levels(event.type)){
      event.type[event.type==""] <- factor(NA)
      event.type <- droplevels(event.type)
    }
  }
  
  #unvalidated data frame
  subject.data <- data.frame(
    subject=data[,subject],
    rand.date=data[,rand.date],
    time=time,
    has.event=data[,has.event],
    withdrawn=data[,withdrawn],
    site=site,
    event.type=event.type
  )
  
     
  keep <- which(as.character(subject.data$subject) > "" & !is.na(subject.data$rand.date))
  if(length(keep) != nrow(subject.data)){
    warning(paste("Subjects",paste(subject.data[setdiff(1:nrow(subject.data),keep),subject],collapse=", "),
                  "have been removed due to invalid rand.date or missing subject ID."))
  }
  
  subject.data <- subject.data[keep,]
  
  subject.data <- warnNAtime(remove.0.time,subject.data)
  
  subject.data <- IncludeFollowUp(subject.data,followup)
  
  #finally run some validation checks which change the input data and output warnings
  validation.checks <- function(subject.data,idx,warning.message,fn){
    if(length(idx)>0){
      warning(paste("Subjects",paste(subject.data[idx,subject],collapse=", "),warning.message))
      subject.data <- fn(subject.data,idx)
    }
    subject.data
  }
  
  subject.data <- validation.checks(subject.data,
                    idx=which(subject.data$has.event==1 & is.na(subject.data$event.type)),
                    warning.message="have an event and no event type, their event type is set to 'Had Event'",
                    fn=function(subject.data,idx){
                      if(!"Had Event" %in% levels(subject.data$event.type)){
                        levels(subject.data$event.type) <- c(levels(subject.data$event.type),"Had Event")
                      }
                      subject.data$event.type[idx] <- "Had Event"
                      return(subject.data)
                    })
  
  subject.data <- validation.checks(subject.data,
                    idx=which(subject.data$has.event!=1 & !is.na(subject.data$event.type)),
                    warning.message="have an event type but did not have an event",
                    fn=function(subject.data,idx){
                      subject.data$event.type[idx] <- factor(NA)
                      subject.data$event.type <- droplevels(subject.data$event.type)
                      return(subject.data)
                    })
  
  subject.data <- validation.checks(subject.data,
                    idx=which(subject.data$has.event==1& subject.data$withdrawn==1),
                    warning.message="have an event and are withdrawn, assuming they have an event at the given time",
                    fn=function(subject.data,idx){
                      subject.data$withdrawn[idx] <- 0
                      return(subject.data)
                    })

  #further validation occurs in the validity function of the class 
  return(new("EventData", subject.data = subject.data, followup=followup))
}



##' @name show
##' @rdname show-methods
##' @aliases show,EventData-method
##' @export
setMethod("show",
          "EventData",
    function(object) {
         cat("EventData, use object@subject.data$param to access individual columns:\n")
         cat(str(object@subject.data))
     })


##' @name summary
##' @rdname summary-methods
##' @aliases summary,EventData-method
##' @export
setMethod("summary","EventData",
  function(object){
    df <- object@subject.data 
    daysinyear <- standarddaysinyear()
    
    if(nrow(df)>0){
    
      cat(paste("Number of subjects:",nrow(df),"\n"))
      cat(paste("Number of events:",sum(df$has.event==1),"\n"))
    
      if(sum(df$has.event==1) != 0 && length(levels(df$event.type)) != 1){
        tab <- table(df$event.type)
        lapply(names(tab),function(y){cat(paste(" Of type ",y,": ",tab[y],"\n",sep=""))})
      }
    
      cat(paste("Number of withdrawn:", sum(df$withdrawn==1),"\n"))
      cat(paste("First subject randomized:",as.Date(min(df$rand.date),origin="1970-01-01"),"\n"))
      cat(paste("Last subject randomized:",as.Date(max(df$rand.date),origin="1970-01-01"),"\n"))
    
      if(sum(object@subject.data$has.event)>0){    
        cat(paste("First Event:",as.Date(min(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
        cat(paste("Last Event:",as.Date(max(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
      }
      
      av <- average.rec(nrow(df),as.numeric(min(df$rand.date)),as.numeric(max(df$rand.date))) 
      cat(paste("Average recruitment (subjects/day):",roundForceOutputZeros(av,2),"\n"))
    
      if(!is.infinite(object@followup)){
        cat(paste("Subjects followed for ",round(object@followup), " days (",
                round(object@followup/daysinyear,2)," years)\n",sep=""))
      
        cat(paste("Number of subjects censored at end of follow up period:",
                  sum(object@subject.data$censored.at.follow.up),"\n"))
      
      }
    }
    else{
      cat("Empty data frame!\n")
    }
})



##' Fit a survival model to an EventData object
##' @rdname fit-methods
##' @name fit
##' @param object an EventData object
##' @param ... Additional arguments to the function
##' @param dist character, either weibull or loglogistic, which model
##' is to be fit 
##' @return an EventModel object
##' @export
if (!isGeneric("fit")){
  setGeneric("fit", function(object,...) standardGeneric("fit"))
}



##' @name fit
##' @rdname fit-methods
##' @aliases fit,EventData-method
##' @export
setMethod("fit","EventData",function(object,dist="weibull"){
  
  if(!dist %in% c("weibull","loglogistic")){
    stop("dist must be weibull or loglogistic")
  }
  
  if(nrow(object@subject.data)==0)stop("Empty data frame!") 
  if(sum(object@subject.data$has.event)==0){
    stop("Cannot fit a model to a dataset with no events")
  }
  
  #subjects with time = 0 are set to NA for the model fitting
  #so they are ignored
  indat <- object@subject.data
  indat$time <- ifelse(indat$time==0,NA,indat$time)
  
  model<-survreg(Surv(time, has.event) ~ 1, data=indat, dist=dist, y = TRUE)
  
  new("EventModel",model=model,event.data=object,
      simParams=FromDataParam(object=model,type=dist))
})



##' @rdname plot-methods
##' @name plot
##' @aliases plot,EventData,missing-method
##' @export
setMethod( "plot",
  signature( x="EventData", y="missing" ),
  function(x, xlab="log(t)", ylab="log(-log(S(t)))", main="", ...) {
    
    if(nrow(x@subject.data)==0)stop("Empty data frame!")
    if(sum(x@subject.data$has.event)==0){
      stop("Cannot fit a model to a dataset with no events")
    }
    
    model <- survfit(Surv(time, has.event) ~ 1, data=x@subject.data,...)
      
    res <- data.frame(t=model$time, s=model$surv)
    res <- res[res$t>0 & res$s>0 & res$s<1,]
    
    df <- data.frame(x=log(res$t),y=log(-log(res$s)))
    
    p <- ggplot(df, aes_string(x="x", y="y")) + geom_point() +
      stat_smooth(method="lm", se=FALSE, color="red", size=1 ) +
      xlab(xlab) + ylab(ylab) +
      ggtitle(main) + theme_bw() +
      theme(panel.grid.minor = element_line(colour="gray", size=0.5))
    p
  }
)



##' Method to create new \code{EventData} object only including recruitment times
##' @param object The  object from which to create the new \code{EventData} object
##' @rdname OnlyUseRecTimes-methods
##' @return An \code{EventData} object with time=0 and no events or withdrawn subjects
##' @name OnlyUseRecTimes
##' @export
setGeneric("OnlyUseRecTimes",function(object) standardGeneric("OnlyUseRecTimes"))

##' @rdname OnlyUseRecTimes-methods
##' @name OnlyUseRecTimes
##' @aliases OnlyUseRecTimes,EventData-method
##' @export
setMethod("OnlyUseRecTimes","EventData",function(object){
  object@subject.data$withdrawn <- 0
  object@subject.data$has.event <- 0
  object@subject.data$censored.at.follow.up <- 0
  object@subject.data$time <- 0
  object@subject.data$event.type <- factor(NA)
  object@subject.data$event.type <- droplevels(object@subject.data$event.type)
  object
})


##' Cut the Event Data
##' 
##' Create a new EventData from an existing one which shows how the
##' data would have looked on a given date (assuming no lag in reporting of 
##' events and if necessary subjects have been censored at cutoff point).
##' If subject was censored before the cut date then their censored date remains
##' unchanged
##' @param object The EventData object
##' @param date The date to cut the data at
##' @rdname CutData-methods
##' @name CutData
##' @return \code{EventData} object with the data censored at the appropriate point
##' @export
setGeneric("CutData",function(object,date) standardGeneric("CutData"))


##' @rdname CutData-methods
##' @name CutData
##' @aliases CutData,EventData-method
##' @export
setMethod("CutData","EventData",function(object,date){

  cut.date <- FixDates(date)
  
  subject.data <- object@subject.data
  
  #only include subjects who have been randomized by this time
  subject.data <- subject.data[subject.data$rand.date<=cut.date,]
  if(nrow(subject.data)==0){
    stop("Cut date before first subject randomization")
  }
  
  censored.times <- cut.date - subject.data$rand.date + 1
  idx <- which(censored.times < subject.data$time)
  
  if(length(idx)==0&&nrow(subject.data)==nrow(object@subject.data)){
    warning("Cut date is too late (no ongoing subjects at this time) and so has no effect")
  }
  
  
  
  subject.data$time <- pmin(censored.times,subject.data$time)
  subject.data$has.event[idx] <- 0
  subject.data$event.type[idx] <- factor(NA)
  subject.data$event.type <- droplevels(subject.data$event.type)
  subject.data$withdrawn[idx] <- 0
  
  EventData(
    data=subject.data,
    subject="subject",
    rand.date="rand.date",
    has.event="has.event",
    withdrawn="withdrawn",
    time="time",
    site="site",
    event.type="event.type",
    followup=object@followup
  )
  
})

##' This function calculates the event type for pfs data
##' 
##' This function is called before creating the EventData object 
##' @param has.event A vector or whether subjects had event 
##' @param prog.date A vector of dates (or strings in the standard eventPrediction allowed formats)
##' of progression.date or blank if unknown 
##' @param dth.date A vector of dates (or strings in the standard eventPrediction allowed formats)
##' of death date or blank if unknown
##' @return A vector with NA if subject does not have event, 
##' "Death" if dth.date <= prog.date or prog.date blank,
##' "Progression (not death)" if prog.date < dth.date or dth.date blank
##' "Progression (unknown if death)" if both prog.date and dth.date are blank
##' @export
CalculateProgEventTypes <- function(has.event,prog.date,dth.date){
  #called before creating EventDate object so extra validation needed  
  
  if(any(!has.event %in% c(0,1))){
    stop("Invalid has.event argument")
  }
  
  prog.date <- FixDates(prog.date)
  dth.date <- FixDates(dth.date)
  
  if(!(length(prog.date)==length(dth.date) && length(prog.date) ==length(has.event))){
    stop("Invalid lengths of arguments")
  }
  
  unlist(lapply(seq_along(has.event),function(x){
    if(!has.event[x]) return(NA)
    
    dth <- dth.date[x]
    prog <- prog.date[x]
    
    if(!is.na(prog) && !is.na(dth)){
      if(dth<=prog) return("Death")  
      return("Progression (not death)")
    }  
    
    
    #just dth return dth
    if(!is.na(dth)) return("Death")
    
    #just prog return prog
    if(!is.na(prog)) return("Progression (not death)")
    
    #Otherwise are relying on lastDate so do not know which
    return("Progression (unknown if death)")
    
  }))
  
  
}


##' Create an \code{EventData} object with no subjects
##' 
##' @param followup The time subjects are followed until censoring  
##' @export
EmptyEventData <- function(followup=Inf){
  
  if(length(followup) > 1 || !is.numeric(followup) || followup < 0){
    stop("Invalid followup argument")
  }
  
  d <- data.frame(subject=character(0),
                  randDate=numeric(0),
                  has.event=numeric(0),
                  withdrawn=numeric(0),
                  time=numeric(0))
  
  #convert to an empty EventData object
   EventData(data=d,
             subject="subject",
             rand.date="randDate",
             has.event="has.event",
             withdrawn="withdrawn",
             time="time",
             followup=followup)
}

##' Calculate the number of days at risk
##' 
##' For \code{EventData} object this is the number of days
##' at risk of the input data. For \code{SingleSimDetails} object
##' this is the median numberof days at risk in the simulation set at the given time
##' 
##' @param object The object to calculate the days at risk  
##' @param ... Additional arguments passed to the function
##' @param times A vector of dates for calculating cutting the simulated date at in order to
##' calculate the number of days at risk by this point.
##' @name CalculateDaysAtRisk
##' @rdname CalculateDaysAtRisk-methods
##' @export
setGeneric("CalculateDaysAtRisk",function(object,...)standardGeneric("CalculateDaysAtRisk"))


##' @rdname CalculateDaysAtRisk-methods
##' @name CalculateDaysAtRisk
##' @aliases CalculateDaysAtRisk,EventData-method
##' @export
setMethod("CalculateDaysAtRisk","EventData",
          function(object){
            return(sum(object@subject.data$time))            
          }
)

##' Makes a barplot with the number of events over time
##' 
##' @param object The object to create the event versus time plot 
##' @param ... Additional arguments passed to the function
##' @param timeunit The resoloution on the x-axis (month, weeks or quarter)
##' @name EventVsTimePlot
##' @rdname EventVsTimePlot-methods
##' @export
setGeneric("EventVsTimePlot",function(object,timeunit="Months",...)standardGeneric("EventVsTimePlot"))


##' @rdname EventVsTimePlot-methods
##' @name EventVsTimePlot
##' @aliases EventVsTimePlot,EventData-method
##' @export
setMethod("EventVsTimePlot","EventData",
          function( object, timeunit="month" ){
            my.data <- object@subject.data
            my.data$lastdate <- LastDate( my.data )
            my.data$has.event <- as.integer( my.data$has.event )
            my.data$mytimeunit <- as.Date( cut( my.data$lastdate, 
                                                breaks = timeunit ) )
            
            mybreaks <- if( timeunit == "quarter" ) "3 months" else "1 month" 
            
            ggplot( data = my.data, aes_string( x="mytimeunit", y="has.event" )) +
              stat_summary( fun.y = sum, geom = "bar") + 
              scale_x_date(
                date_labels = "%Y-%b",
                date_breaks = mybreaks ) + 
              theme_bw() + 
              theme( axis.text.x = element_text(angle = 45, hjust = 1) ) +
              xlab( "" ) +
              ylab( "Observed events" )
          }
)
scientific-computing-solutions/eventPrediction documentation built on May 29, 2019, 3:44 p.m.