R/fromDataInternal.R

#This file contains the private functions
#for the predict from data part of the package
#apart from those used within the simulate function
#see fromDataSimInternal.R. For functions called by 
#AddTimeColumn see timeInternal.R

##'@include common.R simQOutput.R timeInternal.R
NULL


# Deal with subjects who have time = 0 or NA when creating 
# the EventData object
# 
# @param remove.0.time logical if TRUE then all subjects with time = NA or 0 are removed from the
# data set and not included in the object. If FALSE then they are included in the simulation 
# (but not in the model fitting)
# @param subject.data A data frame to be included in the subject.data slot 
# of the EventData object
# @return subject.data with appropriate subjects removed/time set to 0
warnNAtime <- function(remove.0.time,subject.data){
  
  if(!is.logical(remove.0.time) || length(remove.0.time) > 1){
    stop("Invalid remove.0.time argument")
  }
  
  index <- which(subject.data$time==0 | is.na(subject.data$time))
  if(length(index)==0){
    return(subject.data)
  }
  
  
  if(remove.0.time){
    warning(paste("Subjects",paste(subject.data[index,]$subject,collapse=", "),"have time=NA or 0",
                  "and have been removed from the data set."))
    subject.data <- subject.data[!1:nrow(subject.data) %in% index,] 
  }
  else{
    warning(paste("Subjects", paste(subject.data[index,]$subject, collapse=", "),
                  "have time=NA or 0, their time has been set to 0 and",
                  "they will not be used when fitting the model but will have event times simulated",
                  "when performing the predictions unless withdrawn is set to 1."))
    subject.data$time[is.na(subject.data$time)] <- 0
  }
  
  subject.data
}

# Get the multiplicative factor to convert units to
# days for the KM-plot
# @param units "Days", "Months" or "Years"
# @param daysinyear The number of days in a year (e.g. 365 or 365.25)
# @return 1, daysinyear/12 or daysinyear respectively
GetScaleForKM <- function(units,daysinyear){
  if(!units %in% c("Days","Months","Years")){
    stop("Invalid units argument")
  }
  
  xscale <- 1
  if(units == "Months") xscale <- daysinyear/12
  if(units == "Years") xscale <- daysinyear
  
  return(xscale)
}

# Function which inputs a vector of dates/strings
# and outputs a vector of dates
# 
# If S3 dates are input they are returned unchanged.
# If character strings are input then they must all
# have the same format either "YYYY-MM-DD", "DD/MM/YYYY"
# or "DD Month YYYY"
# 
# @param vec The vector of characters
# @return a vector of Dates.
# If conversion fails an error is thrown
FixDates <- function(vec){
     
   if(all(is.na(vec))) return(as.Date(vec))
   if(class(vec)=="Date") return(vec)
   
   vec[vec==""] <- NA
   formats <- c("%d/%m/%Y","%d %b %Y","%Y-%m-%d")
       
   ans <- lapply(formats,function(x){
     as.Date(vec,format=x)
   })
   
   which_date <- unlist(lapply(1:3,function(x){
     sum(is.na(ans[[x]]))==sum(is.na(vec))
   }))
   
   if(sum(which_date)!= 1){
     stop("Error reading date format it should be of the form
        YYYY-MM-DD, DD/MM/YYYY or DD Month YYYY ") 
   }
   
  ans <- ans[[which(which_date,TRUE)]]
  years <- as.character((ans),format="%Y") 
  years <- years[!is.na(years)]
   
  if(any(as.numeric(years) < 1000) ){
    stop("All years must have 4 digits, e.g. 01/01/2015 not 01/01/15")
  } 
   
  return(ans) 
}


# Set the censored.at.follow.up column for EventData@@subject.data
# @param subject.data An EventData@@subject.data data frame without the
# censored.at.follow.up column
# @param followup The follow period (in days)
# @return subject.data with additional censored.at.follow.up column
# Warnings will be output if dates have to be changed due to invalid data 
# for example if event occurs after followup period 
IncludeFollowUp <- function(subject.data,followup){
  
  subject.data$censored.at.follow.up <- as.numeric(subject.data$time > followup)
  subject.data$time <-ifelse(subject.data$censored.at.follow.up==1,followup,subject.data$time)
  
  
  resetToFollowUp <- function(data,idx,warning.msg){
    if(length(idx)>0){
      data$withdrawn[idx] <- 0
      data$has.event[idx] <- 0
      data$event.type[idx] <- NA
      warning(paste("Subjects",paste(data[idx,"subject"],collapse=", "),warning.msg))
    }
    data
  }
  
  subject.data <- resetToFollowUp(data=subject.data,
                   idx=which(subject.data$withdrawn==1 & subject.data$censored.at.follow.up==1),
                   warning.msg="have withdrawn dates after followup period and so have been censored at followup")
  
  resetToFollowUp(data=subject.data,
                 idx=which(subject.data$has.event==1 & subject.data$censored.at.follow.up==1),
                 warning.msg="had event after followup period and so have been censored at followup instead of having an event")

}


# Create an empty data frame for 
# event.pred.data or time.pred.data
# slots of the \code{WeibullResults} class
# @return This empty data frame
EmptyPredictionDF <- function(){
  
  data.frame(time=as.Date(character()),
                 event=numeric(),
                 CI_low=numeric(),
                 CI_high=numeric(),
                 daysatrisk=numeric())
}


# Create the text for summarizing a \code{FromDataResults object}
# @param object A \code{FromDataResults} object
# @param round.method See \code{eventPrediction:::myRound.Date}
# @param text.width numeric The width of the text to be returned. See \code{base::strwrap}
# @param show.predictions Logical if TRUE then include the time.pred.data and
# event.pred.data information in the text 
# @param show.at.risk Output the median number of at risk years
# @return A string (which can be output using cat)
getFromDataResultsSummaryText <- function(object,round.method="None",text.width=60,show.predictions=TRUE,show.at.risk=TRUE){
  
  daysinyear <- standarddaysinyear()
  
  data <- object@event.data
  indat <- data@subject.data
  indat$event.date <- ifelse(indat$has.event, LastDate(indat),NA)
  
  N.subjects <- length(indat$rand.date) #subjects in data set
  
  title <- ""
  if(N.subjects!=0){
    last.date <- if(any(!is.na(indat$event.date))) as.Date(max(indat$event.date, na.rm=TRUE),origin="1970-01-01")
                 else "NA"
    title <- paste0(N.subjects, " patients recruited where last patient in on ", max(indat$rand.date))
    
    if(class(last.date)=="Date"){
      title <- paste0(title, " and last event observed at ", last.date,
              " (",sum( indat$has.event, na.rm=TRUE), " events). ")
    }
    else{
      title <- paste0(title," and no events observed. ")
    }
  }
    
    
  if(object@Naccrual > 0){
      title <- paste0(title, 
                      " Out of ", N.subjects+object@Naccrual, " patients ",  object@Naccrual, 
                      " were simulated using ", object@accrualGenerator@text," ")
  }
  
  
  title <- paste(title, "Using ", object@simParams@type ," survival model. ",sep="")
  
  NA.note <- ""
  
  if(nrow(object@event.pred.data)!=0 && show.predictions){
    df <- object@event.pred.data 
    ans <- lapply(1:nrow(df),function(x){
      target <- myRound.Date(c(df[x,"CI_low"],df[x,"time"],df[x,"CI_high"]),round.method)
      
      retVal <- paste0("The time at which ", df[x,"event"]," events have occurred is predicted to be ",
             target[2], " [", target[1], ", ", target[3], "]" )
    
      if(show.at.risk){
        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk at this time",sep="")
      }  
      paste(retVal,". ",sep="")
    }) 
  
    if(any(object@event.pred.data$time==as.Date(Inf,origin="1970-01-01"))){
      NA.note <- paste("An expected time of NA implies that,",
                       "in fewer than 50% of simulations the given number of events occurred.")
    }else if(any(object@event.pred.data$CI_high==as.Date(Inf,origin="1970-01-01"))){
      NA.note <- paste("A CI value of NA implies that, due to subject dropout, ",
                       "in fewer than ",100*(1-object@limit),
                        "% of the simulations the given number of events occurred.", sep="")
    }
    
    title <- paste(title,paste(ans,collapse=""),sep="")
  }
  
  if(nrow(object@time.pred.data)!=0 && show.predictions){
    df <- object@time.pred.data 
    ans <- lapply(1:nrow(df),function(x){
      retVal <- paste0("On ", df[x,"time"]," the predicted number of events is ",
             df[x,"event"], " [", df[x,"CI_low"], ", ", df[x,"CI_high"], "]" )
    
      if(show.at.risk){
        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk",sep="")
      }  
      paste(retVal,". ",sep="")  
    }) 
    
    title <- paste(title,paste(ans,collapse=""),sep="")
  }
  
  title <- paste(title,NA.note,sep="")
  
  return(AddLineBreaks(title,text.width=text.width))
}


# Round dates
# 
# @param mydates a vector of 3 dates, the
# lower CI value, the median and the upper CI value
# @param round.method character, if "toMonths" then 
# the following procedure is applied:
# lower CI: take the month of the date 15 days earlier \cr
# median: take the month of the given date \cr
# upper CI: take the month of the date 15 days later \cr 
# otherwise the dates are unchanged
# @return The rounded dates
myRound.Date <- function(mydates,round.method){
 
  if(round.method=="toMonths"){
     c(format( mydates[1] - 15, format="%b %Y" ),
       format( mydates[2], format="%b %Y"),
       format( mydates[3] + 15, format="%b %Y" ))
  }
  else{
    mydates
  }
}


# Calculate the last date on study for subjects
# 
# Code: \code{ifelse(time==0,rand.date,time+rand.date-1)}
# 
# @param time A vector of times on study
# @param rand.date A vector of randomization dates
# @return A vector of dates
internal.Date <- function(time,rand.date){
   as.Date(ifelse(time==0,rand.date,time+rand.date-1),origin="1970-01-01")
}

# Calculate the last date on study for subjects
#
# The last date is the date of censoring or date of withdrawal
# or date of event for subjects
# @param indat  A data frame e.g. EventData@@subject.data
# @return A vector of dates
LastDate <- function(indat){
  internal.Date(indat$time,indat$rand.date)
}



# Create a data frame which contains only subjects
# who have been censored before a given date
# @param data A data frame e.g. EventData@@subject.data
# @param censor.date The given censored cutoff date
# @return The subset of \code{data} which is censored (i.e. does not
# have an event and is not withdrawn) before the given date
GetLaggedSubjects <- function(data,censor.date){
  data <- data[LastDate(data) < censor.date,]
  data[data$has.event==0 & data$withdrawn==0 & data$censored.at.follow.up==0,]
}



# Function to derive the time on study for subjects given 
# columns such as dth.date, event.date etc.
# 
# See the vignette for the logic used in calculating the time. 
# Note the changing of times so they are <= followup period is
# carried out in a subsequent function
# 
# see timeInternal.R
#
# @param data A data frame 
# @param rand.date string, the column name of randomization dates
# @param has.event string, the column name of whether a subject had an event
# (1) or not (0) 
# @param withdrawn string, the column name of whether a subject has withdrawn
# (1) or not (0)  
# @param subject, the column name of the subject ID
# @param time.list A list of column names used to calculate the time on study for example
# \code{time=list(last.date="lastDate",dth.date="dthDate", prog.date="progDate",
# withdrawn.date="withdrawnDate")}
# @return A vector containing time on study
AddTimeColumn <- function(data,rand.date,has.event,withdrawn,subject,time.list){
  
  allowed.colnames <- c("dth.date","event.date","prog.date","withdrawn.date","last.date")
  validate.time.list.arguments(data,rand.date,has.event,withdrawn,time.list,allowed.colnames)
  
  #create the date objects
  for(x in allowed.colnames){
    if(!is.null(time.list[[x]])) data[,x] <- FixDates(data[,time.list[[x]]]) 
  }
  
  #check prog is <= dth if they exist
  if("dth.date" %in% names(time.list) && "prog.date" %in% names(time.list)){
    idx <- which(!is.na(data[,"dth.date"]) & 
                 !is.na(data[,"prog.date"]) & 
                   data[,"dth.date"] < data[,"prog.date"])
    if(length(idx)>0){
      warning(paste("Subjects",paste(data[idx,subject] ,collapse=", ")  ,
                    "have progression date after death date. This is invalid and should be fixed"))
    }
  }
  
  #This vector will be populated with subjects time on study and returned 
  ans <- rep(NA,nrow(data))
  
  #first deal with subjects who are censored
  are.censored <- which(!data[,has.event] & !data[,withdrawn])
  ans <- Time.Deal.With.Censored(ans,data,rand.date,are.censored,time.list,allowed.colnames[1:4],subject)
  
  
  #next deal with subjects who have event
  subs.had.event <- which(data[,has.event]==1)
  ans <- Time.Deal.With.Had.Event(ans,data,rand.date,subs.had.event,time.list,allowed.colnames[1:3],subject,has.event)
  
  #finally subjects who have withdrawn
  has.withdrawn <- which(data[,withdrawn]==1 & data[,has.event]==0)
  ans <- Time.Deal.With.Withdrawn(ans,data,rand.date,has.withdrawn,time.list,withdrawn,subject,has.event)
  
    
  #output warning if subject has withdranw date but have not been withdrawn
  if(!is.null(time.list[["withdrawn.date"]])){
    r <- which(any(!is.na(data[,"withdrawn.date"]) & data[,withdrawn]==0))
    if(any(r)){
      warning(paste("Subjects",paste(data[r,subject],collapse=", "), "have not withdrawn yet have a withdrawn date.",
              "The withdrawn dates are ignored"))
    }
  }

  #if we have not been able to calculate a time then output
  #an error if had event or warning if censored/withdrawn
  if(any(is.na(ans))){
    r <- which(is.na(ans))
    if(any(data[r,has.event]==1)){
      s <- intersect(which(data[,has.event]==1), r)
      stop(paste("Subjects",paste(data[s,subject],collapse=", "), "have an event but no time on study can be calculated.",
                 "Is there any missing data in your data set?"))
    }
    
    warning(paste("Subjects",paste(data[r,subject],collapse=", "),
                  "are censored/withdrawn subject(s) and no time on study can be calculated.",
                  "For these subjects the time on study is set to 0. Is there any missing data",
                  "in your data set?"))
  }  
  
  return(as.numeric(ans))
      
} 


# Maximum Likelihood estimate of uniform
# accrual parameter k
# 
# @param B recruitment period
# @param s.i a vector of recruitment
# times (numeric, time since start of recruitment period)
# @return estimate of k = 1/(logB- mean(log(s.i)))
MLestimateK <- function(B,s.i){
  if(any(s.i<=0) || B <= 0 || any(s.i > B)) stop("Invalid arguments in MLestimateK")
  
  s <- mean(log(s.i))
  return(1/(log(B)-s))
}


# Calculate the average recruitment rate (subjects/day)
# @param N number of subjects recruited
# @param first.date the date of the first recruitment (numeric)
# @param last.date the date of the last recruitment (numeric)
# @return The average recruitment
average.rec <- function(N,first.date,last.date){ 
  N/(last.date-first.date+1)
}
scientific-computing-solutions/eventPrediction documentation built on May 29, 2019, 3:44 p.m.