R/baseline.R

#' Sort a clinical trial dataset
#' 
#' Sort a clinical trial dataset on one or two variables in it
#' 
#' @param data The data.frame to be sorted
#' @param id The name of the first sorting variable. Defaults to 'pid'
#' @param visit The name of the second sorting variable. Defaults to 'visitnum'
#' @return The input data.frame, resorted.
#' @export sortCTdata
sortCTdata <- function(data, id="pid", visit="visit"){
  # Sort the data on visit then pid
  wh <- try(data[order(data[, visit]), ], silent=TRUE)
  if (class(wh) != "try-error") data <- wh
  data <- data[order(data[, id]), ]
  invisible(data)
}

#' Get logical vector indicating if a value is a baseline value
#' 
#' Split data by subject, test and visit to compute a logical vector indicating if values are baseline values
#' 
#' @param val A vector of values of visit numbers
#' @param base The value of val if the visit is a baseline visit
#' @return A logical vector
#' @note If all values of val < base, the function attempts to return TRUE for the nearest value of val < base.
#'       Perhaps it should more properly return NAs
getLogicalBaseline <- function(val, base){
  # Need to account for subjects with no baseline visit.
  # Take the visit previous to baseline, if any
  # Return object needs to be a logical vector flagging:
  #   The baseline value if it exists
  #   Otherwise, the nearest pre-baseline value if that exists
  #   Otherwise the first value
  res <- val == base # Ok if a baseline visit exists
  if (sum(res) == 0){
    res <- val < base
    last.res <- sum(res) # Assumes already sorted in visit order
    res <- rep(FALSE, length(res))
    if (last.res > 0)
      res[last.res] <- TRUE
    else
      res[1] <- TRUE
  } # Close if (sum(res) == 0)
  
  res
}

#' Create flag variable to indicate baseline values
#' 
#' Create flag variable to indicate baseline values
#' 
#' @param data A data.frame, usually vital signs or lab data
#' @param flag The name of the column that ought to contain the flags
#' @param id The unique patient identifier
#' @param test The name of the column that identifies the test being performed
#' @param baseflag The value of flag for the baseline values. Defaults to 1
#' @param visit The name of the visit variable in data
#' @param basevisit The value of visit for the baseline values. Defaults to 0
#' @return A vector of 0s and 1s, 1s indicating the baseline values
#' @export getBaselineFlag
getBaselineFlag <- function(data, flag, id, test, visit="visit", baseflag=1, basevisit=0){
  # Create flag vector of 0s and 1s, or coerce existing flag vectors to 0s and 1s

  # Check if flag exists, create empty column if not
  flag <- try(data[, flag], silent=TRUE)
  if (class(flag) == "try-error")
    flag <- rep("", nrow(data))
  
  # Check if existing flag is empty
  if (all(is.na(flag)) | all(flag == "")){
    wh <- paste(data[, id], data[, test])
    sdata <- split(data[, visit], wh)
    flag <- lapply(sdata, getLogicalBaseline, base=basevisit)
    flag <- as.numeric(unsplit(flag, wh))
  }
  else # Otherwise replace incorrect missing values with 0s
    flag[flag != baseflag] <- 0
  
  flag
}


#' Add baseline data to a dataset
#' 
#' Add a column of baseline values to a dataset, given information on the baseline visit
#' 
#' @param data A data.frame to have baseline values added
#' @param id The name of the unique subject identifier variable. Defaults to 'subject'
#' @param baseline The name of the baseline column to be created
#' @param flag The name of the baseline flag variable in the data, if it exists
#' @param baseflag The value that flag takes if the value is a baseline value. Defaults to 1
#' @param visit The name of the variable identifying visits. Defaults to 'visit'
#' @param basevisit The value that visit has when it is a baseline visit. Defaults to 0
#' @param test The name of the variable that identifies the test being performed. Deafults to 'test'
#' @param value The name of the variable containing the values of the test result. Defaults to 'value'
#' @param keepFlag Whether to keep the column of baseline flags. Defaults to \code{keepFlag= FALSE}.
#' @return A data.frame similar to the input data.frame, but with a column of baseline values called 'baseline' that
#'         contains the baseline values for each element of test
#' @details The function will remove any rows with missing values in the \code{value}
#'   column. It tries to be sensible and make use of baseline flags or values if they already
#'   exist.
#' @export makeBaselines
makeBaselines <- function(data, id="subject", baseline="baseline", flag="baselineFlag", baseflag=1,
                          visit="visit", basevisit=0, test="test", value="value", keepFlag=FALSE){
  data <- data[order(data[, test]), ]
  data <- sortCTdata(data, id=id, visit=visit)

  nr <- nrow(data)
  data <- data[!is.na(data[, value]), ]
  if (nrow(data) < nr) warning("Rows with missing values in 'value' have been dropped")
  
  data[, flag] <- getBaselineFlag(data, flag, id, test, visit, baseflag, basevisit)

  fun <- function(d, id, values){
    flag <- d[, flag]

    b <- d[as.logical(flag), values]
    N <- rle(as.character(d[, id]))$lengths

    res <- try(rep(b, N), silent=TRUE)
    if (class(res) == "try-error")
      res <- rep(NA, nrow(d))
    d[, baseline] <- res
    d
  }

  # Need to loop on the values of test
  sdata <- split(data, data[, test])

  res <- lapply(sdata, fun, id=id, values=value)
  res <- do.call("rbind", res)

  res <- sortCTdata(res, id=id, visit=visit)
  rownames(res) <- 1:nrow(res)

  if (!keepFlag) res <- res[, names(res) != flag]

  invisible(res)
}
harrysouthworth/dmhelp documentation built on May 17, 2019, 3:02 p.m.