R/otherFun.R

#   Build and Reload Package:  'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'
#' @export

############################# little functions #################################
## add.row
add.row <- function(df, new.row) {
  # This function is used to add a new row to the end of a data.frame.
  # Each element of the new row has the same class as the column it belongs to
  # However, factor will be convert to character firstly
  # Args:
  #   df: The data.frame to be added to
  #   new.row: a vector of the new values added to the end of df.
  #            The length of the new.row should be equal to number of df columns
  # Return:
  #   df with new row added to the end. The classes of each column is preserved
  f <- sapply(df, is.factor)
  df[f] <- lapply(df[f], as.character)
  
  df.class <- lapply(df, class)
  new.row <- as.list(new.row)
  if(length(df.class) != length(new.row)) {
    stop('Length does not match number of columns')
  }
  
  new.row.list <- lapply(1:length(new.row), function(x) {
    new.class <- as(new.row[[x]], df.class[[x]])
  })
  
  df[nrow(df) + 1, ] <- new.row.list
  
  return(df)
}
# df <- data.frame(x = c('A', 'B', 'C'), y = 1:3, z = 4:6, stringsAsFactors = F)
# df <- data.frame(x = c('A', 'B', 'C'), y = 1:3, z = 4:6)
# new.row <- c('Total', colSums(df[, -1]))
# add.row(df = df, new.row = new.row)

## paste.mat
#' @export
# paste elements to a matrix, and return a matrix with the same dims
paste.mat <- function(x, ...) {
  # This function is used to paste matrix/data.frame, and return a matrix with
  # the same dim as x.
  # Arg:
  #   x: the first matrix/data.frame, which is used to determine the dimension
  #   ...: any other elements that are being pasted to the x, it can be a
  #        single value, a vector, and a matrix. (data.frame must be converted
  #        to matrix before being passed into the function)
  # Return:
  #   a matrix with the same dimension as x, column name reserved.
  if(is.data.frame(x)) x <- as.matrix(x)
  nrow.x <- nrow(x)
  value <- paste0(x, ...)
  out.mat <- matrix(value, nrow = nrow.x)
  colnames(out.mat) <- colnames(x)
  return(out.mat)
}
# df <- data.frame(a=1:3, b = 4:6, c = 7:9)
# paste.mat(df, '(', as.matrix(df), ')')
# paste.mat(df, '_', 11:19)

# ifelse prevent data type change
#' @export
ifelse.safe <- function(cond, yes, no) {
  # The ifelse function may change the returned data format (e.g., date -> num)
  # this simple function can prevent the change
  # reference to:
  # http://stackoverflow.com/questions/6668963/how-to-prevent-ifelse-from-turning-
  # date-objects-into-numeric-objects
  structure(ifelse(cond, yes, no), class = class(no))
}

## KM curve
#' @export
km.curve <- function(data, time, status, x, plot = TRUE) {
  dt.km.obo <- lapply(x, function(xx) {
    sf.x <- survfit(as.formula(paste0('Surv(', time, ',', status, ') ~', xx)),
      data = data)
    df.x <- data.frame(time = sf.x$time, surv = sf.x$surv, variable = xx,
      group = rep(gsub(paste0(xx, '='), '', names(sf.x$strata)), sf.x$strata))
  })
  
  dt.km.obo <- do.call(rbind, dt.km.obo)
  
  out <- by(data = dt.km.obo, INDICES = dt.km.obo$variable, FUN = function(m) {
    m <- droplevels(m)
    m <- ggplot(m, aes(x = time, y = surv, color = group)) +
      geom_line() +
      scale_color_discrete(name = '') +
      labs(x = NULL, y = NULL) +
      facet_wrap(~variable, nrow = 4, scales = 'free') +
      theme_simple(plot.margin=unit(c(.5, 0, .5, .5), "cm"))
  })
  
  if(plot == TRUE) do.call(grid.arrange, out)
  return(dt.km.obo)
}
# sf <- km.curve(data = dt.conv, time = 'Conversion_Time_Months',
#   status = 'Conversion_Status', x = col.x[1:6], plot = TRUE)

## calculate the survival probability at each time for each record one by one
#' @export
survexp.obo <- function(data, ratetable, ...) {
  # calculate the survival table for each record one by one
  # Args:
  #    the same as the survexp model, except that the formula is not required
  # Return:
  #    suvival function at each time step, for each record
  
  id = split(1:nrow(data), cut(1:nrow(data), ceiling(nrow(data) / 5000)))
  
  pred.obo <- lapply(id, function(x) {
    pred <- survexp(~ ID, ratetable = ratetable,
      data = data.frame(data[x, ], ID = x), ...)
    t(pred$surv)
  })
  
  pred.all <- do.call(rbind, pred.obo)
}
# rs <- survexp.obo(data = dt.conv.test, ratetable = cox.conv.train



## beatufied correlation plot
#' @export
corrplot.beautify <- function(cor.mat) {
  # The layout and fonts of the default corrplot output doesn't look good
  # this function is used to beautify the corrplot
  # Arg:
  #    cor.mat: correlation matrix generated by cor() function
  par(cex = .8)
  corrplot(round(cor.mat, 2), type = 'lower', tl.srt = 15, addCoef.col = "black",
    cl.cex = 1.5, tl.cex = 1.2, tl.col = 'black', mar = c(0, 0, 0, 0),
    col=colorRampPalette(c("blue","white","red"))(200))
  par(cex = 1)
}
JianhuaHuang/streamlineR documentation built on May 7, 2019, 10:40 a.m.