R/km.R

Defines functions km km_legend

Documented in km km_legend

#' Make a legend command suitable for km
#'
#' Make a legend command suitable for km
#'
#' @param x same as \code{legend}'s x
#' @param y same as \code{legend}'s y
#' @param levels factor levels
#' @param colors colors
#' @param lty lty
#' @param lwd lwd
#' @param title legend title
#' @export
km_legend <- function(x, y = NULL,
                      levels = NULL,
                      colors = NULL,
                      lty = 'solid',
                      lwd = 1,
                      title = NULL){
    x <- deparse(x)
    y <- deparse(y)
    lev <- deparse(levels)
    cols <- deparse(colors)
    lt <- deparse(lty)
    lw <- deparse(lwd)
    titl <- deparse(title)

    paste0("legend(",
           "x = ", x, ", ",
           "y = ", y, ", ",
           "title = ", titl, ", ",
           "bg = \"white\", ",
           "legend = ", lev, ", ",
           "col = ", cols, ", ",
           "lty = ", lt, ", ",
           "lwd = ", lw, ")")

}

#' Plots an 'enhanced' Kaplan-Meier plot, with base graphics package.
#' 
#' 
#' This function plots a Kaplan-Meier plot.
#' 
#' @param time survival time variable
#' @param status survival indicator variable
#' @param strata Stratifying variable (optional)
#' @param plot plot (default = TRUE) or only return estimates?
#' @param time_unit Time unit of x axis
#' @param time_by Time step x axis (in days)
#' @param quantile_probs quantile calculated (default to 0.5, aka median)
#' @param main Graph main title
#' @param ylab Y-axis label.
#' @param xlab X-axis label. If NULL a suitable default based on
#'     time_unit will be provided
#' @param xlim X-axis limit. If NULL a suitable default based on
#'     time_unit will be provided
#' @param ylim Y-axis limit. Default to c(0,1) be provided
#' @param reverse plot cumulative events
#' @param mark_censored mark censored observation
#' @param pch_censored pch used for censored observation
#' @param conf_int character. Can be 'default', 'none' or 'lines' or
#'     'shades'
#' @param conf_int_alpha a base level for alpha if \code{conf_int =
#'     'alpha'} (splitted by number of groups)
#' @param test tests: 'none' = don't plot tests, 'logr' = log-rank
#'     test, 'hr' = hazard ratio, 'both' = log-rank test and hazard
#'     ratio
#' @param cex_test cex parameter for test string
#' @param plot_grid plot grid
#' @param plot_n_at_risk Logical value: plot number at risk?
#' @param cex_n_at_risk cex of number at risk
#' @param legend_cmd Graph command to add legend, as string
#' @param ... Further \code{\link{lines.survfit}} parameters
#' @return The function plot the graph and return a list with
#'     Laplan-Meier statistics
#' @details The function make the hypothesis that times are measured
#'     in days; in not leave time_unit not specified
#' @examples
#' library(survival)
#' km(time = aml$time, status = aml$status, strata = aml$x)
#' @export
km <- function(time = NULL,
               status = NULL,
               ## NULL = non stratified, otherwise stratifying variable
               strata = NULL,
               ## plot anything,
               plot = TRUE,
               ## Time unit x axis
               time_unit = c('days', 'weeks', 'months', 'years'),
               ## Time step x axis (in days)
               time_by = NULL,
               ## quantiles calculated
               quantile_probs = 0.5,
               ## Main title
               main = '',
               ## Y axis label
               ylab = NULL,
               ## X axis nbel
               xlab = NULL,
               ## Y axis limits
               ylim = c(0L, 1L),
               ## X axis limits
               xlim = NULL,
               ## plot cumulative events?
               reverse = FALSE,
               ## mark censored observation
               mark_censored = TRUE,
               ## pch used for censored observation
               pch_censored = "'",
               ## Plot Confidence interval
               conf_int = c('default', 'none', 'lines', 'shades'),
               conf_int_alpha = 0.8,
               ## Test: none = don't plot tests, logr = logranktest,
               ##       hr = hazratio, both = both
               test = c('logr', 'hr', 'both', 'none'),
               cex_test = par("cex") * 0.8,
               ## plot grid
               plot_grid = TRUE,
               ## Plot number ad risk in the km
               plot_n_at_risk = TRUE,
               cex_n_at_risk = par("cex") * 0.8,
               ## Graph command to add legend, as string
               legend_cmd = NULL,
               ## Further lines.survfit params
               ...
               )
{

    ## TODO
    ## - permettere al plot di incastrarsi in un mfrow specificato a monte
    ## - inserire la possibilita' di visualizzare la mediana 
    ##   di sopravvivenza
    ## - se il plot non ha titolo e non ha test ottimizzare lo spazio
    ##   superiore eliminandolo
    ## - sistemare il settore sinistro del grafico per permettere che 
    ##   stringhe molto lunghe (il nome dei gruppi, se lunghi) 
    ##   non vengano tagliate negli at risk 
    ## - inserire il parametro data come specificabile,
    ##   per prendere da un data.frame??
   
    ## time, status: check existence
    if (is.null(status)) stop("'status' needed.")
    if (is.null(time)) stop("'time' needed.")

    ## strata
    if (!is.null(strata) && !is.factor(strata))
        strata <- factor(strata)
    
    ## time_by: NULL or numeric
    if(! (is.null(time_by) || is.numeric(time_by)))
        stop("'time_by' must be NULL or numeric")

    ## xlab, ylab: NULL or character
    if (! (is.null(ylab) || is.character(ylab)))
        stop('ylab must be NULL or character')
    if (! (is.null(xlab) || is.character(xlab)))
        stop('xlab must be NULL or character')
        
    ## time_unit, main, test: character
    if (! is.character(time_unit)) stop('time_unit must be character')
    if (! is.character(main)) stop('main must be character')
    if (! is.character(test)) stop('test must be character')
    
    ## plot_n_at_risk: logical
    if (! is.logical(plot_n_at_risk)) stop("'plot_n_at_risk' must be logical")

    ## conf_int: NULL or logical
    conf_int <- match.arg(conf_int)
    
    ## xlim: NULL or numeric (of length 2)
    if (! (is.null(xlim) || (is.numeric(xlim) && (2 == length(xlim)))))
        stop("'xlim' must be NULL or numeric vector of 2 elements")

    ## xlim: NULL or numeric (of length 2)
    if (! (is.null(ylim) || (is.numeric(ylim) && (2 == length(ylim)))))
        stop("'ylim' must be NULL or numeric vector of 2 elements")
    
    ## Argument matching, ... 'handling'
    time_unit <- match.arg(time_unit)
    test <- match.arg(test)
    dots <- list(...)
    
    ## Time divisor for days for the plot axis
    time_divisor <-
        (time_unit %in% 'days'   *   1)    + 
        (time_unit %in% 'weeks'  *   7)    + 
        (time_unit %in% 'months' *  30.43) + 
        (time_unit %in% 'years'  * 365.25)
    
    ## Default xlab and ylab if NULL is provided
    if (is.null(xlab)) xlab <- paste(toupper(substring(time_unit, 1, 1)),
                                     substring(time_unit, 2),
                                     sep = '')
    if (is.null(ylab)) ylab <- 'Probability'

    ## Default time_by if NULL is provided
    if (is.null(time_by)) {
        time_by <-
            (time_unit %in% 'days'   *  30)  + 
            (time_unit %in% 'weeks'  *   4)  + 
            (time_unit %in% 'months' *  12)  + 
            (time_unit %in% 'years'  *   1)
    }
        
    ## Check if it's a univariate or stratified plot; setup the
    ## estimates dataset and parameter defaults accordingly
    if (is.null(strata)) {
        db <- data.frame(time = time, status = status)
        db <- lbmisc::NA_remove(db)
        mod_formula <- survival::Surv(time, status) ~ 1
        univariate <- TRUE
        n_stratas <- 1
        strata_labels <- 'All '
        if ('default' %in% conf_int) conf_int <- 'lines'
    } else {
        db <- data.frame(time = time, status = status, strata = strata)
        db <- lbmisc::NA_remove(db)
        mod_formula <- survival::Surv(time, status) ~ strata
        univariate <- FALSE
        n_stratas <- nlevels(strata)
        strata_labels <- levels(strata)
        if ('default' %in% conf_int) conf_int <- 'none'
        if ((n_stratas != 2) & (test == 'hr')) {
            warning(paste0('HR can be plotted only with 2 groups. ',
                           'Changing to Log-rank tests'))
            test <- 'logr'
        }
    }

    ## -------------------------------------
    ## Estimates (km, curve comparison, cox)
    ## -------------------------------------

    ## Kaplan-Meyer survival estimate
    fit <- survival::survfit(mod_formula, data = db)
    sfit <- summary(fit)

    ## times for plot ticks, n at risk, summaries and so on
    ## x has to be based on time_by, if specified
    ## if not specified make 4 step
    times <- if (is.null(time_by)) seq(0, max(fit$time), length = 4)
             else seq(0, max(fit$time), by = time_by * time_divisor)
    
    ## Quantiles
    quantiles <- quantile(fit, probs = quantile_probs)
    quantiles <- do.call(cbind, quantiles)
    quantiles <- quantiles / time_divisor
    quantiles <- as.data.frame(quantiles)
    names(quantiles) <- c(if (quantile_probs == 0.5) "median" else "estimate",
                          "lower",
                          "upper")

    ## Summary estimates for presentation purposes
    est_times <- seq(from = 0L, to = max(times), by = time_divisor)
    sfit2 <- summary(fit, times = est_times)
    ret_sfit <- if (univariate)
                    with(sfit2,
                         data.frame('time' = round(time/time_divisor),
                                    'Estimate' = surv,
                                    'Lower'    = lower,
                                    'Upper'    = upper))
                else with(sfit2,
                          data.frame('time' = time / time_divisor,
                                     'group' = gsub('strata=', '', strata),
                                     'Estimate' =  surv,
                                     'Lower'    = lower,
                                     'Upper'    = upper))
    names(ret_sfit)[1] <- time_unit

    ## Tests
    if( !univariate ) {
        ## Log-rank test
        logr <- survival::survdiff(mod_formula, data = db)
        logr$df <- n_stratas - 1
        logr$p <- stats::pchisq(q = logr$chisq, df = logr$df,
                                lower.tail = FALSE)
        logr_string <- sprintf('Log-rank Test=%.2f, df=%d, p%s',	
                               logr$chisq, 
                               logr$df, 
                               lbmisc::pretty_pval(logr$p, equal = TRUE))
        ## Cox Model (and his summary)
        cox <- survival::coxph(mod_formula, data = db)
        scox <- summary(cox)
        scox_coefs <- stats::coefficients(scox)
        hr_string  <- sprintf('HR=%.2f (95%% CI, %.2f-%.2f, p%s)',
                              scox_coefs[2],
                              scox$conf.int[3],
                              scox$conf.int[4],
                              lbmisc::pretty_pval(scox_coefs[5], equal = TRUE))
        both_string <- paste(logr_string, hr_string, sep = ' - ')
	
        ## Choose which stat to print in the graph
        test_string <- switch(test,
                              logr = logr_string,
                              hr = hr_string,
                              both= both_string) 
    }

    ## obtain data for confidence intervals
    CI <- data.frame((unclass(sfit))[c('time', 'lower', 'upper')])
    CI_spl <- if (univariate) list('All' = CI)
              else  split(CI, f = sfit$strata)

    ## ------------
    ## Plot section
    ## ------------

    if (plot){

        ## reset default graphical parameters on exit
        old_par <- graphics::par(no.readonly = TRUE)
        on.exit(graphics::par(old_par))
    
        ## Color set-up: if given, otherwise set it black
        strata_col <- if ('col' %in% names(dots)) dots$col
                      else rep('black', n_stratas)

        ## Se si desidera inserire la tabella dei number at risk
        ## occorre impostare il margine inferiore, prevedendo un tot
        ## di righe opportune (determinate dal numero degli strati) 
        if (plot_n_at_risk) graphics::par('oma' = c(n_stratas + 0.1, 0, 0, 0))

        ## xlim definition
        if (is.null(xlim)) {
            xlim_inf <-  -(max(fit$time)/15)
            xlim_sup <- max(fit$time) + (- xlim_inf/3)
        } else {
            xlim_inf <- xlim[1]
            xlim_sup <- xlim[2]
        }
                
        ## Main plotting section
        graphics::plot(NA, NA,
                       xlim = c(xlim_inf, xlim_sup),
                       ylim = ylim,
                       axes = FALSE,
                       ylab = ylab,
                       xlab = xlab,
                       main = main)
        graphics::axis(2, las = 1)
        graphics::axis(1, at = times, labels = times/time_divisor)
        if (plot_grid)
            lbmisc::add_grid(at_x = times, at_y = graphics::axTicks(2))
        graphics::box()
        ## main line and confidence intervals
        if (reverse) {
            lines_fun <- 'event'
            switch(conf_int,
                   none  = graphics::lines(fit,
                                           fun = lines_fun,
                                           conf.int = FALSE,
                                           ...),
                   lines = graphics::lines(fit,
                                           fun = lines_fun,
                                           conf.int = TRUE,
                                           ...))
        } else {
            switch(conf_int,
                   none  = graphics::lines(fit,
                                           conf.int = FALSE,
                                           mark.time = mark_censored,
                                           pch = pch_censored,
                                           ...),
                   lines = graphics::lines(fit,
                                           conf.int = TRUE,
                                           mark.time = mark_censored,
                                           pch = pch_censored,
                                           ...),
                   shades = {
                       ## http://stackoverflow.com/questions/18584815/
                       mapply(FUN = function(ci, cols){
                           alpha <- conf_int_alpha/n_stratas
                           ## if it's a base color add alpha, otherwise if
                           ## it is already a hexadecimal, leave it
                           ## unchanged
                           col <- if (cols %in% grDevices::colors()) 
                                      lbmisc::col2hex(cols, alpha = alpha)
                                  else
                                      cols
                           graphics::polygon(
                                         c(ci$time,  rev(ci$time)),
                                         c(ci$lower, rev(ci$upper)),
                                         col = col,
                                         border = FALSE)},
                           CI_spl,
                           strata_col)
                       graphics::lines(fit,
                                       conf.int = FALSE,
                                       pch = pch_censored,
                                       mark.time = mark_censored,
                                       ...)
                   })
        }
        
        ## Add legend
        if (!is.null(legend_cmd)) {
            eval(parse(text = legend_cmd))
        }

        ## Add stat string to title
        if (!univariate && (test %in% c('logr','hr','both') )) {
            graphics::mtext(test_string, line = 0.2,
                            cex = cex_test,
                            family = 'sans', font = 3)
        }

        ## Add number at risk
        if (plot_n_at_risk) {

            ## Print header
            graphics::mtext('At risk', side = 1, line = 4, adj = 1,
                            cex = cex_n_at_risk,
                            at = xlim_inf, font = 2)

            ## Utilizzo axis per plottare gli a rischio negli strati
            ## (la linea utilizzabile in presenza di titolo di asse
            ## delle x e' dalla 4 in poi: la 4 e' per il titolo, dalla 5
            ## in poi per i dati
            
            my_time <- summary(fit, times = times, extend = TRUE)$time
            n_risk <- summary(fit, times = times, extend = TRUE)$n.risk
            if (univariate) {
                strata <- rep(strata_labels, length(my_time))
            } else {
                strata <- summary(fit, times = times, extend = TRUE)$strata
            }
            
            risk_data <- data.frame(strata = strata,
                                    time = my_time,
                                    n_risk = n_risk)
            if (!univariate) {
                levels(risk_data$strata) <- sub('(^strata=)(.*$)',
                                                '\\2', 
                                                levels(risk_data$strata))
            }
            
            ## Lo split pone i dati nell'ordine della lista
            ## nell'ordine dei dati, quindi per coerenza con l'ordine
            ## dei colori e' necessario riordinare la lista
            
            spl_risk_data <- split( risk_data, risk_data$strata)
            spl_risk_data <- spl_risk_data[ strata_labels ]

            for(label in names(spl_risk_data) ) {
                prog <- which( names(spl_risk_data) %in% label ) 
                group_line_lab <- 4 + prog
                group_line_num <- group_line_lab - 1
                group_col <- strata_col[prog]
                ## plot label del gruppo
                graphics::mtext(label,
                                side = 1,
                                cex = cex_n_at_risk,
                                line = group_line_lab,
                                at = xlim_inf,
                                adj = 1,
                                col = group_col)
                ## plot dati per ogni time_by
                graphics::axis(1,
                               at = times,
                               cex.axis = cex_n_at_risk,
                               labels = spl_risk_data[[label]]$n_risk,
                               line = group_line_num,
                               tick = FALSE,
                               col.axis = group_col)
            }

        }

    }
    ## Return Stats wheter or not plot has been done
    if (univariate) {
        invisible(list('km' = fit,
                       'quantiles' = quantiles,
                       'estimates' = ret_sfit))
    } else {
        invisible(list('km' = fit,
                       'quantiles' = quantiles,
                       'estimates' = ret_sfit,
                       'logrank' = logr,
                       'cox' = cox,
                       'scox' = scox))
    }
    
}
lbraglia/lbsurv documentation built on June 19, 2022, 11:13 p.m.