Basic Usage
In survivALL: Continuous Biomarker Assessment by Exhaustive Survival Analysis

knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE, fig.align = 'center', results = 'asis', fig.show = 'hold', fig.width = 7, fig.height = 5)

Quick example using NKI breast cancer data

note: this data is pre-subsetted to only include patients with complete distant metastasis information (e.dmfs & t.dmfs)

library(survivALL)
library(Biobase)
library(ggplot2)

data(nki_subset)

Example : Plotting

We use a continuous measure, here a vector of gene expression, to re-order our survival data and then compute hazard ratios and pvalues for all points of separation

xpr_vec <- exprs(nki_subset)["NM_020974", ] #expression vector for SCUBE2 (anti-correlated with proliferation)

plotALL(
        measure = xpr_vec, #expression data
        srv = pData(nki_subset), #survival information
        time = "t.dmfs", #time-to-outcome
        event = "e.dmfs", #outcome type
        bs_dfr = c(), #thresholding data would go here
        measure_name = "SCUBE2", #our gene's name
        title = "SCUBE2 prognostic capacity in a mixed\npopulation of invasive breast cancer samples", #plot title
        )

Note that we can add additional elements using standard ggplot2 syntax. Here we add a horizontal indicator of the most significant point of separation

a_random_x_axis_value <- 123

plotALL(measure = xpr_vec, 
        srv = pData(nki_subset), 
        time = "t.dmfs", 
        event = "e.dmfs", 
        bs_dfr = c(),
        measure_name = "SCUBE2", 
        title = "SCUBE2 prognostic capacity in a mixed\npopulation of invasive breast cancer samples") + 
    geom_vline(xintercept = a_random_x_axis_value, linetype = 5)

Plotting multiple genes simultaneously

We first organise our measure data, our expression vectors for three genes of interest SCUBE2, FOS and ERBB2 before applying each in a loop, specifying a common and sensible y-axis range using ggplot2 conventions. (To choose the limits we produce the plots first, select a rational range by eye and then recompute with the newly specified limits). We then combine the figures using the cowplot::plot_grid() function.

geneset <- data.frame(refseq_id = c("NM_020974", "NM_002051", "NM_004448"), hgnc_id = c("SCUBE2", "GATA3", "ERBB2"), stringsAsFactors = FALSE)

xpr_lst <- lapply(geneset$refseq_id, function(id){
                exprs(nki_subset)[id,]
        })
names(xpr_lst) <- geneset$hgnc_id

plot_lst <- lapply(geneset$hgnc_id, function(id){
                       plotALL(
                               measure = xpr_lst[[id]], #expression data
                               srv = pData(nki_subset), #survival information
                               time = "t.dmfs", #time-to-outcome
                               event = "e.dmfs", #outcome type
                               bs_dfr = c(), #thresholding data 
                               measure_name = id, #our gene's name
                               title = id #plot title
                               ) + 
                           ylim(-2.5, 2.5)  
        })

cowplot::plot_grid(plotlist = plot_lst, nrow = 1)

p <- cowplot::plot_grid(plotlist = plot_lst, nrow = 1)
print(p)

Example : Returning a dataframe

Alternatively, we can return only the computed statistics as a dataframe for further calculations, comparisons and manipulations

survivall_out <- survivALL(
                           measure = xpr_vec, #expression data
                           srv = pData(nki_subset), #survival information
                           time = "t.dmfs", #time-to-outcome
                           event = "e.dmfs", #outcome type
                           bs_dfr = c(), #thresholding data
                           measure_name = "SCUBE2" #our gene's name
                           )

head(survivall_out)

library(pander)
library(magrittr)
head(survivall_out) %>% pandoc.table()

Analysing and displaying multiple genes

We can return the results for multiple genes as a single dataframe simply by row-binding the results. Organised in this way we can plot multiple hazard ratio distributions as a single figure

survivall_lst <- lapply(geneset$hgnc_id, function(id){
                            survivALL(
                                      measure = xpr_lst[[id]], #expression data
                                      srv = pData(nki_subset), #survival information
                                      time = "t.dmfs", #time-to-outcome
                                      event = "e.dmfs", #outcome type
                                      bs_dfr = c(), #thresholding data
                                      measure_name = id #our gene's name
                                      )
                           })

survivall_dfr <- do.call(rbind, survivall_lst)

ggplot(survivall_dfr, aes(x = index, y = HR, colour = name)) + 
    geom_hline(yintercept = 0, linetype = 3) + 
    geom_point() + 
    ylim(-2.5, 2.5) + 
    ggthemes::theme_pander()