R/analysis.R

Defines functions diffAnalyses singleDiffAnalyses createSparklines createDensitySparklines leveneTest plotDistribution plotPointsStyle transformData transformValues transformOptions createEventPlotting filterGroups basicStats eventPlotOptions spearman fisher kruskal fligner levene ttest wilcox prepareEventPlotOptions optimalPSIcutoff optimalSurvivalCutoff testSurvivalCutoff labelBasedOnCutoff testSurvival processSurvival plotSurvivalCurves survdiff.survTerms survfit.survTerms processSurvTerms updateClinicalParams getColumnsTime getAttributesTime processSurvData getPSIperPatient getValuePerPatient getClinicalDataForSurvival patientMultiMatchWarning analysesServer analysesUI selectizeGeneInput missingDataGuide missingDataModal

Documented in analysesServer analysesUI basicStats createDensitySparklines createEventPlotting createSparklines diffAnalyses eventPlotOptions filterGroups fisher fligner getAttributesTime getClinicalDataForSurvival getColumnsTime getPSIperPatient getValuePerPatient kruskal labelBasedOnCutoff levene leveneTest missingDataGuide missingDataModal optimalPSIcutoff optimalSurvivalCutoff patientMultiMatchWarning plotDistribution plotPointsStyle plotSurvivalCurves prepareEventPlotOptions processSurvData processSurvival processSurvTerms selectizeGeneInput singleDiffAnalyses spearman survdiff.survTerms survfit.survTerms testSurvival testSurvivalCutoff transformData transformOptions transformValues ttest updateClinicalParams wilcox

#' @include app.R
NULL

#' Missing information modal template
#'  
#' @param session Shiny session
#' @param dataType Character: type of data missing
#' @param buttonId Character: identifier of button to take user to load missing 
#' data
#' 
#' @examples
#' \dontrun{
#'  session <- session$ns
#'  buttonInput <- "takeMeThere"
#'  buttonId <- ns(buttonInput)
#'  dataType <- "Inclusion levels"
#'  missingDataModal(session, buttonId, dataType)
#'  observeEvent(input[[buttonInput]], missingDataGuide(dataType))
#' }
#' @return NULL (this function is used to modify the Shiny session's state)
missingDataModal <- function(session, dataType, buttonId) {
    template <- function(buttonLabel) {
        errorModal(
            session, paste("Load", tolower(dataType)),
            "This analysis requires", tolower(dataType), "to proceed.",
            footer=actionButton(buttonId, buttonLabel, "data-dismiss"="modal",
                                class="btn-danger"))
    }
    
    switch(dataType,
           "Inclusion levels"=template("Load or calculate"),
           template("Load"))
}

#' @rdname missingDataModal
missingDataGuide <- function(dataType) {
    js <- loadRequiredData(dataType)
    runjs(js)
}

#' Create input to select a gene
#' 
#' @param id Character: identifier
#' @inheritParams shiny::selectizeInput
#' 
#' @return HTML elements
selectizeGeneInput <- function(id, label="Gene", choices=NULL, multiple=FALSE) {
    onFocus  <- NULL
    onChange <- NULL
    if (!multiple) {
        onFocus  <- I(sprintf(
            'function() { $("#%s")[0].selectize.clear(); }', id))
        onChange <- I(sprintf(
            'function(value) { $("#%s")[0].selectize.blur(); }', id))
    }
    
    selectizeInput(
        id, label, width="100%", multiple=multiple,
        choices=c("Type to search for a gene..."="", choices), 
        options=list(onFocus=onFocus, onChange=onChange, 
                     plugins=list('remove_button', 'drag_drop')))
}

#' @rdname appUI
#' 
#' @param id Character: identifier
#' @param tab Function to process HTML elements
#' 
#' @importFrom shinyBS bsTooltip
#' @importFrom shiny NS div icon fluidRow column tags
analysesUI <- function(id, tab) { 
    ns <- NS(id)
    uiList <- getUiFunctions(
        ns, "analysis", 
        priority=paste0(c("dimReduction", "diffSplicing", "diffExpression", 
                          "survival", "info"), "UI"))
    
    # Load available analyses
    analyses <- tagList()
    for (k in seq(uiList)) {
        ui <- uiList[[k]]
        if ( is(ui, "shiny.tag.list") ) {
            tabName  <- attr(ui, "name")
            analyses <- c(analyses, list(tabPanel(tabName, ui)))
        } else {
            headerName <- attr(ui, "name")
            analyses   <- c(analyses, list(headerName))
            for (each in seq(ui)) {
                name     <- attr(ui[[each]], "name")
                analyses <- c(analyses, list(tabPanel(name, ui[each])))
            }
        }
        analyses <- c(analyses, list("----")) # Add a divider after each section
    }
    analyses <- analyses[-length(analyses)]
    do.call(tab, c(list(icon="flask", title="Analyses", menu=TRUE), analyses))
}

#' @rdname appServer
#' 
#' @importFrom shiny observe observeEvent
#' @importFrom shinyjs hide show
analysesServer <- function(input, output, session) {
    # Run server logic from the scripts
    getServerFunctions("analysis", 
                       priority=paste0(c("dimReduction", "diffSplicing", 
                                         "diffExpression", "survival", "info"),
                                       "Server"))
}


# Survival analyses helper functions --------------------------------------

#' Helper text to explain what happens when a patient matches multiple samples
#' when performing survival analysis
#' 
#' @return Character
patientMultiMatchWarning <- function() {
    paste("While stratifying patients for survival analysis, patients",
          "with multipe samples are assigned the average value of their",
          "corresponding samples. However, for patients with both disease",
          "and normal samples, it may be inappropriate to include the",
          "values of their normal samples for survival analysis.")
}

#' Retrieve clinical data based on attributes required for survival analysis
#' 
#' @param ... Character: names of columns to retrieve
#' @param formulaStr Character: right-side of the formula for survival analysis
#' 
#' @return Filtered clinical data
getClinicalDataForSurvival <- function(..., formulaStr=NULL) {
    cols <- unlist(list(...))
    if (!is.null(formulaStr) && formulaStr != "") {
        form <- formula(paste("1 ~", formulaStr))
        cols <- c(cols, all.vars(form))
    }
    clinical <- getClinicalData(cols)
    return(clinical)
}

#' Assign average sample values to their corresponding patients
#' 
#' @param data One-row data frame/matrix or vector: values per sample for a 
#' single gene
#' @param match Matrix: match between samples and patients
#' @param clinical Data frame or matrix: clinical dataset (only required if the
#' \code{patients} argument is not handed)
#' @param patients Character: patient identifiers (only required if the
#' \code{clinical} argument is not handed)
#' @param samples Character: samples to use when assigning values per patient 
#' (if NULL, all samples will be used)
#' 
#' @return Values per patient
#' @export
getValuePerPatient <- function(data, match, clinical=NULL, patients=NULL,
                               samples=NULL) {
    hasOneRow     <- !is.null(nrow(data)) && nrow(data) == 1
    isNamedVector <- is.vector(data) && !is.null(names(data))
    if (!hasOneRow && !isNamedVector)
        stop("Data needs to either have only one row or be a vector with",
             "sample identifiers as names.")
    
    if (is.null(clinical) && is.null(patients))
        stop("You cannot leave both 'clinical' and 'patients' arguments ",
             "as NULL.")
    else if (is.null(patients))
        patients <- rownames(clinical)
    
    if (!is.numeric(data)) {
        ns   <- names(data)
        data <- as.numeric(data)
        names(data) <- ns
    }
    
    # Filter by samples to use
    if (!is.null(samples)) match <- match[names(match) %in% samples]
    match <- match[!is.na(match)]
    
    # For each patient, assign the average value of its respective samples
    res <- sapply(split(data[names(match)], match), mean, na.rm=TRUE)
    return(res)
}

#' @rdname getValuePerPatient
#' @export
getValuePerSubject <- getValuePerPatient

#' @rdname getValuePerPatient
#' @export
assignValuePerPatient <- getValuePerPatient

#' @rdname getValuePerPatient
#' @export
assignValuePerSubject <- getValuePerPatient

#' @rdname getValuePerPatient
#' @param psi Data frame or matrix: values per sample
#' @param ... Deprecated arguments
#' @export
getPSIperPatient <- function(psi, match, clinical=NULL, patients=NULL, ...) {
    .Deprecated("getValuePerPatient")
    getValuePerPatient(psi, match, clinical, patients)
}

#' Process survival data to calculate survival curves
#' 
#' @inheritParams getAttributesTime
#' @param group Character: group relative to each patient
#' @param clinical Data frame: clinical data
#' @param survTime \code{survTime} object: Times to follow up, time start, time 
#' stop and event (optional)
#' 
#' @details The event time will only be used to determine whether the event has
#' occurred (1) or not (0) in case of missing values.
#' 
#' If \code{survTime} is NULL, the survival times will be fetch from the
#' clinical dataset according to the names given in \code{timeStart},
#' \code{timeStop}, \code{event} and \code{followup}. This can became quite slow
#' when using the function in a for loop. If these variables are constant, 
#' consider running the function \code{\link{getAttributesTime}} to retrieve the
#' time of such columns once and hand the result to the \code{survTime} argument
#' of this function.
#' 
#' @return Data frame with terms needed to calculate survival curves
processSurvData <- function(event, timeStart, timeStop, followup, group, 
                            clinical, survTime=NULL) {
    if ( is.null(survTime) ) {
        survTime <- getAttributesTime(clinical, event, timeStart, timeStop,
                                      followup)
    }
    
    # Create new time using the starting time replacing the NAs with
    # days to last follow up
    nas <- is.na(survTime$start)
    survTime$time <- survTime$start
    survTime$time[nas] <- survTime$followup[nas]
    
    # Indicate event of interest and groups
    survTime$event <- ifelse(!is.na(survTime$event), 1, 0)
    if (!is.null(names(group)))
        survTime[names(group), "groups"] <- group
    else
        survTime$groups <- group
    
    if (!is.null(timeStop)) {
        # Create new time using the ending time replacing the NAs
        # with days to last follow up
        nas <- is.na(survTime$stop)
        survTime$time2 <- survTime$stop
        survTime$time2[nas] <- survTime$followup[nas]
    }
    return(survTime)
}

#' Retrieve the time for given columns in a clinical dataset
#' 
#' @param clinical Data frame: clinical data
#' @param event Character: name of column containing time of the event of
#' interest
#' @param timeStart Character: name of column containing starting time of the
#' interval or follow up time
#' @param timeStop Character: name of column containing ending time of the 
#' interval (only relevant for interval censoring)
#' @param followup Character: name of column containing follow up time
#' 
#' @return Data frame containing the time for the given columns
#' 
#' @export
#' 
#' @examples 
#' df <- data.frame(followup=c(200, 300, 400), death=c(NA, 300, NA))
#' rownames(df) <- paste("patient", 1:3)
#' getAttributesTime(df, event="death", timeStart="death", followup="followup")
getAttributesTime <- function(clinical, event, timeStart, timeStop=NULL,
                              followup="days_to_last_followup") {
    cols <- c(followup=followup, start=timeStart, stop=timeStop, event=event)
    
    # Retrive time for given attributes
    timePerPatient <- function(col, clinical) {
        cols <- grep(col, colnames(clinical), value=TRUE)
        row  <- apply(clinical[cols], 1, function(i)
            if(!all(is.na(i))) max(as.numeric(i), na.rm = TRUE) else NA)
        return(row)
    }
    survTime <- lapply(cols, timePerPatient, clinical)
    
    survTime <- as.data.frame(survTime)
    class(survTime) <- c("data.frame", "survTime")
    return(survTime)
}

#' @rdname getAttributesTime
#' @export
getColumnsTime <- function(clinical, event, timeStart, timeStop=NULL,
                           followup="days_to_last_followup") {
    .Deprecated("getAttributesTime")
    getAttributesTime(clinical=clinical, event=event, timeStart=timeStart, 
                      timeStop=timeStop, followup=followup)
}

#' Update available clinical attributes when the clinical data changes
#' 
#' @param session Shiny session
#' @param attrs Character: patient attributes
#' 
#' @importFrom shiny observe updateSelectizeInput
#' @return NULL (this function is used to modify the Shiny session's state)
updateClinicalParams <- function(session, attrs) {
    if (!is.null(attrs)) {
        # Allow the user to select any "days_to" attribute available
        daysTo <- grep("days_to_", attrs, value=TRUE, fixed=TRUE)
        subDaysTo <- gsub(".*(days_to_.*)", "\\1", daysTo)
        choices <- unique(subDaysTo)
        names(choices) <- gsub("_", " ", choices, fixed=TRUE)
        names(choices) <- capitalize(names(choices))
        
        # Update choices for starting or follow up time
        updateSelectizeInput(
            session, "timeStart", choices=list(
                "Select a clinical attribute"="",
                "Suggested attributes"=choices,
                "All clinical attributes"=attrs),
            selected="days_to_death")
        
        # Update choices for ending time
        updateSelectizeInput(
            session, "timeStop", choices=list(
                "Select a clinical attribute"="",
                "Suggested attributes"=choices,
                "All clinical attributes"=attrs))
        
        # Update choices for events of interest
        names(choices) <- gsub("Days to ", "", names(choices), fixed=TRUE)
        names(choices) <- capitalize(names(choices))
        updateSelectizeInput(
            session, "event", choices=list(
                "Select a clinical attribute"="",
                "Suggested events"=choices,
                "All clinical attributes"=attrs),
            selected="days_to_death")
    } else {
        choices <- c("No clinical data loaded"="")
        updateSelectizeInput(session, "timeStart", choices=choices)
        updateSelectizeInput(session, "timeStop",  choices=choices)
        updateSelectizeInput(session, "event",     choices=choices)
    }
}

#' Process survival curves terms to calculate survival curves
#'
#' @inheritParams processSurvData
#' @param censoring Character: censor using "left", "right", "interval" or
#' "interval2"
#' @param scale Character: rescale the survival time to "days", "weeks",
#' "months" or "years"
#' @param formulaStr Character: formula to use
#' @param coxph Boolean: fit a Cox proportional hazards regression model? FALSE 
#' by default
#' @param survTime survTime object: times to follow up, time start, time stop
#' and event (optional)
#' 
#' @importFrom stats formula
#' @importFrom survival coxph Surv
#'
#' @details 
#' If \code{survTime} is NULL, the survival times will be fetch from the
#' clinical dataset according to the names given in \code{timeStart},
#' \code{timeStop}, \code{event} and \code{followup}. This can became quite slow
#' when using the function in a for loop. If these variables are constant, 
#' consider running the function \code{\link{getAttributesTime}} to retrieve the
#' time of such columns once and hand the result to the \code{survTime} argument
#' of this function.
#'
#' @return A list with a \code{formula} object and a data frame with terms
#' needed to calculate survival curves
#' @export
#' 
#' @examples 
#' clinical <- read.table(text = "2549   NA ii  female
#'                                 840   NA i   female
#'                                  NA 1204 iv    male
#'                                  NA  383 iv  female
#'                                1293   NA iii   male
#'                                  NA 1355 ii    male")
#' names(clinical) <- c("patient.days_to_last_followup", 
#'                      "patient.days_to_death",
#'                      "patient.stage_event.pathologic_stage",
#'                      "patient.gender")
#' timeStart  <- "days_to_death"
#' event      <- "days_to_death"
#' formulaStr <- "patient.stage_event.pathologic_stage + patient.gender"
#' survTerms  <- processSurvTerms(clinical, censoring="right", event, timeStart,
#'                                formulaStr=formulaStr)
processSurvTerms <- function(clinical, censoring, event, timeStart, 
                             timeStop=NULL, group=NULL, formulaStr=NULL, 
                             coxph=FALSE, scale="days",
                             followup="days_to_last_followup", survTime=NULL) {
    # Ignore timeStop if interval-censoring is not selected
    if (!grepl("interval", censoring, fixed=TRUE) || timeStop == "") 
        timeStop <- NULL
    
    # Check if using or not interval-censored data
    formulaSurv <- ifelse(is.null(timeStop),
                          "Surv(time/%s, event, type=censoring) ~", 
                          "Surv(time/%s, time2, event, type=censoring) ~")
    scaleStr <- scale
    scale <- switch(scaleStr, days=1, weeks=7, months=30.42, years=365.25)
    formulaSurv <- sprintf(formulaSurv, scale)
    
    survData <- processSurvData(event, timeStart, timeStop, followup, group, 
                                clinical, survTime)
    
    # Estimate survival curves by groups or using formula
    if (formulaStr == "" || is.null(formulaStr)) {
        formulaTerms <- "groups"
    } else {
        formulaTerms <- formulaStr
        factor <- sapply(clinical, is.factor)
        clinical[factor] <- lapply(clinical[factor], as.character)
        survData <- cbind(survData, clinical)
    }
    
    form <- formula(paste(formulaSurv, formulaTerms))
    
    if (coxph) {
        res <- coxph(form, data=survData)
        
        if (!is.null(res$xlevels$groups)) {
            # Correct group names
            name <- res$xlevels$groups[-1]
            names(res$means) <- name
            names(res$coefficients)  <- name
            # names(res$wald.test)  <- name
        }
    } else {
        res <- list(form=form, survTime=survData)
    }
    
    res$scale <- scaleStr
    class(res) <- c("survTerms", class(res))
    return(res)
}

#' Compute estimates of survival curves
#' 
#' @param survTerms \code{survTerms} object: processed survival terms
#' @inheritDotParams survival::survfit.formula -formula -data
#' 
#' @importFrom survival survfit
#' @method survfit survTerms
#' 
#' @return \code{survfit} object. See \code{survfit.object} for details. Methods
#' defined for survfit objects are \code{print}, \code{plot}, \code{lines}, and 
#' \code{points}.
#' @export survfit.survTerms
#'
#' @examples 
#' clinical <- read.table(text = "2549   NA ii  female
#'                                 840   NA i   female
#'                                  NA 1204 iv    male
#'                                  NA  383 iv  female
#'                                1293   NA iii   male
#'                                  NA 1355 ii    male")
#' names(clinical) <- c("patient.days_to_last_followup", 
#'                      "patient.days_to_death",
#'                      "patient.stage_event.pathologic_stage",
#'                      "patient.gender")
#' timeStart  <- "days_to_death"
#' event      <- "days_to_death"
#' formulaStr <- "patient.stage_event.pathologic_stage + patient.gender"
#' survTerms  <- processSurvTerms(clinical, censoring="right", event, timeStart,
#'                                formulaStr=formulaStr)
#' require("survival")
#' survfit(survTerms)
survfit.survTerms <- function(survTerms, ...) {
    res <- survfit(survTerms$form, data=survTerms$survTime, ...)
    res$scale <- survTerms$scale
    
    # Correct group names
    groups <- deparse(survTerms$form[[3]])
    if (!is.null(res$strata) && groups == "groups") {
        name <- paste0("^", groups, "=")
        names(res$strata) <- gsub(name, "", names(res$strata))
    }
    return(res)
}

#' Test differences between survival curves
#' 
#' @param survTerms survTerms object: processed survival terms
#' @inheritDotParams survival::survdiff -formula -data
#' 
#' @importFrom survival survdiff
#' 
#' @return an object of class "survfit". See survfit.object for details. Methods
#' defined for survfit objects are print, plot, lines, and points.
#' @export
#'
#' @examples
#' clinical <- read.table(text = "2549   NA ii  female
#'                                 840   NA i   female
#'                                  NA 1204 iv    male
#'                                  NA  383 iv  female
#'                                1293   NA iii   male
#'                                  NA 1355 ii    male")
#' names(clinical) <- c("patient.days_to_last_followup", 
#'                      "patient.days_to_death",
#'                      "patient.stage_event.pathologic_stage",
#'                      "patient.gender")
#' timeStart  <- "days_to_death"
#' event      <- "days_to_death"
#' formulaStr <- "patient.stage_event.pathologic_stage + patient.gender"
#' survTerms  <- processSurvTerms(clinical, censoring="right", event, timeStart,
#'                                formulaStr=formulaStr)
#' survdiff.survTerms(survTerms)
survdiff.survTerms <- function(survTerms, ...) {
    survdiff(survTerms$form, data=survTerms$survTime, ...)
}

#' Plot survival curves
#' 
#' @param surv Survival object
#' @param interval Boolean: show interval ranges? FALSE by default
#' @param mark Boolean: mark times? TRUE by default
#' @param title Character: plot title
#' @param pvalue Numeric: p-value of the survival curves
#' @param scale Character: time scale; default is "days"
#' @param auto Boolean: return the plot automatically prepared (TRUE) or only
#' the bare minimum (FALSE)? TRUE by default
#' 
#' @importFrom shiny tags br
#' 
#' @return Plot of survival curves
#' @export
#' @examples 
#' require("survival")
#' fit <- survfit(Surv(time, status) ~ x, data = aml)
#' plotSurvivalCurves(fit)
plotSurvivalCurves <- function(surv, mark=TRUE, interval=FALSE, pvalue=NULL, 
                               title="Survival analysis", scale=NULL,
                               auto=TRUE) {
    hc <- hchart(surv, ranges=interval, markTimes=mark)
    if (auto) {
        if (is.null(scale)) {
            if (is.null(surv$scale))
                scale <- "days"
            else
                scale <- surv$scale
        }
        
        hc <- hc %>%
            hc_chart(zoomType="xy") %>%
            hc_title(text=title) %>%
            hc_yAxis(title=list(text="Survival proportion"),
                     crosshair=TRUE) %>%
            hc_xAxis(title=list(text=paste("Time in", scale)),
                     crosshair=TRUE) %>%
            hc_tooltip(
                headerFormat = paste(
                    tags$small("{point.x:.3f}", scale), br(),
                    span(style="color:{point.color}", "\u25CF "),
                    tags$b("{series.name}"), br()),
                pointFormat = paste(
                    "Survival proportion: {point.y:.3f}", br(),
                    "Records: {series.options.records}", br(),
                    "Events: {series.options.events}", br(),
                    "Median: {series.options.median}")) %>%
            hc_plotOptions(series=list(stickyTracking=FALSE))
    }
    
    if (!is.null(pvalue))
        hc <- hc_subtitle(hc, text=paste("log-rank p-value:", pvalue))
    return(hc)
}

#' Check if survival analyses successfully completed or returned errors
#' 
#' @param session Shiny session
#' @inheritDotParams processSurvTerms
#' 
#' @importFrom shiny tags
#' @return List with survival analysis results
processSurvival <- function(session, ...) {
    # Calculate survival curves
    survTerms <- tryCatch(processSurvTerms(...), error=return)
    if ("simpleError" %in% class(survTerms)) {
        if (survTerms[[1]] == paste("contrasts can be applied only to",
                                    "factors with 2 or more levels")) {
            errorModal(session, "Formula error",
                       "Cox models can only be applied to 2 or more groups.")
        } else {
            errorModal(session, "Formula error",
                       "Maybe you misplaced a ", tags$kbd("+"), ", ",
                       tags$kbd(":"), " or ", tags$kbd("*"), "?", br(),
                       br(), "The following error was raised:", br(), 
                       tags$code(survTerms$message))
        }
        return(NULL)
    }
    return(survTerms)
}

#' Test the survival difference between groups of patients
#' 
#' @inheritParams survdiff.survTerms
#' @inheritDotParams survival::survdiff -formula -data
#' 
#' @note Instead of raising errors, an \code{NA} is returned
#' 
#' @return p-value of the survival difference or \code{NA}
#' @export
#' 
#' @examples 
#' require("survival")
#' data <- aml
#' timeStart  <- "event"
#' event      <- "event"
#' followup   <- "time"
#' data$event <- NA
#' data$event[aml$status == 1] <- aml$time[aml$status == 1]
#' censoring  <- "right"
#' formulaStr <- "x"
#' survTerms <- processSurvTerms(data, censoring=censoring, event=event, 
#'                               timeStart=timeStart, followup=followup,
#'                               formulaStr=formulaStr)
#' testSurvival(survTerms)
testSurvival <- function (survTerms, ...) {
    # If there's an error with survdiff, return NA
    pvalue <- tryCatch({
        # Test the difference between survival curves
        diff <- survdiff.survTerms(survTerms, ...)
        
        # Calculate p-value with given significant digits
        pvalue <- 1 - pchisq(diff$chisq, length(diff$n) - 1)
        return(as.numeric(signifDigits(pvalue)))
    }, error = function(e) NA)
    return(pvalue)
}

#' Label groups based on a given cutoff
#' 
#' @param data Numeric: test data
#' @param cutoff Numeric: test cutoff
#' @param label Character: label to prefix group names (NULL by default)
#' @param gte Boolean: test with greater than or equal to cutoff (TRUE) or use
#' less than or equal to cutoff (FALSE)? TRUE by default
#' 
#' @return Labelled groups
#' @export
#' 
#' @examples
#' labelBasedOnCutoff(data=c(1, 0, 0, 1, 0, 1), cutoff=0.5)
#' 
#' labelBasedOnCutoff(data=c(1, 0, 0, 1, 0, 1), cutoff=0.5, "Ratio")
#'                    
#' # Use "greater than" instead of "greater than or equal to"
#' labelBasedOnCutoff(data=c(1, 0, 0, 0.5, 0, 1), cutoff=0.5, gte=FALSE)
labelBasedOnCutoff <- function (data, cutoff, label=NULL, gte=TRUE) {
    len <- length(data)
    group <- rep(NA, len)
    
    if (gte) {
        comp <- `>=`
        str1 <- "&gt;="
        str2 <- "&lt;"
    } else {
        comp <- `>`
        str1 <- "&gt;"
        str2 <- "&lt;="
    }
    group <- comp(data, cutoff)
    
    # Assign a value based on the inclusion levels cutoff
    if (is.null(label)) {
        group[group == "TRUE"]  <- paste(str1, cutoff)
        group[group == "FALSE"] <- paste(str2, cutoff)
    } else {
        group[group == "TRUE"]  <- paste(label, str1, cutoff)
        group[group == "FALSE"] <- paste(label, str2, cutoff)
    }
    
    length(group) <- len
    return(group)
}

#' Test the survival difference between two survival groups given a cutoff
#' 
#' @inheritParams processSurvTerms
#' @param cutoff Numeric: Cutoff of interest
#' @param data Numeric: elements of interest to test against the cutoff
#' @param filter Boolean or numeric: elements to use (all by default)
#' @inheritDotParams processSurvTerms -group -clinical
#' @param session Shiny session
#' @param survivalInfo Boolean: return extra survival information
#' 
#' @importFrom survival survdiff
#' @return p-value of the survival difference
testSurvivalCutoff <- function(cutoff, data, filter=TRUE, clinical, ...,
                               session=NULL, survivalInfo=FALSE) {
    group <- labelBasedOnCutoff(data, cutoff, label="Inclusion levels")
    
    # Calculate survival curves
    if (!is.null(session)) {
        survTerms <- processSurvival(session, group=group, clinical=clinical,
                                     ...)
        if (is.null(survTerms)) return(NULL)
    } else {
        survTerms <- tryCatch(processSurvTerms(group=group, clinical=clinical, 
                                               ...), 
                              error=return)
        if ("simpleError" %in% class(survTerms)) return(NA)
    }
    
    pvalue <- testSurvival(survTerms)
    if (is.na(pvalue)) pvalue <- 1
    if (survivalInfo) attr(pvalue, "info") <- survfit(survTerms)
    return(pvalue)
}

#' Calculate optimal data cutoff that best separates survival curves
#' 
#' Uses \code{stats::optim} with the Brent method to test multiple cutoffs and
#' to find the minimum log-rank p-value.
#'
#' @inheritParams processSurvTerms
#' @inheritParams testSurvivalCutoff
#' @param data Numeric: data values
#' @param session Shiny session (only used for the visual interface)
#' @param lower,upper Bounds in which to search (if NULL, they will be 
#' automatically set to 0 and 1 if all data values are within that interval;
#' otherwise, they will be set to the minimum and maximum values of data)
#' 
#' @return List containg the optimal cutoff (\code{par}) and the corresponding 
#' p-value (\code{value})
#' @export
#' 
#' @examples 
#' clinical <- read.table(text = "2549   NA ii  female
#'                                 840   NA i   female
#'                                  NA 1204 iv    male
#'                                  NA  383 iv  female
#'                                1293   NA iii   male
#'                                  NA 1355 ii    male")
#' names(clinical) <- c("patient.days_to_last_followup", 
#'                      "patient.days_to_death",
#'                      "patient.stage_event.pathologic_stage",
#'                      "patient.gender")
#' timeStart  <- "days_to_death"
#' event      <- "days_to_death"
#' 
#' psi <- c(0.1, 0.2, 0.9, 1, 0.2, 0.6)
#' opt <- optimalSurvivalCutoff(clinical, psi, "right", event, timeStart)
optimalSurvivalCutoff <- function(clinical, data, censoring, event, timeStart, 
                                  timeStop=NULL, 
                                  followup="days_to_last_followup",
                                  session=NULL, filter=TRUE, survTime=NULL, 
                                  lower=NULL, upper=NULL) {
    if (is.null(lower) && is.null(upper)) {
        # Search between min and max of data
        lower <- min(data, na.rm=TRUE)
        upper <- max(data, na.rm=TRUE)
        
        if (lower >= 0 && upper <= 1) {
            # Search between 0 and 1 (if data values are within that interval)
            lower <- 0
            upper <- 1
        } else if (lower >= upper) {
            upper <- lower + 1
        }
    }
    
    if ( is.null(survTime) )
        survTime <- getAttributesTime(clinical, event, timeStart, timeStop,
                                      followup)
    
    # Supress warnings from failed calculations while optimising
    opt <- suppressWarnings(
        optim(0, testSurvivalCutoff, data=data, filter=filter, 
              clinical=clinical, censoring=censoring, timeStart=timeStart,
              timeStop=timeStop, event=event, followup=followup, 
              survTime=survTime, session=session,
              # Method and parameters interval
              method="Brent", lower=lower, upper=upper))
    return(opt)
}

#' @rdname optimalSurvivalCutoff
#' @param psi Numeric: PSI values to test against the cutoff
#' @export
optimalPSIcutoff <- function(clinical, psi, censoring, event, timeStart, 
                             timeStop=NULL, followup="days_to_last_followup",
                             session=NULL, filter=TRUE, survTime=NULL) {
    .Deprecated("optimalSurvivalCutoff")
    
    optimalSurvivalCutoff(clinical=clinical, data=psi, censoring=censoring, 
                          event=event, timeStart=timeStart, timeStop=timeStop, 
                          followup=followup, session=session, filter=filter, 
                          survTime=survTime, lower=0, upper=1)
}

# Differential analyses helper functions -----------------------------------

#' Prepare event plot options
#' 
#' @param id Character: identifier
#' @param ns Namespace identifier
#' @param labelsPanel Tab panel containing options to label points
#' 
#' @return HTML elements
prepareEventPlotOptions <- function(id, ns, labelsPanel=NULL) {
    xAxisPanel <- tabPanel(
        "X axis",
        selectizeInput(ns("xAxis"), "Select X axis", choices=NULL, 
                       width="100%"),
        selectizeInput(ns("xTransform"), "Data transformation of X values",
                       transformOptions("x"), width="100%"),
        bsCollapse(
            bsCollapsePanel(
                list(icon("thumb-tack"), "Highlight points based on X values"),
                value="xAxisHighlightPanel",
                checkboxInput(ns("xHighlight"), width="100%",
                              "Highlight points based on X values"),
                uiOutput(ns("xHighlightValues")))))
    
    yAxisPanel <- tabPanel(
        "Y axis",
        selectizeInput(ns("yAxis"), "Select Y axis", choices=NULL, 
                       width="100%"),
        selectizeInput(ns("yTransform"), "Data transformation of Y values",
                       transformOptions("y"), width="100%"),
        bsCollapse(
            bsCollapsePanel(
                list(icon("thumb-tack"), "Highlight points based on Y values"),
                value="xAxisHighlightPanel",
                checkboxInput(ns("yHighlight"), width="100%",
                              "Highlight points based on Y values"),
                uiOutput(ns("yHighlightValues")))))
    
    plotStyle <- navbarMenu(
        "Plot style",
        tabPanel("Base points",
                 plotPointsStyle(
                     ns, "base", "Base points",
                     help="These are points not highlighted or selected.",
                     size=2, colour="gray", alpha=0.3)),
        tabPanel("Highlighted points",
                 plotPointsStyle(
                     ns, "highlighted", "Highlighted points",
                     help="Highlight points in the X and Y axes options.",
                     size=3, colour="orange", alpha=0.5)),
        tabPanel("Selected in the table",
                 plotPointsStyle(
                     ns, "selected", "Selected in the table",
                     help=paste("Click in a row of the table to emphasise the",
                                "respective point in the plot."),
                     size=8, colour="blue", alpha=0.5)),
        tabPanel("Labels",
                 plotPointsStyle(
                     ns, "labelled", "Labels", 
                     help="Modify the style of labels",
                     size=4, colour="red", alpha=1)))
    
    div(id=id, tabsetPanel(xAxisPanel, yAxisPanel, labelsPanel, plotStyle))
}

#' Perform Wilcoxon analysis and return interface to show the results
#' 
#' @inheritParams plotDistribution
#' @param stat Data frame or matrix: values of the analyses to be performed (if
#' NULL, the analyses will be performed)
#' 
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats wilcox.test
#' @importFrom R.utils capitalize
#' 
#' @return HTML elements
wilcox <- function(data, groups, stat=NULL) {
    warn <- NULL
    group <- unique(groups)
    len <- length(group)
    
    p.value <- NULL
    if (!is.null(stat)) {
        method      <- stat$`Wilcoxon method`
        statistic   <- stat$`Wilcoxon statistic`
        p.value     <- stat$`Wilcoxon p-value`
        null.value  <- stat$`Wilcoxon null value`
        alternative <- stat$`Wilcoxon alternative`
    }
    
    if (len != 2) {
        return(tagList(h4("Wilcoxon test"),
                       "Can only perform this test on 2 groups."))
    } else if (!is.null(p.value)) {
        adjusted <- grep("Wilcoxon p-value \\(.* adjusted\\)", colnames(stat), 
                         value=TRUE)
        if (length(adjusted) != 0) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label <- paste0("p-value (", adjustMethod, "): ")
            adjusted <- tagList(tags$b(label), signifDigits(adjusted), br())
        } else {
            adjusted <- NULL
        }
    } else {
        dataA <- data[groups == group[1]]
        dataB <- data[groups == group[2]]
        stat <- tryCatch(list(stat=wilcox.test(dataA, dataB)), 
                         warning=function(w)
                             return(list(stat=wilcox.test(dataA, dataB),
                                         warning=w)))
        
        if ("warning" %in% names(stat))
            warn <- tags$div(class="alert alert-warning", role="alert",
                             capitalize(stat$warning$message))
        
        method      <- stat$stat$method
        statistic   <- stat$stat$statistic
        p.value     <- stat$stat$p.value
        adjusted    <- NULL
        null.value  <- stat$stat$null.value
        alternative <- stat$stat$alternative
    }
    
    tagList(
        h4(method), warn,
        tags$b("Test value: "), roundDigits(statistic), br(),
        tags$b("Location parameter: "), null.value, br(),
        tags$b("Alternative hypothesis: "), alternative,
        div(style="text-align:right",
            tags$b("p-value: "), signifDigits(p.value), br(), adjusted))
}

#' Perform unpaired t-test analysis and return interface to show the results
#' @inheritParams wilcox
#' @param stat Data frame or matrix: values of the analyses to be performed (if
#' NULL, the analyses will be performed)
#' 
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats t.test
#' @importFrom R.utils capitalize
#' 
#' @return HTML elements
ttest <- function(data, groups, stat=NULL) {
    warn <- NULL
    group <- unique(groups)
    len <- length(group)
    
    p.value <- NULL
    if (!is.null(stat)) {
        method      <- stat$`T-test method`
        statistic   <- stat$`T-test statistic`
        p.value     <- stat$`T-test p-value`
        null.value  <- stat$`T-test null value`
        alternative <- stat$`T-test alternative`
        parameter   <- stat$`T-test parameter`
        int1        <- stat$`T-test conf int1`
        int2        <- stat$`T-test conf int2`
    }
    
    if (len != 2) {
        return(tagList(h4("Unpaired t-test"),
                       "Can only perform this test on 2 groups."))
    } else if (!is.null(p.value)) {
        adjusted <- grep("T-test p-value \\(.* adjusted\\)", colnames(stat), 
                         value=TRUE)
        if (length(adjusted) != 0) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label <- paste0("p-value (", adjustMethod, "): ")
            adjusted <- tagList(tags$b(label), signifDigits(adjusted), br())
        } else {
            adjusted <- NULL
        }
    } else {
        dataA <- data[groups == group[1]]
        dataB <- data[groups == group[2]]
        stat <- tryCatch(list(stat=t.test(dataA, dataB)), 
                         warning=function(w)
                             return(list(stat=t.test(dataA, dataB),
                                         warning=w)),
                         error=return)
        if (is(stat, "error")) {
            message <- stat$message
            check <- "not enough '%s' observations"
            checkX <- sprintf(check, "x")
            checkY <- sprintf(check, "y")
            
            fewObservations <- function(name)
                tagList("Not enough observations in group", tags$b(name),
                        "to perform this statistical test.")
            
            if (message == checkX)
                message <- fewObservations(group[1])
            else if (message == checkY)
                message <- fewObservations(group[2])
            else
                message <- capitalize(message)
            
            error <- tagList(h4("t-test"), tags$div(class="alert alert-danger",
                                                    role="alert", message))
            return(error)
        }
        
        if ("warning" %in% names(stat))
            warn <- tags$div(class="alert alert-warning", role="alert",
                             capitalize(stat$warning$message))
        
        method      <- stat$stat$method
        statistic   <- stat$stat$statistic
        p.value     <- stat$stat$p.value
        adjusted    <- NULL
        null.value  <- stat$stat$null.value
        alternative <- stat$stat$alternative
        parameter   <- stat$stat$parameter
        int1        <- stat$stat$conf.int[[1]]
        int2        <- stat$stat$conf.int[[2]]
    }
    
    tagList(
        h4(method), warn,
        tags$b("Test value: "), roundDigits(statistic), br(),
        tags$b("Test parameter: "), parameter, br(),
        tags$b("Difference in means: "), null.value, br(),
        tags$b("Alternative hypothesis: "), alternative, br(),
        tags$b("95\u0025 confidence interval: "), roundDigits(int1),
        roundDigits(int2),
        div(style="text-align:right",
            tags$b("p-value: "), signifDigits(p.value), br(), adjusted))
}


#' Perform Levene's test and return interface to show the results
#' @inheritParams wilcox
#' @importFrom shiny tagList tags h4 br
#' @return HTML elements
levene <- function(data, groups, stat=NULL) {
    p.value <- NULL
    if (!is.null(stat)) {
        statistic <- stat$`Levene statistic`
        p.value   <- stat$`Levene p-value`
        non.bootstrap.p.value <- stat$`Levene non bootstrap p-value`
    }
    
    len <- length(unique(groups))
    if (len < 2) {
        return(tagList(h4("Levene's Test for Homogeneity of Variances"),
                       "Can only perform this test on 2 or more groups."))
    } else if (!is.null(p.value)) {
        adjusted <- grep("Levene .*p-value \\(.* adjusted\\)", colnames(stat), 
                         value=TRUE)
        if (length(adjusted) == 2) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label1 <- paste0("p-value (", adjustMethod[[1]], "): ")
            label2 <- paste0("p-value without bootstrap (", adjustMethod[[2]],
                             "): ")
            adjustedNonBootstrap <- tagList(br(), tags$b(label2), 
                                            signifDigits(adjusted[[1]]), br())
            adjusted <- tagList(tags$b(label1), signifDigits(adjusted[[2]]), 
                                br())
        } else if (length(adjusted) == 1) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label <- paste0("p-value (", adjustMethod, "): ")
            adjusted <- tagList(tags$b(label), signifDigits(adjusted), br())
        } else {
            adjusted <- NULL
            adjustedNonBootstrap <- NULL
        }
    } else {
        nas <- is.na(data)
        stat <- leveneTest(data[!nas], factor(groups[!nas]))
        statistic <- stat$statistic
        p.value   <- stat$p.value
        adjusted  <- NULL
        non.bootstrap.p.value <- stat$non.bootstrap.p.value
        adjustedNonBootstrap  <- NULL
    }
    
    if (!is.null(non.bootstrap.p.value)) {
        nonBootstrap <- tagList(tags$b("p-value without bootstrap: "), 
                                signifDigits(non.bootstrap.p.value), 
                                adjustedNonBootstrap)
    } else {
        nonBootstrap <- NULL
    }
    
    tagList(
        h4("Levene's Test for Homogeneity of Variance"),
        tags$b("Test value: "), roundDigits(statistic), br(),
        div(style="text-align:right",
            tags$b("p-value: "), signifDigits(p.value), br(), adjusted,
            nonBootstrap))
}

#' Perform Fligner-Killeen test and return interface to show the results
#' @inheritParams wilcox
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats fligner.test
#' @return HTML elements
fligner <- function(data, groups, stat=NULL) {
    len <- length(unique(groups))
    
    p.value <- NULL
    if (!is.null(stat)) {
        statistic <- stat$`Fligner-Killeen statistic`
        p.value   <- stat$`Fligner-Killeen p-value`
        parameter <- stat$`Fligner-Killeen parameter`
    }
    
    if (len < 2) {
        return(tagList(h4("Fligner-Killeen Test for Homogeneity of Variances"),
                       "Can only perform this test on 2 or more groups."))
    } else if (!is.null(p.value)) {
        adjusted <- grep("Fligner-Killeen .*p-value \\(.* adjusted\\)", 
                         colnames(stat), value=TRUE)
        if (length(adjusted) != 0) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label <- paste0("p-value (", adjustMethod, "): ")
            adjusted <- tagList(tags$b(label), signifDigits(adjusted), br())
        } else {
            adjusted <- NULL
        }
    } else {
        nas <- is.na(data)
        stat <- fligner.test(data[!nas], factor(groups[!nas]))
        if (is.infinite(stat$statistic)) {
            statistic <- NaN
            p.value   <- NA
            parameter <- NaN
        } else {
            statistic <- stat$statistic
            p.value   <- stat$p.value
            parameter <- stat$parameter
        }
        adjusted <- NULL
    }
    
    tagList(
        h4("Fligner-Killeen's Test for Homogeneity of Variance"),
        tags$b("Test value: "), roundDigits(statistic), br(),
        tags$b("Test parameter: "), parameter, br(),
        div(style="text-align:right",
            tags$b("p-value: "), signifDigits(p.value), br(), adjusted))
}

#' Perform Kruskal's test and return interface to show the results
#' @inheritParams wilcox
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats kruskal.test
#' @return HTML elements
kruskal <- function(data, groups, stat=NULL) {
    len <- length(unique(groups))
    
    p.value <- NULL
    if (!is.null(stat)) {
        method    <- stat$`Kruskal method`
        statistic <- stat$`Kruskal statistic`
        p.value   <- stat$`Kruskal p-value`
        parameter <- stat$`Kruskal parameter`
    }
    
    if (len < 2) {
        return(tagList(h4("Kruskal test"),
                       "Can only perform this test on 2 or more groups."))
    } else if (!is.null(p.value)) {
        adjusted <- grep("Kruskal p-value \\(.* adjusted\\)", colnames(stat), 
                         value=TRUE)
        if (length(adjusted) != 0) {
            adjustMethod <- gsub(".*\\((.* adjusted)\\).*", "\\1", adjusted)
            adjusted <- stat[ , adjusted]
            label <- paste0("p-value (", adjustMethod, "): ")
            adjusted <- tagList(tags$b(label), signifDigits(adjusted), br())
        } else {
            adjusted <- NULL
        }
    } else {
        stat      <- kruskal.test(data, factor(groups))
        method    <- stat$method
        statistic <- stat$statistic
        p.value   <- stat$p.value
        parameter <- stat$parameter
        adjusted  <- NULL
    }
    
    tagList(h4(method),
            tags$b("Test value \u03C7\u00B2: "), roundDigits(statistic), br(),
            tags$b("Degrees of freedom: "), parameter,
            div(style="text-align:right",
                tags$b("p-value: "), signifDigits(p.value), br(), adjusted))
}

#' Perform Fisher's exact test and return interface to show the results
#' @inheritParams wilcox
#' 
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats fisher.test
#' @importFrom R.utils evalWithTimeout
#' 
#' @return HTML elements
fisher <- function(data, groups) {
    stat <- try(evalWithTimeout(
        fisher.test(data, factor(groups)),
        timeout = 1,
        onTimeout = "error"))
    
    if (class(stat) != "try-error") {
        tagList(
            h4(stat$method),
            tags$b("p-value: "), stat$p.value, br(),
            tags$b("Alternative hypothesis: "), stat$alternative
        )
    } else {
        tagList(
            h4("Fisher's Exact Test for Count Data"),
            "This test took too much to complete!"
        )
    }
}

#' Perform Spearman's test and return interface to show the results
#' @inheritParams wilcox
#' @importFrom shiny tagList tags h4 br
#' @importFrom stats var cor
#' @return HTML elements
spearman <- function(data, groups) {
    group <- unique(groups)
    len <- length(group)
    
    if (len != 2) {
        tagList(
            h4("Spearman's correlation"),
            "Can only perform this test on 2 groups.")
    } else {
        var <- var(data[groups == group[1]], data[groups == group[2]])
        cor <- cor(data[groups == group[1]], data[groups == group[2]])
        
        tagList(
            h4("Spearman's correlation"),
            tags$b("Variance: "), var, br(),
            tags$b("Correlation: "), cor)
    }
}

#' Options for event plotting
#' 
#' @param session Shiny session
#' @param df Data frame
#' @param xAxis Character: currently selected variable for the X axis
#' @param yAxis Character: currently selected variable for the Y axis
#' @param labelSortBy Character: currently selected variable for the selectize
#' element to sort differentially analysis
#' 
#' @return HTML elements
eventPlotOptions <- function(session, df, xAxis, yAxis, labelSortBy) {
    # Only allow to select numeric columns    
    cols <- colnames(df)
    type <- sapply(cols, function(i) class(df[[i]]))
    numericCols <- cols[type == "numeric"]
    
    if (!is.null(numericCols) && length(numericCols) > 0) {
        if (is.null(xAxis) || identical(xAxis, "") || !xAxis %in% numericCols) {
            # Default option for X axis
            deltaMedian <- "\u2206 Median"    # PSI
            logFC       <- "log2 Fold-Change" # Gene expression
            if (logFC %in% numericCols)
                xSelected <- logFC
            else if (deltaMedian %in% numericCols)
                xSelected <- deltaMedian
            else
                xSelected <- NULL
            
            updateSelectizeInput(session, "xAxis", choices=numericCols,
                                 selected=xSelected)
        } else {
            updateSelectizeInput(session, "xAxis", choices=numericCols,
                                 selected=xAxis)
        }
        
        if (is.null(yAxis) || identical(yAxis, "") || !yAxis %in% numericCols) {
            # Default option for Y axis
            pValue <- grepl("p-value", numericCols)
            bStat  <- "B-statistics" # Gene expression
            if (bStat %in% numericCols)
                ySelected <- bStat
            else if (any(pValue))
                ySelected <- numericCols[pValue][1]
            else if (length(numericCols) >= 2)
                ySelected <- numericCols[[2]]
            else
                ySelected <- NULL
            
            updateSelectizeInput(session, "yAxis", choices=numericCols,
                                 selected=ySelected)
        } else {
            updateSelectizeInput(session, "yAxis", choices=numericCols,
                                 selected=yAxis)
        }
        
        if (is.null(labelSortBy) || identical(labelSortBy, "") || 
            !labelSortBy %in% numericCols) {
            # Default option for sorting differentially analysis
            pValue <- grepl("p-value", numericCols)
            bStat  <- "B-statistics" # Gene expression
            if (bStat %in% numericCols)
                labelSelected <- bStat
            else if (any(pValue))
                labelSelected <- numericCols[pValue][1]
            else if (length(numericCols) >= 2)
                labelSelected <- numericCols[[2]]
            else
                labelSelected <- NULL
            
            updateSelectizeInput(session, "labelSortBy", choices=numericCols,
                                 selected=labelSelected)
        } else {
            updateSelectizeInput(session, "labelSortBy", choices=numericCols,
                                 selected=labelSortBy)
        }
    } else {
        updateSelectizeInput(session, "xAxis", choices=character(0))
        updateSelectizeInput(session, "yAxis", choices=character(0))
        updateSelectizeInput(session, "labelSortBy", choices=character(0))
    }
}

#' Basic statistics performed on data
#' 
#' Variance and median of each group. If data has 2 groups, also calculates the
#' delta variance and delta median.
#' 
#' @inheritParams plotDistribution
#' @importFrom shiny tagList br h4
#' 
#' @return HTML elements
basicStats <- function(data, groups) {
    data <- lapply(unique(groups), function(g) data[groups == g])
    
    len <- length(unique(groups))
    vari <- vapply(data, var, numeric(1), na.rm = TRUE)
    medi <- vapply(data, median, numeric(1), na.rm = TRUE)
    
    if (len == 2) {
        deltaMedian <- tagList(tags$b("|\u0394 Median|: "), 
                               roundDigits(abs(medi[2] - medi[1])), br())
        deltaVar <- tagList(tags$b("|\u0394 Variance|: "), 
                            roundDigits(abs(vari[2] - vari[1])), br())
    } else {
        deltaMedian <- NULL
        deltaVar <- NULL
    }
    
    avgMedian <- roundDigits( mean(medi) )
    avgVar <- roundDigits( mean(vari) )
    ui <- tagList(hr(), h4("Basic statistics"),
                  tags$b("Average median: "), avgMedian, br(), deltaMedian,
                  tags$b("Average variance: "), avgVar, br(), deltaVar)
    return(ui)
}

#' Filter groups with less data points than the threshold
#' 
#' Groups containing a number of non-missing values less than the threshold are
#' discarded.
#' 
#' @param vector Unnamed elements
#' @param group Character: group of the elements
#' @param threshold Integer: number of valid non-missing values by group
#' 
#' @return Named vector with filtered elements from valid groups. The group of 
#' the respective element is given in the name.
#' @export
#' @examples 
#' # Removes groups with less than two elements
#' filterGroups(1:4, c("A", "B", "B", "D"), threshold=2)
filterGroups <- function(vector, group, threshold=1) {
    names(vector) <- group
    vector <- lapply(unique(group), function(t) {
        vector <- vector[group == t]
        if ( sum(!is.na(vector)) >= threshold )
            return(vector)
    })
    return(unlist(vector))
}


#' Create plot for events
#' 
#' @param df Data frame
#' @param x Character: name of the variable used for the X axis
#' @param y Character: name of the variable used for the Y axis
#' @param params List of parameters to pass to \code{\link[ggplot2]{geom_point}}
#' related to most points
#' @param highlightX Integer: region of points in X axis to highlight
#' @param highlightY Integer: region of points in Y axis to highlight
#' @param highlightParams List of parameters to pass to
#' \code{\link[ggplot2]{geom_point}} related to highlighted points
#' @param selected Integer: index of rows/points to be coloured
#' @param selectedParams List of parameters to pass to 
#' \code{\link[ggplot2]{geom_point}} related to selected points
#' @param labelled Integer: index of rows/points to be labelled
#' @param labelledParams List of parameters to pass to 
#' \code{\link[ggrepel]{geom_label_repel}} related to labelled points
#' @param xlim Numeric: limits of X axis
#' @param ylim Numeric: limits of Y axis
#' 
#' @importFrom ggplot2 ggplot aes_string geom_point theme_light coord_cartesian 
#' unit
#' @importFrom ggrepel geom_label_repel
#' 
#' @return List containing HTML elements and highlighted points
createEventPlotting <- function(df, x, y, params, highlightX, highlightY,
                                highlightParams, selected, selectedParams,
                                labelled, labelledParams, xlim, ylim) {
    aes <- aes_string(paste0("`", x, "`"), paste0("`", y, "`"))
    
    # Get points highlighted in X and Y that were not selected
    if (!is.null(highlightX)) {
        highlightedX <- findInterval(df[[x]], highlightX, left.open=FALSE,
                                     rightmost.closed=TRUE) == 1
        if (attr(highlightX, "inverted")) highlightedX <- !highlightedX
        highlightedX <- which(highlightedX)
    } else {
        highlightedX <- seq(nrow(df))
    }
    
    if (!is.null(highlightY)) {
        highlightedY <- findInterval(df[[y]], highlightY, left.open=FALSE,
                                     rightmost.closed=TRUE) == 1
        if (attr(highlightY, "inverted")) highlightedY <- !highlightedY
        highlightedY <- which(highlightedY)
    } else {
        highlightedY <- seq(nrow(df))
    }
    
    if ( is.null(highlightX) && is.null(highlightY) ) {
        highlighted <- NULL
    } else {
        highlighted <- intersect(highlightedX, highlightedY)
    }
    
    # Render remaining points
    plotted <- union(selected, highlighted)
    if (!is.null(plotted))
        remaining <- df[-plotted, ]
    else
        remaining <- df
    plot <- ggplot() + do.call("geom_point", c(
        list(data=remaining, aes, na.rm=TRUE), params))
    
    # Render highlighted points
    plot <- plot + do.call("geom_point", c(
        list(data=df[setdiff(highlighted, selected), ], aes, na.rm=TRUE), 
        highlightParams))
    
    # Render selected points
    plot <- plot + do.call("geom_point", c(
        list(data=df[selected, ], aes, na.rm=TRUE), selectedParams))
    
    # Label points
    aesMod <- aes
    aesMod$label <- parse(text="`Names`")[[1]]
    
    modNames <- rownames(df)
    if ( isTRUE(attr(labelled, "displayOnlyGene")) )
        modNames <- gsub(".*_(.*)$", "\\1", modNames)
    
    mod <- cbind(Names=modNames, df)
    plot <- plot + do.call("geom_label_repel", c(
        list(data=mod[labelled, ], aesMod, na.rm=TRUE), labelledParams))
    
    plot <- plot + coord_cartesian(xlim=xlim, ylim=ylim) + theme_light(16)
    return(list(plot=list(plot), highlighted=highlighted))
}

#' Show variable transformation(s)
#' 
#' @param label Character: label to display
#' @param type Character: show the variable transformation for the chosen type;
#' NULL (by default) to show all variable transformations
#' 
#' @return Character labelling variable transformation(s)
transformOptions <- function(label, type=NULL) {
    transform <- c("No transformation"="no",
                   "|%s|"="abs",
                   "-%s"="inv",
                   "log10(|%s|)"="log10abs",
                   "-log10(|%s|)"="-log10abs")
    names(transform) <- sprintf(names(transform), label)
    
    if (!is.null(type)) {
        show <- names(transform)
        show[[1]] <- label
        show <- show[match(type, transform)]
        return(show)
    } else {
        return(transform)
    }
}

#' Transform values as per a given type of transformation
#' 
#' @param val Integer: values to transform
#' @param type Character: type of transformation
#' @param avoidZero Boolean: add the smallest non-zero number available to zero
#' values; avoids returning infinity values during Log transformation (which are
#' not plotted); useful for preserving p-values of 0, for instance; TRUE by
#' default
#' 
#' @return Integer containing transformed values
transformValues <- function(val, type, avoidZero=TRUE) {
    # Remove NAs
    if (avoidZero) {
        zeroes <- val == 0 & !is.na(val)
        val[zeroes] <- val[zeroes] + .Machine$double.xmin
    }
    
    trans <- suppressWarnings(
        switch(type,
               "no"=val,
               "abs"=abs(val),
               "inv"=-val,
               "log10abs"=log10(abs(val)),
               "-log10abs"=-log10(abs(val))))
    return(trans)
}

#' Transform data in data frame
#' 
#' @param input Shiny input
#' @param df Data frame
#' @param x Character: column name
#' @param y Character: column name
#' 
#' @return Data frame with transformed data in new columns and respective name
#' of created columns
transformData <- function(input, df, x, y) {
    xTrans <- input$xTransform
    xLabel <- transformOptions(x, xTrans)
    if (!x %in% colnames(df)) return(NULL)
    df[[xLabel]] <- transformValues(df[[x]], xTrans)
    
    yTrans <- input$yTransform
    yLabel <- transformOptions(y, yTrans)
    if (!y %in% colnames(df)) return(NULL)
    df[[yLabel]] <- transformValues(df[[y]], yTrans)
    
    return(list(data=df, xLabel=xLabel, yLabel=yLabel))
}

#' Interface to modify the style of the plot points
#' 
#' @param ns Namespace function
#' @param id Character: identifier
#' @param description Character: display text for user
#' @param help Character: extra text to help the user
#' @param colour Character: default colour ("black" by default)
#' @param size Integer: default size (2 by default)
#' @param alpha Numeric: default transparency value; (opaque by default)
#' 
#' @importFrom shiny tagList h4 helpText sliderInput
#' @importFrom colourpicker colourInput
#' 
#' @return HTML elements
plotPointsStyle <- function(ns, id, description, help=NULL, size=2,
                            colour="black", alpha=1.0) {
    id2 <- function(att) ns(paste0(id, att))
    
    colourSelector <- colourInput(id2("Colour"), "Colour", value=colour)
    colourSelector[[2]][["style"]] <- "width: 100%;"
    
    tagList(
        h4(description),
        if (!is.null(help)) helpText(help),
        sliderInput(id2("Size"), "Size", min=1, max=10, step=1, value=size,
                    width="100%"),
        colourSelector,
        sliderInput(id2("Alpha"), "Opacity", min=0, max=1, step=0.01,
                    value=alpha, width="100%")
    )
}

#' Plot distribution through a density plot
#' 
#' The tooltip shows the median, variance, max, min and number of non-NA samples
#' of each data series.
#' 
#' @param data Numeric, data frame or matrix: data for one gene or alternative 
#' splicing event
#' @param groups List of characters (list of groups containing data identifiers)
#' or character vector (group of each value in \code{data})
#' @param rug Boolean: include rug plot to better visualise data distribution
#' @param vLine Boolean: include vertical plot lines to indicate the mean and
#' median of each group even when those groups are omitted
#' @param ... Extra parameters passed to \code{density} to create the kernel
#' density estimates
#' @param title Character: plot title
#' @param psi Boolean: are data composed of PSI values?
#' 
#' @importFrom highcharter highchart hc_chart hc_xAxis hc_plotOptions hc_tooltip
#' JS
#' @importFrom stats median var density
#' 
#' @return Highcharter object with density plot
#' @export
#' @examples
#' data <- sample(20, rep=TRUE)/20
#' groups <- c(rep("A", 10), rep("B", 10))
#' plotDistribution(data, groups)
plotDistribution <- function(data, groups="All samples", rug=TRUE, vLine=TRUE, 
                             ..., title=NULL, psi=TRUE) {
    if (psi) {
        xMin <- 0
        xMax <- 1
        xLabel <- "Distribution of PSI values"
        id <- "Inclusion level: "
    } else {
        xMin <- NULL
        xMax <- NULL
        xLabel <- "Distribution of gene expression"
        id <- "Gene expression: "
    }
    
    # Include X-axis zoom and hide markers
    hc <- highchart() %>%
        hc_chart(zoomType="x") %>%
        hc_xAxis(min=xMin, max=xMax, title=list(text=xLabel)) %>%
        hc_plotOptions(series = list(fillOpacity=0.3,
                                     marker=list(enabled=FALSE))) %>%
        hc_tooltip(
            headerFormat = paste(span(style="color:{point.color}", "\u25CF "),
                                 tags$b("{series.name}"), br()),
            pointFormat = paste(
                id, "{point.x:.2f}", br(),
                "Number of samples: {series.options.samples}", br(),
                "Median: {series.options.median}", br(),
                "Variance: {series.options.var}", br(),
                "Range: {series.options.min} - {series.options.max}")) %>%
        export_highcharts()
    
    if (!is.null(title)) hc <- hc %>% hc_title(text=title)
    
    if (is.list(groups)) 
        ns <- names(groups)
    else
        ns <- groups
    
    count <- 0
    plotLines <- list()
    for (group in unique(ns)) {
        if (is.list(groups))
            filter <- groups[[group]]
        else
            filter <- groups == group
        
        if (is.vector(data))
            row <- as.numeric(data[filter])
        else
            row <- as.numeric(data[ , filter])
        
        med  <- roundDigits(median(row, na.rm=TRUE))
        vari <- roundDigits(var(row, na.rm=TRUE))
        max  <- roundDigits(max(row, na.rm=TRUE))
        min  <- roundDigits(min(row, na.rm=TRUE))
        samples <- sum(!is.na(row))
        
        colour <- unname(attr(groups, "Colour")[group])
        if (is.null(colour))
            colour <- JS("Highcharts.getOptions().colors[", count, "]")
        
        # Calculate the density of inclusion levels for each sample group
        den <- tryCatch(density(row, na.rm=TRUE, ...), error=return,
                        warning=return)
        if (is(den, "error") || is(den, "warning")) {
            hc <- hc %>% hc_add_series(NULL)
        } else {
            hc <- hc %>% hc_add_series(den, type="area", name=group, median=med,
                                       var=vari, samples=samples, max=max, 
                                       color=colour, min=min)
        }
        # Rug plot
        if (rug) {
            hc <- hc_scatter(
                hc, row, rep(0, length(row)), name=group, marker=list(
                    enabled=TRUE, symbol="circle", radius=4,
                    fillColor=paste0(colour, "60")), # Add opacity
                median=med, var=vari, samples=samples, max=max, min=min)
        }
        # Save plot line with information
        if (vLine) {
            plotLines[[count + 1]] <- list(
                label = list(text = paste("Median:", med, "/ Variance:", vari)),
                # Colour the same as the series
                color=colour, dashStyle="shortdash", width=2, value=med, 
                zIndex=7)
        }
        count <- count + 1
    }
    
    # Add plotLines with information
    if (vLine) hc <- hc %>% hc_xAxis(plotLines = plotLines)
    return(hc)
}

#' @rdname plotDistribution
plotDensity <- plotDistribution

#' Levene's test
#' 
#' Performs a Levene's test to assess the equality of variances
#' 
#' @details The implementation of this function is based on
#' \code{car:::leveneTest.default} with a more standard result.
#' 
#' @inheritParams stats::kruskal.test
#' @param centers Function used to calculate how much values spread 
#' (\code{median} by default; another common function used is \code{mean})
#' 
#' @importFrom stats complete.cases anova median lm
#' 
#' @return A list with class \code{"htest"} containing the following components:
#' \item{statistic}{the value of the test statistic with a name describing it.}
#' \item{p.value}{the p-value for the test.}
#' \item{method}{the type of test applied.}
#' \item{data.name}{a character string giving the names of the data.}
#' 
#' @examples 
#' 
#' vals <- sample(30, replace=TRUE)
#' group <- lapply(list("A", "B", "C"), rep, 10)
#' group <- unlist(group)
#' psichomics:::leveneTest(vals, group)
#' 
#' ## Using Levene's test based on the mean
#' psichomics:::leveneTest(vals, group, mean)
leveneTest <- function(x, g, centers=median) {
    dname <- paste(deparse(substitute(x)), "and", deparse(substitute(g)))
    
    # Remove missing values
    noNAs <- complete.cases(x, g)
    x <- x[noNAs]
    g <- g[noNAs]
    
    # Convert groups to factors
    g <- factor(g)
    
    res <- vapply(split(x, g, drop=TRUE), centers, numeric(1))
    spread <- abs(x - res[g])
    
    # Analysis of variance (ANOVA)
    var <- anova(lm(spread ~ g))
    statistic <- var$`F value`[1]
    pval <- var$`Pr(>F)`[1]
    
    centers <- deparse(substitute(centers))
    rval <- list(statistic=c("W"=statistic), p.value=pval, data.name=dname,
                 method=paste0("Levene's test (using the ", centers, ")"))
    class(rval) <- "htest"
    return(rval)
}

#' Create density sparklines for inclusion levels
#' 
#' @importFrom highcharter highchart hc_credits hc_tooltip hc_chart hc_title
#' hc_xAxis hc_yAxis hc_exporting hc_legend hc_plotOptions
#' @importFrom shiny tags
#' 
#' @inherit createSparklines
#' @param areSplicingEvents Boolean: are these splicing events (TRUE) or gene 
#' expression (FALSE)?
createDensitySparklines <- function(data, events, areSplicingEvents=TRUE, 
                                    groups=NULL, geneExpr=NULL) {
    if (areSplicingEvents) {
        minX <- 0
        maxX <- 1
        id   <- "Inclusion levels"
        FUN  <- "showDiffSplicing"
    } else {
        minX <- NULL
        maxX <- NULL
        id   <- "Gene expression"
        FUN  <- "showDiffExpression"
    }
    
    hc <- highchart() %>%
        hc_tooltip(
            hideDelay=0, shared=TRUE, valueDecimals=getPrecision(),
            headerFormat=paste0(tags$small(paste0(id, ": {point.x:.2f}")),
                                tags$br()),
            pointFormat=paste(span(style="color:{point.color}", "\u25CF "),
                              tags$b("{series.name}"), br())) %>%
        hc_chart(width=120, height=20, backgroundColor="", type="areaspline", 
                 margin=c(2, 0, 2, 0), style=list(overflow='visible')) %>%
        hc_title(text="") %>%
        hc_xAxis(visible=FALSE) %>%
        hc_credits(enabled=FALSE) %>%
        hc_yAxis(endOnTick=FALSE, startOnTick=FALSE, visible=FALSE) %>%
        hc_exporting(enabled=FALSE) %>%
        hc_legend(enabled=FALSE) %>%
        hc_plotOptions(series=list(cursor="non", animation=FALSE, lineWidth=1,
                                   marker=list(radius=1), fillOpacity=0.25))
    
    if (!is.null(minX) || !is.null(maxX))
        hc <- hc %>% hc_xAxis(min=minX, max=maxX)
    createSparklines(hc, data, events, FUN=FUN, groups=groups, 
                     geneExpr=geneExpr)
}

#' Create sparkline charts to be used in a data table
#' 
#' @param hc Highcharts object
#' @param data Character: HTML-formatted data series of interest
#' @param events Character: event identifiers
#' @param FUN Character: JavaScript function to execute when clicking on a chart
#' @param groups Character: name of the groups used for differential analyses
#' @param geneExpr Character: name of the gene expression dataset
#' 
#' @importFrom jsonlite toJSON
#' 
#' @return HTML element with sparkline data
createSparklines <- function(hc, data, events, FUN, groups=NULL,
                             geneExpr=NULL) {
    hc <- as.character(toJSON(hc$x$hc_opts, auto_unbox=TRUE))
    hc <- substr(hc, 1, nchar(hc)-1)
    
    if (!is.null(groups) && !identical(groups, ""))
        groups <- toJSarray(groups)
    else
        groups <- "null"
    
    if (is.null(geneExpr) || geneExpr == "")
        geneExpr <- ")"
    else
        geneExpr <- sprintf(", \'%s\')", geneExpr)
    
    json <- paste0(hc, ',"series":', data, "}")
    sparklines <- sprintf(
        paste('<sparkline onclick="%s(\'%s\', %s%s"',
              'style="cursor:pointer;" data-sparkline=\'%s\'/>'), 
        FUN, events, groups, geneExpr, json)
    return(sparklines)
}

#' Perform statistical analysis on a given splicing event
#' 
#' Perform statistical analyses on a given vector containing elements from
#' different groups
#' 
#' @details 
#' The following statistical analyses may be performed by including the 
#' respective string in the \code{analysis} argument:
#' \itemize{
#'      \item{\code{ttest} - Unpaired t-test (2 groups)}
#'      \item{\code{wilcoxRankSum} - Wilcoxon Rank Sum test (2 groups)}
#'      \item{\code{kruskal} - Kruskal test (2 or more groups)}
#'      \item{\code{levene} - Levene's test (2 or more groups)}
#'      \item{\code{fligner} - Fligner-Killeen test (2 or more groups)}
#' }
#' 
#' @param vector Numeric
#' @param group Character: group of each element in the vector
#' @param threshold Integer: minimum number of data points to perform analysis
#' in a group (default is 1)
#' @param analyses Character: analyses to perform (see "Details")
#' @param step Numeric: number of events before the progress bar is updated
#' (a bigger number allows for a faster execution)
#' 
#' @importFrom stats kruskal.test median wilcox.test t.test var density
#' @importFrom methods is
#' 
#' @return A row from a data frame with the results
singleDiffAnalyses <- function(vector, group, threshold=1, step=100,
                               analyses=c("wilcoxRankSum", "ttest", "kruskal",
                                          "levene", "fligner")) {
    colour  <- attr(group, "Colour")
    series  <- split(vector, group)
    samples <- vapply(series, function(i) sum(!is.na(i)), integer(1))
    valid   <- names(series)[samples >= threshold]
    inGroup <- group %in% valid
    group   <- group[inGroup]
    vector  <- vector[inGroup]
    len     <- length(valid)
    
    # Variance and median
    med <- lapply(series, median, na.rm=TRUE)
    var <- lapply(series, var, na.rm=TRUE)
    
    # Improve display of group names
    names(samples) <- paste0("(", names(samples), ")")
    names(med) <- paste0("(", names(med), ")")
    names(var) <- paste0("(", names(var), ")")
    
    # Unpaired t-test (2 groups)
    ttest <- NULL
    if (any("ttest" == analyses) && len == 2 && all(samples > 1)) {
        typeOne <- group == valid[1]
        ttest <- tryCatch(t.test(vector[typeOne], vector[!typeOne]), 
                          error=return)
        if (is(ttest, "error")) ttest <- NULL
    }
    
    # Wilcoxon test (2 groups)
    wilcox <- NULL
    if (any("wilcoxRankSum" == analyses) && len == 2) {
        # Wilcoxon rank sum test (Mann Whitney U test)
        typeOne <- group == valid[1]
        wilcox  <- suppressWarnings(
            tryCatch(wilcox.test(vector[typeOne], vector[!typeOne]),
                     error=return))
        if (is(wilcox, "error")) wilcox <- NULL
        # } else if (any("wilcoxSignedRank" == analyses) && len == 1) {
        #     # Wilcoxon signed rank test
        #     wilcox <- suppressWarnings(wilcox.test(vector))
    }
    
    # Kruskal-Wallis test (2 or more groups)
    kruskal <- NULL
    if (any("kruskal" == analyses) && len >= 2) {
        kruskal <- tryCatch(kruskal.test(vector, group), error=return)
        if (is(kruskal, "error")) kruskal <- NULL
    }
    
    # Levene's test (2 or more groups)
    levene <- NULL
    if (any("levene" == analyses) && len >= 2) {
        levene <- suppressWarnings(
            tryCatch(leveneTest(vector, group), error=return))
        if (is(levene, "error")) levene <- NULL
    }
    
    # Fligner-Killeen test (2 or more groups)
    fligner <- NULL
    if (any("fligner" == analyses) && len >= 2) {
        fligner <- suppressWarnings(
            tryCatch(fligner.test(vector, group), error=return))
        if (is(fligner, "error") || is.infinite(fligner$statistic))
            fligner <- NULL
    }
    
    # Density sparklines
    sparkline <- NULL
    if (any("density" == analyses)) {
        data         <- NULL
        validSeries  <- series[valid]
        groupsColour <- NULL
        for (each in seq(validSeries)) {
            group <- validSeries[[each]]
            # Calculate data density for each sample group with a greatly 
            # reduced number of points for faster execution
            den <- tryCatch(density(group, n=10, na.rm=TRUE), error=return,
                            warning=return)
            if (is(den, "error") || is(den, "warning"))
                data <- c(data, "")
            else
                data <- c(data, paste(sprintf('{"x":%s,"y":%s}', den$x, den$y),
                                      collapse=","))
            
            if (!is.null(colour))
                groupsColour <- c(groupsColour, 
                                  unname(colour[names(validSeries)[[each]]]))
        }
        if (!is.null(colour)) {
            sparkline <- paste(
                sprintf('{"name":"%s", "data":[%s], "color":"%s"}', 
                        names(validSeries), data, groupsColour), collapse=",")
        } else {
            sparkline <- paste(
                sprintf('{"name":"%s", "data":[%s]}', 
                        names(validSeries), data), collapse=",")
        }
        sparkline <- paste("[", sparkline, "]")
    }
    
    vector <- c("Distribution"=sparkline,
                "Survival by PSI cutoff"=NA,
                "Optimal PSI cutoff"=as.numeric(NA),
                "Log-rank p-value"=as.numeric(NA),
                Samples=samples, "T-test"=ttest,
                Wilcoxon=wilcox, Kruskal=kruskal, Levene=levene, 
                "Fligner-Killeen"=fligner, Variance=var, Median=med)
    vector <- vector[!vapply(vector, is.null, logical(1))] # Remove NULL
    return(vector)
}

#' Perform statistical analyses
#' 
#' @param data Data frame or matrix: gene expression or alternative splicing 
#' quantification
#' @param groups Named list of characters (containing elements belonging to each
#' group) or character vector (containing the group of each individual sample);
#' if NULL, sample types are used instead when available, e.g. normal, tumour 
#' and metastasis
#' @param analyses Character: statistical tests to perform (see Details)
#' @param pvalueAdjust Character: method used to adjust p-values (see Details)
#' @param geneExpr Character: name of the gene expression dataset (only required
#' for density sparklines available in the interactive mode)
#' @param psi Data frame or matrix: alternative splicing quantification (defunct
#' argument, use \code{data} instead)
#' 
#' @importFrom plyr rbind.fill
#' @importFrom fastmatch fmatch
#' @importFrom stats p.adjust
#' 
#' @details 
#' The following statistical analyses may be performed by including the 
#' respective string in the \code{analysis} argument:
#' \itemize{
#'      \item{\code{ttest} - Unpaired t-test (2 groups)}
#'      \item{\code{wilcoxRankSum} - Wilcoxon Rank Sum test (2 groups)}
#'      \item{\code{kruskal} - Kruskal test (2 or more groups)}
#'      \item{\code{levene} - Levene's test (2 or more groups)}
#'      \item{\code{fligner} - Fligner-Killeen test (2 or more groups)}
#'      \item{\code{density} - Sample distribution per group (only usable 
#'      through the visual interface)}
#' }
#' 
#' The following methods for p-value adjustment are supported by using the 
#' respective string in the \code{pvalueAdjust} argument:
#' \itemize{
#'      \item{\code{none}: do not adjust p-values}
#'      \item{\code{BH}: Benjamini-Hochberg's method (false discovery rate)}
#'      \item{\code{BY}: Benjamini-Yekutieli's method (false discovery rate)}
#'      \item{\code{bonferroni}: Bonferroni correction (family-wise error rate)}
#'      \item{\code{holm}: Holm's method (family-wise error rate)}
#'      \item{\code{hochberg}: Hochberg's method (family-wise error rate)}
#'      \item{\code{hommel}: Hommel's method (family-wise error rate)}
#' }
#' 
#' @return Table of statistical analyses
#' @export
#' @examples 
#' # Calculate PSI for skipped exon (SE) and mutually exclusive (MXE) events
#' eventType <- c("SE", "MXE")
#' annot <- readFile("ex_splicing_annotation.RDS")
#' junctionQuant <- readFile("ex_junctionQuant.RDS")
#' 
#' psi <- quantifySplicing(annot, junctionQuant, eventType=c("SE", "MXE"))
#' group <- c(rep("Normal", 3), rep("Tumour", 3))
#' diffAnalyses(psi, group)
diffAnalyses <- function(data, groups=NULL, 
                         analyses=c("wilcoxRankSum", "ttest", "kruskal",
                                    "levene", "fligner"),
                         pvalueAdjust="BH", geneExpr=NULL, psi=NULL) {
    if (!is.null(psi)) {
        warning("The argument 'psi' is deprecated: use 'data' instead.")
        data <- psi
    }
    
    # cl <- parallel::makeCluster(getOption("cl.cores", getCores()))
    step <- 50 # Avoid updating progress too frequently
    updateProgress("Performing statistical analysis", 
                   divisions=5 + round(nrow(data)/step))
    time <- Sys.time()
    
    if (is.null(groups)) {
        ids    <- names(data)
        groups <- parseSampleGroups(ids)
    } else if (is.list(groups)) {
        data   <- data[ , unlist(groups)]
        
        colour <- attr(groups, "Colour")
        groups <- rep(names(groups), sapply(groups, length))
        attr(groups, "Colour") <- colour
    }
    originalGroups <- unique(groups)
    if (identical(originalGroups, "All samples")) originalGroups <- NULL
    
    # Prepare groups with respective colours
    colour <- attr(groups, "Colour")
    groups <- factor(groups)
    if ( !is.null(colour) ) attr(groups, "Colour") <- colour
    
    count <- 0
    stats <- apply(data, 1, function(...) {
        count <<- count + 1
        if (count %% step == 0)
            updateProgress("Performing statistical analysis", console=FALSE)
        return(singleDiffAnalyses(...))
    }, groups, threshold=1, step=step, analyses=analyses)
    display(Sys.time() - time)
    
    # Check the column names of the different columns
    ns <- lapply(stats, names)
    uniq <- unique(ns)
    match <- fmatch(ns, uniq)
    
    updateProgress("Preparing data")
    time <- Sys.time()
    
    # Convert list of lists to data frame
    ll <- lapply(stats, function(i) lapply(i, unname))
    ll <- lapply(ll, unlist)
    ldf <- lapply(seq_along(uniq), function(k) {
        elems <- match == k
        df2   <- t(as.data.frame(ll[elems]))
        cols  <- colnames(df2)
        
        df2           <- data.frame(df2)
        colnames(df2) <- cols
        rownames(df2) <- names(stats)[elems]
        return(df2)
    })
    df <- do.call(rbind.fill, ldf)
    rownames(df) <- unlist(lapply(ldf, rownames))
    
    # Convert numeric columns to numeric
    num <- suppressWarnings(apply(df, 2, as.numeric))
    if (!is.matrix(num)) {
        num <- t(as.matrix(num))
        rownames(num) <- rownames(df)
    }
    numericCols <- colSums(is.na(num)) != nrow(num)
    df[ , numericCols] <- num[ , numericCols]
    
    # Convert integer columns to integer
    if (any(numericCols)) {
        int <- apply(df[ , numericCols, drop=FALSE], 2, function(i) 
            all(is.whole(i), na.rm=TRUE))
        intCols <- numericCols
        intCols[numericCols] <- int
        if (any(intCols))
            df[ , intCols] <- apply(df[ , intCols, drop=FALSE], 2, as.integer)
    }
    display(Sys.time() - time)
    
    # Calculate delta variance and delta median if there are only 2 groups
    deltaVar <- df[, grepl("Variance", colnames(df)), drop=FALSE]
    if (ncol(deltaVar) == 2) {
        updateProgress("Calculating delta variance and median")
        time <- Sys.time()
        deltaVar <- deltaVar[, 2] - deltaVar[, 1]
        deltaMed <- df[, grepl("Median", colnames(df))]
        deltaMed <- deltaMed[, 2] - deltaMed[, 1]
        df <- cbind(df, "\u2206 Variance"=deltaVar, "\u2206 Median"=deltaMed)
        display(Sys.time() - time)
    }
    
    if (any(pvalueAdjust == c("BH", "BY", "bonferroni", "holm", "hochberg",
                              "hommel"))) {
        updateProgress("Adjusting p-values", detail=pvalueAdjust)
        
        cols   <- grep("p.value", colnames(df))[-1]
        if (length(cols > 0)) {
            time <- Sys.time()
            pvalue <- df[cols]
            adjust <- apply(pvalue, 2, p.adjust, pvalueAdjust)
            names  <- paste0(colnames(pvalue), " (", pvalueAdjust, " adjusted)")
            
            if (!is.matrix(adjust)) adjust <- t(as.matrix(adjust))
            colnames(adjust) <- names
            
            # Place the adjusted p-values next to the respective p-values
            len <- ncol(df)
            order <- seq(len)
            for (i in seq_along(cols)) 
                order <- append(order, len+i, after=which(order == cols[i]))
            df <- cbind(df, adjust)[order]
            display(Sys.time() - time)
        }
    }
    
    if ( areSplicingEvents(rownames(df)) ) {
        # Add splicing event information
        updateProgress("Including splicing event information")
        info <- suppressWarnings(parseSplicingEvent(rownames(df), pretty=TRUE))
        
        # Prepare presentation of multigenes
        multigene <- lapply(info$gene, length) > 1
        infoGene <- info$gene
        infoGene[multigene] <- lapply(infoGene[multigene], paste, collapse="/")
        
        df <- cbind("Event type"=info$type, "Chromosome"=info$chrom,
                    "Strand"=info$strand, "Gene"=unlist(infoGene), df)
    }
    
    if (any("density" == analyses)) {
        updateProgress("Preparing density plots")
        time <- Sys.time()
        
        df[ , "Distribution"] <- createDensitySparklines(
            df[ , "Distribution"], rownames(df), 
            areSplicingEvents(rownames(df)), groups=originalGroups, 
            geneExpr=geneExpr)
        name <- ifelse(areSplicingEvents(rownames(df)),
                       "PSI.distribution", "GE.distribution")
        colnames(df)[match("Distribution", colnames(df))] <- name
        display(Sys.time() - time)
    }
    
    # Properly set column names
    col <- colnames(df)
    col <- gsub(".", " ", col, fixed=TRUE)
    col <- gsub("p value", "p-value", col, fixed=TRUE)
    colnames(df) <- col
    
    # parallel::stopCluster(cl)
    return(df)
}

#' @rdname diffAnalyses
diffAnalysis <- diffAnalyses

attr(analysesUI, "loader") <- "app"
attr(analysesServer, "loader") <- "app"
nuno-agostinho/psichomics documentation built on Feb. 15, 2018, 7:49 a.m.