R/data_geNormalisationFiltering.R
In psichomics: Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

Documented in convertGeneIdentifiers filterGeneExpr geNormalisationFilteringInterface geNormalisationFilteringServer geNormalisationFilteringUI loadGeneExpressionSet normaliseGeneExpression plotGeneExprPerSample plotLibrarySize

geneExprFilteringSetting <- function(ns, id, min=0, max=100, step=1, ...,
                                     label=id) {
    psiFilteringSetting(ns, id, min=min, max=max, step=step, ..., label=label)
}

#' Interface to normalise and filter gene expression
#'
#' @param ns Namespace function
#'
#' @importFrom shiny numericInput div column fluidRow tags helpText
#' selectizeInput checkboxInput
#' @importFrom shinyjs hidden
#'
#' @return HTML elements
#' @keywords internal
geNormalisationFilteringInterface <- function(ns) {
    filters <- div(
        id=ns("filteringInterface"),
        geneExprFilteringSetting(ns, "Mean", check=c(TRUE, FALSE)),
        geneExprFilteringSetting(ns, "Variance", check=c(TRUE, FALSE)),
        fluidRow(
            numericInputWithCheckbox(ns, "minCounts",
                                     div("Counts >=", icon("question-circle")),
                                     min=0, max=100, value=10, check=TRUE),
            numericInputWithCheckbox(ns, "minTotalCounts",
                                     div("Total counts >=",
                                         icon("question-circle")),
                                     min=0, max=100, value=15, check=TRUE),
            bsTooltip(ns("elementMinCounts"), placement="top",
                      options=list(container="body"),
                      paste("Minimum counts in a worthwhile number of samples:",
                            "for more information, check documentation for",
                            tags$code("edgeR::filterByExpr()"))),
            bsTooltip(ns("elementMinTotalCounts"), placement="top",
                      options=list(container="body"),
                      "Minimum total counts across all samples")))

    filteringAssistant <- NULL
    # filteringAssistant <- div(
    #     id=ns("assistantInterface"),
    #     hr(), h4("Filtering assistant"),
    #     selectizeInput(ns("assistantPlot"), "Plot type", width="100%",
    #                    c("Boxplot of the mean expression"="mean",
    #                      "Boxplot of the variance expression"="var")),
    #     highchartOutput(ns("filteringAssistant"), height="150px")
    # )

    options <- div(
        id=ns("options"),
        selectizeInput(ns("geneExpr"), "Gene expression dataset", width="100%",
                       choices=NULL),
        bsCollapse(
            bsCollapsePanel(
                tagList(icon("vial"), "Sample filtering",
                        contextUI(ns("sampleFilterText"))),
                value="Sample filtering",
                selectizeInput(ns("sampleFilter"), "Samples to discard",
                               multiple=TRUE, width="100%",
                               choices=character(0))),
            bsCollapsePanel(
                tagList(icon("dna"), "Gene filtering",
                        contextUI(ns("filterText"))),
                value="Filtering", filters, filteringAssistant),
            bsCollapsePanel(
                tagList(icon("balance-scale"), "Normalisation",
                        contextUI(ns("normalisationText"))),
                value="Normalisation",
                helpText("Scale raw library sizes using",
                         tags$code("edgeR::calcNormFactors()"), ", unless",
                         tags$code("quantile"), "is selected."),
                selectizeInput(
                    ns("normalisation"), "Normalisation method", width="100%",
                    c("Weighted trimmed mean of M-values (TMM)"="TMM",
                              "Relative log expression (RLE)"="RLE",
                              "Upper-quartile normalisation"="upperquartile",
                              "No normalisation"="none",
                              "Quantile"="quantile"),
                    options=list(render=I('{ option: renderGEnormOptions }'))),
                conditionalPanel(
                    sprintf("input[id='%s'] == '%s'", ns("normalisation"),
                            "upperquartile"),
                    sliderInput(ns("upperquartilePercentile"), width="100%",
                                paste("Percentile of the counts used to",
                                      "calculate scale factors"),
                                min=0, max=1, value=0.75, step=0.01)),
                checkboxInput(ns("voom"), width="100%",
                              "Perform mean-variance modelling using voom"),
                helpText("If library sizes are very different,",
                         tags$code("limma::voom()"), "should be more powerful",
                         "and preferred.")),
            bsCollapsePanel(
                tagList(icon("retweet"), "CPM and log2 transformation",
                        contextUI(ns("logTransformText"))),
                value="Log-transformation",
                helpText("Compute log2-transformed counts per million",
                         "(log2CPM) using", tags$code("edgeR::cpm()"), "(or",
                         tags$code("limma::voom()"), ", if selected)."),
                numericInput(
                    ns("priorCount"), value=0.25, step=0.25, width="100%",
                    paste("Average count to add to each observation to avoid",
                          "zeroes after log-transformation")),
                helpText())),
        checkboxInput(
            ns("convertToGeneSymbol"), width="100%",
            paste("Replace unambiguous ENSEMBL gene identifiers with their",
                  "gene symbols"), value=TRUE))

    tagList(
        uiOutput(ns("modal")),
        errorDialog("Gene expression not loaded.",
                    id=ns("missingData"), style="margin: 10px;"),
        hidden(options),
        actionButton(ns("loadGeneExpr"), "Load from file..."),
        disabled(processButton(ns("processGeneExpr"),
                               "Filter and normalise gene expression")))
}

#' @rdname appUI
geNormalisationFilteringUI <- function(id, panel) {
    ns <- NS(id)
    title <- "Gene expression filtering and normalisation"
    panel(style="success", title=tagList(icon("cogs"), title),
          value=title, geNormalisationFilteringInterface(ns))
}

#' Filter and normalise gene expression
#'
#' @description
#' Gene expression is filtered and normalised in the following steps:
#'
#' \itemize{
#' \item{Filter gene expression;}
#' \item{Normalise gene expression with \code{\link[edgeR]{calcNormFactors}};}
#' \item{If \code{performVoom = FALSE}, compute counts per million (CPM) using
#' \code{\link[edgeR]{cpm}} and log2-transform values if
#' \code{log2transform = TRUE};}
#' \item{If \code{performVoom = TRUE}, use \code{\link[limma]{voom}} to compute
#' log2-CPM, quantile-normalise (if \code{method = "quantile"}) and estimate
#' mean-variance relationship to calculate observation-level weights.}
#' }
#'
#' @param geneExpr Matrix or data frame: gene expression
#' @param geneFilter Boolean: filtered genes (if \code{NULL}, skip filtering)
#' @param method Character: normalisation method, including \code{TMM},
#' \code{RLE}, \code{upperquartile}, \code{none} or \code{quantile} (see
#' Details)
#' @inheritParams edgeR::calcNormFactors
#' @param log2transform Boolean: perform log2-transformation?
#' @param priorCount Average count to add to each observation to avoid zeroes
#' after log-transformation
#' @param performVoom Boolean: perform mean-variance modelling
#' (using \code{\link[limma]{voom}})?
#'
#' @details \code{edgeR::calcNormFactors} will be used to normalise gene
#' expression if \code{method} is \code{TMM}, \code{RLE}, \code{upperquartile}
#' or \code{none}. If \code{performVoom = TRUE}, \code{\link[limma]{voom}} will
#' only normalise if \code{method = "quantile"}.
#'
#' Available normalisation methods:
#' \itemize{
#' \item{\code{TMM} is recommended for most RNA-seq data where more than half of
#' the genes are believed not differentially expressed between any pair of
#' samples;}
#' \item{\code{RLE} calculates the median library from the geometric mean of all
#' columns and the median ratio of each sample to the median library is taken as
#' the scale factor;}
#' \item{\code{upperquartile} calculates the scale factors from a given quantile
#' of the counts for each library, after removing genes with zero counts in all
#' libraries;}
#' \item{\code{quantile} forces the entire empirical distribution of each
#' column to be identical (only performed if \code{performVoom = TRUE}).}
#' }
#'
#' @importFrom edgeR DGEList [.DGEList calcNormFactors cpm
#' @importFrom limma voom
#'
#' @family functions for gene expression pre-processing
#' @return Filtered and normalised gene expression
#' @export
#'
#' @examples
#' geneExpr <- readFile("ex_gene_expression.RDS")
#' normaliseGeneExpression(geneExpr)
normaliseGeneExpression <- function(geneExpr, geneFilter=NULL, method="TMM",
                                    p=0.75, log2transform=TRUE,
                                    priorCount=0.25, performVoom=FALSE) {
    updateProgress("Processing gene expression", divisions=3)

    updateProgress("Filtering gene expression")
    geneFilterSettings <- attr(geneFilter, "settings")
    if (is.null(geneFilter)) geneFilter <- TRUE
    else if (!any(geneFilter)) return(NULL)

    originalGeneExpr <- geneExpr
    geneExpr <- DGEList(geneExpr)
    geneExprNorm <- geneExpr[geneFilter, , keep.lib.sizes=TRUE]

    updateProgress("Normalising gene expression")
    if (!performVoom && method == "quantile") method <- "none"
    if (method != "quantile") {
        geneExprNorm <- calcNormFactors(geneExprNorm, method=method, p=p)
    }

    if (!performVoom) {
        geneExprNorm <- cpm(geneExprNorm, log=log2transform,
                            prior.count=priorCount)

        avgCountPerObservationText <- priorCount
        names(avgCountPerObservationText) <- paste("Average count added per",
                                                   "observation")
    } else {
        norm <- if (method == "quantile") "quantile" else "none"
        log2transform <- TRUE
        geneExprNorm  <- voom(geneExprNorm, normalize.method=norm)

        avgCountPerObservationText <- NULL
    }

    updateProgress("Preparing gene expression data")
    if (!is(geneExprNorm, "EList")) geneExprNorm <- data.frame(geneExprNorm)
    colnames(geneExprNorm) <- colnames(geneExpr)

    geneExprNorm <- inheritAttrs(geneExprNorm, originalGeneExpr)
    if (is(geneExprNorm, "EList"))
        geneExprNorm$E <- inheritAttrs(geneExprNorm$E, originalGeneExpr)
    attr(geneExprNorm, "filename") <- NULL

    attr(geneExprNorm, "settings") <- c(
        "Original gene expression (file)"=attr(originalGeneExpr, "filename"),
        "Original gene expression (label)"=attr(originalGeneExpr, "label"),
        geneFilterSettings,
        "Normalisation method"=method,
        "Mean-variance modelling (voom)"=if (performVoom) "Yes" else "No",
        "Log2-transformation"=log2transform,
        avgCountPerObservationText)
    return(geneExprNorm)
}

#' @rdname normaliseGeneExpression
#' @export
normalizeGeneExpression <- normaliseGeneExpression

#' Set of functions to load splicing quantification
#'
#' @inherit geNormalisationFilteringServer
#'
#' @importFrom shiny tags
#'
#' @keywords internal
loadGeneExpressionSet <- function(session, input, output) {
    ns <- session$ns

    # Show modal for loading gene expression data
    observeEvent(input$loadGeneExpr, {
        infoModal(
            session, "Load gene expression data",
            geneExprFileInput(ns("customGeneExpr")),
            uiOutput(ns("alertGeneExpr")),
            footer=processButton(ns("loadCustomGE"), "Load quantification"))
    })

    observeEvent(input$loadGeneExpr, {
        prepareFileBrowser(session, input, "customGeneExpr")
    }, once=TRUE)

    # Load gene expression
    loadGeneExpression <- reactive({
        time <- startProcess("loadGeneExpr")

        updateProgress("Wait a moment", divisions=2)
        updateProgress("Loading gene expression")

        allFormats <- loadFileFormats()
        formats <- allFormats[sapply(allFormats, "[[",
                                     "dataType") == "Gene expression"]

        geneExpr <- tryCatch(
            loadFile(input$customGeneExpr, formats, multiple=TRUE),
            warning=return, error=return)
        if (is(geneExpr, "error")) {
            if (geneExpr$message == paste("'file' must be a character string",
                                          "or connection"))
                errorAlert(session, title="No file provided",
                           "Please provide a file", alertId="alertGeneExpr",
                           caller="Gene expression normalisation and filtering")
            else
                errorAlert(session, title="An error was raised",
                           geneExpr$message, alertId="alertGeneExpr",
                           caller="Gene expression normalisation and filtering")
        } else if (is(geneExpr, "warning")) {
            warningAlert(session, title="A warning was raised",
                         geneExpr$message, alertId="alertGeneExpr",
                         caller="Gene expression normalisation and filtering")
        } else {
            removeAlert(output, "alertGeneExpr")

            prepareGeneExpr <- function(geneExpr, set=FALSE) {
                tablename <- attr(geneExpr, "tablename")
                if (is.null(tablename)) tablename <- "Gene expression"
                if (set) {
                    name <- renameDuplicated(tablename,
                                             names(getCategoryData()))
                    setDataTable(name, geneExpr)
                } else {
                    res <- list(geneExpr)
                    names(res) <- tablename
                    return(res)
                }
            }

            if ( is.null(getData()) ) {
                name <- file_path_sans_ext( basename(input$customGeneExpr) )
                name <- gsub(" Gene expression.*$", "", name)
                if (name == "") name <- "Unnamed"

                if (is.data.frame(geneExpr)) {
                    geneExpr <- prepareGeneExpr(geneExpr)
                } else {
                    geneExpr <- unlist(lapply(geneExpr, prepareGeneExpr),
                                       recursive=FALSE)
                }
                data <- setNames(list(geneExpr), name)
                data <- processDatasetNames(data)
                setData(data)
                setCategory(name)

                samples <- colnames(geneExpr)
                parsed <- parseTCGAsampleInfo(samples)
                if ( !is.null(parsed) ) setSampleInfo(parsed)
            } else if (is.data.frame(geneExpr)) {
                prepareGeneExpr(geneExpr, set=TRUE)
            } else if (is.list(geneExpr)) {
                lapply(geneExpr, prepareGeneExpr, set=TRUE)
            }
            removeModal()
        }
        endProcess("loadGeneExpr", time)
    })
    observeEvent(input$loadCustomGE, loadGeneExpression())
}

#' Convert gene identifiers
#'
#' @param annotation \code{OrgDb}: genome wide annotation for an organism, e.g.
#' \code{org.Hs.eg.db}
#' @param genes Character: genes to be converted
#' @param key Character: type of identifier used, e.g. \code{ENSEMBL}; read
#' \code{?AnnotationDbi::columns}
#' @param target Character: type of identifier to convert to; read
#' \code{?AnnotationDbi::columns}
#' @param ignoreDuplicatedTargets Boolean: if \code{TRUE}, identifiers that
#' share targets with other identifiers will not be converted
#'
#' @importFrom AnnotationDbi select
#' @importFrom data.table data.table
#' @importFrom org.Hs.eg.db org.Hs.eg.db
#'
#' @family functions for gene expression pre-processing
#' @return Character vector of the respective targets of gene identifiers. The
#' previous identifiers remain other identifiers have the same target (in case
#' \code{ignoreDuplicatedTargets = TRUE}) or if no target was found.
#' @export
#'
#' @examples
#' if ( require("org.Hs.eg.db") ) {
#'     columns(org.Hs.eg.db)
#'
#'     genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510",
#'                "ENSG00000051180")
#'     convertGeneIdentifiers(org.Hs.eg.db, genes,
#'                            key="ENSEMBL", target="SYMBOL")
#' }
convertGeneIdentifiers <- function(annotation, genes, key="ENSEMBL",
                                   target="SYMBOL",
                                   ignoreDuplicatedTargets=TRUE) {
    stopifnot(is(annotation, "OrgDb"))

    if (key == "ENSEMBL") {
        # Remove ENSEMBL identifiers
        genesClean <- gsub("\\..*", "", genes)
        # Keep version for gene identifier containing the string "PAR_Y"
        par_y <- grep("PAR", genes)
        genesClean[par_y] <- genes[par_y]
    } else {
        genesClean <- genes
    }

    match <- tryCatch(
        suppressMessages(select(annotation, genesClean, target, key)),
        error=return)

    if (is(match, "error")) return(setNames(genes, genes))
    match <- data.table(match, key=key)

    # Ignore missing values
    match <- match[!is.na(match[[target]]), ]

    # Collapse genes with more than one matching target
    colnames(match)[2] <- "target"
    collapsed <- match[
        , list(target=paste(unique(target), collapse="/")), by=key]

    if (ignoreDuplicatedTargets) {
        # Ignore genes sharing the same target
        geneTargets <- collapsed[["target"]]
        collapsed   <- collapsed[
            !geneTargets %in% unique(geneTargets[duplicated(geneTargets)]), ]
    }

    # Replace identifiers by their matching targets (if possible)
    converted <- collapsed[["target"]][match(genesClean, collapsed[[key]])]
    genes[!is.na(converted)] <- converted[!is.na(converted)]
    names(genes) <- genesClean
    return(genes)
}

#' Filter genes based on their expression
#'
#' Uses \code{\link[edgeR]{filterByExpr}} to determine genes with sufficiently
#' large counts to retain for statistical analysis.
#'
#' @param geneExpr Data frame or matrix: gene expression
#' @param minMean Numeric: minimum of read count mean per gene
#' @param maxMean Numeric: maximum of read count mean per gene
#' @param minVar Numeric: minimum of read count variance per gene
#' @param maxVar Numeric: maximum of read count variance per gene
#' @param minCounts Numeric: minimum number of read counts per gene for a
#' worthwhile number of samples (check \code{\link[edgeR]{filterByExpr}} for
#' more information)
#' @param minTotalCounts Numeric: minimum total number of read counts per gene
#'
#' @importFrom edgeR filterByExpr
#'
#' @family functions for gene expression pre-processing
#' @return Boolean vector indicating which genes have sufficiently large counts
#' @export
#'
#' @examples
#' geneExpr <- readFile("ex_gene_expression.RDS")
#'
#' # Add some genes with low expression
#' geneExpr <- rbind(geneExpr,
#'                   lowReadGene1=c(rep(4:5, 10)),
#'                   lowReadGene2=c(rep(5:1, 10)),
#'                   lowReadGene3=c(rep(10:1, 10)),
#'                   lowReadGene4=c(rep(7:8, 10)))
#'
#' # Filter out genes with low reads across samples
#' geneExpr[filterGeneExpr(geneExpr), ]
filterGeneExpr <- function(geneExpr, minMean=0, maxMean=Inf, minVar=0,
                           maxVar=Inf, minCounts=10, minTotalCounts=15) {
    if (is.na(minMean))        minMean <- -Inf
    if (is.na(maxMean))        maxMean <- Inf
    if (is.na(minVar))         minVar <- -Inf
    if (is.na(maxVar))         maxVar <- Inf
    if (is.na(minCounts))      minCounts <- 0
    if (is.na(minTotalCounts)) minTotalCounts <- 0

    geneExprMean <- customRowMeans(geneExpr, fast=TRUE)
    geneExprVar  <- customRowVars(geneExpr, fast=TRUE)

    varMeanFilter <- geneExprMean >= minMean & geneExprMean <= maxMean &
        geneExprVar >= minVar & geneExprVar <= maxVar

    lowCountFilter <- suppressMessages(
        filterByExpr(geneExpr[varMeanFilter, ],
                     min.count=minCounts,
                     min.total.count=minTotalCounts))
    filteredGenes <- varMeanFilter
    filteredGenes[names(lowCountFilter[!lowCountFilter])] <- FALSE

    attr(filteredGenes, "settings") <- c(
        "Mean >="=minMean, "Mean <="=maxMean,
        "Variance >="=minVar, "Variance <="=maxVar,
        "Counts for at least some samples >="=minCounts,
        "Total counts across samples >="=minTotalCounts)
    return(filteredGenes)
}

#' Plot distribution of gene expression per sample
#'
#' @param geneExpr Data frame or matrix: gene expression
#' @inheritDotParams renderBoxplot
#'
#' @importFrom highcharter %>% hc_yAxis
#'
#' @family functions for gene expression pre-processing
#' @return Gene expression distribution plots
#' @export
#'
#' @examples
#' df <- data.frame(geneA=c(2, 4, 5),
#'                  geneB=c(20, 3, 5),
#'                  geneC=c(5, 10, 21))
#' colnames(df) <- paste("Sample", 1:3)
#' plotGeneExprPerSample(df)
plotGeneExprPerSample <- function(geneExpr, ...) {
    if (is(geneExpr, "EList")) geneExpr <- geneExpr$E
    renderBoxplot(geneExpr, ...) %>%
        hc_yAxis(title=list(text="Gene expression"))
}

#' Plot library size
#'
#' @param data Data frame or matrix: gene expression
#' @param log10 Boolean: log10-transform \code{data}?
#' @param title Character: plot title
#' @param subtitle Character: plot subtitle
#' @param colour Character: data colour
#'
#' @family functions for gene expression pre-processing
#' @return Library size distribution
#' @export
#'
#' @examples
#' df <- data.frame(geneA=c(2, 4, 5),
#'                  geneB=c(20, 3, 5),
#'                  geneC=c(5, 10, 21))
#' colnames(df) <- paste("Sample", 1:3)
#' plotLibrarySize(df)
plotLibrarySize <- function(
    data, log10=TRUE,
    title="Library size distribution across samples",
    subtitle="Library size: total number of mapped reads",
    colour="orange") {

    table <- colSums(data)
    if (log10) {
        table <- log10(table)
        xAxisLabel <- "log10(Library sizes)"
    } else {
        xAxisLabel <- "Library sizes"
    }
    yAxisLabel <- "Density"

    groups <- "All samples"
    attr(groups, "Colour") <- c("All samples"=colour)
    plot <- plotDistribution(table, groups,
                             rugLabels=TRUE, vLine=FALSE, legend=FALSE,
                             title=title, valueLabel="log10(library size)") %>%
        hc_xAxis(title=list(text=xAxisLabel)) %>%
        hc_yAxis(title=list(text=yAxisLabel)) %>%
        hc_subtitle(text=paste(subtitle))
    return(plot)
}

#' Sum columns using an \code{\link{EList-class}} object
#' @inheritParams base::colSums
#'
#' @return Numeric vector with the sum of the columns
#' @export
setMethod("colSums", signature="EList", function(x, na.rm=FALSE, dims=1) {
    colSums(x$E, na.rm=na.rm, dims=dims)
})

#' @rdname appServer
#'
#' @importFrom shiny reactive observeEvent helpText removeModal
#' updateNumericInput
#' @importFrom tools file_path_sans_ext
#' @importFrom shinyjs enable disable hide show
#' @importFrom data.table fread
#' @importFrom highcharter hc_plotOptions hc_xAxis hc_chart
#'
#' @keywords internal
geNormalisationFilteringServer <- function(input, output, session) {
    ns <- session$ns
    observeEvent(input$missing, missingDataGuide("Gene expression"))

    # Warn user if gene expression is not loaded
    observe({
        if (is.null(getGeneExpression())) {
            hide("options")
            disable("processGeneExpr")
            show("missingData")
        } else {
            show("options")
            enable("processGeneExpr")
            hide("missingData")
        }
    })

    # Update available gene expression data according to loaded files
    observe({
        geneExpr <- getGeneExpression()
        if (!is.null(geneExpr)) {
            updateSelectizeInput(session, "geneExpr",
                                 choices=c(names(geneExpr),
                                           "Select gene expression data"=""))
        } else {
            updateSelectizeInput(
                session, "geneExpr",
                choices=c("No gene expression data loaded"=""))
        }
    })

    getFilter <- reactive({
        geneExpr <- isolate(input$geneExpr)
        if (is.null(geneExpr) || geneExpr == "") return(NULL)
        geneExpr <- isolate(getGeneExpression(geneExpr))

        checkSetting <- function(id) {
            if (startsWith(id, "min")) {
                res <- -Inf
            } else if (startsWith(id, "max")) {
                res <- Inf
            } else {
                res <- 0
            }

            enabled <- input[[paste0("enable", capitalize(id))]]
            if (isTRUE(enabled)) res <- input[[id]]
            return(res)
        }
        minMean        <- checkSetting("minMean")
        maxMean        <- checkSetting("maxMean")
        minVariance    <- checkSetting("minVariance")
        maxVariance    <- checkSetting("maxVariance")
        minCounts      <- checkSetting("minCounts")
        minTotalCounts <- checkSetting("minTotalCounts")

        sampleFilter   <- input$sampleFilter
        if (!is.null(sampleFilter) && sampleFilter != "") {
            samplesToKeep <- !colnames(geneExpr) %in% sampleFilter
            geneExpr <- geneExpr[ , samplesToKeep]
        }
        filtered <- filterGeneExpr(geneExpr, minMean, maxMean, minVariance,
                                   maxVariance, minCounts, minTotalCounts)
        return(filtered)
    })

    # Update sample filtering options
    observeEvent(input$geneExpr, {
        geneExpr <- isolate(input$geneExpr)
        if (is.null(geneExpr) || geneExpr == "") return(NULL)
        geneExpr <- isolate(getGeneExpression(geneExpr))

        updateSelectizeInput(
            session, "sampleFilter", server=TRUE,
            choices=colnames(geneExpr),
            options=list(placeholder="Select samples to discard",
                         plugins=list("remove_button")))
    })

    # Update filtering options based on selected gene expression data
    observeEvent(input$geneExpr, {
        geneExpr <- isolate(input$geneExpr)
        if (is.null(geneExpr) || geneExpr == "") return(NULL)
        geneExpr     <- isolate(getGeneExpression(geneExpr))

        sampleFilter <- isolate(input$sampleFilter)
        if (!is.null(sampleFilter) && sampleFilter != "") {
            samplesToKeep <- !colnames(geneExpr) %in% sampleFilter
            geneExpr <- geneExpr[ , samplesToKeep]
        }

        # Update mean range
        geneExprMean <- customRowMeans(geneExpr, fast=TRUE)
        maxMean      <- ceiling(max(geneExprMean, na.rm=TRUE))
        updateNumericInput(session, "minMean", max=maxMean)
        updateNumericInput(session, "maxMean", max=maxMean, value=maxMean)

        # Update variance range
        geneExprVar <- customRowVars(geneExpr, fast=TRUE)
        maxVar      <- ceiling(max(geneExprVar, na.rm=TRUE))
        updateNumericInput(session, "minVar", max=maxVar)
        updateNumericInput(session, "maxVar", max=maxVar, value=maxVar)

        # output$filteringAssistant <- renderHighchart({
        #     type <- input$assistantPlot
        #     if (type == "") return(NULL)
        #
        #     filter <- getFilter()
        #     if (type == "mean") {
        #         arg <- geneExprMean[filter]
        #         description <- "Mean per gene"
        #     } else if (type == "var") {
        #         arg <- geneExprVar[filter]
        #         description <- "Variance per gene"
        #     }
        #
        #     hc <- tryCatch({
        #         hcboxplot(arg)  %>%
        #             hc_chart(zoomType="y") %>%
        #             hc_tooltip(valueDecimals=2, followPointer=TRUE) %>%
        #             hc_xAxis(visible=FALSE) %>%
        #             hc_yAxis(title=list(text=description)) %>%
        #             hc_plotOptions(series=list(animation=FALSE))
        #     }, error=return, warning=return)
        #
        #     if (is(hc, "error") || is(hc, "warning"))
        #         return(NULL)
        #     else
        #         return(hc)
        # })
    })

    # Filter and normalise gene expression
    observeEvent(input$processGeneExpr, {
        time <- startProcess("processGeneExpr")

        isolate({
            geneExpr            <- getGeneExpression(input$geneExpr)
            method              <- input$normalisation
            percentile          <- input$upperquartilePercentile
            sampleFilter        <- input$sampleFilter
            priorCount          <- input$priorCount
            voom                <- input$voom
            convertToGeneSymbol <- input$convertToGeneSymbol
        })

        # Filter samples
        if (!is.null(sampleFilter) && sampleFilter != "") {
            samplesToKeep <- !colnames(geneExpr) %in% sampleFilter
            geneExpr <- geneExpr[ , samplesToKeep]
            sampleFilterText <- paste(sampleFilter, collapse=", ")
        } else {
            sampleFilterText <- "None"
        }
        sampleFilterSettings <- c("Discarded samples"=sampleFilterText)

        # Filter and normalise
        geneFilter <- getFilter()
        attr(geneExpr, "label") <- isolate(input$geneExpr)
        geneExprNorm <- normaliseGeneExpression(
            geneExpr, geneFilter, method, percentile, log2transform=TRUE,
            priorCount, performVoom=voom)

        # Convert ENSEMBL gene id to gene symbols
        if (convertToGeneSymbol) {
            rownames(geneExprNorm) <- convertGeneIdentifiers(
                org.Hs.eg.db, rownames(geneExprNorm))
            convertToGeneSymbolText <- "Yes"
        } else {
            convertToGeneSymbolText <- "No"
        }
        names(convertToGeneSymbolText) <- paste(
            "Replace unambiguous ENSEMBL gene identifiers with their gene",
            "symbols")

        # Prepare attributes
        description <- attr(geneExprNorm, "description")
        if (!is.null(description)) {
            description <- gsub(" \\(normalised\\)$", "", description)
            description <- paste(description, "(normalised)")
        } else {
            description <- "Gene expression (normalised)"
        }
        geneExprNorm <- addObjectAttrs(
            geneExprNorm,
            "settings"=c(attr(geneExprNorm, "settings"),
                         sampleFilterSettings, convertToGeneSymbolText),
            "icon"=list(symbol="cogs", colour="green"),
            "description"=description,
            "dataType"="Gene expression")

        if (is(geneExprNorm, "EList")) {
            geneExprNorm$E <- inheritAttrs(geneExprNorm$E, geneExprNorm)
        }
        setNormalisedGeneExpression(geneExprNorm)
        endProcess("processGeneExpr", time=time)
    })
    loadGeneExpressionSet(session, input, output)

    # Toggle filtering options
    toggleGEsetting <- function(id) {
        checkbox <- paste0("enable", capitalize(id))
        observe(toggleState(id, input[[checkbox]]))
    }
    toggleGEsetting("minMean")
    toggleGEsetting("maxMean")
    toggleGEsetting("minVariance")
    toggleGEsetting("maxVariance")
    toggleGEsetting("minCounts")
    toggleGEsetting("minTotalCounts")

    # Update context
    output$sampleFilterText <- renderText({
        sampleFilter <- input$sampleFilter
        if (is.null(sampleFilter) || sampleFilter == "") {
            text <- "No samples to discard"
        } else {
            len  <- length(sampleFilter)
            text <- sprintf("%s sample%s to discard",
                            len, ifelse(len == 1, "", "s"))
        }
        return(text)
    })

    output$filterText <- renderText({
        geneExpr <- input$geneExpr
        if (is.null(geneExpr) || geneExpr == "") return(NULL)
        geneExpr <- isolate(getGeneExpression(geneExpr))

        filter <- sum(getFilter())
        total  <- nrow(geneExpr)
        ratio  <- filter/total * 100

        msg <- sprintf("%s genes of %s (%s%%)", filter, total, round(ratio))
        return(msg)
    })

    output$normalisationText <- renderText({
        text <- input$normalisation
        if (text == "upperquartile") {
            text <- sprintf("%s (%s)", text, input$upperquartilePercentile)
        }
        if (input$voom) text <- paste(text, "+ voom")
        return(text)
    })

    output$logTransformText <- renderText({
        paste("Prior count:", input$priorCount)
    })
}

attr(geNormalisationFilteringUI, "loader") <- "data"
attr(geNormalisationFilteringServer, "loader") <- "data"
Any scripts or data that you put into this service are public.
psichomics documentation built on Nov. 8, 2020, 5:44 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
psichomics
Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

R/data_geNormalisationFiltering.R
In psichomics: Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

Defines functions geNormalisationFilteringServer plotLibrarySize plotGeneExprPerSample filterGeneExpr convertGeneIdentifiers loadGeneExpressionSet normaliseGeneExpression geNormalisationFilteringUI geNormalisationFilteringInterface geneExprFilteringSetting

Documented in convertGeneIdentifiers filterGeneExpr geNormalisationFilteringInterface geNormalisationFilteringServer geNormalisationFilteringUI loadGeneExpressionSet normaliseGeneExpression plotGeneExprPerSample plotLibrarySize

Try the psichomics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

psichomics Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

R/data_geNormalisationFiltering.R In psichomics: Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

Defines functions geNormalisationFilteringServer plotLibrarySize plotGeneExprPerSample filterGeneExpr convertGeneIdentifiers loadGeneExpressionSet normaliseGeneExpression geNormalisationFilteringUI geNormalisationFilteringInterface geneExprFilteringSetting

Documented in convertGeneIdentifiers filterGeneExpr geNormalisationFilteringInterface geNormalisationFilteringServer geNormalisationFilteringUI loadGeneExpressionSet normaliseGeneExpression plotGeneExprPerSample plotLibrarySize

Try the psichomics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

psichomics
Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation

R/data_geNormalisationFiltering.R
In psichomics: Graphical Interface for Alternative Splicing Quantification, Analysis and Visualisation