R/NanoStringExperiment-munge.R

#' Append metadata to assay in long format
#' 
#' Convert assay DataFrame into long format and append
#' labels and meta to the new DataFrame
#' 
#' @param data NanoStringExperiment object
#' @param ... additional parameters
#' 
#' @return matrix of results in long format
#' 
#' @examples
#' data(exampleNSEData)
#' head(munge(testExp, mapping = ~`cell_line` + exprs))
#' 
#' @export
setGeneric("munge", signature = "data", 
    function(data, ...) standardGeneric("munge"))

#' Append metadata to assay in long format
#' 
#' Convert assay DataFrame into long format and append
#' labels and meta to the new DataFrame
#' 
#' @param data NanoStringExperiment object
#' @param mapping formula to map data by
#' @param extradata additional data to append
#' @param ... additional parameters
#' @param elt expression matrix for assay element \code{"exprs"}
#' 
#' @importFrom NanoStringNCTools signatureScores
#' 
#' @return matrix of results in long format
#' 
#' @examples
#' data(exampleNSEData)
#' head(munge(testExp, mapping = ~`cell_line` + exprs))
#' 
#' @export
setMethod("munge", "NanoStringExperiment", 
    function(data, 
             mapping = update(design(data), exprs ~ .), 
             extradata = NULL, 
             elt = "exprs", 
             ...) {
        mapping <- try(mapping, silent = TRUE)
        if (inherits(mapping, "try-error")) {
            stop("\"mapping\" argument is missing")
        }
        if (inherits(mapping, "formula")) {
            vars <- all.vars(mapping)
        } else if (is.list(mapping)) {
            vars <- unique(unlist(lapply(mapping, all.vars), 
                use.names = FALSE))
        }
        hasGeneMatrix <- "GeneMatrix" %in% vars
        hasSignatureMatrix <- "SignatureMatrix" %in% vars
        if (hasGeneMatrix || hasSignatureMatrix) {
            sampleLabels <- dimnames(data)[[2L]]
            df <- DataFrame(SampleName = sampleLabels, 
                colData(data), check.names = FALSE)
            if (!is.null(extradata) && any(vars %in% colnames(extradata))) {
                df <- cbind(df, extradata)
            }
            if (hasGeneMatrix) {
                mat <- assayDataElement(data, elt)
                if ("GeneName" %in% colnames(rowData(data))) {
                    rownames(mat) <- rowData(data)[["GeneName"]]
                }
                df[["GeneMatrix"]] <- t(mat)
            }
            if (hasSignatureMatrix) {
                mat <- signatureScores(data, elt)
                colnames(mat) <- sampleLabels
                df[["SignatureMatrix"]] <- t(mat)
            }
            if (!all(vars %in% colnames(df))) {
                stop("\"mapping\" contains undefined variables")
            }
            return(as(df[vars], "DataFrame"))
        }
        hasFeatureVars <- any(vars %in% colnames(rowData(data)))
        hasSampleVars <- any(vars %in% colnames(colData(data)))
        hasLog2Summaries <- any(vars %in% rownames(.summaryMetadata[["log2"]]))
        hasSummaries <- any(vars %in% rownames(.summaryMetadata[["moments"]]))
        hasQuantiles <- 
            any(vars %in% rownames(.summaryMetadata[["quantiles"]]))
        if (hasQuantiles && !hasLog2Summaries) {
            hasSummaries <- TRUE
        }
        hasAggregates <- hasLog2Summaries || hasSummaries
        hasAssayDataElts <- any(vars %in% assayNames(data))
        useSignatures <- "SignatureName" %in% vars
        if (hasAggregates) {
            hasFeatureVars <- hasFeatureVars || ("FeatureName" %in% vars)
            hasSampleVars <- hasSampleVars || ("SampleName" %in% vars)
            if (hasAssayDataElts) {
                stop("\"mapping\" argument cannot contain ", 
                    "both aggregates and disaggregates")
            }
            if (hasFeatureVars && hasSampleVars) {
                stop("\"mapping\" argument cannot aggregate ",
                    "using both feature and sample variables")
            }
            if (useSignatures && hasFeatureVars) {
                stop("\"mapping\" argument cannot aggregate ",
                    "using both signatures and feature variables")
            }
            if (useSignatures && hasSampleVars) {
                stop("\"mapping\" argument cannot aggregate ",
                    "using both signatures and sample variables")
            }
            if (!hasFeatureVars && !hasSampleVars && !useSignatures) {
                stop("\"mapping\" argument contains an ambiguous aggregation")
            }
        }
        if (hasLog2Summaries && hasSummaries) {
            stop("\"mapping\" argument cannot use ",
                "both log2 and linear aggregations")
        }
        if (useSignatures && hasFeatureVars) {
            stop("\"mapping\" argument cannot use ",
                "both signatures and feature variables")
        }
        if ((!hasAggregates && !hasAssayDataElts) && 
            (hasFeatureVars || useSignatures) && hasSampleVars) {
            stop("\"mapping\" argument contains ",
                "an ambiguous variable selection")
        }
        if (hasAssayDataElts) {
            assayDataElts <- intersect(vars, assayNames(data))
            if (useSignatures) {
                df <- lapply(assayDataElts, function(elt) {
                    as.vector(signatureScores(data, elt))
                })
                names(df) <- assayDataElts
                df <- DataFrame(
                    SignatureName = 
                        rep.int(names(signatures(data)), ncol(data)), 
                    SampleName = 
                        rep(sampleNames(data), 
                            each = length(signatures(data))), 
                    df, check.names = FALSE)
            }
            else {
                df <- lapply(assayDataElts, function(elt) {
                    as.vector(assay(data, elt))
                })
                names(df) <- assayDataElts
                df <- DataFrame(
                    FeatureName = rep.int(dimnames(data)[[1L]], ncol(data)), 
                    SampleName = rep(dimnames(data)[[2L]], nrow(data)), 
                    df, check.names = FALSE)
            }
        }
        else if (hasAggregates) {
            if (useSignatures) {
                df <- summary(data, MARGIN = 1L, log2scale = hasLog2Summaries, 
                    elt = elt, signatureScores = TRUE)
                df <- df[, intersect(vars, colnames(df)), drop = FALSE]
                df <- copyRowNames(df, "SignatureName")
            }
            else {
                MARGIN <- 1L + hasSampleVars
                df <- summary(data, MARGIN = MARGIN, 
                    log2scale = hasLog2Summaries, elt = elt)
                df <- df[, intersect(vars, colnames(df)), drop = FALSE]
                if (MARGIN == 1L) {
                    df <- copyRowNames(df, "FeatureName")
                }
                else {
                    df <- copyRowNames(df, "SampleName")
                }
            }
        }
        else {
            df <- NULL
        }
        if (hasFeatureVars) {
            fvars <- intersect(vars, colnames(rowData(data)))
            fdf <- rowData(data)[, fvars, drop = FALSE]
            if (is.null(df)) {
                df <- copyRowNames(fdf, "FeatureName")
            }
            else {
                df <- cbind(df, fdf[df[["FeatureName"]], , drop = FALSE])
            }
        }
        if (useSignatures && is.null(df)) {
            df <- DataFrame(SignatureName = names(signatures(data)))
        }
        if (hasSampleVars) {
            svars <- intersect(vars, colnames(colData(data)))
            sdf <- colData(data)[, svars, drop = FALSE]
            if (is.null(df)) {
                df <- copyRowNames(sdf, "SampleName")
            }
            else {
                df <- cbind(df, sdf[df[["SampleName"]], , drop = FALSE])
            }
        }
        if (!is.null(extradata)) {
            matchFeatureNames <- 
                identical(rownames(extradata), featureNames(data))
            matchSampleNames <- 
                identical(rownames(extradata), sampleNames(data))
            if (!matchFeatureNames && !matchSampleNames) {
                stop("\"extradata\" 'rownames' do not match ",
                    "'featureNames' or 'sampleNames'")
            }
            evars <- intersect(vars, colnames(extradata))
            edf <- extradata[, evars, drop = FALSE]
            if (matchFeatureNames) {
                if (is.null(df)) {
                    df <- copyRowNames(edf, "FeatureName")
                }
                else {
                    df <- cbind(df, edf[df[["FeatureName"]], , drop = FALSE])
                }
            }
            else {
                if (is.null(df)) {
                    df <- copyRowNames(edf, "SampleName")
                }
                else {
                    df <- cbind(df, edf[df[["SampleName"]], , drop = FALSE])
                }
            }
        }
        if (!all(vars %in% colnames(df))) {
            print(all(vars %in% colnames(df)))
            print(unlist(colnames(df)))
            print(vars)
            stop("\"mapping\" contains undefined variables")
        }
        rownames(df) <- NULL
        df
})

#' Copy row names
#' 
#' Non-exported helper function.
#' Adds keys to data frame.
#' 
#' @noRd
copyRowNames <- function(df, key) {
    rn <- DataFrame(rownames(df))
    colnames(rn) <- key
    cbind(rn, df)
}
Nanostring-Biostats/NanoStringExperiment documentation built on April 28, 2022, 6:24 a.m.