R/makeSummarizedExperimentFromExpressionSet.R

Defines functions makeSummarizedExperimentFromExpressionSet geneRangeMapper probeRangeMapper naiveRangeMapper .from_DataFrame_to_AnnotatedDataFrame .from_AnnotatedDataFrame_to_DataFrame .from_GRangesList_to_FeatureData .from_GRanges_to_FeatureData .from_rowRanges_to_FeatureData

Documented in geneRangeMapper makeSummarizedExperimentFromExpressionSet naiveRangeMapper probeRangeMapper

##
## makeSummarizedExperimentFromExpressionSet

## coercion

.from_rowRanges_to_FeatureData <- function(from)
{
    if (is(from, "GRanges")) {
        fd <- .from_GRanges_to_FeatureData(from)
    } else if (is(from, "GRangesList")) {
        fd <- .from_GRangesList_to_FeatureData(from)
    } else {
        stop("class ", sQuote(class(from)),
             " is not a supported type for rowRanges coercion")
    }
    featureNames(fd) <- names(from)
    fd
}

.from_GRanges_to_FeatureData <- function(from)
{
    data <- as.data.frame(from)

    ## the first mcols are automatically included in the data.frame from
    ## as.data.frame, the secondary mcols holds the metadata for the first
    ## metadata columns.
    metaData <- mcols(mcols(from, use.names=FALSE), use.names=FALSE)
    if (is.null(metaData)) {
        metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
    } else {
        metaData <- as.data.frame(metaData)
    }
    AnnotatedDataFrame(data, metaData)
}
.from_GRangesList_to_FeatureData <- function(from)
{
    data <- as.data.frame(mcols(from, use.names=FALSE))

    ## the first mcols are automatically included in the data.frame from
    ## as.data.frame, the secondary mcols holds the metadata for the first
    ## metadata columns.
    metaData <- mcols(mcols(from, use.names=FALSE), use.names=FALSE)
    if (is.null(metaData)) {
        metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
    } else {
        metaData <- as.data.frame(metaData)
    }
    AnnotatedDataFrame(data, metaData)
}

.from_AnnotatedDataFrame_to_DataFrame <- function(from)
{
    df <- DataFrame(pData(from), row.names=rownames(from))
    mcols(df) <- DataFrame(varMetadata(from))
    df
}

.from_DataFrame_to_AnnotatedDataFrame <- function(df)
{
    data <- as(df, "data.frame")
    metaData <- mcols(df, use.names=FALSE)
    if (is.null(metaData)) {
        metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
    } else {
        metaData <- as(metaData, "data.frame")
    }
    AnnotatedDataFrame(data, metaData)
}

## If the ExpressionSet has featureData with range information make
## GRanges out of that, otherwise make an empty GRangesList with names
## from the featureNames
naiveRangeMapper <- function(from)
{
    nms <- featureNames(from)
    res <- tryCatch({
        makeGRangesFromDataFrame(pData(featureData(from)),
                                 keep.extra.columns = TRUE)
    }, error = function(e) {
        res <- relist(GRanges(), vector("list", length=length(nms)))
        mcols(res) <- .from_AnnotatedDataFrame_to_DataFrame(featureData(from))
        res
    })
    names(res) <- nms
    res
}

# Simple ProbeId to Range mapper
# Probes with multiple ranges are dropped
# The sign of the chromosome location is assumed to contain the strand
# information
probeRangeMapper <- function(from)
{
    annotation <- annotation(from)
    if (identical(annotation, character(0))) {
        return(naiveRangeMapper(from))
    }
    if (requireNamespace("annotate", quietly = TRUE)) {
        annotationPackage <- annotate::annPkgName(annotation)
        test <- require(annotationPackage, character.only = TRUE,
                        quietly = TRUE)
        if (test) {
            db <- get(annotationPackage, envir = asNamespace(annotationPackage))
            pid <- featureNames(from)
            locs <- AnnotationDbi::select(
                db, pid, columns = c("CHR", "CHRLOC", "CHRLOCEND"))
            locs <- na.omit(locs)
            dups <- duplicated(locs$PROBEID)
            if (any(dups)) {
                locs <- locs[!dups, , drop = FALSE]
            }
            strand <- ifelse(locs$CHRLOC > 0, "+", "-")
            res <- GRanges(seqnames = locs$CHR,
                           ranges = IRanges(abs(locs$CHRLOC),
                                            abs(locs$CHRLOCEND)),
                           strand = strand)
            names(res) <- locs$PROBEID

            if (NROW(res) < length(pid)) {
                warning(length(pid) - NROW(res),
                        " probes could not be mapped.", call. = FALSE)
            }
            res
        } else {
            stop("Failed to load ", sQuote(annotationPackage), " package",
                 call. = FALSE)
        }
    } else {
        stop("Failed to load annotate package", call. = FALSE)
    }
}

# Simple ProbeId to Gene mapper
# Is there a way to get the txDb given the annotation package?
geneRangeMapper <- function(txDbPackage, key = "ENTREZID")
{
    function(from) {
        annotation <- annotation(from)
        if (identical(annotation, character(0))) {
            return(naiveRangeMapper(from))
        }
        if (requireNamespace("annotate", quietly = TRUE)) {
            annotationPackage <- annotate::annPkgName(annotation)
            test <- require(annotationPackage, character.only = TRUE,
                            quietly = TRUE)
            if (test) {
                db <- get(annotationPackage,
                          envir = asNamespace(annotationPackage))
                pid <- featureNames(from)
                probeIdToGeneId <-
                    AnnotationDbi::mapIds(db, pid, key, "PROBEID")
                geneIdToProbeId <-
                    setNames(names(probeIdToGeneId), probeIdToGeneId)

                if (requireNamespace(txDbPackage, quietly = TRUE)) {
                    txDb <- get(txDbPackage, envir = asNamespace(txDbPackage))
                    genes <- GenomicFeatures::genes(txDb)
                    probesWithAMatch <-
                        probeIdToGeneId[probeIdToGeneId %in% names(genes)]
                    res <- genes[probesWithAMatch]
                    names(res) <- geneIdToProbeId[names(res)]
                    if (NROW(res) < length(pid)) {
                        warning(length(pid) - NROW(res),
                                " probes could not be mapped.", call. = FALSE)
                    }
                    res
                } else {
                    stop("Failed to load ", sQuote(txDbPackage), " package",
                         call. = FALSE)
                }

            } else {
                stop("Failed to load ", sQuote(annotationPackage), " package",
                     call. = FALSE)
            }
        } else {
            stop("Failed to load annotate package", call. = FALSE)
        }
    }
}

makeSummarizedExperimentFromExpressionSet <-
    function(from, mapFun = naiveRangeMapper, ...)
{
    mapFun <- match.fun(mapFun)
    rowRanges <- mapFun(from, ...)
    matches <- match(names(rowRanges),
                     featureNames(from),
                     nomatch = 0)
    from <- from[matches, drop = FALSE]
    assays <- as.list(assayData(from))
    colData <- .from_AnnotatedDataFrame_to_DataFrame(phenoData(from))
    metadata <- SimpleList(
        experimentData = experimentData(from),
        annotation = annotation(from),
        protocolData = protocolData(from)
    )

    SummarizedExperiment(
        assays = assays,
        rowRanges = rowRanges,
        colData = colData,
        metadata = metadata
    )
}

setAs("ExpressionSet", "RangedSummarizedExperiment", function(from)
{
    makeSummarizedExperimentFromExpressionSet(from)
})

setAs("ExpressionSet", "SummarizedExperiment", function(from)
{
    as(makeSummarizedExperimentFromExpressionSet(from), "SummarizedExperiment")
})

setAs("RangedSummarizedExperiment", "ExpressionSet",
      function(from)
{
    assayData <- list2env(as.list(assays(from)))

    numAssays <- length(assayData)

    if (numAssays == 0) {
        assayData$exprs <- new("matrix")
    } else if (!"exprs" %in% ls(assayData)) {
        ## if there isn't an exprs assay we need to pick one as exprs,
        ## so rename the first element exprs and issue a warning.
        exprs <- ls(assayData)[[1]]
        warning("No assay named ", sQuote("exprs"), " found, renaming ",
                exprs, " to ", sQuote("exprs"), ".")
        assayData[["exprs"]] <- assayData[[exprs]]
        rm(list=exprs, envir=assayData)
    }
    lockEnvironment(assayData, bindings = TRUE)

    featureData <- .from_rowRanges_to_FeatureData(rowRanges(from))
    phenoData <- .from_DataFrame_to_AnnotatedDataFrame(colData(from))

    metadata <- metadata(from)

    experimentData <- if (!is.null(metadata$experimentData)) {
        metadata$experimentData
    } else {
        MIAME()
    }

    annotation <- if (!is.null(metadata$annotation)) {
        metadata$annotation
    } else {
        character()
    }

    protocolData <- if (!is.null(metadata$protocolData)) {
        metadata$protocolData
    } else {
        annotatedDataFrameFrom(assayData, byrow=FALSE)
    }

    ExpressionSet(assayData,
                  phenoData = phenoData,
                  featureData = featureData,
                  experimentData = experimentData,
                  annotation = annotation,
                  protocolData = protocolData
                  )
})

setAs("SummarizedExperiment", "ExpressionSet", function(from)
    as(as(from, "RangedSummarizedExperiment"), "ExpressionSet")
)
Bioconductor/SummarizedExperiment documentation built on Nov. 4, 2022, 11:59 a.m.