R/mCSEAIntegrate.R

Defines functions .integrateCore mCSEAIntegrate

Documented in mCSEAIntegrate

#' Integrate methylation and expression
#'
#' Uses mCSEA methylation analysis results and expression values to search for
#' significant correlations between DMRs methylation and close genes expression.
#'
#' @param mCSEAResults The object generated by mCSEATest function
#' @param exprData A matrix or data frame with genes in rows and samples in
#' columns. A SummarizedExperiment object can be used too
#' @param regionType The region types to be represented. Must be one or more of
#' "promoters", "genes", "CGI" and "custom"
#' @param geneIDs Gene identifiers used in exprData. One of "SYMBOL",
#' "ENSEMBL", "ENTREZID", "GENEID", "REFSEQ" or "UNIGENE"
#' @param dmrName The DMR of interest to correlate with expression (e.g. gene
#' name, CGI name...). If NULL (default), all DMRs with P-Value < pcutoff are
#' selected
#' @param pcutoff P-Value threshold to select DMRs if dmrName = NULL
#' @param minCor Correlation threshold to output the results
#' @param minP Correlation P-Value threshold to output the results
#' @param makePlot If TRUE, generate corelation and save them in the folder
#' specified by folder parameter
#' @param folder Directory to save the correlation plots if makePlot = TRUE
#' @param nproc Number of processors to be used
#'
#' @return A data.frame with the integration results.
#'
#' @author Jordi Martorell Marugán, \email{jordi.martorell@@genyo.es}
#'
#' @seealso \code{\link{rankProbes}}, \code{\link{mCSEATest}}
#'
#' @examples
#' data(precomputedmCSEA)
#' data(exprTest)
#'
#' resultsInt <- mCSEAIntegrate(myResults, exprTest, "promoters", "ENSEMBL",
#'                             "GATA2", makePlot = FALSE)
#'
#' resultsInt
#'
#' @export

mCSEAIntegrate <- function(mCSEAResults, exprData,
                            regionType = c("promoters", "genes", "CGI",
                                            "custom"),
                            geneIDs = "SYMBOL",
                            dmrName = NULL,
                            pcutoff = 0.05,
                            minCor = 0.5, minP = 0.05, makePlot = TRUE,
                            folder = ".", nproc = 1){

    regionType <- match.arg(regionType, choices=c("promoters", "genes", "CGI",
                                                    "custom"), several.ok=TRUE)

    geneIDs <- match.arg(geneIDs, choices=c("SYMBOL", "ENSEMBL",
                                            "ENTREZID", "GENEID",
                                            "REFSEQ", "UNIGENE"))

    pheno <- mCSEAResults[["pheno"]]
    methData <- mCSEAResults[["methData"]]
    platform <- mCSEAResults[["platform"]]

    # Remove genes with Standar Deviation = 0
    ngenesPre <- nrow(exprData)
    exprData <- exprData[apply(exprData, 1, sd) != 0,]
    ngenesPost <- nrow(exprData)
    
    message(ngenesPre -  ngenesPost, " genes removed from exprData due to Standar Deviation = 0")

    # Check input objects

    if (any(class(mCSEAResults) != "list" | length(mCSEAResults) < 5)){
        stop("mCSEAResults must be the object generated
            by mCSEATest function")
    }

    if (!any(class(exprData) == "data.frame" | class(exprData) == "matrix" |
            class(exprData) == "SummarizedExperiment" |
            class(exprData) == "RangedSummarizedExperiment")){
        stop("exprData must be a data frame, a matrix or a SummarizedExperiment
            object")
    }

    if (class(geneIDs) != "character"){
        stop("geneIDs must be a character object")
    }

    if (class(dmrName) != "character" & !is.null(dmrName)){
        stop("dmrName must be a character object or NULL")
    }

    if (any(class(pcutoff) != "numeric" | pcutoff < 0)){
        stop("pcutoff must be a positive number")
    }

    if (any(class(minCor) != "numeric" | minCor < 0)){
        stop("minCor must be a positive number")
    }

    if (any(class(minP) != "numeric" | minP < 0)){
        stop("minP must be a positive number")
    }

    if (class(makePlot) != "logical"){
        stop("makePlot must be a logical object (TRUE7FALSE)")
    }

    if (class(folder) != "character"){
        stop("folder must be a character object")
    }

    if (!file.exists(folder)){
        dir.create(folder)
    }

    if (any(class(nproc) != "numeric" | nproc < 0)){
        stop("nproc must be a positive number")
    }

    if ("SummarizedExperiment" %in% is(exprData) |
        "RangedSummarizedExperiment" %in% is(exprData)){
        exprData <- SummarizedExperiment::assay(exprData)
    }

    if (!identical(colnames(methData), colnames(exprData))) {
        if (length(setdiff(colnames(methData), colnames(exprData))) == 0 &&
            length(setdiff(colnames(exprData), colnames(methData))) == 0) {
            exprData <- exprData[,colnames(methData)]
        }

        else {
            stop("Sample labels of methData and exprData must be the same")
        }
    }


    if (length(levels(pheno[,1])) != 2){
        stop("Integration can only be performed with a 2 groups comparison")
    }

    for (reg in regionType) {
        if (is.null(mCSEAResults[[reg]])){
            warning("mCSEAResults does not contain ", reg, " regionType. ",
                    "Skipping it.")
            regionType = regionType[!regionType %in% reg]
        }
    }

    if (length(regionType) == 0) {
        stop("None of the specified regionTypes in mCSEAResults")
    }


    intResults <- data.frame()

    exprData <- as.data.frame(exprData)

    rangeGenes <- suppressMessages(GenomicFeatures::genes(
        Homo.sapiens::Homo.sapiens, columns = c("CDSCHROM", geneIDs)))

    for (reg in regionType) {

        message("Integrating ", reg, " methylation with gene expression")

        sigRegions <- mCSEAResults[[reg]]
        sigRegions <- sigRegions[sigRegions[, "pval"] < pcutoff,]

        if (is.null(dmrName)) {
            region2 <- rownames(sigRegions)
        }
        else {
            region2 <- dmrName
        }

        corCpGs <- strsplit(sigRegions[region2,"leadingEdge"], ", ")
        names(corCpGs) <- region2

        if (platform == "450k") {
            annot <- mCSEAdata::annot450K
        }
        else {
            annot <- mCSEAdata::annotEPIC
        }

        cl <- parallel::makeCluster(nproc)

        parallel::clusterExport(cl=cl,
                                varlist=c("region2", "methData", "exprData",
                                            "pheno", "geneIDs",
                                            "minCor", "minP", "reg", "corCpGs",
                                            "annot", "rangeGenes", "folder",
                                            "makePlot", ".integrateCore"),
                                envir=environment())

        indInt <- parallel::parLapply(cl, region2,
                            fun = function(x) {
                                .integrateCore(x, methData=methData,
                                                exprData=exprData, pheno=pheno,
                                                geneIDs=geneIDs,
                                                minCor=minCor, minP=minP,
                                                regionType=reg, corCpGs=corCpGs,
                                                annot=annot,
                                                rangeGenes=rangeGenes,
                                                folder=folder,
                                                makePlot=makePlot)})
        


        parallel::stopCluster(cl)
        
		intResults <- rbind(intResults, do.call("rbind", indInt))
    }

    intResults <- intResults[order(abs(intResults[["Correlation"]]),
                                    decreasing=TRUE),]
    intResults[["adjPValue"]] <- p.adjust(intResults[["PValue"]],
                                            method = "fdr")
    rownames(intResults) <- seq_len(nrow(intResults))

    return(intResults)

}

.integrateCore <- function(indRegion, methData, exprData, pheno, geneIDs,
                            minCor, minP, regionType, corCpGs, annot,
                            rangeGenes, folder, makePlot){
	
    corCpGs <- corCpGs[[indRegion]]

    pheno <- pheno[,1]

    if (length(corCpGs) > 1) {
        meth1 <- colMeans(methData[corCpGs, pheno == levels(pheno)[1]], 
                            na.rm = TRUE)
        meth2 <- colMeans(methData[corCpGs, pheno == levels(pheno)[2]], 
                            na.rm = TRUE)
	}
	else {
		meth1 <- unlist(methData[corCpGs, pheno == levels(pheno)[1]])
		meth2 <- unlist(methData[corCpGs, pheno == levels(pheno)[2]])
	}
    meth <- c(meth1, meth2)


    annotCpGs <- annot[corCpGs]

    nearGenes <- IRanges::subsetByOverlaps(rangeGenes, annotCpGs, maxgap = 1500)

    for (gene in unlist(S4Vectors::mcols(nearGenes)[,geneIDs])){

        if (gene %in% rownames(exprData)){

            feat <- gen <- correlations <- pvals <- c()

            expr1 <- colMeans(exprData[gene, pheno == levels(pheno)[1]],
                                na.rm = TRUE)
            expr2 <- colMeans(exprData[gene, pheno == levels(pheno)[2]],
                                na.rm = TRUE)
            expr <- c(expr1, expr2)

            corResult <- try(cor.test(meth, expr))

            interesting <- TRUE

            if (class(corResult) == "try-error") {
            	interesting <- FALSE
            }

            else if (regionType == "promoters" & corResult[["estimate"]] > 0) {
                interesting <- FALSE
            }

            else if (regionType == "genes" & corResult[["estimate"]] < 0) {
                interesting <- FALSE
            }

            if (corResult[["p.value"]] < minP &
                abs(corResult[["estimate"]]) > minCor &
                interesting){


                feat <- c(feat, indRegion)
                gen <- c(gen, gene)
                correlations <- c(correlations, corResult[["estimate"]])
                pvals <- c(pvals, corResult[["p.value"]])

                xLim <- c(min(expr1, expr2), max(expr1, expr2))
                yLim <- c(min(meth1, meth2), max(meth1, meth2))

                if (makePlot){

                    if (regionType == "promoters"){
                        mainTitle <- paste0(indRegion,
                                            " promoter methylation vs. ",
                                            gene, " expression\nCorrelation = ",
                                            round(corResult[["estimate"]], 2),
                                            "   P-value = ",
                                            signif(corResult[["p.value"]], 2))
                    }
                    else if (regionType == "genes") {
                        mainTitle <- paste0(indRegion, " body methylation vs. ",
                                            gene, " expression\nCorrelation = ",
                                            round(corResult[["estimate"]], 2),
                                            "   P-value = ",
                                            signif(corResult[["p.value"]], 2))
                    }

                    else if (regionType == "CGI"){
                        mainTitle <- paste0(indRegion, " CGI methylation vs. ",
                                            gene, " expression\nCorrelation = ",
                                            round(corResult[["estimate"]], 2),
                                            "   P-value = ",
                                            signif(corResult[["p.value"]], 2))
                    }

                    else {
                        mainTitle <- paste0(indRegion, " methylation vs. ",
                                            gene, " expression\nCorrelation = ",
                                            round(corResult[["estimate"]], 2),
                                            "   P-value = ",
                                            signif(corResult[["p.value"]], 2))
                    }

                    pdf(paste0(folder, "/", indRegion, "_", gene, "_",
                                regionType, ".pdf"))

                    plot(expr1, meth1, xlim=xLim, ylim=yLim, main=mainTitle,
                        xlab="Expression", ylab="Methylation", pch=16,
                        col="darkred")
                    par(new=TRUE)
                    plot(expr2, meth2, xlim=xLim, ylim=yLim, xaxt="n",
                        yaxt="n", xlab="", ylab="", pch=16, col="dodgerblue3")

                    if (corResult[["estimate"]] < 0){

                        legend("topright", legend=levels(pheno), pch=16,
                                col=c("darkred", "dodgerblue3"))
                    }

                    else {
                        legend("bottomright", legend=levels(pheno), pch=16,
                                col=c("darkred", "dodgerblue3"))
                    }

                    abline(lm(meth ~ expr), lty=2)

                    dev.off()
                }
            }
        }
    }

    if (exists("feat") && length(feat) > 0) {
        out <- data.frame(Feature=feat,
                            regionType=regionType,
                            Gene=gen, Correlation=correlations,
                            PValue=pvals)
        return(out)
    }
}

Try the mCSEA package in your browser

Any scripts or data that you put into this service are public.

mCSEA documentation built on Nov. 8, 2020, 5:37 p.m.