R/mirnaFuns.R
In affycoretools: Functions useful for those doing repetitive analyses with Affymetrix GeneChips

Documented in makeHmap mirna2mrna

########################################
##
## Copyright 2012 James W. MacDonald
##
##
## mirnaFuns.R - Functions for miRNA data, primarily in the context of
##               existing mRNA data.
##
##
## 2012-04-09 First pass at functions
##
###########################################

convertIDs <- function(ids, from = TRUE){
    if(from) {
        ids <- gsub("_st|_x_st|_s_st|_at", "", ids)
        ids <- gsub("-star", "*", ids)
    }else{
        ids <- gsub("\\*", "-star", ids)
        ids <- paste(ids, "_st", sep = "")
    }
    ids
}

allToMat <- function(object){
    if(is.matrix(object)){
        return(object)
    } else {
        if(is(object, "ExpressionSet")){
            return(exprs(object))
        } else {
            if(is.data.frame(object)){
                return(as.matrix(object))
            } else {
                stop(paste(ls()[1], "should be a matrix, ExpressionSet or data.frame"),
                     call. = FALSE)
            }
        }
    }
}



#' A function to map miRNA to mRNA.
#' 
#' This function is intended use when there are miRNA and mRNA data for the
#' same subjects, and the goal is to detect mRNAs that appear to be targeted by
#' the miRNA.
#' 
#' This function is intended to take a vector of miRNA IDs that are
#' significantly differentially expressed in a given experiment and then map
#' those IDs to putative mRNA transcripts that the miRNAs are supposed to
#' target. The mRNA transcript IDs are then mapped to chip-specific probeset
#' IDs, which are then subsetted to only include those probesets that were also
#' significantly differentially expressed.
#' 
#' The output from this function is intended as input for
#' \code{\link{makeHmap}}.
#' 
#' @param miRNAids A character vector of miRNA IDs. Currently only supports
#' Affymetrix platform.
#' @param miRNAannot Character. The filename (including path if not in working
#' directory) for the file containing miRNA to mRNA mappings.
#' @param mRNAids A character vector of mRNA IDs. Currently only supports
#' Affymetrix platform.
#' @param orgPkg Character. The Bioconductor organism package (e.g.,
#' org.Hs.eg.db) to be used for mapping.
#' @param chipPkg Character. The Bioconductor chip-specific package (e.g.,
#' hgu133plus2.db) to be used for mapping.
#' @param sanger Boolean. Is the miRNAannot file a Sanger miRBase targets file?
#' These can be downloaded from
#' http://www.ebi.ac.uk/enright-srv/microcosm/cgi-bin/targets/v5/download.pl
#' @param miRNAcol Numeric. If using a Sanger miRBase targets file, leave
#' \code{NULL}. Otherwise, use this to indicate which column of the miRNAannot
#' file contains miRNA IDs.
#' @param mRNAcol Numeric. If using Sanger miRBase targets file, leave
#' \code{NULL}. Otherwise, use this to indicate which column of the miRNAannot
#' file contains mRNA IDs.
#' @param transType Character. Designates the type of transcript ID for mRNA
#' supplied by the miRNAannot file. If using the Sanger miRBase files, this is
#' ensembl. Other choices include refseq and accnum.
#' @return A list with names that correspond to each significant miRNA, and the
#' mRNA probeset IDs that are targeted by that miRNA.
#' @author James W. MacDonald
#' @seealso makeHmap
#' @keywords manip
#' @export mirna2mrna
mirna2mrna <- function(miRNAids, miRNAannot, mRNAids, orgPkg, chipPkg, sanger = TRUE,
                       miRNAcol = NULL, mRNAcol = NULL, transType = "ensembl"){
    transType <- match.arg(transType, c("ensembl","refseq","accnum"))
    if(!sanger && (is.null(miRNAcol) || is.null(mRNAcol)))
        stop(paste("If the annotation data are not Sanger microcosm target data\n",
                   "then you must supply the correct column number for miRNA and mRNA data.\n\n",
                   sep = ""), call. = FALSE)
    if(sanger){
        if(is.null(miRNAcol)) miRNAcol <- 2
        if(is.null(mRNAcol)) mRNAcol <- 12
    }
    ## tab <- switch(transType,
    ##               ensembl = "ensembl_trans",
    ##               "refseq")

    miRNAids <- convertIDs(miRNAids)
    annot <- read.table(miRNAannot, sep = "\t", stringsAsFactors = FALSE)
    
    annotlst <- tapply(1:nrow(annot), annot[,miRNAcol], function(x) annot[x,mRNAcol])
    ## for each miRNA get corresponding transcript IDs
    prblst <- annotlst[names(annotlst) %in% miRNAids]

    ## convert to probeIDs
    require(chipPkg, quietly = TRUE, character.only = TRUE)
    require(orgPkg, quietly = TRUE, character.only = TRUE)
    orgDBloc <- system.file("extdata", sub("db","sqlite", orgPkg),
                            package = orgPkg)
    attachSQL <- paste("attach '", orgDBloc, "' as orgDB;", sep = "")
    con <- dbconn(get(paste(sub("\\.db", "", chipPkg), "ENTREZID", sep = "")))
    dbGetQuery(con, attachSQL)

    makeSql <- function(ids, transType){
        sql <- switch(transType,
                      ensembl = paste("select distinct probe_id from probes ",
                      "inner join orgDB.genes as gi on probes.gene_id=gi.gene_id ",
                      "inner join orgDB.ensembl_trans as et on gi._id=et._id ",
                      "where et.trans_id in ('", paste(ids, collapse = "','"), "');", sep = ""),
                      paste("select distinct probe_id from probes ",
                            "inner join orgDB.genes as gi on probes.gene_id=gi.gene_id ",
                            "inner join orgDB.refseq as et on gi._id=et._id ",
                            "where et.accession in ('", paste(ids, collapse = "','"), "');", sep = ""))
        return(sql)
    }
    
    mrnaprbs <- lapply(prblst, function(x) dbGetQuery(con, makeSql(x, transType))[,1])
    intprbs <- lapply(mrnaprbs, function(x) mRNAids[mRNAids %in% x])
    intprbs <- intprbs[sapply(intprbs, length) > 0]
    dbGetQuery(con, "detach orgDB;")
    intprbs
}



#' A function to create a heatmap-like object or matrix of correlations between
#' miRNA and mRNA data.
#' 
#' This function is intended for use when both miRNA and mRNA data are
#' available for the same samples. In this situation it may be advantageous to
#' compute correlations between the two RNA types, in order to detect mRNA
#' transcripts that are targeted by miRNA.
#' 
#' As noted above, this function is intended to generate output from
#' simultaneous analyses of miRNA and mRNA data for the same samples, the goal
#' being either a heatmap like plot of correlations, or the data (or both).
#' 
#' If creating a plot, note that if the number of significant mRNA probes is
#' large, the resulting heatmap will have many rows and will not plot correctly
#' on the usual graphics device within R. In order to visualize, it is almost
#' always better to output as a pdf. In addition, the dimensions of this pdf
#' will have to be adjusted so the row names for the heatmap will be legible.
#' As an example, a heatmap with 10 miRNA transcripts and 100 mRNA transcripts
#' will likely need a pdf with a width argument of 6 and a height argument of
#' 25 or 30. It may require some experimentation to get the correct arguments
#' to the \code{pdf} function.
#' 
#' Also please note that this function by necessity outputs rectangular data.
#' However, there will be many instances in which a given miRNA isn't thought
#' to target a particular mRNA. Whenever this occurs, the heatmap will have a
#' white cell, and the output data for that combination will be NA.
#' 
#' @param mRNAdat An \code{ExpressionSet}, \code{data.frame} or \code{matrix}
#' of mRNA expression values. The row.names for these data should correspond to
#' the manufacturer's probe ID. Currently, the only manufacturer supported is
#' Affymetrix.
#' @param miRNAdat An \code{ExpressionSet}, \code{data.frame} or \code{matrix}
#' of mRNA expression values. The row.names for these data should correspond to
#' the manufacturer's probe ID. Currently, the only manufacturer supported is
#' Affymetrix.
#' @param mRNAlst A \code{list} of mRNA probe IDs where the names of each list
#' item are mirBase miRNA IDs. Usually this will be the output from
#' \code{\link{mirna2mrna}}.
#' @param mRNAvec A numeric vector used to subset or reorder the mRNA data, by
#' column. If \code{NULL}, this will simply be 1:ncol(mRNAdat).
#' @param miRNAvec A numeric vector used to subset or reorder the miRNA data,
#' by column. If \code{NULL}, this will simply be 1:ncol(miRNAdat).
#' @param chipPkg Character. The name of the chip-specific annotation package
#' (e.g., "hgu133plus2.db").
#' @param header Character. The plot title if a heatmap is output.
#' @param plot Boolean. Should a heatmap be generated?
#' @param out Boolean. Should the matrix of correlation coefficients be output?
#' @param lhei From \code{\link[gplots]{heatmap.2}}. This controls the ratio of the heatmap
#' to the key size. If there are very few mRNAs being plotted, you may need to change to
#' something like (0.5, 5).
#' @param margins From \code{\link[gplots]{heatmap.2}}. This controls the right and bottom
#' margins, respectively. Increase either value if names get cut off.
#' @return This function will output a numeric matrix if the 'out' argument is
#' \code{TRUE}.
#' @author James W. MacDonald
#' @seealso mirna2mrna
#' @keywords manip
#' @export makeHmap
makeHmap <- function(mRNAdat, miRNAdat, mRNAlst, mRNAvec = NULL, miRNAvec = NULL, chipPkg,
                     header, plot = TRUE, out = TRUE, lhei = c(0.05, 0.95), margins = c(5,8)){
    mRNAdat <- allToMat(mRNAdat)
    miRNAdat <- allToMat(miRNAdat)
    if(is.null(mRNAvec)) mRNAvec <- seq_len(ncol(mRNAdat))
    if(is.null(miRNAvec)) miRNAvec <- seq_len(ncol(miRNAdat))
    rn <- unique(do.call("c", mRNAlst))
    cn <- gsub("\\.probe_id", "", names(mRNAlst))
    if(!all(rn %in% row.names(mRNAdat)))
        warning(paste("Not all mRNA probes being tested are found in\n",
                      "the row.names of the mRNA data.\n\n", sep = ""), call. = FALSE)
    
    mat <- matrix(NA, nrow = length(rn), ncol = length(cn))
    row.names(mat) <- rn
    colnames(mat) <- cn
    cn2 <- convertIDs(cn, FALSE)
    ## have to account for probesets that start with v11_
    bads <- cn2[!cn2 %in% row.names(miRNAdat)]
    if(length(bads) > 0){
        ind <- which(cn2 %in% bads)
        for(i in seq(along = bads))
            cn2[ind[i]] <- grep(cn2[ind[i]], row.names(miRNAdat), value = TRUE)
    }
    if(!all(cn2 %in% row.names(miRNAdat)))
        warning(paste("Not all miRNA probes being tested are found in\n",
                      "the row.names of the miRNA data.\n\n", sep = ""), call. = FALSE)
    
    getCor <- function(miRNA, mRNAlst.itm){
        sapply(seq(along = mRNAlst.itm), function(x) cor(miRNAdat[miRNA,miRNAvec],
                   mRNAdat[mRNAlst.itm[x],mRNAvec]))
    }
    for(i in seq(along = cn2))
        mat[mRNAlst[[i]], cn[i]] <- getCor(cn2[i], mRNAlst[[i]])
    
    rn2 <- mapIds(get(chipPkg), rn, "SYMBOL", "PROBEID", multiVals = "first")
    rn2 <- ifelse(is.na(rn2), rn, rn2)
    row.names(mat) <- rn2
    ## alphabetize matrix
    ord <- order(row.names(mat))
    mat <- mat[ord,,drop = FALSE]
    if(plot){
        heatmap.2(mat, Rowv = FALSE, Colv = FALSE, dendrogram = "none",
                  trace = "none", density.info = "none", lhei = lhei,
                  cexCol = 0.6, colsep = 1:ncol(mat), rowsep = 1:nrow(mat),
                  sepcolor = "lightgrey", main = header, margins = margins)
    }
    if(out){
        mat <- data.frame(mRNA.ID = rn[ord], mat, row.names = NULL)
        return(mat)
    }
}

Any scripts or data that you put into this service are public.

affycoretools documentation built on Nov. 8, 2020, 6 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

affycoretools
Functions useful for those doing repetitive analyses with Affymetrix GeneChips

R/mirnaFuns.R
In affycoretools: Functions useful for those doing repetitive analyses with Affymetrix GeneChips

Defines functions makeHmap mirna2mrna allToMat convertIDs

Documented in makeHmap mirna2mrna

Try the affycoretools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

affycoretools Functions useful for those doing repetitive analyses with Affymetrix GeneChips

R/mirnaFuns.R In affycoretools: Functions useful for those doing repetitive analyses with Affymetrix GeneChips

Defines functions makeHmap mirna2mrna allToMat convertIDs

Documented in makeHmap mirna2mrna

Try the affycoretools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

affycoretools
Functions useful for those doing repetitive analyses with Affymetrix GeneChips

R/mirnaFuns.R
In affycoretools: Functions useful for those doing repetitive analyses with Affymetrix GeneChips