R/DEG_list_helper.R

Defines functions drawVol drawMA getOverlapDF geneCount removeAmbigDEGs extractGeneList ent2sym

Documented in drawMA drawVol ent2sym extractGeneList geneCount getOverlapDF removeAmbigDEGs

#' Gene ID mapper for entrez and symbol
#'
#' Map Entrez ID to HGNC symbol and vice versa.
#' Uses LazygeneInfo if no gene ID information is provided at geneMap.
#' geneMap, if provided, should be a data frame with at least two columns named 'hgnc_symbol' and 'entrez'.
#' ent2sym is meant for quick ID conversion and uses match, which does not handle many-to-one or many-to-many relationships.
#'
#' @param genes genes either in Entrez or Symbol (human)
#' @param geneMap gene mapper data frame. Uses LazygeneInfo if null.
#' @return genes in wanted IDs
#' @examples
#' ent2sym("TP53")
#' @export

ent2sym <- function(genes, geneMap = NULL) {
    if (is.null(geneMap)) geneMap <- Lazy2::LazygeneInfo
    genes <- as.character(genes)

    if (all(grepl("^[0-9]+$", genes[which(!is.na(genes) & genes != "")]))) {
        out <- geneMap$hgnc_symbol[match(genes, geneMap$entrez)]
    } else {
        out <- geneMap$entrez[match(genes, geneMap$hgnc_symbol)]
    }
    return(as.character(out))
}



#' Gene list extraction from result
#'
#' Get DEG list from list of results generated by limma or DESeq2.
#'
#' @param resultsLS list of result data frames. (limma results default)
#' @param fco fold change cut-offs (NOT LOG). Used to filter logFC column.
#' @param qco adjusted p value cut-offs. Used to filter adj.P.Val column.
#' @param cnt DEG count constrains (not used).
#' @param remove_ambi Remove genes both in up and down?
#' @return Nested list of DEGs
#' @export

extractGeneList <- function(resultsLS, fco, qco, cnt = NULL, remove_ambi = FALSE) {
    ## Add logFC, p-value, adjusted p-value column detector ##
    ##
    ##
    ##

    if (length(qco) != 1 & length(qco) != length(resultsLS)) stop("qco length should be either 1 or same as resultsLS.")
    if (length(fco) != 1 & length(fco) != length(resultsLS)) stop("fco length should be either 1 or same as resultsLS.")

    if (length(qco) == 1) qco <- structure(rep(qco, length(resultsLS)), names = names(resultsLS))
    if (length(fco) == 1) fco <- structure(rep(fco, length(resultsLS)), names = names(resultsLS))

    geneList <- list(up = list(), dn = list(), to = list())
    for (aid in names(resultsLS)) {
        resultDF.f <- resultsLS[[aid]]
        resultDF.f <- resultDF.f[which(!is.na(resultDF.f$entGene)), ]

        geneList$up[[aid]] <- with(resultDF.f, unique(entGene[which(adj.P.Val < qco[aid] & logFC >= log2(fco[aid]))]))
        geneList$dn[[aid]] <- with(resultDF.f, unique(entGene[which(adj.P.Val < qco[aid] & logFC <= -log2(fco[aid]))]))
        geneList$to[[aid]] <- unique(resultDF.f$entGene)
    }

    ## Remove ambiguous option
    if (remove_ambi) geneList <- removeAmbigDEGs(geneList)

    return(geneList)
}



#' Remove ambiguous DEGs
#'
#' Remove DEGs if they're both in Down and Up.
#' @param geneList gene list
#' @return gene list with ambiguous genes removed
#' @examples
#' \dontrun{
#' geneList <- removeAmbigDEGs(geneList)
#' }
#' @export

removeAmbigDEGs <- function(geneList) {
    ambi <- map2(geneList$up, geneList$dn, function(x, y) intersect(x, y))
    geneList$up <- map2(geneList$up, ambi, function(x, y) setdiff(x, y))
    geneList$dn <- map2(geneList$dn, ambi, function(x, y) setdiff(x, y))
    return(geneList)
}



#' Count number of genes in geneList
#'
#' Lazy function for gene number for geneList
#' @param geneList Nested DEG list. See example for gene list structure.
#' @examples
#' set.seed(1234)
#' geneList <- list(
#'     up = list(A = sample(letters, 10), B = sample(letters, 5), C = sample(letters, 4)),
#'     dn = list(A = sample(letters, 6), B = sample(letters, 15), C = sample(letters, 7)),
#'     to = list(A = letters, B = letters, C = letters)
#' )
#' geneCount(geneList)
#' @export

geneCount <- function(geneList) {
    sapply(geneList, function(ls) sapply(ls, length))
}



#' Get pairwise overlap significance for a list
#'
#' Calculate genelist set overlap (replaces pairsOverlap).
#' Metric : hypergeometric test p value, enrichment factor, tanimoto coef.
#' @param gls Unnested list of genes
#' @param tgls List of gene space for each entry in gls
#' @param unique_combn Only return unique combination (no replicates) Default FALSE, will be set to TRUE in later versions
#' @return dataframe
#' @examples
#' \dontrun{
#' pairdf <- getOverlapDF(geneList$up, geneList$to)
#' pairm <- reshape2::acast(pairdf, ix1 ~ ix2, value.var = "ef")
#' }
#' @export

getOverlapDF <- function(gls, tgls, unique_combn = FALSE) {
    if (unique_combn) {
        pairdf <- data.frame(t(combn(names(gls), 2)))
        colnames(pairdf) <- c("Var1", "Var2")
    } else {
        pairdf <- expand.grid(names(gls), names(gls), stringsAsFactors = FALSE)
        pairdf <- pairdf[order(factor(pairdf$Var1, levels = names(gls))), ]
    }

    LS <- apply(pairdf, 1, function(v) {
        ix1 <- as.character(v[1])
        ix2 <- as.character(v[2])
        gspace <- intersect(tgls[[ix1]], tgls[[ix2]])
        setA <- intersect(gls[[ix1]], gspace)
        setB <- intersect(gls[[ix2]], gspace)
        hgeo <- hypergeoTest(setA, setB, gspace)
        eff <- getEnrichmentFactor(setA, setB, gspace, 2)
        tan <- tanimotoCoef(setA, setB)
        data.frame(ix1 = ix1, ix2 = ix2, hgeo = hgeo, ef = eff, tan = tan)
    })
    pairdf <- do.call(rbind, LS)
}



#' Draw MA, volcano plot
#'
#' Draw MA plot from resultDF generated from limma (data.frame format). DESeq2 results should have column names matching limma.
#' @param resultDF result dataframe generated from limma or DEseq2. If DESeq2, column name should be 'adj.P.Val','logFC','AveExpr'
#' @param qco adjusted p value cut off
#' @param fco fold change cut off
#' @param ttl_pre main title prefix
#' @param xlim x-axis limit
#' @param ylim y-axis limit
#' @return plot
#' @examples
#' \dontrun{
#' drawMA(resultDF, qco = 0.1, fco = 2.0, ttl_pre = "title")
#' }
#' @export

drawMA <- function(resultDF, qco, fco, ttl_pre, ylim = NULL) {
    ui <- which(resultDF$adj.P.Val < qco & resultDF$logFC > log2(fco))
    di <- which(resultDF$adj.P.Val < qco & resultDF$logFC < -log2(fco))
    gcnt <- paste("up:", length(ui), "dn:", length(di))
    if (is.null(ylim)) {
        ylim <- c(-max(abs(resultDF$logFC), na.rm = TRUE), max(abs(resultDF$logFC), na.rm = TRUE))
    }
    mtitle <- paste(ttl_pre, "fc", fco, "qv", qco, gcnt)

    # MA
    plot(logFC ~ AveExpr, data = resultDF, pch = 20, main = mtitle, cex = 0.05, ylim = ylim)
    points(logFC ~ AveExpr, data = resultDF[c(ui, di), ], pch = 20, col = "red", cex = 0.25)
    abline(h = 0, col = "blue", lty = 2)
}

#' @describeIn drawMA
#' Draw volcano plot
#' @export

drawVol <- function(resultDF, qco, fco, ttl_pre, xlim = NULL) {
    ui <- which(resultDF$adj.P.Val < qco & resultDF$logFC > log2(fco))
    di <- which(resultDF$adj.P.Val < qco & resultDF$logFC < -log2(fco))
    gcnt <- paste("up:", length(ui), "dn:", length(di))
    if (is.null(xlim)) {
        xlim <- c(-max(abs(resultDF$logFC), na.rm = TRUE), max(abs(resultDF$logFC), na.rm = TRUE))
    }
    mtitle <- paste(ttl_pre, "fc", fco, "qv", qco, gcnt)

    # volcano
    plot(-log10(adj.P.Val) ~ logFC, data = resultDF, pch = 20, main = mtitle, cex = 0.05, xlim = xlim)
    points(-log10(adj.P.Val) ~ logFC, data = resultDF[c(ui, di), ], pch = 20, col = "red", cex = 0.25)
    abline(v = 0, col = "blue", lty = 2)
}
LittleHeronCodes/Lazy2 documentation built on April 20, 2024, 11:24 p.m.