#' Gene ID mapper for entrez and symbol
#'
#' Map Entrez ID to HGNC symbol and vice versa.
#' Uses LazygeneInfo if no gene ID information is provided at geneMap.
#' geneMap, if provided, should be a data frame with at least two columns named 'hgnc_symbol' and 'entrez'.
#' ent2sym is meant for quick ID conversion and uses match, which does not handle many-to-one or many-to-many relationships.
#'
#' @param genes genes either in Entrez or Symbol (human)
#' @param geneMap gene mapper data frame. Uses LazygeneInfo if null.
#' @return genes in wanted IDs
#' @examples
#' ent2sym("TP53")
#' @export
ent2sym <- function(genes, geneMap = NULL) {
if (is.null(geneMap)) geneMap <- Lazy2::LazygeneInfo
genes <- as.character(genes)
if (all(grepl("^[0-9]+$", genes[which(!is.na(genes) & genes != "")]))) {
out <- geneMap$hgnc_symbol[match(genes, geneMap$entrez)]
} else {
out <- geneMap$entrez[match(genes, geneMap$hgnc_symbol)]
}
return(as.character(out))
}
#' Gene list extraction from result
#'
#' Get DEG list from list of results generated by limma or DESeq2.
#'
#' @param resultsLS list of result data frames. (limma results default)
#' @param fco fold change cut-offs (NOT LOG). Used to filter logFC column.
#' @param qco adjusted p value cut-offs. Used to filter adj.P.Val column.
#' @param cnt DEG count constrains (not used).
#' @param remove_ambi Remove genes both in up and down?
#' @return Nested list of DEGs
#' @export
extractGeneList <- function(resultsLS, fco, qco, cnt = NULL, remove_ambi = FALSE) {
## Add logFC, p-value, adjusted p-value column detector ##
##
##
##
if (length(qco) != 1 & length(qco) != length(resultsLS)) stop("qco length should be either 1 or same as resultsLS.")
if (length(fco) != 1 & length(fco) != length(resultsLS)) stop("fco length should be either 1 or same as resultsLS.")
if (length(qco) == 1) qco <- structure(rep(qco, length(resultsLS)), names = names(resultsLS))
if (length(fco) == 1) fco <- structure(rep(fco, length(resultsLS)), names = names(resultsLS))
geneList <- list(up = list(), dn = list(), to = list())
for (aid in names(resultsLS)) {
resultDF.f <- resultsLS[[aid]]
resultDF.f <- resultDF.f[which(!is.na(resultDF.f$entGene)), ]
geneList$up[[aid]] <- with(resultDF.f, unique(entGene[which(adj.P.Val < qco[aid] & logFC >= log2(fco[aid]))]))
geneList$dn[[aid]] <- with(resultDF.f, unique(entGene[which(adj.P.Val < qco[aid] & logFC <= -log2(fco[aid]))]))
geneList$to[[aid]] <- unique(resultDF.f$entGene)
}
## Remove ambiguous option
if (remove_ambi) geneList <- removeAmbigDEGs(geneList)
return(geneList)
}
#' Remove ambiguous DEGs
#'
#' Remove DEGs if they're both in Down and Up.
#' @param geneList gene list
#' @return gene list with ambiguous genes removed
#' @examples
#' \dontrun{
#' geneList <- removeAmbigDEGs(geneList)
#' }
#' @export
removeAmbigDEGs <- function(geneList) {
ambi <- map2(geneList$up, geneList$dn, function(x, y) intersect(x, y))
geneList$up <- map2(geneList$up, ambi, function(x, y) setdiff(x, y))
geneList$dn <- map2(geneList$dn, ambi, function(x, y) setdiff(x, y))
return(geneList)
}
#' Count number of genes in geneList
#'
#' Lazy function for gene number for geneList
#' @param geneList Nested DEG list. See example for gene list structure.
#' @examples
#' set.seed(1234)
#' geneList <- list(
#' up = list(A = sample(letters, 10), B = sample(letters, 5), C = sample(letters, 4)),
#' dn = list(A = sample(letters, 6), B = sample(letters, 15), C = sample(letters, 7)),
#' to = list(A = letters, B = letters, C = letters)
#' )
#' geneCount(geneList)
#' @export
geneCount <- function(geneList) {
sapply(geneList, function(ls) sapply(ls, length))
}
#' Get pairwise overlap significance for a list
#'
#' Calculate genelist set overlap (replaces pairsOverlap).
#' Metric : hypergeometric test p value, enrichment factor, tanimoto coef.
#' @param gls Unnested list of genes
#' @param tgls List of gene space for each entry in gls
#' @param unique_combn Only return unique combination (no replicates) Default FALSE, will be set to TRUE in later versions
#' @return dataframe
#' @examples
#' \dontrun{
#' pairdf <- getOverlapDF(geneList$up, geneList$to)
#' pairm <- reshape2::acast(pairdf, ix1 ~ ix2, value.var = "ef")
#' }
#' @export
getOverlapDF <- function(gls, tgls, unique_combn = FALSE) {
if (unique_combn) {
pairdf <- data.frame(t(combn(names(gls), 2)))
colnames(pairdf) <- c("Var1", "Var2")
} else {
pairdf <- expand.grid(names(gls), names(gls), stringsAsFactors = FALSE)
pairdf <- pairdf[order(factor(pairdf$Var1, levels = names(gls))), ]
}
LS <- apply(pairdf, 1, function(v) {
ix1 <- as.character(v[1])
ix2 <- as.character(v[2])
gspace <- intersect(tgls[[ix1]], tgls[[ix2]])
setA <- intersect(gls[[ix1]], gspace)
setB <- intersect(gls[[ix2]], gspace)
hgeo <- hypergeoTest(setA, setB, gspace)
eff <- getEnrichmentFactor(setA, setB, gspace, 2)
tan <- tanimotoCoef(setA, setB)
data.frame(ix1 = ix1, ix2 = ix2, hgeo = hgeo, ef = eff, tan = tan)
})
pairdf <- do.call(rbind, LS)
}
#' Draw MA, volcano plot
#'
#' Draw MA plot from resultDF generated from limma (data.frame format). DESeq2 results should have column names matching limma.
#' @param resultDF result dataframe generated from limma or DEseq2. If DESeq2, column name should be 'adj.P.Val','logFC','AveExpr'
#' @param qco adjusted p value cut off
#' @param fco fold change cut off
#' @param ttl_pre main title prefix
#' @param xlim x-axis limit
#' @param ylim y-axis limit
#' @return plot
#' @examples
#' \dontrun{
#' drawMA(resultDF, qco = 0.1, fco = 2.0, ttl_pre = "title")
#' }
#' @export
drawMA <- function(resultDF, qco, fco, ttl_pre, ylim = NULL) {
ui <- which(resultDF$adj.P.Val < qco & resultDF$logFC > log2(fco))
di <- which(resultDF$adj.P.Val < qco & resultDF$logFC < -log2(fco))
gcnt <- paste("up:", length(ui), "dn:", length(di))
if (is.null(ylim)) {
ylim <- c(-max(abs(resultDF$logFC), na.rm = TRUE), max(abs(resultDF$logFC), na.rm = TRUE))
}
mtitle <- paste(ttl_pre, "fc", fco, "qv", qco, gcnt)
# MA
plot(logFC ~ AveExpr, data = resultDF, pch = 20, main = mtitle, cex = 0.05, ylim = ylim)
points(logFC ~ AveExpr, data = resultDF[c(ui, di), ], pch = 20, col = "red", cex = 0.25)
abline(h = 0, col = "blue", lty = 2)
}
#' @describeIn drawMA
#' Draw volcano plot
#' @export
drawVol <- function(resultDF, qco, fco, ttl_pre, xlim = NULL) {
ui <- which(resultDF$adj.P.Val < qco & resultDF$logFC > log2(fco))
di <- which(resultDF$adj.P.Val < qco & resultDF$logFC < -log2(fco))
gcnt <- paste("up:", length(ui), "dn:", length(di))
if (is.null(xlim)) {
xlim <- c(-max(abs(resultDF$logFC), na.rm = TRUE), max(abs(resultDF$logFC), na.rm = TRUE))
}
mtitle <- paste(ttl_pre, "fc", fco, "qv", qco, gcnt)
# volcano
plot(-log10(adj.P.Val) ~ logFC, data = resultDF, pch = 20, main = mtitle, cex = 0.05, xlim = xlim)
points(-log10(adj.P.Val) ~ logFC, data = resultDF[c(ui, di), ], pch = 20, col = "red", cex = 0.25)
abline(v = 0, col = "blue", lty = 2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.