#' @title DAVID Gene Ontology Wrapper
#'
#' @description DAVID Gene Ontology Wrapper
#' @param davidWS A DAVIDWebService class object.
#' @param input_data data.frame with LFC values in column 1, and FDR values in column 2
#' @param min_lfc minimum value of the absolute value of the LFC required to be significant
#' @param list_name A string that will be used to identify the submitted list.
#' @param FA_cats A vector of strings indicating which annotation categories to search for. Use getAllAnnotationCategoryNames() to see a list of all possible annotation categories.
#' @param split_at a character at which annotation term strings will be split. If NULL, strings will not be split.
#' @import limma
#' @import stringi
#' @import RDAVIDWebService
#' @import org.Hs.eg.db
#' @import AnnotationDbi
#' @import GOplot
#' @import dplyr
#' @export
#' @examples
#' run_DAVID(davidWS, deg_list, list_name, FA_cats, split_at)
run_DAVID <- function(davidWS, input_data, min_lfc, list_name, FA_cats, split_at) {
# Convert gene symbols into Ensembl IDs -------------------------------------
degs <- subset(
input_data,
abs(input_data[, 1]) >= min_lfc & input_data[, 2] <= 0.05 &
!is.na(input_data[, 1]) & !is.na(input_data[, 2])
)
if (nrow(degs) < 1) {
print("No DEGs detected")
return(NULL)
}
gs_data <- data.frame(rownames(degs), degs)
colnames(gs_data) <- c("Gene_Symbol", "logFC", "padj")
deg_list <- AnnotationDbi::mapIds(
org.Hs.eg.db, rownames(degs), keytype = "SYMBOL", column = "ENSEMBL"
)
ensembl_IDs <- data.frame(deg_list)
ensembl_IDs <- data.frame(rownames(ensembl_IDs), ensembl_IDs)
colnames(ensembl_IDs) <- c("Gene_Symbol", "Ensembl_ID")
ensembl_IDs <- dplyr::left_join(ensembl_IDs, gs_data)
# Add gene list if needed then select correct gene list ---------------------
gene_lists <- getGeneListNames(davidWS)
list_position <- 0
if (length(gene_lists) > 0) {
for (x in 1:length(gene_lists)) {
if (gene_lists[x] == list_name) list_position <- x
}
}
if (list_position == 0) {
addList(
davidWS, deg_list, idType = "ENSEMBL_GENE_ID",
listName = list_name, listType = "Gene"
)
gene_lists <- getGeneListNames(davidWS)
} else setCurrentGeneListPosition(davidWS, list_position)
print("Running DAVID analysis on gene list:")
print(gene_lists[getCurrentGeneListPosition(davidWS)])
# Run GO analysis -----------------------------------------------------------
setAnnotationCategories(davidWS, FA_cats)
FA_chart <- getFunctionalAnnotationChart(davidWS)
if (nrow(FA_chart) < 1) return(NULL)
# Convert Ensembl IDs back into gene symbols --------------------------------
gene_list <- data.frame(FA_chart$Genes)
for (r in 1:nrow(gene_list)) {
gene_names <- limma::strsplit2(gene_list[r, 1], ", ")[1, ]
for (n in 1:length(gene_names)) {
ID_sub <- subset(ensembl_IDs, ensembl_IDs$Ensembl_ID == gene_names[n])
gene_names[n] <- ID_sub$Gene_Symbol[1]
}
gene_names <- stri_join_list(list(gene_names), sep = ", ")
gene_list[r, 1] <- gene_names
}
# Split "Term" column into 2 if applicable & put relevant data in a table ---
if (is.null(split_at)) {
FA_sub <- data.frame(
FA_chart$Category, FA_chart$Term, FA_chart$Count, FA_chart$X.,
gene_list, FA_chart$Fold.Enrichment, FA_chart$FDR/100
)
colnames(FA_sub) <- c(
"Annotation_Category", "Term", "Count",
"Percent_of_input_list", "Genes", "Fold.Enrichment", "FDR"
)
return(FA_sub)
} else {
split_IDs <- limma::strsplit2(FA_chart$Term, split_at)
if (ncol(split_IDs) > 2) {
for (x in nrow(split_IDs)) {
new_cell <- split_IDs[x, 2]
for (y in 3:ncol(split_IDs)) {
if (split_IDs[x, y] != "") {
new_cell <- stri_join(c(new_cell, split_IDs[x, y]), sep = ":")
}
}
split_IDs[x, 2] <- new_cell
}
split_IDs <- split_IDs[, 1:2]
}
FA_sub <- data.frame(
FA_chart$Category, split_IDs, FA_chart$Count, FA_chart$X.,
gene_list, FA_chart$Fold.Enrichment, FA_chart$FDR/100
)
colnames(FA_sub) <- c(
"Annotation_Category", "ID", "Term", "Count",
"Percent_of_input_list", "Genes", "Fold.Enrichment", "FDR"
)
terms_data <- data.frame(FA_sub[, c(1:3, 8, 6)])
colnames(terms_data) <- c(
"category", "ID", "term", "adj_pval", "genes"
)
}
# Calculate Z-scores for each row -------------------------------------------
genes_data <- data.frame(ensembl_IDs$Gene_Symbol, ensembl_IDs$logFC)
colnames(genes_data) <- c("ID", "logFC")
genes_data <- subset(genes_data, !is.na(genes_data$logFC))
cd <- GOplot::circle_dat(terms_data, genes_data)
z_scores <- cd[, c(2, 8)]
z_scores <- z_scores[!duplicated(z_scores), ]
z_scores <- dplyr::left_join(FA_sub, z_scores, by = "ID")
# Return a list with all of the results -------------------------------------
list(
DAVID_FA = z_scores,
GOplot_results = cd,
ensembl_conversion = ensembl_IDs
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.