#' Read in DESeq data from TSV file
#'
#' @param path Path to DESeq data
#' @param filter_invalid Remove entry if log2Foldchange, pvalue and padj is NA
#' @export
#' @import readr magrittr
#' @return Tibble of DESeq data
read_deseq <- function(path, filter_invalid = TRUE) {
read_tsv(path) %>%
rename(EnsemblGeneID = id) %>%
filter(!is.na(log2FoldChange) & !is.na(pvalue) & !is.na(padj)) %>%
return()
}
#' Attach biomart variables to deseq data
#'
#' @import dplyr biomaRt
#' @export
#' @param dat Tibble to attach biomart results to.
#' @param join_key_dat Join key on the dataset
#' @param join_key_bm Join key on biomart
#' @param bmart_dataset biomart dataset to be used. Defaults to mmusculus_gene_ensembl
#' @param extra_attributes Any extra attributes to attach besides the gene name
#' @return Tibble with attached biomart variables
attach_biomart <- function(
dat,
join_key_dat = "EnsemblGeneID",
join_key_bm = "ensembl_gene_id",
bmart_dataset = "mmusculus_gene_ensembl",
extra_attributes = c()
) {
dat <- getBM(
filters = join_key_bm,
attributes = c(join_key_bm, "external_gene_name", extra_attributes),
values = dat[join_key_dat][[1]],
mart = useMart("ensembl", dataset=bmart_dataset),
uniqueRows = TRUE,
verbose = FALSE
) %>%
inner_join(dat, ., by = setNames(join_key_bm, join_key_dat)) %>%
rename("GeneName" = "external_gene_name")
}
#' Add boolean 'Significant' variable to deseq dataset
#'
#' @param deseq_dat Tibble of DESeq data
#' @param padj_min Double of minimum adjusted pvalue
#' @export
#' @import dplyr
#' @return Tibble of entries satisfying significance filter
set_significant <- function(deseq_dat, padj_min = 0.05) {
deseq_dat %>%
mutate(Significant = padj <= padj_min) %>%
return()
}
#' Export deseq folder to excel with default read_deseq
#'
#' @param path Path to deseq files
#' @param outputpath Path to output Excel files
#' @param file_pattern Pattern to identify deseq files.
#' @export
#' @import writexl purrr stringr
deseq_to_excel <- function(path, outputpath, file_pattern = ".tsv$") {
# Remove ending forward slah if it's there
path <- path %>% str_replace(pattern = "/$", replacement = "")
outputpath <- outputpath %>% str_replace(pattern = "/$", replacement = "")
# Read in all files from the path
dir(path, pattern = file_pattern) %>%
# Walk over all path names
iwalk(
# Combine path with filename
~str_glue("{path}/{.x}") %>%
read_deseq() %>%
# Write excel file (replace the file ending)
write_xlsx(path = paste0(outputpath, "/", str_replace(string = .x, pattern = "\\.[^.]*$", replacement = ".xlsx")))
)
}
#' Create a volcano plot from DESeq data set
#'
#' @param deseq_dat DESeq dataset to plot
#' @param label_var Name of variable containing the names for plotting
#' @param label_top_n Annotate the top n genes of the data set
#' @export
#' @import ggplot2 ggrepel magrittr
#' @return ggplot2 object of volcano plot
volcano_plot <- function(deseq_dat, label_var = "GeneName", label_top_n = 20) {
if (!(label_var %in% names(deseq_dat))) {
stop(paste0(label_var, "not in data frame. Please attach it with attach_biomart"))
}
if (!("Significant") %in% names(deseq_dat)) {
warning("Variable 'Significant' not found, added it using 'set_significant' with default values (padj <= 0.05)")
deseq_dat <- deseq_dat %>% set_significant()
}
plot <- deseq_dat %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color = Significant), alpha = 0.3, shape = 1) +
scale_color_manual(values = c("grey", "red")) +
geom_text_repel(
data = deseq_dat %>% filter(Significant == TRUE) %>% arrange(padj) %>% head(label_top_n),
aes_string(label = label_var)
)
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.