.libPaths("D:/R-3.5.1/library")
#' @import pheatmap gplots tibble tidyr readr dplyr viridis
# options
options(tibble.print_max = 10, tibble.width = Inf)
#' plot heatmap by integrate data from DESeq and enrichment analysis
#'
#' more information
#' @param fileName is the post-processed csv file from enrichment analysis
#' @return a list of tibbles of external gene names from different enrichment pathway
#' @export
get_pathway_namelist <- function(fileName){
#read the enrichment dataset as tibble
enrichment_csv <- file(fileName)
string_list <- scan(enrichment_csv, what = '', sep = '\n')
start <- which(startsWith(string_list, 'Gene/Gene Set Overlap'))
enrichment_tibble <- readr::read_csv(file = fileName, skip = start + 1,
col_types = cols(
`Entrez Gene Id` = col_character())
)
close(enrichment_csv)
# the enrichment.table have data structure like this
# Entrez.Gene.Id Gene.Symbol Gene.Description HALLMARK_E2F_TARGETS
# <dbl> <fct> <fct> <fct>
# 1 7153 TOP2A topoisomerase (DNA) II alpha 170kDa HALLMARK_E2F_TARGETS
# 2 991 CDC20 cell division cycle 20 homolog (S. cerevisiae) HALLMARK_E2F_TARGETS
# 3 10733 PLK4 polo-like kinase 4 HALLMARK_E2F_TARGETS
# 4 6790 AURKA aurora kinase A HALLMARK_E2F_TARGETS
# 5 5347 PLK1 polo-like kinase 1 HALLMARK_E2F_TARGETS
# 6 983 CDK1 cyclin-dependent kinase 1 HALLMARK_E2F_TARGETS
# 7 332 BIRC5 baculoviral IAP repeat containing 5 HALLMARK_E2F_TARGETS
# 8 11004 KIF2C kinesin family member 2C HALLMARK_E2F_TARGETS
# 9 9133 CCNB2 cyclin B2 HALLMARK_E2F_TARGETS
# 10 24137 KIF4A kinesin family member 4A HALLMARK_E2F_TARGETS
# create a number of list corresponding to the number pathway in the enrichment analysis
# First, get a vector of those pathway names
pathway_names_vector <- colnames(enrichment_tibble)[-c(1:3)]
# a list of tibble contain two column, column 1 is gene name
#
pathway_genes_list <- list()
for(i in 1:length(pathway_names_vector)){
pathway_genes_list[[i]] <- dplyr::select(enrichment_tibble, `Gene Symbol`,
pathway_names_vector[i]) %>%
dplyr::filter(.[[2]] != 'NA') %>%
dplyr::select(`Gene Symbol`) %>%
dplyr::pull(var = 1)
}
# assign the pathway name to each subset on the list
names(pathway_genes_list) <- pathway_names_vector
# add one additional gene names vector, which are those genes are not in any
#of those pathway
genes_union <- purrr::reduce(pathway_genes_list, union)
gene_left <- filter(enrichment_tibble, !(`Gene Symbol` %in% genes_union)) %>%
select(`Gene Symbol`)
# add it into the list
pathway_genes_list <- c(pathway_genes_list, left = gene_left)
return(pathway_genes_list)
}
#' get tibbles of different subgrouped genes by pathway
#'
#' more information
#' @param count_matrix the RNA-seq expression level count matrix, typically a
#' normalized count matrix is used
#' @param enrichment the modified enrichment export csv file from xlsx file
#' @param fun the fun used to get lists of genes names from enrichment analysis
#' export
#' @return a list of tibbles of count matrix for different pathways
#' @export
#'
#'
subset_count_maxtrix <- function(count_matrix, enrichment){
# convert count matrix as tibble
# careful when you use data.frame rather than as.data.frame, the colnames is different from matrix
count_matrix_tibble <- tibble::as_tibble(data.frame(rownames(count_matrix),
count_matrix))
colnames(count_matrix_tibble) <- c('gene_name', colnames(count_matrix))
pathway_genes_names_list <- get_pathway_namelist(enrichment)
pathway_names <- names(pathway_genes_names_list)
# get a list of lists of gene names only
# use the lists of gene to subset the count matrix
# initialize the subgroup
count_matrix_subgrouped_list <- list()
# get a list of those subgroup genes sets
for(i in 1:length(pathway_genes_names_list)){
count_matrix_subgrouped_list[[i]] <-
filter(count_matrix_tibble, gene_name %in% pathway_genes_names_list[[i]])
}
result_list <- list('subgroups' = count_matrix_subgrouped_list, 'names' = pathway_names)
return(result_list)
}
#' plot heatmap by integrate data from DESeq and enrichment analysis
#'
#' more information
#' @param count_matrix the RNA-seq expression level count matrix or intensity matrix from microarray
#' @param enrichment_xlsx the post-processed Excel file from enrichment analysis
#' @param file the destination and name you want to save your heatmap, by default is the
#' @param ... other parameters used in pheatmap
#' your current work directory, the default name is heatmap.pdf
#' @return output a pdf file of heatmaps of all the pathways in the enrichment_xlsx
#' @export
#'
#'
hmplot <- function(count_matrix,
enrichment,
file = 'heatmap.pdf',
...){
# get the a list of tibbles of subsets of genes
genes_tibbles_list <- subset_count_maxtrix(count_matrix, enrichment)$subgroups
# get the names of objects in list, will be used as title of heatmaps
pathway_names <- subset_count_maxtrix(count_matrix, enrichment)$names
# transform tibbles into count_matrices with first column as rownames
genes_pre_matrices_list <- lapply(genes_tibbles_list, select, -1)
genes_count_matrices_list <- lapply(genes_pre_matrices_list, data.matrix)
for(i in 1:length(genes_tibbles_list)){
rownames(genes_count_matrices_list[[i]]) <- pull(select(genes_tibbles_list[[i]], 1))
}
#####################################################################
pdf(file)
# plot the heatmap for subsets
for(i in 1:(length(genes_tibbles_list))){
#heatmap(genes_count_matrices.list[[i]], main = pathway_names[i], Rowv = F)
#heatmap.2(genes_count_matrices.list[[i]], main = pathway_names[i], Rowv = F)
pheatmap(mat = genes_count_matrices_list[[i]],
main = pathway_names[i],
...
)
}
####################################
# need to debug: some time cannot write pdf file
graphics.off()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.