knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", warning = FALSE, message = FALSE )
library(magrittr) library(ggplot2)
cat( badger::badge_devel("cparsania/parcutils" , color = "blue"), badger::badge_lifecycle() )
The goal of parcutils
is to provide day to day bioinformatics utility functions. Most of the functions in the package are useful for analyzing and visualizing complex RNA-seq studies.
if(require("devtools") && require("BiocManager")){ options(repos = BiocManager::repositories() ) devtools::install_github("cparsania/parcutils") } else{ install.packages(c("devtools","BiocManager")) options(repos = BiocManager::repositories() ) devtools::install_github("cparsania/parcutils") }
count_file <- system.file("extdata","toy_counts.txt" , package = "parcutils") count_data <- readr::read_delim(count_file, delim = "\t") count_data
To run DESeq2, replicates for each sample needs to be grouped.
sample_info <- count_data %>% colnames() %>% .[-1] %>% tibble::tibble(samples = . , groups = rep(c("control" ,"treatment1" , "treatment2") , each = 3)) sample_info
NOTE: Samples which are present in the object 'sample_info' will be considered for differential expressed analysis.
DESeq2
for multiple differential gene comparison.res <- parcutils::run_deseq_analysis(counts = count_data , sample_info = sample_info, column_geneid = "gene_id" , cutoff_lfc = 1, cutoff_pval = 0.05, group_numerator = c("treatment1", "treatment2") , group_denominator = c("control"))
res
res
res
is an object of improved dataframe
- tibble
. Each row in the res
is a differential comparison which can be identified by the value from the column comp
.
res$de_comparisons
Data related to each differential comparison can be found from other columns of res
.
For example, summary of differently expressed genes can be found from the column deg_summmary
res$deg_summmary
As described below there are several helper functions to get data from the res
.
res
using helper functions# get normalised gene expression value for all genes across all samples. parcutils::get_normalised_expression_matrix(x = res, samples = NULL, genes = NULL, summarise_replicates = FALSE) # average gene expression values across relicates parcutils::get_normalised_expression_matrix(x = res, samples = NULL, genes = NULL, summarise_replicates = T, summarise_method = "median") # get fold change values for all genes and all comparisons. q_genes = c("ENSG00000196415:PRTN3", "ENSG00000221988:PPT2", "ENSG00000163138:PACRGL", "ENSG00000183840:GPR39", "ENSG00000146700:SSC4D", "ENSG00000163746:PLSCR2", "ENSG00000155918:RAET1L", "ENSG00000151458:ANKRD50", "ENSG00000167074:TEF", "ENSG00000130159:ECSIT") parcutils::get_fold_change_matrix(x = res, sample_comparisons = res$de_comparisons, genes = q_genes) # get differentially expressed genes for given comparison parcutils::get_genes_by_regulation(x = res, sample_comparison = "treatment1_VS_control", regulation = "both" # can be one of the "up" , "down" , "both", "other", "all" ) # get replicates group data parcutils::.group_replicates_by_sample(res)
res
parcutils::get_pairwise_corr_plot(res, samples =c("control" ,"treatment1"))
parcutils::get_corr_heatbox(x = res, show_corr_values = T, cluster_samples = F)
parcutils::get_pca_plot(x = res, samples =c("control" ,"treatment1" ,"treatment2"))
parcutils::get_diff_gene_count_barplot(x = res)
change color of the bars
parcutils::get_diff_gene_count_barplot(x = res, col_down = "green4")
parcutils::get_volcano_plot(x = res, sample_comparison = "treatment2_VS_control", col_up = "#a40000", col_down = "#16317d", repair_genes = T, col_other = "grey") # change cutoffs parcutils::get_volcano_plot(x = res, repair_genes = T, sample_comparison = "treatment2_VS_control", pval_cutoff = 0.01, log2fc_cutoff = 0.6, col_up = "#a40000", col_down = "#16317d", col_other = "grey")
# all replicates parcutils::get_gene_expression_box_plot(x = res, samples =c("control" ,"treatment1"), group_replicates = FALSE, convert_log2 = T) # summarise replicates parcutils::get_gene_expression_box_plot(x = res, samples =c("control" ,"treatment1"), group_replicates = T, convert_log2 = T)
genes_for_hm = parcutils::get_genes_by_regulation(x = res, sample_comparison = res$de_comparisons[[2]], regulation = "both") # heatmap of normalised gene expression values across samples hm1 <- parcutils::get_gene_expression_heatmap(x = res, samples = c("control","treatment1" , "treatment2") , genes = genes_for_hm %>% names() , convert_zscore = FALSE, convert_log2 = T, summarise_replicates = T, name = "log2(value)" , color_default = F, col = circlize::colorRamp2(breaks = c(-5,0,15), colors = c("#16317d","white","#a40000")), cluster_columns = FALSE) ComplexHeatmap::draw(hm1) # Visualise z-score and show all replicates. hm2 <- parcutils::get_gene_expression_heatmap(x = res, samples = c("control","treatment1") , name = "Z-score", summarise_replicates = F, col = circlize::colorRamp2(breaks = c(-2,0,2), colors = c("#16317d","white","#a40000")),color_default = F, genes = genes_for_hm %>% names() , convert_zscore = TRUE, cluster_columns = FALSE) ComplexHeatmap::draw(hm2) # log2 FC heatamap hm3 <- parcutils::get_fold_change_heatmap(x = res, sample_comparisons = res$de_comparisons, genes = genes_for_hm %>% names() , color_default = F, col = circlize::colorRamp2(breaks = c(-5,0,5), colors = c("#16317d","white","#a40000")), name= "Log2FC") ComplexHeatmap::draw(hm3)
us_plot <- parcutils::plot_deg_upsets(x = res, sample_comparisons = res$de_comparisons) us_plot$treatment1_VS_control_AND_treatment2_VS_control$upset_plot %>% print() # get list of intersecting genes. us_plot$treatment1_VS_control_AND_treatment2_VS_control$upset_intersects %>% print()
# show common up and down genes parcutils::get_fold_change_scatter_plot(x = res, sample_comparisons = res$de_comparisons, point_size = 3,label_size = 3,repair_genes = T) # show common up and down genes parcutils::get_fold_change_scatter_plot(x = res, sample_comparisons = res$de_comparisons, point_size = 3, label_size = 3, repair_genes = T) # show common up genes parcutils::get_fold_change_scatter_plot(x = res, sample_comparisons = res$de_comparisons, point_size = 5, label_size = 4, color_label = "both_up", col_up = "red", repair_genes = T) # show common down genes parcutils::get_fold_change_scatter_plot(x = res, sample_comparisons = res$de_comparisons, point_size = 5, label_size = 4, color_label = "both_down", col_down = "green4", repair_genes = T)
genes_for_lineplot = parcutils::get_genes_by_regulation(x = res, sample_comparison = res$de_comparisons[[2]], regulation = "both") %>% names() # line plot of gene expression values parcutils::get_gene_expression_line_plot(x = res, genes = genes_for_lineplot , samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + ggplot2::theme(text = ggplot2::element_text(size = 15)) # line plot of gene expression values with k-means clustering parcutils::get_gene_expression_line_plot(x = res, km = 4, genes = genes_for_lineplot , samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + ggplot2::theme(text = ggplot2::element_text(size = 15)) # line plot of gene expression values with k-means clustering parcutils::get_gene_expression_line_plot(x = res, km = 4, facet_clusters = T, genes = genes_for_lineplot , samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + ggplot2::theme(text = ggplot2::element_text(size = 15), axis.text.x = ggplot2::element_text(angle = 40,hjust = 0.8)) # Fold change values parcutils::get_fold_change_line_plot(x = res, genes = genes_for_lineplot , line_transparency = 0.5, km = 2,facet_clusters = T, sample_comparisons = c("treatment1_VS_control", "treatment2_VS_control"), average_line_summary_method = "mean", show_average_line = T) + ggplot2::theme(text = ggplot2::element_text(size = 15), axis.text.x = ggplot2::element_text(angle = 40,hjust = 0.8))
go_results <- parcutils::get_go_emap_plot(x = res) # GO results as a table go_results$go_enrichment_output # GO results as an emap plot go_results$go_emap_plots
parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "up") parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "down") parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "both")
star_align_log_file <- system.file("extdata" , "a_Log.final.out" , package = "parcutils") x = parcutils::get_star_align_log_summary(log_file = star_align_log_file) print(x) # plot alignment summary star_align_log_file_dir <- system.file("extdata" , package = "parcutils") star_align_log_files <- fs::dir_ls(star_align_log_file_dir, glob = "*Log.final.out" , recurse = T,type = "file") names(star_align_log_files) <- NULL parcutils::get_star_align_log_summary_plot(x = star_align_log_files, col_total_reads = "red", col_mapped_reads = "blue")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.