knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  warning = FALSE,
  message = FALSE
)
library(magrittr)
library(ggplot2)

parcutils

cat(
badger::badge_devel("cparsania/parcutils" , color = "blue"),
badger::badge_lifecycle()
)

The goal of parcutils is to provide day to day bioinformatics utility functions. Most of the functions in the package are useful for analyzing and visualizing complex RNA-seq studies.

Installation

if(require("devtools") && require("BiocManager")){
  options(repos = BiocManager::repositories() )
  devtools::install_github("cparsania/parcutils")
} else{
  install.packages(c("devtools","BiocManager"))
  options(repos = BiocManager::repositories() )
  devtools::install_github("cparsania/parcutils")
}

RNA-seq analysis

Differential expression analysis

Prepare a count table

count_file <- system.file("extdata","toy_counts.txt" , package = "parcutils")

count_data <- readr::read_delim(count_file, delim = "\t")

count_data 

Group replicates by samples

To run DESeq2, replicates for each sample needs to be grouped.

sample_info <- count_data %>% colnames() %>% .[-1]  %>%
 tibble::tibble(samples = . , groups = rep(c("control" ,"treatment1" , "treatment2") , 
                                           each = 3))
sample_info

NOTE: Samples which are present in the object 'sample_info' will be considered for differential expressed analysis.

Run DESeq2 for multiple differential gene comparison.

res <- parcutils::run_deseq_analysis(counts = count_data ,
                         sample_info = sample_info,
                         column_geneid = "gene_id" ,
                         cutoff_lfc = 1,
                         cutoff_pval = 0.05,
                         group_numerator = c("treatment1", "treatment2") ,
                         group_denominator = c("control"))

Let's have a look in to res

res

res is an object of improved dataframe - tibble. Each row in the res is a differential comparison which can be identified by the value from the column comp.

res$de_comparisons

Data related to each differential comparison can be found from other columns of res.

For example, summary of differently expressed genes can be found from the column deg_summmary

res$deg_summmary

As described below there are several helper functions to get data from the res .

Get data from res using helper functions

# get normalised gene expression value for all genes across all samples. 
parcutils::get_normalised_expression_matrix(x = res, 
                                            samples = NULL,
                                            genes = NULL,
                                            summarise_replicates = FALSE)

# average gene expression values across relicates  
parcutils::get_normalised_expression_matrix(x = res, 
                                            samples = NULL,
                                            genes = NULL,
                                            summarise_replicates = T, 
                                            summarise_method = "median")




# get fold change values for all genes and all comparisons.

q_genes = c("ENSG00000196415:PRTN3", "ENSG00000221988:PPT2", "ENSG00000163138:PACRGL", "ENSG00000183840:GPR39", "ENSG00000146700:SSC4D", "ENSG00000163746:PLSCR2", "ENSG00000155918:RAET1L", "ENSG00000151458:ANKRD50", "ENSG00000167074:TEF", "ENSG00000130159:ECSIT")

parcutils::get_fold_change_matrix(x = res, 
                                  sample_comparisons = res$de_comparisons, 
                                  genes = q_genes)



# get differentially expressed genes for given comparison 

parcutils::get_genes_by_regulation(x = res, 
                                  sample_comparison = "treatment1_VS_control", 
                                  regulation = "both" # can be one of the "up" , "down" , "both", "other", "all"
                                  )

# get replicates group data 

parcutils::.group_replicates_by_sample(res)

Generate several visualizations from res

Visualize pairwise correlation between replicates

parcutils::get_pairwise_corr_plot(res, samples =c("control" ,"treatment1"))

Visualize all sample correlation by heat box

parcutils::get_corr_heatbox(x = res, show_corr_values = T, cluster_samples = F)

Visualize samples by Principle Component Analysis (PCA)

parcutils::get_pca_plot(x = res, 
                        samples  =c("control" ,"treatment1" ,"treatment2"))

Counts of diff expressed genes

parcutils::get_diff_gene_count_barplot(x = res)

change color of the bars

parcutils::get_diff_gene_count_barplot(x = res, col_down = "green4")

Visualize differential expressed genes by volcano plot

parcutils::get_volcano_plot(x = res, sample_comparison = "treatment2_VS_control",
                            col_up = "#a40000",
                            col_down = "#16317d", 
                            repair_genes = T,
                            col_other = "grey")

# change cutoffs 

parcutils::get_volcano_plot(x = res, repair_genes = T,
                            sample_comparison = "treatment2_VS_control",
                            pval_cutoff = 0.01,
                            log2fc_cutoff = 0.6, 
                            col_up = "#a40000",
                            col_down = "#16317d",
                            col_other = "grey")

Visualize gene expression distribution using box plot

# all replicates 
parcutils::get_gene_expression_box_plot(x = res, 
                                        samples =c("control" ,"treatment1"), 
                                        group_replicates = FALSE,
                                        convert_log2 = T)
# summarise  replicates 
parcutils::get_gene_expression_box_plot(x = res, 
                                        samples =c("control" ,"treatment1"), 
                                        group_replicates = T,
                                        convert_log2 = T)

Visualize genes by heatmaps

genes_for_hm = parcutils::get_genes_by_regulation(x = res,
                                                  sample_comparison = res$de_comparisons[[2]], 
                                                  regulation = "both")

# heatmap of normalised gene expression values across samples 

hm1 <- parcutils::get_gene_expression_heatmap(x = res, 
                                       samples = c("control","treatment1" , "treatment2") , 
                                       genes = genes_for_hm %>% names() , 
                                       convert_zscore = FALSE, 
                                       convert_log2 = T, 
                                       summarise_replicates = T,
                                       name = "log2(value)" , color_default = F, 
                                       col = 
                                         circlize::colorRamp2(breaks = c(-5,0,15), colors = c("#16317d","white","#a40000")),
                                       cluster_columns = FALSE)

ComplexHeatmap::draw(hm1)

# Visualise  z-score and show all replicates.

hm2 <- parcutils::get_gene_expression_heatmap(x = res, 
                                       samples = c("control","treatment1") , 
                                       name = "Z-score",
                                       summarise_replicates = F,
                                        col = 
                                         circlize::colorRamp2(breaks = c(-2,0,2), colors = c("#16317d","white","#a40000")),color_default = F,
                                       genes = genes_for_hm %>% names() , 
                                       convert_zscore = TRUE, 
                                       cluster_columns = FALSE)


ComplexHeatmap::draw(hm2)

# log2 FC heatamap
hm3 <- parcutils::get_fold_change_heatmap(x = res, 
                                   sample_comparisons = res$de_comparisons, 
                                   genes = genes_for_hm %>% names() , 
                                   color_default = F, 
                                     col = 
                                         circlize::colorRamp2(breaks = c(-5,0,5), colors = c("#16317d","white","#a40000")),
                                   name= "Log2FC")

ComplexHeatmap::draw(hm3)

Visualize differential genes overlap between comparison

us_plot <- parcutils::plot_deg_upsets(x = res, 
                                      sample_comparisons = res$de_comparisons)

us_plot$treatment1_VS_control_AND_treatment2_VS_control$upset_plot %>% print()

# get list of intersecting genes. 

us_plot$treatment1_VS_control_AND_treatment2_VS_control$upset_intersects %>% print()

Visualize common DE genes between comparison by scatter plot

# show common up and down genes 
parcutils::get_fold_change_scatter_plot(x = res, 
                                                   sample_comparisons = res$de_comparisons, point_size = 3,label_size = 3,repair_genes = T)

# show common up and down genes 
parcutils::get_fold_change_scatter_plot(x = res, 
                                        sample_comparisons = res$de_comparisons, 
                                        point_size = 3,
                                        label_size = 3,
                                        repair_genes = T)

# show common up genes
parcutils::get_fold_change_scatter_plot(x = res, 
                                        sample_comparisons = res$de_comparisons, 
                                        point_size = 5,
                                        label_size = 4,
                                        color_label = "both_up",
                                        col_up = "red",
                                        repair_genes = T)


# show common down genes
parcutils::get_fold_change_scatter_plot(x = res, 
                                        sample_comparisons = res$de_comparisons, 
                                        point_size = 5,
                                        label_size = 4,
                                        color_label = "both_down",
                                        col_down = "green4",
                                        repair_genes = T)

Visualize genes by line plot

genes_for_lineplot = parcutils::get_genes_by_regulation(x = res,
                                                  sample_comparison = res$de_comparisons[[2]], 
                                                  regulation = "both") %>% names()

# line plot of gene expression values
parcutils::get_gene_expression_line_plot(x = res, 
                                   genes = genes_for_lineplot , 
                                   samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + 
  ggplot2::theme(text = ggplot2::element_text(size = 15))


# line plot of gene expression values with k-means clustering  

parcutils::get_gene_expression_line_plot(x = res, 
                                         km = 4,
                                   genes = genes_for_lineplot , 
                                   samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + 
  ggplot2::theme(text = ggplot2::element_text(size = 15))

# line plot of gene expression values with k-means clustering  

parcutils::get_gene_expression_line_plot(x = res, 
                                         km = 4, 
                                         facet_clusters = T,
                                   genes = genes_for_lineplot , 
                                   samples = c("control","treatment1","treatment2"),summarise_replicates = T, show_average_line = T) + 
  ggplot2::theme(text = ggplot2::element_text(size = 15), 
                 axis.text.x = ggplot2::element_text(angle = 40,hjust = 0.8))

# Fold change values 

parcutils::get_fold_change_line_plot(x = res, 
                                   genes = genes_for_lineplot , 
                                   line_transparency = 0.5, 
                                   km = 2,facet_clusters = T,
                                     sample_comparisons = c("treatment1_VS_control", "treatment2_VS_control"), 
                                   average_line_summary_method =  "mean",
                                   show_average_line = T) + 
  ggplot2::theme(text = ggplot2::element_text(size = 15),
                 axis.text.x = ggplot2::element_text(angle = 40,hjust = 0.8))

Perform gene ontology analysis and visualization of all UP/DOWN genes from all comparisons in one go.

go_results <- parcutils::get_go_emap_plot(x = res)

# GO results as a table 

go_results$go_enrichment_output

# GO results as an emap plot 

go_results$go_emap_plots

Show overlapping genes through VENN diagram

parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "up")

parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "down")

parcutils::plot_deg_venn(res, sample_comparisons = res$de_comparisons,regulation = "both")

Other functions

Alignment summary

star_align_log_file <- system.file("extdata" , "a_Log.final.out" , package = "parcutils")

x =  parcutils::get_star_align_log_summary(log_file = star_align_log_file)

print(x)


# plot alignment summary


star_align_log_file_dir <- system.file("extdata" , package = "parcutils")

star_align_log_files <- fs::dir_ls(star_align_log_file_dir, 
                                   glob = "*Log.final.out" ,
                                   recurse = T,type = "file")
names(star_align_log_files) <- NULL
parcutils::get_star_align_log_summary_plot(x = star_align_log_files,
                                col_total_reads = "red", 
                                col_mapped_reads  = "blue") 



cparsania/parcutils documentation built on Oct. 27, 2024, 4:55 a.m.