README.md

mygo

Conduct GO-term analysis using clusterProfiler and print report.

Restrictions & Data Preparations

Currently only mus musculus datasets are supported.

The data frame input needs to have the following dimensions:

Here is an example tibble.

# A tibble: 18,777 x 3
   ensembl_gene_id    q_value       fc
   <chr>                <dbl>    <dbl>
 1 ENSMUSG00000103922  0.998   0.00361
 2 ENSMUSG00000025903  0.443   0.141  
 3 ENSMUSG00000104217  0.443   0.141  
 4 ENSMUSG00000033813  0.443   0.141  
 5 ENSMUSG00000033793  0.718   0.0864 
 6 ENSMUSG00000025905  0.0553  0.334  
 7 ENSMUSG00000025907  0.747   0.0919 
 8 ENSMUSG00000087247  0.282  -3.24   
 9 ENSMUSG00000033740  0.431  -0.221  
10 ENSMUSG00000102135  0.672  -0.547  
# … with 18,767 more rows

Examples

Whole dataset


library(magrittr)
# Create data frame that fits the need of the analysis
dat <- readr::read_tsv('test/geneexp_F_CPu.tsv') %>%
  dplyr::rename(fc = `log2(fold_change)`) %>%
  dplyr::mutate(Symbol = ensembl_gene_id) %>%
  dplyr::filter(status == "OK") %>%
  dplyr::select(ensembl_gene_id, q_value, fc)

# Here we do not pass a Symbol column. This won't result in an error.
dat %>% mygo::createHTMLReport(
  output_path = file.path(getwd(), 'result')
)

Selected genes

Sometimes you want to do a GO term analysis only for a small number of genes. In this case, we need to make sure to get the proper gene names and deactivate the GSEA analysis.

Here is an example call.


my_genes <- readxl::read_xlsx("data/dat.xlsx") %>%
  dplyr::mutate(
    gene_name = `Gene names`,
    fc = `-Log t-test p value`) %>%
  dplyr::select(
    gene_name,
    fc
  ) %>%
  tidyr::separate_rows(gene_name, sep = ";") %>%
  dplyr::distinct(gene_name, .keep_all = TRUE) %>%
  dplyr::mutate(
    gene_name_fixed = rmyknife::get_gene_name_from_synonym(gene_name)
  ) %>%
  rmyknife::attach_ensembl_gene_id_from_name(
    gene_name_var = "gene_name_fixed",
    ensembl_version = 96
  ) %>%
  # We do not need the q-values, so set them to 0
  dplyr::mutate(q_value = 0)

my_genes %>%
  mygo::createHTMLReport(
    output_path = file.path(getwd(), 'result', 'c5'),
    save_excel = TRUE,
    do_gse = FALSE,
    use_background = TRUE
)

Options for Rendering

Debug Run

Internal use only.


# Local test run
my_dat <- readr::read_tsv('test/geneexp_F_CPu.tsv') %>%
  dplyr::rename(fc = `log2(fold_change)`) %>%
  dplyr::mutate(Symbol = ensembl_gene_id) %>%
  dplyr::filter(status == "OK") %>%
  dplyr::select(ensembl_gene_id, q_value, fc)

xaringan::infinite_moon_reader(
  moon = "inst/rmd/goterm_report.Rmd",
  cast_from=file.path(getwd(), "inst", "rmd"),
  params = list(
    dat = my_dat,
    output_path = ".",
    save_excel = FALSE,
    significance_cutoff = 0.05,
    simplify_ontologies = FALSE,
    do_gse = FALSE,
    use_background = TRUE,
    store_r_objects = FALSE,
    save_plots_as_pdf = FALSE
  )
)

Debug nfcore-rnaseq-pipeline run


# Test for nfcore-rnaseq-pipeline output
nfcore_pipeline_path <- "/beegfs/scratch/bruening_scratch/pklemm/2020-11-anna-rnaseq/nfcore-rnaseq-pipeline"
dat <-
  glue::glue("{nfcore_pipeline_path}/results/DESeq2/npc/deseq_diff/deseq2_diff.csv") %>%
  readr::read_csv() %>%
  # Create data frame compatible with mygo
  dplyr::rename(
    ensembl_gene_id = row,
    q_value = padj,
    fc = log2FoldChange,
    Symbol = external_gene_name
  ) %>%
  dplyr::select(ensembl_gene_id, q_value, fc, Symbol)
xaringan::infinite_moon_reader(
  moon = "inst/rmd/goterm_report.Rmd",
  cast_from=file.path(getwd(), "inst", "rmd"),
  params = list(
    dat = dat,
    output_path = glue::glue("{nfcore_pipeline_path}/results/goterm-analyses/npc"),
    significance_cutoff = 0.1
  )
)

Installation

# install.packages('devtools')
devtools::install_github("paulklemm/mygo")

Credits

History



paulklemm/mygo documentation built on July 27, 2023, 11:36 a.m.