inst/doc/diffenrich_vignette.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval.after = 'fig.cap'
)
options(knitr.table.format = "html") 

## ---- echo=FALSE, out.height='600px', out.width='600px', dpi=300, fig.cap=cap----
suppressMessages(library(diagram))

## open plot 
par(mar = c(0.5, 0.5, 0.5, 0.5))
openplotmat()
## define number of boxes at each level
elpos <- coordinates (c(3, 1, 1, 2, 2, 1, 1, 1))
## generate matrix of from-to arrow coordinates
fromto <- matrix(ncol = 2, byrow = TRUE,
                 data = c(1, 6,
                          2, 4,
                          3, 7,
                          4, 5,
                          5, 6,
                          5, 7,
                          6, 8,
                          7, 9,
                          8, 10, 
                          9, 10,
                          10, 11,
                          11, 12))
nr <- nrow(fromto)
arrpos <- matrix(ncol = 2, nrow = nr)

## Build flow chart
for (i in 1:nr){
  arrpos[i, ] <- straightarrow(to = elpos[fromto[i, 2], ],
                                from = elpos[fromto[i, 1], ],
                                lwd = 1, arr.pos = 0.6, arr.length = 0.2)
  textellipse(elpos[1,], 0.10, 0.04, lab = "geneList1", box.col = "purple",
              shadow.col = "purple4", shadow.size = 0.005, cex = 0.6)
  textrect(elpos[2,], 0.10, 0.03, lab = "get_kegg()", box.col = "steelblue1",
              shadow.col = "darkblue", shadow.size = 0.005, cex = 0.6)
  textellipse(elpos[3,], 0.10, 0.04, lab = "geneList2", box.col = "purple",
              shadow.col = "purple4", shadow.size = 0.005, cex = 0.6)
  textround(elpos[4,], 0.13, 0.03,lab = "KEGG REST API", box.col = "orange1",
            shadow.col = "orange4", shadow.size = 0.005, cex = 0.5)
  textellipse(elpos[5,], 0.18, 0.06, lab = c("cleaned gene-to-pathway", "map"), box.col = "firebrick1",
              shadow.col = "firebrick4", shadow.size = 0.005, cex = 0.5)
  textrect(elpos[6,], 0.14, 0.03,lab = "pathEnrich(List 1)", box.col = "steelblue1",
            shadow.col = "darkblue", shadow.size = 0.005, cex = 0.5)
  textrect(elpos[7,], 0.14, 0.03, lab = "pathEnrich(List 2)", box.col = "steelblue1",
           shadow.col = "darkblue", shadow.size = 0.005, cex = 0.5)
  textellipse(elpos[8,], 0.12, 0.04, lab = c("pathEnrich","result/object"), box.col = "firebrick1",
              shadow.col = "firebrick4", shadow.size = 0.005, cex = 0.5)
  textellipse(elpos[9,], 0.12, 0.04, lab = c("pathEnrich","result/object"),box.col = "firebrick1",
              shadow.col = "firebrick4", shadow.size = 0.005, cex = 0.5)
  textrect(elpos[10,], 0.10, 0.03, lab = "diffEnrich()", box.col = "steelblue1",
           shadow.col = "darkblue", shadow.size = 0.005, cex = 0.5)
  textellipse(elpos[11,], 0.12, 0.06, lab = c("diffEnrich","result/object"), box.col = "firebrick1",
              shadow.col = "firebrick4", shadow.size = 0.005, cex = 0.5)
  textrect(elpos[12,], 0.17, 0.03, lab = "plotFoldEnrichment()", box.col = "steelblue1",
           shadow.col = "darkblue", shadow.size = 0.005, cex = 0.5)
  #text(0.15,0.05, "Figure 1. diffEnrich Analysis pipeline", cex = 0.15)
  text(0.05,0.87, "Step 1", cex = 0.45, font = 2)
  text(0.05,0.57, "Step 2", cex = 0.45, font = 2)
  text(0.05,0.35, "Step 3", cex = 0.45, font = 2)
  text(0.05,0.10, "Step 4", cex = 0.45, font = 2)
}
cap <- c("Figure 1. diffEnrich Analysis pipeline. Functions within the diffEnrich package are represented by blue rectangles. The data that must be provided by the user is represented by the purple ovals. Data objects generated by a function in diffEnrich are represented by red ovals. The external call of the get_kegg function to the KEGG REST API is represented in yellow.")

## ---- results='hide', echo=FALSE----------------------------------------------
suppressMessages(library(diffEnrich))

## ----get_kegg_exp1------------------------------------------------------------
## run get_kegg() using rat
kegg_rno <- get_kegg('rno')

## ----get_kegg_exp2------------------------------------------------------------
## run get_kegg() using rat
kegg_rno <- get_kegg('rno')

## ----get_kegg_read, eval=FALSE------------------------------------------------
#  ## run get_kegg() using rat
#  kegg_rno <- get_kegg(read = TRUE, path = "/path/to/files", date = "2019-10-17", release = "92")

## ----get_kegg_descriptions, echo=FALSE----------------------------------------
# Define column names
kegg_df_dat <- c("ncbi_to_kegg",
                 "kegg_to_pathway",
                 "pathway_to_species")

kegg_df_dscr <- c(paste0("ncbi gene ID",
                 " <-- mapped to --> ",
                 "KEGG gene ID"),
                 paste0("KEGG gene ID",
                 " <-- mapped to --> ",
                 "KEGG pathway ID"),
                 paste0("KEGG pathway ID",
                 " <-- mapped to --> ",
                 "KEGG pathway species description"))
# Make dataframe
kegg_df <- data.frame("get_kegg list object" = kegg_df_dat, 
                      "Object description" = kegg_df_dscr)

## ----get_kegg_kable, warning=FALSE, echo=FALSE, message=FALSE-----------------
if (require(kableExtra)){
knitr::kable(kegg_df,
             caption = "Table 1. Description of data sets generated from accessing KEGG REST API", row.names = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = T)
}

## ----view_genelists-----------------------------------------------------------
## View sample gene lists from package data
head(geneLists$list1)
head(geneLists$list2)

## ----pathEnrich, warning=FALSE------------------------------------------------
# run pathEnrich using kegg_rno
## List 1
list1_pe <- pathEnrich(gk_obj = kegg, gene_list = geneLists$list1, cutoff = 0.05, N = 2)
## list2
list2_pe <- pathEnrich(gk_obj = kegg, gene_list = geneLists$list2, cutoff = 0.05, N = 2) 

## ----pathEnrich_kable, warning=FALSE, echo=FALSE------------------------------
if (require(kableExtra)){
knitr::kable(head(list1_pe$enrich_table),
             caption = "Table 2. First 6 rows of list1_pe data frame generated using pathEnrich", row.names = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = T)
}

## -----------------------------------------------------------------------------
summary(list1_pe)

## ----pathEnrich_description, echo=FALSE---------------------------------------
# get column names
names <- colnames(list1_pe$enrich_table)
# Define column names
description <- c("KEGG Pathway Identifier",
                 "Description of KEGG Pathway (provided by KEGG)",
                 "Number of Genes in KEGG Pathway",
                 "Number of Genes from gene list in KEGG Pathway",
                 "Number of Genes in KEGG Database",
                 "Number of Genes from gene list in KEGG Database",
                 "Expected number of genes from list to be in KEGG pathway by chance",
                 "P-value for enrichment within the KEGG pathway for list genes",
                 "Multiple testing adjusted enrichment p-values (default = False Discovery Rate (Benjamini and Hochberg, 1995))",
                 "Ratio of number of genes observed from the gene list annotated to the KEGG pathway to the number of genes expected from the gene list to be annotated to the KEGG pathway if there was no enrichment (i.e. KEGG_PATHWAY_in_list/expected)")
# Make dataframe
df1 <- data.frame(names, description)
colnames(df1) <- c("Column Names", "Column Description") 

## ----pathEnrich_vars_kable, warning=FALSE, echo=FALSE-------------------------
if (require(kableExtra)){
knitr::kable(df1,
             caption = "Table 3. Description of columns is dataframe generated by pathEnrich", row.names = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
}

## ----diffEnrich---------------------------------------------------------------
## Perform differential enrichment
diff_enrich <- diffEnrich(list1_pe = list1_pe, list2_pe = list2_pe, method = 'none', cutoff = 0.05)

## ----diffEnrich_kable, warning=FALSE, echo=FALSE------------------------------
if (require(kableExtra)){
knitr::kable(head(diff_enrich$de_table),
             caption = "Table 4. First 6 rows from data frame generated by diffEnrich", row.names = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
}

## ----diffEnrich_descriptions, echo=FALSE--------------------------------------
## get vector of column names
de_col_names <- names(diff_enrich$de_table)
de_col_descr <- c("KEGG Pathway Identifier",
                  "Description of KEGG Pathway (provided by KEGG)",
                  "Number of Genes in KEGG Pathway",
                  "Number of Genes in KEGG Database",
                  "Number of Genes from gene list 1 in KEGG Pathway",
                  "Number of Genes from gene list 1 in KEGG Database",
                  "Expected number of genes from list 1 to be in KEGG pathway by chance",
                  "P-value for enrichment of list 1 genes related to KEGG pathway",
                  "Multiple testing adjusted enrichment p-values from gene list 1 (default = False Discovery Rate (Benjamini and Hochberg, 1995))",
                  "Ratio of number of genes observed from gene list 1 annotated to the KEGG pathway to the number of genes expected from gene list 1 annotated to the KEGG pathway if there was no enrichment (i.e. KEGG_PATHWAY_in_list1/expected_list1)",
                  "Number of Genes from gene list 2 in KEGG Pathway",
                  "Number of Genes from gene list 2 in KEGG Database",
                  "Expected number of genes from list 2 to be in KEGG pathway by chance",
                  "P-value for enrichment of list 2 genes related to KEGG pathway",
                  "Multiple testing adjusted enrichment p-values from gene list 2 (default = False Discovery Rate (Benjamini and Hochberg, 1995))",
                  "Ratio of number of genes observed from gene list 2 annotated to the KEGG pathway to the number of genes expected from gene list 2 annotated to the KEGG pathway if there was no enrichment (i.e. KEGG_PATHWAY_in_list2/expected_list2)",
                  "Odds of a gene from list 2 being from this KEGG pathway / Odds of a gene from list 1 being from this KEGG pathway",
                  "P-value for differential enrichment of this KEGG pathway between list 1 and list 2",
                  "Multiple testing adjusted differential enrichment p-values (default = False Discovery Rate (Benjamini and Hochberg, 1995))")
# Make dataframe
de_descr_df <- data.frame(de_col_names, de_col_descr)
colnames(de_descr_df) <- c("Column Names", "Column Description") 

## ----diffEnrich_vars_kable, warning=FALSE, echo=FALSE-------------------------
knitr::kable(de_descr_df,
             caption = "Table 5. Description of columns in dataframe generated by diffEnrich", row.names = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)

## -----------------------------------------------------------------------------
summary(diff_enrich)

## ----echo=FALSE---------------------------------------------------------------
cap_plot <- c("Figure 2. Example of a differential enrichment graphic. KEGG pathways are plotted on the y-axis and fold
enrichment is plotted on the x-axis. Each KEGG pathway has a bar depicting
its fold enrichment in list 1 (red) and its fold enrichment in list 2 (blue).
The transparency of the bars correspond to the unadjusted p-value for the
pathway's enrichment in the given list. The p-value presented as text to the
right of each pair of bars is the adjusted p-value (user defined: default is FDR) associated with the
differential enrichment of the pathway between the two lists, and the pathways
are ordered from top to bottom by this p-value (i.e. smallest p-value on top
of plot, and largest p-value on bottom of plot). The dotted line represents a fold enrichment of 1. Finally, the number of genes used
for analysis from each gene list (recall that this number may not be the same as the number of
genes in the user's original list) are reported below their respective p-values
in the legend.")

## ----plotFoldEnrichment, message=FALSE, fig.height=6, fig.width=8, fig.cap=cap_plot----
## Plot fold enrichment
plotFoldEnrichment(de_res = diff_enrich, pval = 0.05, N = 5)

Try the diffEnrich package in your browser

Any scripts or data that you put into this service are public.

diffEnrich documentation built on June 28, 2022, 1:08 a.m.