inst/doc/pcaExplorer.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE, warning=FALSE-----------------------------------------
library(knitr)
set.seed(42)
opts_chunk$set(comment=NA,
               fig.align="center",
               warning=FALSE)
stopifnot(requireNamespace("htmltools"))
htmltools::tagList(rmarkdown::html_dependency_font_awesome())

## ----installation, eval=FALSE-------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("pcaExplorer")

## ----installfull, eval=FALSE--------------------------------------------------
#  BiocManager::install("pcaExplorer", dependencies = TRUE)

## ----installation-github, eval=FALSE------------------------------------------
#  library("devtools")
#  install_github("federicomarini/pcaExplorer")

## ----loadlibrary, message=FALSE-----------------------------------------------
library("pcaExplorer")

## -----------------------------------------------------------------------------
citation("pcaExplorer")

## ----eval=FALSE---------------------------------------------------------------
#  BiocManager::install("org.At.tair.db")
#  library("org.At.tair.db")
#  # skipping the steps where you normally would generate your dds_at object...
#  dds_at
#  vst_at <- DESeq2::vst(dds_at)
#  anno_at <- get_annotation_orgdb(dds_at,"org.At.tair.db", idtype = "TAIR")
#  # subset the background to include only the expressed genes
#  bg_ids <- rownames(dds_at)[rowSums(counts(dds_at)) > 0]
#  library(topGO)
#  pca2go_at <- pca2go(vst_at,
#                      annotation = anno_at,
#                      annopkg = "org.At.tair.db",
#                      ensToGeneSymbol = TRUE,
#                      background_genes = bg_ids)
#  # and finally, with all the objects prepared...
#  pcaExplorer(dds = dds_at, dst = vst_at, annotation = anno_at, pca2go = pca2go_at)

## ----installairway, eval=FALSE------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("airway")

## ----loadairway, message=FALSE------------------------------------------------
library(airway)
library(DESeq2)

data(airway)

dds_airway <- DESeqDataSet(airway,design= ~ cell + dex)
dds_airway
rld_airway <- rlogTransformation(dds_airway)
rld_airway

## ----launchairway, eval=FALSE-------------------------------------------------
#  pcaExplorer(dds = dds_airway,
#              dst = rld_airway)

## ----annoairway, message = FALSE----------------------------------------------
library(org.Hs.eg.db)
genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL")
annotation_airway <- data.frame(gene_name = genenames_airway,
                                row.names = rownames(dds_airway),
                                stringsAsFactors = FALSE)
head(annotation_airway)                                

## ----annofuncs, eval=FALSE----------------------------------------------------
#  anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway,
#                                        orgdb_species = "org.Hs.eg.db",
#                                        idtype = "ENSEMBL")
#  
#  anno_df_biomart <- get_annotation(dds = dds_airway,
#                                    biomart_dataset = "hsapiens_gene_ensembl",
#                                    idtype = "ensembl_gene_id")

## ----echo=FALSE---------------------------------------------------------------
anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway,
                                      orgdb_species = "org.Hs.eg.db",
                                      idtype = "ENSEMBL")

## -----------------------------------------------------------------------------
head(anno_df_orgdb)

## ----launchairwayanno, eval=FALSE---------------------------------------------
#  pcaExplorer(dds = dds_airway,
#              dst = rld_airway,
#              annotation = annotation_airway) # or anno_df_orgdb, or anno_df_biomart

## ----ddsmultifac--------------------------------------------------------------
dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1)

## ----prepsimul----------------------------------------------------------------
dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1,betaSD_tissue = 3)
dds_multifac
rld_multifac <- rlogTransformation(dds_multifac)
rld_multifac
## checking how the samples cluster on the PCA plot
pcaplot(rld_multifac,intgroup = c("condition","tissue"))

## ----launchsimul, eval=FALSE--------------------------------------------------
#  pcaExplorer(dds = dds_multifac,
#              dst = rld_multifac)

## ----func-pcaplot-------------------------------------------------------------
pcaplot(rld_airway,intgroup = c("cell","dex"),ntop = 1000,
        pcX = 1, pcY = 2, title = "airway dataset PCA on samples - PC1 vs PC2")

# on a different set of principal components...
pcaplot(rld_airway,intgroup = c("dex"),ntop = 1000,
        pcX = 1, pcY = 4, title = "airway dataset PCA on samples - PC1 vs PC4",
        ellipse = TRUE)

## ----func-pcaplot3d, eval=FALSE-----------------------------------------------
#  pcaplot3d(rld_airway,intgroup = c("cell","dex"),ntop = 1000,
#          pcX = 1, pcY = 2, pcZ = 3)
#  # will open up in the viewer

## ----func-pcascree------------------------------------------------------------
pcaobj_airway <- prcomp(t(assay(rld_airway)))
pcascree(pcaobj_airway,type="pev",
         title="Proportion of explained proportion of variance - airway dataset")

## ----func-correlatepcs--------------------------------------------------------
res_pcairway <- correlatePCs(pcaobj_airway,colData(dds_airway))

res_pcairway

plotPCcorrs(res_pcairway)

## ----func-hiloadings----------------------------------------------------------
# extract the table of the genes with high loadings
hi_loadings(pcaobj_airway,topN = 10,exprTable=counts(dds_airway))
# or alternatively plot the values
hi_loadings(pcaobj_airway,topN = 10,annotation = annotation_airway)

## ----func-genespca------------------------------------------------------------
groups_airway <- colData(dds_airway)$dex
cols_airway <- scales::hue_pal()(2)[groups_airway]
# with many genes, do not plot the labels of the genes
genespca(rld_airway,ntop=5000,
         choices = c(1,2),
         arrowColors=cols_airway,groupNames=groups_airway,
         alpha = 0.2,
         useRownamesAsLabels=FALSE,
         varname.size = 5
        )
# with a smaller number of genes, plot gene names included in the annotation
genespca(rld_airway,ntop=100,
         choices = c(1,2),
         arrowColors=cols_airway,groupNames=groups_airway,
         alpha = 0.7,
         varname.size = 5,
         annotation = annotation_airway
        )

## ----func-topGOtable, eval=FALSE----------------------------------------------
#  # example not run due to quite long runtime
#  dds_airway <- DESeq(dds_airway)
#  res_airway <- results(dds_airway)
#  res_airway$symbol <- mapIds(org.Hs.eg.db,
#                              keys=row.names(res_airway),
#                              column="SYMBOL",
#                              keytype="ENSEMBL",
#                              multiVals="first")
#  res_airway$entrez <- mapIds(org.Hs.eg.db,
#                              keys=row.names(res_airway),
#                              column="ENTREZID",
#                              keytype="ENSEMBL",
#                              multiVals="first")
#  resOrdered <- as.data.frame(res_airway[order(res_airway$padj),])
#  head(resOrdered)
#  # extract DE genes
#  de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj),]
#  de_symbols <- de_df$symbol
#  # extract background genes
#  bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0]
#  bg_symbols <- mapIds(org.Hs.eg.db,
#                       keys=bg_ids,
#                       column="SYMBOL",
#                       keytype="ENSEMBL",
#                       multiVals="first")
#  # run the function
#  topgoDE_airway <- topGOtable(de_symbols, bg_symbols,
#                               ontology = "BP",
#                               mapping = "org.Hs.eg.db",
#                               geneID = "symbol")

## ----func-pca2go, eval=FALSE--------------------------------------------------
#  pca2go_airway <- pca2go(rld_airway,
#                          annotation = annotation_airway,
#                          organism = "Hs",
#                          ensToGeneSymbol = TRUE,
#                          background_genes = bg_ids)
#  # for a smooth interactive exploration, use DT::datatable
#  datatable(pca2go_airway$PC1$posLoad)
#  # display it in the normal R session...
#  head(pca2go_airway$PC1$posLoad)
#  # ... or use it for running the app and display in the dedicated tab
#  pcaExplorer(dds_airway,rld_airway,
#              pca2go = pca2go_airway,
#              annotation = annotation_airway)

## ----func, message = FALSE, eval = FALSE--------------------------------------
#  goquick_airway <- limmaquickpca2go(rld_airway,
#                                     pca_ngenes = 10000,
#                                     inputType = "ENSEMBL",
#                                     organism = "Hs")
#  # display it in the normal R session...
#  head(goquick_airway$PC1$posLoad)
#  # ... or use it for running the app and display in the dedicated tab
#  pcaExplorer(dds_airway,rld_airway,
#              pca2go = goquick_airway,
#              annotation = annotation_airway)

## ----func-makedataset---------------------------------------------------------

dds_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 0.5)
dds_simu
dds2_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 0.5,betaSD_tissue = 2)
dds2_simu

rld_simu <- rlogTransformation(dds_simu)
rld2_simu <- rlogTransformation(dds2_simu)
pcaplot(rld_simu,intgroup = c("condition","tissue")) + 
  ggplot2::ggtitle("Simulated data - Big condition effect, small tissue effect")
pcaplot(rld2_simu,intgroup = c("condition","tissue")) + 
  ggplot2::ggtitle("Simulated data - Small condition effect, bigger tissue effect")

## ----eval=FALSE---------------------------------------------------------------
#  distro_expr(rld_airway,plot_type = "density")
#  distro_expr(rld_airway,plot_type = "violin")

## -----------------------------------------------------------------------------
distro_expr(rld_airway,plot_type = "boxplot")

## -----------------------------------------------------------------------------
dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1)
dst <- DESeq2::rlogTransformation(dds)
set.seed(42)
geneprofiler(dst,paste0("gene",sample(1:1000,20)), plotZ = FALSE)

## ----eval=FALSE---------------------------------------------------------------
#  anno_df_biomart <- get_annotation(dds = dds_airway,
#                                    biomart_dataset = "hsapiens_gene_ensembl",
#                                    idtype = "ensembl_gene_id")
#  
#  anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway,
#                                        orgdb_species = "org.Hs.eg.db",
#                                        idtype = "ENSEMBL")

## -----------------------------------------------------------------------------
# use a subset of the counts to reduce plotting time, it can be time consuming with many samples
pair_corr(counts(dds_airway)[1:100,])

## -----------------------------------------------------------------------------
sessionInfo()

Try the pcaExplorer package in your browser

Any scripts or data that you put into this service are public.

pcaExplorer documentation built on Nov. 8, 2020, 5:29 p.m.