DASC_html.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

## ----style, echo = FALSE, results = 'asis'---------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE, message=FALSE--------------------------------------
library(knitr)
opts_chunk$set(comment=NA, fig.align="center", warning=FALSE)

## Libraries
require(Biobase)
require(NMF)
require(cvxclustr)

## ----installation, eval=FALSE----------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("DASC")

## ---- message=FALSE, eval=TRUE---------------------------------------------
library(DASC)
data("esGolub")
samples <- c(20,21,28,30)
dat <- exprs(esGolub)[1:100,samples]
pdat <- pData(esGolub)[samples,]

## use nrun = 50 or more for better convergence of results
res <- DASC(edata = dat, pdata = pdat, factor = pdat$Cell, 
                        method = 'ama', type = 3, lambda = 1, 
                        rank = 2:3, nrun = 5, annotation='esGolub Dataset')

## ---- message=FALSE, eval=TRUE---------------------------------------------
## libraries
set.seed(99999)
library(DESeq2)
library(ggplot2)
library(pcaExplorer)

## dataset
rawCounts <- stanfordData$rawCounts
metadata <- stanfordData$metadata

## ---- message=FALSE, eval=TRUE---------------------------------------------
## Using a smaller dataset
idx <- which(metadata$tissue %in% c("adipose", "adrenal", "sigmoid"))
rawCounts <- rawCounts[,idx]
metadata <- metadata[idx,]

## ---- message=FALSE, eval=TRUE---------------------------------------------
head(rawCounts)
head(metadata)

## ---- message=FALSE, eval=TRUE---------------------------------------------
## Normalizing the dataset using DESeq2
dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue)
dds <- estimateSizeFactors(dds)
dat <- counts(dds, normalized = TRUE)
lognormalizedCounts <- log2(dat + 1)

## ---- message=FALSE, eval=TRUE---------------------------------------------
## PCA plot using 
rld.dds <- rlog(dds)
pcaplot(rld.dds, intgroup=c("tissue","species"), ntop=1000, pcX=1, pcY=2)

## ---- message=FALSE, eval=TRUE---------------------------------------------
res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue,
                method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 10,
                annotation = 'Stanford Dataset')

## ---- message=FALSE, eval=TRUE---------------------------------------------
## Consensus plot
consensusmap(res)

## ---- message=FALSE, eval=TRUE---------------------------------------------
## Residual plot
plot(res)

## ---- message=FALSE, eval=TRUE---------------------------------------------
## Batches -- dataset has 6 batches
sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts), 
                            clust = as.vector(predict(res$fit$`2`)), 
                            batch = metadata$seqBatch)
ggplot(data = sample.clust, aes(x=c(1:6), y=clust, color=factor(clust))) + 
    geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number")

## ---- message=FALSE, eval=FALSE--------------------------------------------
#  
#  ## not running this part of the code for building package
#  ## Using entire dataset
#  rawCounts <- stanfordData$rawCounts
#  metadata <- stanfordData$metadata
#  dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue)
#  dds <- estimateSizeFactors(dds)
#  dat <- counts(dds, normalized = TRUE)
#  lognormalizedCounts <- log2(dat + 1)
#  
#  ## PCA Plot
#  rld.dds <- rlog(dds)
#  pcaplot(rld.dds, intgroup=c("tissue","species"), ntop = nrow(rld.dds),
#          pcX = 1, pcY = 2)
#  
#  ## Running DASC
#  res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue,
#                  method = 'ama', type = 3, lambda = 1, rank = 2:10,
#                  nrun = 100, annotation = 'Stanford Dataset')
#  
#  ## Consensus plot
#  consensusmap(res)
#  
#  ## Residual plot
#  plot(res)
#  
#  ## Clustering samples based on batches
#  sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts),
#                              clust = as.vector(predict(res$fit$`10`)),
#                              batch = metadata$seqBatch)
#  ggplot(data = sample.clust, aes(x=c(1:26), y=clust, color=factor(clust))) +
#      geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number")

## ---- out.width = "800px", echo=FALSE--------------------------------------
knitr::include_graphics("PCA_Plot_Fig1.png")
knitr::include_graphics("Consensus_Plot_Fig2.png")
knitr::include_graphics("NMF_Rank_Fig3.png")
knitr::include_graphics("ClusterNumber_Fig4.png")

## ---- message=FALSE--------------------------------------------------------
sessionInfo()

Any scripts or data that you put into this service are public.

DASC documentation built on Nov. 17, 2017, 9:36 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DASC
Detecting hidden batch factors through data adaptive adjustment for biological effects

inst/doc/DASC_html.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

Try the DASC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DASC Detecting hidden batch factors through data adaptive adjustment for biological effects

inst/doc/DASC_html.R In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

Try the DASC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DASC
Detecting hidden batch factors through data adaptive adjustment for biological effects

inst/doc/DASC_html.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects