inst/doc/DASC_pdf.R

## ----setup, echo=FALSE, results="hide", message=FALSE--------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE, 
                        dev="png", message=FALSE, error=FALSE, warning=TRUE)
## Libraries
require("Biobase")
require("NMF")
require("cvxclustr")

## ----installation, eval=FALSE--------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("DASC")

## ---- message=FALSE, results='hide', eval=TRUE---------------------------
library(DASC)
data("esGolub")
samples <- c(20,21,28,30)
dat <- exprs(esGolub)[1:100,samples]
pdat <- pData(esGolub)[samples,]

## use nrun = 50 or more for better convergence of results
res <- DASC(edata = dat, pdata = pdat, factor = pdat$Cell, method = 'ama',
                type = 3, lambda = 1, rank = 2:3, nrun = 5,
                annotation="esGolub Dataset")
#consensusmap(res)
#plot(res)

## ---- message=FALSE, eval=TRUE-------------------------------------------
## libraries
set.seed(99999)
library(DESeq2)
library(ggplot2)
library(pcaExplorer)

## dataset
rawCounts <- stanfordData$rawCounts
metadata <- stanfordData$metadata

## ---- message=FALSE, eval=TRUE-------------------------------------------
## Using a smaller dataset
idx <- which(metadata$tissue %in% c("adipose", "adrenal", "sigmoid"))
rawCounts <- rawCounts[,idx]
metadata <- metadata[idx,]

## ---- message=FALSE, eval=TRUE-------------------------------------------
head(rawCounts)
head(metadata)

## ---- message=FALSE, eval=TRUE-------------------------------------------
## Normalizing the dataset using DESeq2
dds <- DESeqDataSetFromMatrix(rawCounts, metadata, design = ~ species+tissue)
dds <- estimateSizeFactors(dds)
dat <- counts(dds, normalized = TRUE)
lognormalizedCounts <- log2(dat + 1)

## ---- message=FALSE, eval=TRUE-------------------------------------------
## PCA plot using 
rld.dds <- rlog(dds)
pcaplot(rld.dds, intgroup=c("tissue","species"), ntop=1000, pcX=1, pcY=2)

## ---- message=FALSE, eval=TRUE-------------------------------------------
res <- DASC(edata = dat, pdata = metadata, factor = metadata$tissue,
                method = 'ama', type = 3, lambda = 1, rank = 2:3, nrun = 10,
                annotation = 'Stanford Dataset')

## ---- message=FALSE, eval=TRUE-------------------------------------------
## Consensus plot
consensusmap(res)

## ---- message=FALSE, eval=TRUE-------------------------------------------
## Residual plot
plot(res)

## ---- message=FALSE, eval=TRUE-------------------------------------------
## Batches -- dataset has 6 batches
sample.clust <- data.frame(sample.name = colnames(lognormalizedCounts), 
                            clust = as.vector(predict(res$fit$`2`)), 
                            batch = metadata$seqBatch)
ggplot(data = sample.clust, aes(x=c(1:6), y=clust, color=factor(clust))) + 
    geom_point(size = 4) + xlab("Sample Number") + ylab("Cluster Number")

## ---- message=FALSE------------------------------------------------------
sessionInfo()

Try the DASC package in your browser

Any scripts or data that you put into this service are public.

DASC documentation built on Nov. 17, 2017, 9:36 a.m.