# limmaVoomTools.R
# Limma-Voon -- differential expression analysis of gene read counts expression data
LimmaVoom.DiffExpress <- function( fnames, fids, m=NULL, groupSet, targetGroup=sort(groupSet)[1], geneColumn="GENE_ID",
intensityColumn="READS_M", keepIntergenics=FALSE, missingGenes="fill", extraColumn=NULL,
average.FUN=sqrtmean, minimumRPKM=1, wt.folds=1, wt.pvalues=1,
adjust.lowReadCounts=TRUE, ...) {
# turn the set of transcript files into one matrix
if ( is.null(m)) {
m <- expressionFileSetToMatrix( fnames=fnames, fids=fids, geneColumn=geneColumn,
intensityColumn=intensityColumn, missingGenes=missingGenes,
keepIntergenics=keepIntergenics)
if ( ! is.null( extraColumn)) {
mExtra <- expressionFileSetToMatrix( fnames=fnames, fids=fids, geneColumn=geneColumn,
intensityColumn=extraColumn, missingGenes=missingGenes,
keepIntergenics=keepIntergenics)
}
}
# drop non-genes...
if ( ! keepIntergenics) {
drops <- grep( "(ng)", rownames(m), fixed=T)
if ( length(drops) > 0) {
m <- m[ -drops, ]
if ( ! is.null( extraColumn)) mExtra <- mExtra[ -drops, ]
}
nonGenes <- subset( getCurrentGeneMap(), REAL_G == FALSE)$GENE_ID
drops <- which( rownames(m) %in% nonGenes)
if ( length(drops) > 0) {
m <- m[ -drops, ]
if ( ! is.null( extraColumn)) mExtra <- mExtra[ -drops, ]
}
}
# the column flags for EdgeR & LimmaVoom are to end up as 1,2, where 1 is the one we want...
cl <- rep( 2, times=length(groupSet))
cl[ groupSet == targetGroup] <- 1
myGrpNames <- rep( targetGroup, times=length(groupSet))
myGrpNames[ cl == 2] <- paste( "Not", targetGroup, sep=" ")
otherGroupNames <- setdiff( groupSet, targetGroup)
if ( length(otherGroupNames) == 1) myGrpNames[ cl == 2] <- otherGroupNames
if ( ! is.null( extraColumn)) {
clExtra <- cl
}
grpFac <- factor( myGrpNames)
grpNames <- levels(grpFac)
mUse <- m
require( edgeR)
NG <- nrow( m)
# to call Limma & Voom, it's a multi step process that shares some steps with EdgeR
ans1 <- DGEList( counts=mUse, group=cl)
ans1 <- calcNormFactors( ans1)
# create the grouping variable L-V wants, and do the log2-CPM scaled transform
lvGroup <- interaction( as.character(cl))
lvModel <- model.matrix( ~0 + lvGroup)
ans2 <- voom( ans1, design=lvModel)
# do the linear model fit
ans3 <- lmFit( ans2, design=lvModel)
# make the contrasts from the group
lvContrast <- makeContrasts( lvGroup1 - lvGroup2, levels=colnames(coef(ans3)))
ans4 <- contrasts.fit( ans3, lvContrast)
# Bayes smoothing
ans4 <- eBayes( ans4)
# extract the DE results
lvOut <- topTable( ans4, number=NG, sort.by="none")
# get what e want/need out
fout <- lvOut[ , 1]
pout <- lvOut[ , 4]
qout <- lvOut[ , 5]
gnames <- rownames( lvOut)
where <- match( gnames, rownames(m), nomatch=0)
mTmp <- m[ where, ]
# calc the average for each group, and put it into 'this group' order
avgM <- t( apply( mTmp, MARGIN=1, function(x) tapply(x, grpFac, FUN=average.FUN)))
if ( colnames(avgM)[1] != targetGroup) avgM <- avgM[ ,c(2,1)]
# the fold change is based on log2 read counts, not RPKM, so we need a different way to
# prevent divide by zero and falsely exagerated FC
if ( adjust.lowReadCounts) {
adjustAns <- lowReadCountAdjustment( fout, pout, avgM[,1], avgM[,2])
fout <- adjustAns$fold.change
pout <- adjustAns$p.value
}
# now we can assess PI values
piout <- piValue( fout, pout)
gprod <- gene2ProductAllSpecies( gnames)
if ( any( gprod == "")) {
tmp <- read.delim( fnames[1], as.is=T)
gnams <- tmp[[ geneColumn]]
gpros <- tmp[[ grep( "product", tolower(colnames(tmp)))]]
needs <- which( gprod == "")
where <- match( gnames[needs], gnams, nomatch=0)
gprod[ needs[ where > 0]] <- gpros[ where]
}
# round to sensible digits of resolution
fout <- round( fout, digits=4)
avgM <- round( avgM, digits=2)
piout <- round( piout, digits=3)
out <- data.frame( gnames, gprod, fout, pout, qout, piout, avgM, stringsAsFactors=F)
colnames(out) <- c( "GENE_ID", "PRODUCT", "LOG2FOLD", "PVALUE", "FDR", "PIVALUE", colnames(avgM))
if ( ! is.null( extraColumn)) {
avgExtra1 <- apply( mExtra[ , which( cl == 1)], MARGIN=1, FUN=average.FUN)
avgExtra2 <- apply( mExtra[ , which( cl == 2)], MARGIN=1, FUN=average.FUN)
out$AVG_EXTRA1 <- avgExtra1
out$AVG_EXTRA2 <- avgExtra2
}
ord <- diffExpressRankOrder( out$LOG2FOLD, out$PVALUE, wt.folds, wt.pvalues)
out <- out[ ord, ]
rownames(out) <- 1:nrow(out)
return( out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.