##########################################################################
##Function provides two moudiles for calculating differential methylation
##########################################################################
##Input: genomic coordinates and data for groups of MEDIPS SETs
##Param: base, values, diff.method, nMSets1, nMSets2, p.adj, n.r.M1, n.r.M2, p.adj, MeDIP, minRowSum
##Output: results of differential methylation analysis plus index in genome wide table
##Requires: edgeR
##Modified: 1/9/2015
##Author: Lukas Chavez, Joern Dietrich
MEDIPS.diffMeth = function(base = NULL, values=NULL, diff.method="ttest", nMSets1=NULL, nMSets2=NULL, n.r.M1=n.r.M1, n.r.M2=n.r.M2, p.adj="bonferroni", MeDIP, minRowSum=10, diffnorm="tmm")
{
##edgeR##
#########
if(diff.method=="edgeR"){
##Extract non-zero MeDIP count windows
cat(paste("Extracting count windows with at least", minRowSum, "reads...\n", sep=" "))
filter= rowSums(values)>=minRowSum
##Extract non-zero coupling factor rows
if(MeDIP){
cat(paste("Extracting non-zero coupling factor windows...\n", sep=" "))
filter=filter & base[,4]!=0
}
cat(paste("Execute edgeR for count data of", sum(filter), "windows...\n", sep=" "))
cat("(Neglecting parameter 'type')\n")
cat("Creating a DGEList object...\n")
edRObj.group=c(rep(1, nMSets1), rep(2, nMSets2))
edRObj.length=c(n.r.M1, n.r.M2)
#d <- edgeR::DGEList(counts = values[filter,], group = edRObj.group, lib.size=edRObj.length)
d <- edgeR::DGEList(counts = values[filter,], group = edRObj.group)
if(diffnorm=="tmm"){
cat("Apply trimmed mean of M-values (TMM) for library sizes normalization...\n")
d=edgeR::calcNormFactors(d)
}
if(diffnorm=="quantile" | diffnorm=="none"){
cat("Skipping trimmed mean of M-values (TMM) library size normalization...\n")
d=edgeR::calcNormFactors(d, method="none")
}
if(diffnorm!="tmm" & diffnorm!="quantile" & diffnorm!="none"){
stop("diffnorm method unknown.")
}
if(nMSets1!=1 | nMSets2!=1){
cat("Estimating common dispersion...\n")
d=edgeR::estimateCommonDisp(d)
cat("Estimating tagwise dispersion...\n")
d=edgeR::estimateTagwiseDisp(d)
cat("Calculating differential coverage...\n")
de.com=edgeR::exactTest(d,pair=c("2","1"))
}
else{
cat("There is no replication, setting dispersion to bcv^2 where bcv=0.01.\n")
cat("Please consider http://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf section 2.9.\n")
bcv = 0.01
cat("Calculating differential coverage...\n")
de.com = suppressWarnings(edgeR::exactTest(d, dispersion=bcv^2, pair=c("2","1")))
}
##Adjusting p.values for multiple testing
cat(paste("Adjusting p.values for multiple testing...\n", sep=" "))
colnames(de.com$table) = c("edgeR.logFC", "edgeR.logCPM", "edgeR.p.value")
diff.results = cbind(de.com$table, edgeR.adj.p.value=p.adjust(de.com$table$edgeR.p.value, p.adj))
rm(de.com, edRObj.group, edRObj.length, d)
gc()
}
##ttest/Score##
#########
else{
##Extract non-zero MeDIP rows
cat(paste("Extracting count windows with at least",minRowSum," reads...\n", sep=" "))
filter= rowSums(values)>=minRowSum
##Extract non-zero coupling factor windows
if(MeDIP){
cat(paste("Extracting non-zero coupling factor windows...\n", sep=" "))
filter=filter & base[,4]!=0
}
#filter NA
#if there is a na in a row, this row is sorted out?!
filter=filter & !is.na(rowSums(values))
cat(paste("Calculating score for", sum(filter), "windows...\n", sep=" "))
ms1=1:nMSets1
ms2=(nMSets1+1):(nMSets1+nMSets2)
##Calculate ratios##
####################
ratio=rowSums(values[filter,ms1]+0.1)/rowSums(values[filter,ms2,drop=F]+0.1)*(nMSets2/nMSets1)
##Calculate p.values##
######################
##Check for constant entries
## Filter out constant entries, as they have no sd
const = apply(X=values[filter,ms1,drop=F],MARGIN=1,FUN=min) - apply(X=values[filter,ms1,drop=F],MARGIN=1,FUN=max) +
apply(X=values[filter,ms2,drop=F],MARGIN=1,FUN=min) - apply(X=values[filter,ms2,drop=F],MARGIN=1,FUN=max) == 0
t.test.p.value = rep(NA, sum(filter))
t.test.p.value[!const]=matTtest(values[filter,][!const,],groups=c(rep(1,nMSets1),rep(2,nMSets2)))$p.value
##Calculate the final score##
#############################
score = (-log10(t.test.p.value)*10)*log(ratio)
##Adjusting p.values for multiple testing
cat(paste("Adjusting p.values for multiple testing...\n", sep=" "))
diff.results = cbind(score.log2.ratio=log2(ratio), score.p.value=t.test.p.value, score.adj.p.value=p.adjust(t.test.p.value, p.adj), score=score)
rm(const, ratio, t.test.p.value, score)
}
return(list(diff.results=diff.results, diff.index=which(filter)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.